Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Split more "string" functions. New helper function TclUniCharToUCS4(), not used yet but that's the next step. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | tip-575 |
Files: | files | file ages | folders |
SHA3-256: |
ec03d6f62dfe7d4f52b4f36e01835905 |
User & Date: | jan.nijtmans 2020-05-22 21:28:16.346 |
Context
2020-05-23
| ||
21:51 | Fix testsuite when "string bytelength" doesn't exist. check-in: 56fca3f26d user: jan.nijtmans tags: tip-575 | |
2020-05-22
| ||
21:28 | Split more "string" functions. New helper function TclUniCharToUCS4(), not used yet but that's the n... check-in: ec03d6f62d user: jan.nijtmans tags: tip-575 | |
15:27 | New "string" subcommands: "nextchar", "nextword", "prevchar", "prevword". Not implemented yet (for n... check-in: 04d41e57ba user: jan.nijtmans tags: tip-575 | |
Changes
Changes to generic/tclCmdMZ.c.
︙ | ︙ | |||
2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 | Tcl_SetObjResult(interp, Tcl_NewWideIntObj(cur)); return TCL_OK; } /* *---------------------------------------------------------------------- * * StringEndCmd -- * * This procedure is invoked to process the "string wordend" Tcl command. * See the user documentation for details on what it does. * * Results: * A standard Tcl result. | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 | Tcl_SetObjResult(interp, Tcl_NewWideIntObj(cur)); return TCL_OK; } /* *---------------------------------------------------------------------- * * StringPrevCharCmd -- * * This procedure is invoked to process the "string prevchar" Tcl * command. See the user documentation for details on what it does. * * Results: * A standard Tcl result. * * Side effects: * See the user documentation. * *---------------------------------------------------------------------- */ static int StringPrevCharCmd( TCL_UNUSED(ClientData), Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { int ch; const char *p, *string; int cur, index, length, numChars; if (objc != 3) { Tcl_WrongNumArgs(interp, 1, objv, "string index"); return TCL_ERROR; } string = TclGetStringFromObj(objv[1], &length); numChars = Tcl_NumUtfChars(string, length); if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) { return TCL_ERROR; } string = TclGetStringFromObj(objv[1], &length); if (index >= numChars) { index = numChars - 1; } cur = 0; if (index > 0) { p = Tcl_UtfAtIndex(string, index); TclUtfToUCS4(p, &ch); for (cur = index; cur >= 0; cur--) { int delta = 0; const char *next; if (!Tcl_UniCharIsWordChar(ch)) { break; } next = Tcl_UtfPrev(p, string); do { next += delta; delta = TclUtfToUCS4(next, &ch); } while (next + delta < p); p = next; } if (cur != index) { cur += 1; } } Tcl_SetObjResult(interp, Tcl_NewWideIntObj(cur)); return TCL_OK; } /* *---------------------------------------------------------------------- * * StringPrevWordCmd -- * * This procedure is invoked to process the "string prevword" Tcl * command. See the user documentation for details on what it does. * * Results: * A standard Tcl result. * * Side effects: * See the user documentation. * *---------------------------------------------------------------------- */ static int StringPrevWordCmd( TCL_UNUSED(ClientData), Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { int ch; const char *p, *string; int cur, index, length, numChars; if (objc != 3) { Tcl_WrongNumArgs(interp, 1, objv, "string index"); return TCL_ERROR; } string = TclGetStringFromObj(objv[1], &length); numChars = Tcl_NumUtfChars(string, length); if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) { return TCL_ERROR; } string = TclGetStringFromObj(objv[1], &length); if (index >= numChars) { index = numChars - 1; } cur = 0; if (index > 0) { p = Tcl_UtfAtIndex(string, index); TclUtfToUCS4(p, &ch); for (cur = index; cur >= 0; cur--) { int delta = 0; const char *next; if (!Tcl_UniCharIsWordChar(ch)) { break; } next = Tcl_UtfPrev(p, string); do { next += delta; delta = TclUtfToUCS4(next, &ch); } while (next + delta < p); p = next; } if (cur != index) { cur += 1; } } Tcl_SetObjResult(interp, Tcl_NewWideIntObj(cur)); return TCL_OK; } /* *---------------------------------------------------------------------- * * StringEndCmd -- * * This procedure is invoked to process the "string wordend" Tcl command. * See the user documentation for details on what it does. * * Results: * A standard Tcl result. |
︙ | ︙ | |||
2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 | } else { cur = numChars; } Tcl_SetObjResult(interp, Tcl_NewWideIntObj(cur)); return TCL_OK; } /* *---------------------------------------------------------------------- * * StringEqualCmd -- * * This procedure is invoked to process the "string equal" Tcl command. * See the user documentation for details on what it does. Note that this | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 | } else { cur = numChars; } Tcl_SetObjResult(interp, Tcl_NewWideIntObj(cur)); return TCL_OK; } /* *---------------------------------------------------------------------- * * StringNextCharCmd -- * * This procedure is invoked to process the "string nextchar" Tcl command. * See the user documentation for details on what it does. * * Results: * A standard Tcl result. * * Side effects: * See the user documentation. * *---------------------------------------------------------------------- */ static int StringNextCharCmd( TCL_UNUSED(ClientData), Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { int ch; const char *p, *end, *string; int cur, index, length, numChars; if (objc != 3) { Tcl_WrongNumArgs(interp, 1, objv, "string index"); return TCL_ERROR; } string = TclGetStringFromObj(objv[1], &length); numChars = Tcl_NumUtfChars(string, length); if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) { return TCL_ERROR; } string = TclGetStringFromObj(objv[1], &length); if (index < 0) { index = 0; } if (index < numChars) { p = Tcl_UtfAtIndex(string, index); end = string+length; for (cur = index; p < end; cur++) { p += TclUtfToUCS4(p, &ch); if (!Tcl_UniCharIsWordChar(ch)) { break; } } if (cur == index) { cur++; } } else { cur = numChars; } Tcl_SetObjResult(interp, Tcl_NewWideIntObj(cur)); return TCL_OK; } /* *---------------------------------------------------------------------- * * StringNextWordCmd -- * * This procedure is invoked to process the "string nextword" Tcl command. * See the user documentation for details on what it does. * * Results: * A standard Tcl result. * * Side effects: * See the user documentation. * *---------------------------------------------------------------------- */ static int StringNextWordCmd( TCL_UNUSED(ClientData), Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { int ch; const char *p, *end, *string; int cur, index, length, numChars; if (objc != 3) { Tcl_WrongNumArgs(interp, 1, objv, "string index"); return TCL_ERROR; } string = TclGetStringFromObj(objv[1], &length); numChars = Tcl_NumUtfChars(string, length); if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) { return TCL_ERROR; } string = TclGetStringFromObj(objv[1], &length); if (index < 0) { index = 0; } if (index < numChars) { p = Tcl_UtfAtIndex(string, index); end = string+length; for (cur = index; p < end; cur++) { p += TclUtfToUCS4(p, &ch); if (!Tcl_UniCharIsWordChar(ch)) { break; } } if (cur == index) { cur++; } } else { cur = numChars; } Tcl_SetObjResult(interp, Tcl_NewWideIntObj(cur)); return TCL_OK; } /* *---------------------------------------------------------------------- * * StringEqualCmd -- * * This procedure is invoked to process the "string equal" Tcl command. * See the user documentation for details on what it does. Note that this |
︙ | ︙ | |||
3317 3318 3319 3320 3321 3322 3323 | {"index", StringIndexCmd, TclCompileStringIndexCmd, NULL, NULL, 0}, {"insert", StringInsertCmd, TclCompileStringInsertCmd, NULL, NULL, 0}, {"is", StringIsCmd, TclCompileStringIsCmd, NULL, NULL, 0}, {"last", StringLastCmd, TclCompileStringLastCmd, NULL, NULL, 0}, {"length", StringLenCmd, TclCompileStringLenCmd, NULL, NULL, 0}, {"map", StringMapCmd, TclCompileStringMapCmd, NULL, NULL, 0}, {"match", StringMatchCmd, TclCompileStringMatchCmd, NULL, NULL, 0}, | | | | | | 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 | {"index", StringIndexCmd, TclCompileStringIndexCmd, NULL, NULL, 0}, {"insert", StringInsertCmd, TclCompileStringInsertCmd, NULL, NULL, 0}, {"is", StringIsCmd, TclCompileStringIsCmd, NULL, NULL, 0}, {"last", StringLastCmd, TclCompileStringLastCmd, NULL, NULL, 0}, {"length", StringLenCmd, TclCompileStringLenCmd, NULL, NULL, 0}, {"map", StringMapCmd, TclCompileStringMapCmd, NULL, NULL, 0}, {"match", StringMatchCmd, TclCompileStringMatchCmd, NULL, NULL, 0}, {"nextchar", StringNextCharCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0}, {"nextword", StringNextWordCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0}, {"prevchar", StringPrevCharCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0}, {"prevword", StringPrevWordCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0}, {"range", StringRangeCmd, TclCompileStringRangeCmd, NULL, NULL, 0}, {"repeat", StringReptCmd, TclCompileBasic2ArgCmd, NULL, NULL, 0}, {"replace", StringRplcCmd, TclCompileStringReplaceCmd, NULL, NULL, 0}, {"reverse", StringRevCmd, TclCompileBasic1ArgCmd, NULL, NULL, 0}, {"tolower", StringLowerCmd, TclCompileStringToLowerCmd, NULL, NULL, 0}, {"toupper", StringUpperCmd, TclCompileStringToUpperCmd, NULL, NULL, 0}, {"totitle", StringTitleCmd, TclCompileStringToTitleCmd, NULL, NULL, 0}, |
︙ | ︙ |
Changes to generic/tclInt.h.
︙ | ︙ | |||
3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 | Tcl_ObjCmdProc *implementationProc, const char *nameStr); MODULE_SCOPE int TclUtfCmp(const char *cs, const char *ct); MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct); MODULE_SCOPE int TclUtfCount(int ch); #if TCL_UTF_MAX > 3 # define TclUtfToUCS4 Tcl_UtfToUniChar #else MODULE_SCOPE int TclUtfToUCS4(const char *src, int *ucs4Ptr); #endif MODULE_SCOPE Tcl_Obj * TclpNativeToNormalized(ClientData clientData); MODULE_SCOPE Tcl_Obj * TclpFilesystemPathType(Tcl_Obj *pathPtr); MODULE_SCOPE int TclpDlopen(Tcl_Interp *interp, Tcl_Obj *pathPtr, Tcl_LoadHandle *loadHandle, Tcl_FSUnloadFileProc **unloadProcPtr, int flags); MODULE_SCOPE int TclpUtime(Tcl_Obj *pathPtr, struct utimbuf *tval); | > > | 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 | Tcl_ObjCmdProc *implementationProc, const char *nameStr); MODULE_SCOPE int TclUtfCmp(const char *cs, const char *ct); MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct); MODULE_SCOPE int TclUtfCount(int ch); #if TCL_UTF_MAX > 3 # define TclUtfToUCS4 Tcl_UtfToUniChar # define TclUniCharToUCS4(src, ptr) (*ptr = *(src),1) #else MODULE_SCOPE int TclUtfToUCS4(const char *src, int *ucs4Ptr); MODULE_SCOPE int TclUniCharToUCS4(const Tcl_UniChar *src, int *ucs4Ptr); #endif MODULE_SCOPE Tcl_Obj * TclpNativeToNormalized(ClientData clientData); MODULE_SCOPE Tcl_Obj * TclpFilesystemPathType(Tcl_Obj *pathPtr); MODULE_SCOPE int TclpDlopen(Tcl_Interp *interp, Tcl_Obj *pathPtr, Tcl_LoadHandle *loadHandle, Tcl_FSUnloadFileProc **unloadProcPtr, int flags); MODULE_SCOPE int TclpUtime(Tcl_Obj *pathPtr, struct utimbuf *tval); |
︙ | ︙ |
Changes to generic/tclUtf.c.
︙ | ︙ | |||
2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 | const char *src, /* The UTF-8 string. */ int *ucs4Ptr) /* Filled with the UCS4 codepoint represented * by the UTF-8 string. */ { /* Make use of the #undef Tcl_UtfToUniChar above, which already handles UCS4. */ return Tcl_UtfToUniChar(src, ucs4Ptr); } #endif /* * Local Variables: * mode: c * c-basic-offset: 4 * fill-column: 78 | > > > > > > > > > > > > > > | 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 | const char *src, /* The UTF-8 string. */ int *ucs4Ptr) /* Filled with the UCS4 codepoint represented * by the UTF-8 string. */ { /* Make use of the #undef Tcl_UtfToUniChar above, which already handles UCS4. */ return Tcl_UtfToUniChar(src, ucs4Ptr); } int TclUniCharToUCS4( const Tcl_UniChar *src, /* The Tcl_UniChar string. */ int *ucs4Ptr) /* Filled with the UCS4 codepoint represented * by the Tcl_UniChar string. */ { if (((src[0] & 0xFC00) == 0xD800) && ((src[1] & 0xFC00) == 0xDC00)) { *ucs4Ptr = (((src[0] & 0x3FF) << 10) | (src[01] & 0x3FF)) + 0x10000; return 2; } *ucs4Ptr = src[0]; return 1; } #endif /* * Local Variables: * mode: c * c-basic-offset: 4 * fill-column: 78 |
︙ | ︙ |