Tcl Source Code

Check-in [ec03d6f62d]
Login
Bounty program for improvements to Tcl and certain Tcl packages.

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Split more "string" functions. New helper function TclUniCharToUCS4(), not used yet but that's the next step.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | tip-575
Files: files | file ages | folders
SHA3-256: ec03d6f62dfe7d4f52b4f36e0183590549b4a8980900424d74d811c54945be5c
User & Date: jan.nijtmans 2020-05-22 21:28:16
Context
2020-05-23
21:51
Fix testsuite when "string bytelength" doesn't exist. check-in: 56fca3f26d user: jan.nijtmans tags: tip-575
2020-05-22
21:28
Split more "string" functions. New helper function TclUniCharToUCS4(), not used yet but that's the n... check-in: ec03d6f62d user: jan.nijtmans tags: tip-575
15:27
New "string" subcommands: "nextchar", "nextword", "prevchar", "prevword". Not implemented yet (for n... check-in: 04d41e57ba user: jan.nijtmans tags: tip-575
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to generic/tclCmdMZ.c.

2543
2544
2545
2546
2547
2548
2549












































































































































2550
2551
2552
2553
2554
2555
2556
....
2601
2602
2603
2604
2605
2606
2607




























































































































2608
2609
2610
2611
2612
2613
2614
....
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
    Tcl_SetObjResult(interp, Tcl_NewWideIntObj(cur));
    return TCL_OK;
}
 
/*
 *----------------------------------------------------------------------
 *












































































































































 * StringEndCmd --
 *
 *	This procedure is invoked to process the "string wordend" Tcl command.
 *	See the user documentation for details on what it does.
 *
 * Results:
 *	A standard Tcl result.
................................................................................
    } else {
	cur = numChars;
    }
    Tcl_SetObjResult(interp, Tcl_NewWideIntObj(cur));
    return TCL_OK;
}
 




























































































































/*
 *----------------------------------------------------------------------
 *
 * StringEqualCmd --
 *
 *	This procedure is invoked to process the "string equal" Tcl command.
 *	See the user documentation for details on what it does. Note that this
................................................................................
	{"index",	StringIndexCmd,	TclCompileStringIndexCmd, NULL, NULL, 0},
	{"insert",	StringInsertCmd, TclCompileStringInsertCmd, NULL, NULL, 0},
	{"is",		StringIsCmd,	TclCompileStringIsCmd, NULL, NULL, 0},
	{"last",	StringLastCmd,	TclCompileStringLastCmd, NULL, NULL, 0},
	{"length",	StringLenCmd,	TclCompileStringLenCmd, NULL, NULL, 0},
	{"map",		StringMapCmd,	TclCompileStringMapCmd, NULL, NULL, 0},
	{"match",	StringMatchCmd,	TclCompileStringMatchCmd, NULL, NULL, 0},
	{"nextchar",	StringEndCmd,	TclCompileBasic2ArgCmd, NULL, NULL, 0},
	{"nextword",	StringEndCmd,	TclCompileBasic2ArgCmd, NULL, NULL, 0},
	{"prevchar",	StringStartCmd,	TclCompileBasic2ArgCmd, NULL, NULL, 0},
	{"prevword",	StringStartCmd,	TclCompileBasic2ArgCmd, NULL, NULL, 0},
	{"range",	StringRangeCmd,	TclCompileStringRangeCmd, NULL, NULL, 0},
	{"repeat",	StringReptCmd,	TclCompileBasic2ArgCmd, NULL, NULL, 0},
	{"replace",	StringRplcCmd,	TclCompileStringReplaceCmd, NULL, NULL, 0},
	{"reverse",	StringRevCmd,	TclCompileBasic1ArgCmd, NULL, NULL, 0},
	{"tolower",	StringLowerCmd,	TclCompileStringToLowerCmd, NULL, NULL, 0},
	{"toupper",	StringUpperCmd,	TclCompileStringToUpperCmd, NULL, NULL, 0},
	{"totitle",	StringTitleCmd,	TclCompileStringToTitleCmd, NULL, NULL, 0},







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







|
|
|
|







2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
....
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
....
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
    Tcl_SetObjResult(interp, Tcl_NewWideIntObj(cur));
    return TCL_OK;
}
 
/*
 *----------------------------------------------------------------------
 *
 * StringPrevCharCmd --
 *
 *	This procedure is invoked to process the "string prevchar" Tcl
 *	command. See the user documentation for details on what it does.
 *
 * Results:
 *	A standard Tcl result.
 *
 * Side effects:
 *	See the user documentation.
 *
 *----------------------------------------------------------------------
 */

static int
StringPrevCharCmd(
    TCL_UNUSED(ClientData),
    Tcl_Interp *interp,		/* Current interpreter. */
    int objc,			/* Number of arguments. */
    Tcl_Obj *const objv[])	/* Argument objects. */
{
    int ch;
    const char *p, *string;
    int cur, index, length, numChars;

    if (objc != 3) {
	Tcl_WrongNumArgs(interp, 1, objv, "string index");
	return TCL_ERROR;
    }

    string = TclGetStringFromObj(objv[1], &length);
    numChars = Tcl_NumUtfChars(string, length);
    if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) {
	return TCL_ERROR;
    }
    string = TclGetStringFromObj(objv[1], &length);
    if (index >= numChars) {
	index = numChars - 1;
    }
    cur = 0;
    if (index > 0) {
	p = Tcl_UtfAtIndex(string, index);

	TclUtfToUCS4(p, &ch);
	for (cur = index; cur >= 0; cur--) {
	    int delta = 0;
	    const char *next;

	    if (!Tcl_UniCharIsWordChar(ch)) {
		break;
	    }

	    next = Tcl_UtfPrev(p, string);
	    do {
		next += delta;
		delta = TclUtfToUCS4(next, &ch);
	    } while (next + delta < p);
	    p = next;
	}
	if (cur != index) {
	    cur += 1;
	}
    }
    Tcl_SetObjResult(interp, Tcl_NewWideIntObj(cur));
    return TCL_OK;
}

/*
 *----------------------------------------------------------------------
 *
 * StringPrevWordCmd --
 *
 *	This procedure is invoked to process the "string prevword" Tcl
 *	command. See the user documentation for details on what it does.
 *
 * Results:
 *	A standard Tcl result.
 *
 * Side effects:
 *	See the user documentation.
 *
 *----------------------------------------------------------------------
 */

static int
StringPrevWordCmd(
    TCL_UNUSED(ClientData),
    Tcl_Interp *interp,		/* Current interpreter. */
    int objc,			/* Number of arguments. */
    Tcl_Obj *const objv[])	/* Argument objects. */
{
    int ch;
    const char *p, *string;
    int cur, index, length, numChars;

    if (objc != 3) {
	Tcl_WrongNumArgs(interp, 1, objv, "string index");
	return TCL_ERROR;
    }

    string = TclGetStringFromObj(objv[1], &length);
    numChars = Tcl_NumUtfChars(string, length);
    if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) {
	return TCL_ERROR;
    }
    string = TclGetStringFromObj(objv[1], &length);
    if (index >= numChars) {
	index = numChars - 1;
    }
    cur = 0;
    if (index > 0) {
	p = Tcl_UtfAtIndex(string, index);

	TclUtfToUCS4(p, &ch);
	for (cur = index; cur >= 0; cur--) {
	    int delta = 0;
	    const char *next;

	    if (!Tcl_UniCharIsWordChar(ch)) {
		break;
	    }

	    next = Tcl_UtfPrev(p, string);
	    do {
		next += delta;
		delta = TclUtfToUCS4(next, &ch);
	    } while (next + delta < p);
	    p = next;
	}
	if (cur != index) {
	    cur += 1;
	}
    }
    Tcl_SetObjResult(interp, Tcl_NewWideIntObj(cur));
    return TCL_OK;
}

/*
 *----------------------------------------------------------------------
 *
 * StringEndCmd --
 *
 *	This procedure is invoked to process the "string wordend" Tcl command.
 *	See the user documentation for details on what it does.
 *
 * Results:
 *	A standard Tcl result.
................................................................................
    } else {
	cur = numChars;
    }
    Tcl_SetObjResult(interp, Tcl_NewWideIntObj(cur));
    return TCL_OK;
}
 

/*
 *----------------------------------------------------------------------
 *
 * StringNextCharCmd --
 *
 *	This procedure is invoked to process the "string nextchar" Tcl command.
 *	See the user documentation for details on what it does.
 *
 * Results:
 *	A standard Tcl result.
 *
 * Side effects:
 *	See the user documentation.
 *
 *----------------------------------------------------------------------
 */

static int
StringNextCharCmd(
    TCL_UNUSED(ClientData),
    Tcl_Interp *interp,		/* Current interpreter. */
    int objc,			/* Number of arguments. */
    Tcl_Obj *const objv[])	/* Argument objects. */
{
    int ch;
    const char *p, *end, *string;
    int cur, index, length, numChars;

    if (objc != 3) {
	Tcl_WrongNumArgs(interp, 1, objv, "string index");
	return TCL_ERROR;
    }

    string = TclGetStringFromObj(objv[1], &length);
    numChars = Tcl_NumUtfChars(string, length);
    if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) {
	return TCL_ERROR;
    }
    string = TclGetStringFromObj(objv[1], &length);
    if (index < 0) {
	index = 0;
    }
    if (index < numChars) {
	p = Tcl_UtfAtIndex(string, index);
	end = string+length;
	for (cur = index; p < end; cur++) {
	    p += TclUtfToUCS4(p, &ch);
	    if (!Tcl_UniCharIsWordChar(ch)) {
		break;
	    }
	}
	if (cur == index) {
	    cur++;
	}
    } else {
	cur = numChars;
    }
    Tcl_SetObjResult(interp, Tcl_NewWideIntObj(cur));
    return TCL_OK;
}


/*
 *----------------------------------------------------------------------
 *
 * StringNextWordCmd --
 *
 *	This procedure is invoked to process the "string nextword" Tcl command.
 *	See the user documentation for details on what it does.
 *
 * Results:
 *	A standard Tcl result.
 *
 * Side effects:
 *	See the user documentation.
 *
 *----------------------------------------------------------------------
 */

static int
StringNextWordCmd(
    TCL_UNUSED(ClientData),
    Tcl_Interp *interp,		/* Current interpreter. */
    int objc,			/* Number of arguments. */
    Tcl_Obj *const objv[])	/* Argument objects. */
{
    int ch;
    const char *p, *end, *string;
    int cur, index, length, numChars;

    if (objc != 3) {
	Tcl_WrongNumArgs(interp, 1, objv, "string index");
	return TCL_ERROR;
    }

    string = TclGetStringFromObj(objv[1], &length);
    numChars = Tcl_NumUtfChars(string, length);
    if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) {
	return TCL_ERROR;
    }
    string = TclGetStringFromObj(objv[1], &length);
    if (index < 0) {
	index = 0;
    }
    if (index < numChars) {
	p = Tcl_UtfAtIndex(string, index);
	end = string+length;
	for (cur = index; p < end; cur++) {
	    p += TclUtfToUCS4(p, &ch);
	    if (!Tcl_UniCharIsWordChar(ch)) {
		break;
	    }
	}
	if (cur == index) {
	    cur++;
	}
    } else {
	cur = numChars;
    }
    Tcl_SetObjResult(interp, Tcl_NewWideIntObj(cur));
    return TCL_OK;
}

/*
 *----------------------------------------------------------------------
 *
 * StringEqualCmd --
 *
 *	This procedure is invoked to process the "string equal" Tcl command.
 *	See the user documentation for details on what it does. Note that this
................................................................................
	{"index",	StringIndexCmd,	TclCompileStringIndexCmd, NULL, NULL, 0},
	{"insert",	StringInsertCmd, TclCompileStringInsertCmd, NULL, NULL, 0},
	{"is",		StringIsCmd,	TclCompileStringIsCmd, NULL, NULL, 0},
	{"last",	StringLastCmd,	TclCompileStringLastCmd, NULL, NULL, 0},
	{"length",	StringLenCmd,	TclCompileStringLenCmd, NULL, NULL, 0},
	{"map",		StringMapCmd,	TclCompileStringMapCmd, NULL, NULL, 0},
	{"match",	StringMatchCmd,	TclCompileStringMatchCmd, NULL, NULL, 0},
	{"nextchar",	StringNextCharCmd,	TclCompileBasic2ArgCmd, NULL, NULL, 0},
	{"nextword",	StringNextWordCmd,	TclCompileBasic2ArgCmd, NULL, NULL, 0},
	{"prevchar",	StringPrevCharCmd,	TclCompileBasic2ArgCmd, NULL, NULL, 0},
	{"prevword",	StringPrevWordCmd,	TclCompileBasic2ArgCmd, NULL, NULL, 0},
	{"range",	StringRangeCmd,	TclCompileStringRangeCmd, NULL, NULL, 0},
	{"repeat",	StringReptCmd,	TclCompileBasic2ArgCmd, NULL, NULL, 0},
	{"replace",	StringRplcCmd,	TclCompileStringReplaceCmd, NULL, NULL, 0},
	{"reverse",	StringRevCmd,	TclCompileBasic1ArgCmd, NULL, NULL, 0},
	{"tolower",	StringLowerCmd,	TclCompileStringToLowerCmd, NULL, NULL, 0},
	{"toupper",	StringUpperCmd,	TclCompileStringToUpperCmd, NULL, NULL, 0},
	{"totitle",	StringTitleCmd,	TclCompileStringToTitleCmd, NULL, NULL, 0},

Changes to generic/tclInt.h.

3248
3249
3250
3251
3252
3253
3254

3255
3256

3257
3258
3259
3260
3261
3262
3263
			    Tcl_ObjCmdProc *implementationProc,
			    const char *nameStr);
MODULE_SCOPE int	TclUtfCmp(const char *cs, const char *ct);
MODULE_SCOPE int	TclUtfCasecmp(const char *cs, const char *ct);
MODULE_SCOPE int	TclUtfCount(int ch);
#if TCL_UTF_MAX > 3
#   define TclUtfToUCS4 Tcl_UtfToUniChar

#else
    MODULE_SCOPE int	TclUtfToUCS4(const char *src, int *ucs4Ptr);

#endif
MODULE_SCOPE Tcl_Obj *	TclpNativeToNormalized(ClientData clientData);
MODULE_SCOPE Tcl_Obj *	TclpFilesystemPathType(Tcl_Obj *pathPtr);
MODULE_SCOPE int	TclpDlopen(Tcl_Interp *interp, Tcl_Obj *pathPtr,
			    Tcl_LoadHandle *loadHandle,
			    Tcl_FSUnloadFileProc **unloadProcPtr, int flags);
MODULE_SCOPE int	TclpUtime(Tcl_Obj *pathPtr, struct utimbuf *tval);







>


>







3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
			    Tcl_ObjCmdProc *implementationProc,
			    const char *nameStr);
MODULE_SCOPE int	TclUtfCmp(const char *cs, const char *ct);
MODULE_SCOPE int	TclUtfCasecmp(const char *cs, const char *ct);
MODULE_SCOPE int	TclUtfCount(int ch);
#if TCL_UTF_MAX > 3
#   define TclUtfToUCS4 Tcl_UtfToUniChar
#   define TclUniCharToUCS4(src, ptr) (*ptr = *(src),1)
#else
    MODULE_SCOPE int	TclUtfToUCS4(const char *src, int *ucs4Ptr);
    MODULE_SCOPE int	TclUniCharToUCS4(const Tcl_UniChar *src, int *ucs4Ptr);
#endif
MODULE_SCOPE Tcl_Obj *	TclpNativeToNormalized(ClientData clientData);
MODULE_SCOPE Tcl_Obj *	TclpFilesystemPathType(Tcl_Obj *pathPtr);
MODULE_SCOPE int	TclpDlopen(Tcl_Interp *interp, Tcl_Obj *pathPtr,
			    Tcl_LoadHandle *loadHandle,
			    Tcl_FSUnloadFileProc **unloadProcPtr, int flags);
MODULE_SCOPE int	TclpUtime(Tcl_Obj *pathPtr, struct utimbuf *tval);

Changes to generic/tclUtf.c.

2630
2631
2632
2633
2634
2635
2636














2637
2638
2639
2640
2641
2642
2643
2644
2645
    const char *src,	/* The UTF-8 string. */
    int *ucs4Ptr)	/* Filled with the UCS4 codepoint represented
			 * by the UTF-8 string. */
{
    /* Make use of the #undef Tcl_UtfToUniChar above, which already handles UCS4. */
    return Tcl_UtfToUniChar(src, ucs4Ptr);
}














#endif
 
/*
 * Local Variables:
 * mode: c
 * c-basic-offset: 4
 * fill-column: 78
 * End:
 */







>
>
>
>
>
>
>
>
>
>
>
>
>
>









2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
    const char *src,	/* The UTF-8 string. */
    int *ucs4Ptr)	/* Filled with the UCS4 codepoint represented
			 * by the UTF-8 string. */
{
    /* Make use of the #undef Tcl_UtfToUniChar above, which already handles UCS4. */
    return Tcl_UtfToUniChar(src, ucs4Ptr);
}

int
TclUniCharToUCS4(
    const Tcl_UniChar *src,	/* The Tcl_UniChar string. */
    int *ucs4Ptr)	/* Filled with the UCS4 codepoint represented
			 * by the Tcl_UniChar string. */
{
    if (((src[0] & 0xFC00) == 0xD800) && ((src[1] & 0xFC00) == 0xDC00)) {
	*ucs4Ptr = (((src[0] & 0x3FF) << 10) | (src[01] & 0x3FF)) + 0x10000;
	return 2;
    }
    *ucs4Ptr = src[0];
    return 1;
}
#endif
 
/*
 * Local Variables:
 * mode: c
 * c-basic-offset: 4
 * fill-column: 78
 * End:
 */