Zsh Mailing List Archive Messages sorted by: Reverse Date, Date, Thread, Author
PATCH: named enum for lexical tokens

X-seq: zsh-workers 30715
From: Peter Stephenson <p.w.stephenson@xxxxxxxxxxxx>
To: Zsh Hackers' List <zsh-workers@xxxxxxx>
Subject: PATCH: named enum for lexical tokens
Date: Fri, 5 Oct 2012 21:39:58 +0100
List-help: <mailto:zsh-workers-help@zsh.org>
List-id: Zsh Workers List <zsh-workers.zsh.org>
List-post: <mailto:zsh-workers@zsh.org>
Mailing-list: contact zsh-workers-help@xxxxxxx; run by ezmlm
People who learnt C from the zsh code (I hope there aren't any) might be
vague about the fact that variables can have an enum type...

People who've ever tried to debug the parser will definitely appreciate
it's rather more useful to have gdb tell you the lexical token you've
just read is called "IF" rather than "55".

This tries to improve matters.  It appears to satisfy gcc, but it's
possible pickier compilers might notice some new type mismatches.  If
you have such a compiler or static checker it would be good if you could
see if there's anything I've missed.  (I absolutely do not *dare* run
the code through lint, but if anyone has some long, empty weekends
looming that would be a splendid thing to do and our quality control
department would be ecstatic, if we had one.)

I also renamed some arguments that clash with the typically well-named
global variable "tok".  I was too lazy to rename the typically
well-named global variable itself; besides, I'd forget what I renamed it
to.

No functional changes expected.

Index: Src/init.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/init.c,v
retrieving revision 1.124
diff -p -u -r1.124 init.c
--- Src/init.c	30 Mar 2012 11:01:14 -0000	1.124
+++ Src/init.c	5 Oct 2012 20:29:34 -0000
@@ -149,7 +149,7 @@ loop(int toplevel, int justonce)
 	    continue;
 	}
 	if (hend(prog)) {
-	    int toksav = tok;
+	    enum lextok toksav = tok;
 
 	    non_empty = 1;
 	    if (toplevel &&
Index: Src/lex.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/lex.c,v
retrieving revision 1.72
diff -p -u -r1.72 lex.c
--- Src/lex.c	3 Oct 2012 18:11:15 -0000	1.72
+++ Src/lex.c	5 Oct 2012 20:29:35 -0000
@@ -42,7 +42,7 @@ char *zshlextext;
 /**/
 mod_export char *tokstr;
 /**/
-mod_export int tok;
+mod_export enum lextok tok;
 /**/
 mod_export int tokfd;
 
@@ -207,7 +207,7 @@ struct lexstack {
     int hlinesz;
     char *hline;
     char *hptr;
-    int tok;
+    enum lextok tok;
     int isnewlin;
     char *tokstr;
     char *zshlextext;
@@ -470,6 +470,10 @@ ctxtlex(void)
     case DINBRACK:
 	incmdpos = 0;
 	break;
+
+    default:
+	/* nothing to do, keep compiler happy */
+	break;
     }
     if (tok != DINPAR)
 	infor = tok == FOR ? 2 : 0;
@@ -698,11 +702,12 @@ isnumglob(void)
 }
 
 /**/
-static int
+static enum lextok
 gettok(void)
 {
     int c, d;
-    int peekfd = -1, peek;
+    int peekfd = -1;
+    enum lextok peek;
 
   beginning:
     tokstr = NULL;
@@ -1007,12 +1012,13 @@ gettok(void)
  */
 
 /**/
-static int
+static enum lextok
 gettokstr(int c, int sub)
 {
     int bct = 0, pct = 0, brct = 0, fdpar = 0;
     int intpos = 1, in_brace_param = 0;
-    int peek, inquote, unmatched = 0;
+    int inquote, unmatched = 0;
+    enum lextok peek;
 #ifdef DEBUG
     int ocmdsp = cmdsp;
 #endif
@@ -1692,6 +1698,7 @@ parse_subst_string(char *s)
 {
     int c, l = strlen(s), err;
     char *ptr;
+    enum lextok ctok;
 
     if (!*s || !strcmp(s, nulstring))
 	return 0;
@@ -1703,14 +1710,14 @@ parse_subst_string(char *s)
     bptr = tokstr = s;
     bsiz = l + 1;
     c = hgetc();
-    c = gettokstr(c, 1);
+    ctok = gettokstr(c, 1);
     err = errflag;
     strinend();
     inpop();
     DPUTS(cmdsp, "BUG: parse_subst_string: cmdstack not empty.");
     lexrestore();
     errflag = err;
-    if (c == LEXERR) {
+    if (ctok == LEXERR) {
 	untokenize(s);
 	return 1;
     }
@@ -1720,9 +1727,9 @@ parse_subst_string(char *s)
      * before lexrestore()) == l, but that's not necessarily the case if
      * we stripped an RCQUOTE.
      */
-    if (c != STRING || (errflag && !noerrs)) {
+    if (ctok != STRING || (errflag && !noerrs)) {
 	fprintf(stderr, "Oops. Bug in parse_subst_string: %s\n",
-		errflag ? "errflag" : "c != STRING");
+		errflag ? "errflag" : "ctok != STRING");
 	fflush(stderr);
 	untokenize(s);
 	return 1;
Index: Src/parse.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/parse.c,v
retrieving revision 1.90
diff -p -u -r1.90 parse.c
--- Src/parse.c	3 Aug 2011 18:45:19 -0000	1.90
+++ Src/parse.c	5 Oct 2012 20:29:35 -0000
@@ -666,7 +666,8 @@ par_sublist(int *complex)
 
 	*complex |= c;
 	if (tok == DBAR || tok == DAMPER) {
-	    int qtok = tok, sl;
+	    enum lextok qtok = tok;
+	    int sl;
 
 	    cmdpush(tok == DBAR ? CS_CMDOR : CS_CMDAND);
 	    zshlex();
@@ -1176,7 +1177,8 @@ par_case(int *complex)
 static void
 par_if(int *complex)
 {
-    int oecused = ecused, xtok, p, pp, type, usebrace = 0;
+    int oecused = ecused, p, pp, type, usebrace = 0;
+    enum lextok xtok;
     unsigned char nc;
 
     p = ecadd(0);
@@ -1367,7 +1369,8 @@ par_repeat(int *complex)
 static void
 par_subsh(int *complex)
 {
-    int oecused = ecused, otok = tok, p, pp;
+    enum lextok otok = tok;
+    int oecused = ecused, p, pp;
 
     p = ecadd(0);
     /* Extra word only needed for always block */
@@ -2110,7 +2113,7 @@ par_cond_2(void)
 		  && !s1[2]);
     condlex();
     if (tok == INANG || tok == OUTANG) {
-	int xtok = tok;
+	enum lextok xtok = tok;
 	condlex();
 	if (tok != STRING)
 	    YYERROR(ecused);
@@ -2371,7 +2374,7 @@ freeeprog(Eprog p)
 
 /**/
 char *
-ecgetstr(Estate s, int dup, int *tok)
+ecgetstr(Estate s, int dup, int *tokflag)
 {
     static char buf[4];
     wordcode c = *s->pc++;
@@ -2389,8 +2392,8 @@ ecgetstr(Estate s, int dup, int *tok)
     } else {
 	r = s->strs + (c >> 2);
     }
-    if (tok)
-	*tok = (c & 1);
+    if (tokflag)
+	*tokflag = (c & 1);
 
     /*** Since function dump files are mapped read-only, avoiding to
      *   to duplicate strings when they don't contain tokens may fail
@@ -2407,33 +2410,33 @@ ecgetstr(Estate s, int dup, int *tok)
 
 /**/
 char *
-ecrawstr(Eprog p, Wordcode pc, int *tok)
+ecrawstr(Eprog p, Wordcode pc, int *tokflag)
 {
     static char buf[4];
     wordcode c = *pc;
 
     if (c == 6 || c == 7) {
-	if (tok)
-	    *tok = (c & 1);
+	if (tokflag)
+	    *tokflag = (c & 1);
 	return "";
     } else if (c & 2) {
 	buf[0] = (char) ((c >>  3) & 0xff);
 	buf[1] = (char) ((c >> 11) & 0xff);
 	buf[2] = (char) ((c >> 19) & 0xff);
 	buf[3] = '\0';
-	if (tok)
-	    *tok = (c & 1);
+	if (tokflag)
+	    *tokflag = (c & 1);
 	return buf;
     } else {
-	if (tok)
-	    *tok = (c & 1);
+	if (tokflag)
+	    *tokflag = (c & 1);
 	return p->strs + (c >> 2);
     }
 }
 
 /**/
 char **
-ecgetarr(Estate s, int num, int dup, int *tok)
+ecgetarr(Estate s, int num, int dup, int *tokflag)
 {
     char **ret, **rp;
     int tf = 0, tmp = 0;
@@ -2445,15 +2448,15 @@ ecgetarr(Estate s, int num, int dup, int
 	tf |=  tmp;
     }
     *rp = NULL;
-    if (tok)
-	*tok = tf;
+    if (tokflag)
+	*tokflag = tf;
 
     return ret;
 }
 
 /**/
 LinkList
-ecgetlist(Estate s, int num, int dup, int *tok)
+ecgetlist(Estate s, int num, int dup, int *tokflag)
 {
     if (num) {
 	LinkList ret;
@@ -2464,12 +2467,12 @@ ecgetlist(Estate s, int num, int dup, in
 	    setsizednode(ret, i, ecgetstr(s, dup, &tmp));
 	    tf |= tmp;
 	}
-	if (tok)
-	    *tok = tf;
+	if (tokflag)
+	    *tokflag = tf;
 	return ret;
     }
-    if (tok)
-	*tok = 0;
+    if (tokflag)
+	*tokflag = 0;
     return NULL;
 }
 
Index: Src/subst.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/subst.c,v
retrieving revision 1.138
diff -p -u -r1.138 subst.c
--- Src/subst.c	21 Aug 2012 18:03:03 -0000	1.138
+++ Src/subst.c	5 Oct 2012 20:29:35 -0000
@@ -1215,7 +1215,7 @@ get_strarg(char *s, int *lenp)
 {
     convchar_t del;
     int len;
-    char tok = 0;
+    char ctok = 0;
 
     MB_METACHARINIT();
     len = MB_METACHARLENCONV(s, &del);
@@ -1243,25 +1243,25 @@ get_strarg(char *s, int *lenp)
 	del = ZWC('>');
 	break;
     case Inpar:
-	tok = Outpar;
+	ctok = Outpar;
 	break;
     case Inang:
-	tok = Outang;
+	ctok = Outang;
 	break;
     case Inbrace:
-	tok = Outbrace;
+	ctok = Outbrace;
 	break;
     case Inbrack:
-	tok = Outbrack;
+	ctok = Outbrack;
 	break;
     }
 
-    if (tok) {
+    if (ctok) {
 	/*
 	 * Looking for a matching token; we want the literal byte,
 	 * not a decoded multibyte character, so search specially.
 	 */
-	while (*s && *s != tok)
+	while (*s && *s != ctok)
 	    s++;
     } else {
 	convchar_t del2;
Index: Src/zsh.h
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/zsh.h,v
retrieving revision 1.184
diff -p -u -r1.184 zsh.h
--- Src/zsh.h	23 Aug 2012 19:19:27 -0000	1.184
+++ Src/zsh.h	5 Oct 2012 20:29:35 -0000
@@ -232,7 +232,7 @@ enum {
  * appear in strings and don't necessarily represent a single character.
  */
 
-enum {
+enum lextok {
     NULLTOK,		/* 0  */
     SEPER,
     NEWLIN,
Index: Src/Zle/zle_tricky.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/Zle/zle_tricky.c,v
retrieving revision 1.113
diff -p -u -r1.113 zle_tricky.c
--- Src/Zle/zle_tricky.c	14 Oct 2011 19:01:42 -0000	1.113
+++ Src/Zle/zle_tricky.c	5 Oct 2012 20:29:35 -0000
@@ -1071,7 +1071,8 @@ has_real_token(const char *s)
 static char *
 get_comp_string(void)
 {
-    int t0, tt0, i, j, k, cp, rd, sl, ocs, ins, oins, ia, parct, varq = 0;
+    enum lextok t0, tt0;
+    int i, j, k, cp, rd, sl, ocs, ins, oins, ia, parct, varq = 0;
     int ona = noaliases;
     /*
      * Index of word being considered
@@ -1152,7 +1153,8 @@ get_comp_string(void)
     lexflags = LEXFLAGS_ZLE;
     inpush(dupstrspace(linptr), 0, NULL);
     strinbeg(0);
-    wordpos = tt0 = cp = rd = ins = oins = linarr = parct = ia = redirpos = 0;
+    wordpos = cp = rd = ins = oins = linarr = parct = ia = redirpos = 0;
+    tt0 = NULLTOK;
 
     /* This loop is possibly the wrong way to do this.  It goes through *
      * the previously massaged command line using the lexer.  It stores *
@@ -1238,7 +1240,8 @@ get_comp_string(void)
 	    if (tt)
 		break;
 	    /* Otherwise reset the variables we are collecting data in. */
-	    wordpos = tt0 = cp = rd = ins = redirpos = 0;
+	    wordpos = cp = rd = ins = redirpos = 0;
+	    tt0 = NULLTOK;
 	}
 	if (lincmd && (tok == STRING || tok == FOR || tok == FOREACH ||
 		       tok == SELECT || tok == REPEAT || tok == CASE)) {
@@ -1251,7 +1254,7 @@ get_comp_string(void)
 	    if (wordpos != redirpos)
 		wordpos = redirpos = 0;
 	}
-	if (!lexflags && !tt0) {
+	if (!lexflags && tt0 == NULLTOK) {
 	    /* This is done when the lexer reached the word the cursor is on. */
 	    tt = tokstr ? dupstring(tokstr) : NULL;
 
@@ -1352,7 +1355,7 @@ get_comp_string(void)
 				 (sl - 1) : (zlemetacs_qsub - wb)]);
 	}
     } while (tok != LEXERR && tok != ENDINPUT &&
-	     (tok != SEPER || (lexflags && !tt0)));
+	     (tok != SEPER || (lexflags && tt0 == NULLTOK)));
     /* Calculate the number of words stored in the clwords array. */
     clwnum = (tt || !wordpos) ? wordpos : wordpos - 1;
     zsfree(clwords[clwnum]);
@@ -1388,7 +1391,7 @@ get_comp_string(void)
 
     if (inwhat == IN_MATH)
 	s = NULL;
-    else if (!t0 || t0 == ENDINPUT) {
+    else if (t0 == NULLTOK || t0 == ENDINPUT) {
 	/* There was no word (empty line). */
 	s = ztrdup("");
 	we = wb = zlemetacs;

-- 
Peter Stephenson <p.w.stephenson@xxxxxxxxxxxx>
Web page now at http://homepage.ntlworld.com/p.w.stephenson/
Messages sorted by: Reverse Date, Date, Thread, Author