Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

PATCH: sort ordering by function



I wrote this because I needed it.

I am faced with a set of directories named <day>-<month>-<year>,
e.g. 26-01-08.  The modification times on the directories aren't useful.
I want these in time order.

After the patch, the solution (assuming EXTENDED_GLOB is on and
KSH_ARRAYS isn't, this isn't supposed to be portable, it's just a
trivial example) is

  sd() {
    local -a match mbegin mend
    [[ $REPLY= (#b)(*)-(*)-(*) ]] && REPLY="$match[3]-$match[2]-$match[1]"
  }
  print -l *(o+sd)

Comments welcome.

Test code should appear when I have time, though if anyone has time to
write test code for this or anything else, that would be fantastic.

Index: Doc/Zsh/expn.yo
===================================================================
RCS file: /cvsroot/zsh/zsh/Doc/Zsh/expn.yo,v
retrieving revision 1.100
diff -u -r1.100 expn.yo
--- Doc/Zsh/expn.yo	17 Nov 2008 16:56:42 -0000	1.100
+++ Doc/Zsh/expn.yo	26 Jan 2009 16:20:05 -0000
@@ -2229,7 +2229,7 @@
 item(tt(o)var(c))(
 specifies how the names of the files should be sorted. If var(c) is
 tt(n) they are sorted by name (the default); if it is tt(L) they
-are sorted depending on the size (length) of the files; if tt(l) 
+are sorted depending on the size (length) of the files; if tt(l)
 they are sorted by the number of links; if tt(a), tt(m), or tt(c)
 they are sorted by the time of the last access, modification, or
 inode change respectively; if tt(d), files in subdirectories appear before
@@ -2242,6 +2242,16 @@
 so `tt(*(^-oL))' gives a list of all files sorted by file size in descending
 order, following any symbolic links.  Unless tt(oN) is used, multiple order
 specifiers may occur to resolve ties.
+
+tt(oe) and tt(o+) are special cases; they are each followed by shell code,
+delimited as for the tt(e) glob qualifier and the tt(+) glob qualifier
+respectively (see above).  The code is executed for each matched file with
+the parameter tt(REPLY) set to the name of the file on entry.  The code
+should modify the parameter tt(REPLY) in some fashion.  On return, the value
+of the parameter is used instead of the file name as the string on which to
+sort.  Unlike other sort operators, tt(oe) and tt(o+) may be repeated, but
+note that the maximum number of sort operators of any kind that may appear
+in any glob expression is 12.
 )
 item(tt(O)var(c))(
 like `tt(o)', but sorts in descending order; i.e. `tt(*(^oc))' is the
Index: Src/glob.c
===================================================================
RCS file: /cvsroot/zsh/zsh/Src/glob.c,v
retrieving revision 1.68
diff -u -r1.68 glob.c
--- Src/glob.c	8 Nov 2008 06:31:02 -0000	1.68
+++ Src/glob.c	26 Jan 2009 16:20:06 -0000
@@ -42,6 +42,11 @@
 
 struct gmatch {
     char *name;
+    /*
+     * Array of sort strings:  one for each GS_EXEC sort type in
+     * the glob qualifiers.
+     */
+    char **sortstrs;
     off_t size ALIGN64;
     long atime;
     long mtime;
@@ -68,8 +73,9 @@
 
 #define GS_NAME   1
 #define GS_DEPTH  2
+#define GS_EXEC	  4
 
-#define GS_SHIFT_BASE	4
+#define GS_SHIFT_BASE	8
 
 #define GS_SIZE  (GS_SHIFT_BASE)
 #define GS_ATIME (GS_SHIFT_BASE << 1)
@@ -135,6 +141,17 @@
 /**/
 mod_export char *glob_pre, *glob_suf;
 
+/* Element of a glob sort */
+struct globsort {
+    /* Sort type */
+    int tp;
+    /* Sort code to eval, if type is GS_EXEC */
+    char *exec;
+};
+
+/* Maximum entries in sort array */
+#define MAX_SORTS	(12)
+
 /* struct to easily save/restore current state */
 
 struct globdata {
@@ -157,7 +174,8 @@
     int gd_range, gd_amc, gd_units;
     int gd_gf_nullglob, gd_gf_markdirs, gd_gf_noglobdots, gd_gf_listtypes;
     int gd_gf_numsort;
-    int gd_gf_follow, gd_gf_sorts, gd_gf_nsorts, gd_gf_sortlist[11];
+    int gd_gf_follow, gd_gf_sorts, gd_gf_nsorts;
+    struct globsort gd_gf_sortlist[MAX_SORTS];
 
     char *gd_glob_pre, *gd_glob_suf;
 };
@@ -880,11 +898,13 @@
 static int
 gmatchcmp(Gmatch a, Gmatch b)
 {
-    int i, *s;
+    int i;
     off_t r = 0L;
+    struct globsort *s;
+    char **asortstrp = NULL, **bsortstrp = NULL;
 
     for (i = gf_nsorts, s = gf_sortlist; i; i--, s++) {
-	switch (*s & ~GS_DESC) {
+	switch (s->tp & ~GS_DESC) {
 	case GS_NAME:
 	    r = zstrcmp(b->name, a->name, gf_numsort ? SORTIT_NUMERICALLY : 0);
 	    break;
@@ -910,6 +930,17 @@
 		r = slasha - slashb;
 	    }
 	    break;
+	case GS_EXEC:
+	    if (!asortstrp) {
+		asortstrp = a->sortstrs;
+		bsortstrp = b->sortstrs;
+	    } else {
+		asortstrp++;
+		bsortstrp++;
+	    }
+	    r = zstrcmp(*bsortstrp, *asortstrp,
+			gf_numsort ? SORTIT_NUMERICALLY : 0);
+	    break;
 	case GS_SIZE:
 	    r = b->size - a->size;
 	    break;
@@ -966,7 +997,7 @@
 	    break;
 	}
 	if (r)
-	    return (int) ((*s & GS_DESC) ? -r : r);
+	    return (int) ((s->tp & GS_DESC) ? -r : r);
     }
     return 0;
 }
@@ -1000,6 +1031,49 @@
     return qfirst;
 }
 
+
+/*
+ * Get a glob string for execution, following e or + qualifiers.
+ * Pointer is character after the e or +.
+ */
+
+/**/
+static char *
+glob_exec_string(char **sp)
+{
+    char sav, *tt, *sdata, *s = *sp;
+    int plus;
+
+    if (s[-1] == '+') {
+	plus = 0;
+	tt = itype_end(s, IIDENT, 0);
+	if (tt == s)
+	{
+	    zerr("missing identifier after `+'");
+	    return NULL;
+	}
+    } else {
+	tt = get_strarg(s, &plus);
+	if (!*tt)
+	{
+	    zerr("missing end of string");
+	    return NULL;
+	}
+    }
+
+    sav = *tt;
+    *tt = '\0';
+    sdata = dupstring(s + plus);
+    untokenize(sdata);
+    *tt = sav;
+    if (sav)
+	*sp = tt + plus;
+    else
+	*sp = tt;
+
+    return sdata;
+}
+
 /* Main entry point to the globbing code for filename globbing. *
  * np points to a node in the list list which will be expanded  *
  * into a series of nodes.                                      */
@@ -1449,7 +1523,16 @@
 		case 'O':
 		{
 		    int t;
+		    char *send;
 
+		    if (gf_nsorts == MAX_SORTS) {
+			zerr("too many glob sort specifiers");
+			restore_globstate(saved);
+			return;
+		    }
+
+		    /* usually just one character */
+		    send = s+1;
 		    switch (*s) {
 		    case 'n': t = GS_NAME; break;
 		    case 'L': t = GS_SIZE; break;
@@ -1459,60 +1542,50 @@
 		    case 'c': t = GS_CTIME; break;
 		    case 'd': t = GS_DEPTH; break;
 		    case 'N': t = GS_NONE; break;
+		    case 'e':
+		    case '+':
+		    {
+			t = GS_EXEC;
+			if ((gf_sortlist[gf_nsorts].exec =
+			     glob_exec_string(&send)) == NULL)
+			{
+			    restore_globstate(saved);
+			    return;
+			}
+			break;
+		    }
 		    default:
 			zerr("unknown sort specifier");
 			restore_globstate(saved);
 			return;
 		    }
-		    if ((sense & 2) && !(t & (GS_NAME|GS_DEPTH)))
-			t <<= GS_SHIFT;
-		    if (gf_sorts & t) {
-			zerr("doubled sort specifier");
-			restore_globstate(saved);
-			return;
+		    if (t != GS_EXEC) {
+			if ((sense & 2) && !(t & (GS_NAME|GS_DEPTH)))
+			    t <<= GS_SHIFT; /* HERE: GS_EXEC? */
+			if (gf_sorts & t) {
+			    zerr("doubled sort specifier");
+			    restore_globstate(saved);
+			    return;
+			}
 		    }
 		    gf_sorts |= t;
-		    gf_sortlist[gf_nsorts++] = t |
+		    gf_sortlist[gf_nsorts++].tp = t |
 			(((sense & 1) ^ (s[-1] == 'O')) ? GS_DESC : 0);
-		    s++;
+		    s = send;
 		    break;
 		}
 		case '+':
 		case 'e':
 		{
-		    char sav, *tt;
-		    int plus;
+		    char *tt;
 
-		    if (s[-1] == '+') {
-			plus = 0;
-			tt = itype_end(s, IIDENT, 0);
-			if (tt == s)
-			{
-			    zerr("missing identifier after `+'");
-			    tt = NULL;
-			}
-		    } else {
-			tt = get_strarg(s, &plus);
-			if (!*tt)
-			{
-			    zerr("missing end of string");
-			    tt = NULL;
-			}
-		    }
+		    tt = glob_exec_string(&s);
 
 		    if (tt == NULL) {
 			data = 0;
 		    } else {
-			sav = *tt;
-			*tt = '\0';
 			func = qualsheval;
-			sdata = dupstring(s + plus);
-			untokenize(sdata);
-			*tt = sav;
-			if (sav)
-			    s = tt + plus;
-			else
-			    s = tt;
+			sdata = tt;
 		    }
 		    break;
 		}
@@ -1632,7 +1705,7 @@
 	return;
     }
     if (!gf_nsorts) {
-	gf_sortlist[0] = gf_sorts = GS_NAME;
+	gf_sortlist[0].tp = gf_sorts = GS_NAME;
 	gf_nsorts = 1;
     }
     /* Initialise receptacle for matched files, *
@@ -1665,7 +1738,65 @@
 	}
     }
 
-    if (!(gf_sortlist[0] & GS_NONE)) {
+    if (!(gf_sortlist[0].tp & GS_NONE)) {
+	/*
+	 * Get the strings to use for sorting by executing
+	 * the code chunk.  We allow more than one of these.
+	 */
+	int nexecs = 0;
+	struct globsort *sortp;
+	struct globsort *lastsortp = gf_sortlist + gf_nsorts;
+
+	/* First find out if there are any GS_EXECs, counting them. */
+	for (sortp = gf_sortlist; sortp < lastsortp; sortp++)
+	{
+	    if (sortp->tp & GS_EXEC)
+		nexecs++;
+	}
+
+	if (nexecs) {
+	    Gmatch tmpptr;
+	    int iexec = 0;
+
+	    /* Yes; allocate enough space for strings for each */
+	    for (tmpptr = matchbuf; tmpptr < matchptr; tmpptr++)
+		tmpptr->sortstrs = (char **)zhalloc(nexecs*sizeof(char*));
+
+	    /* Loop over each one, incrementing iexec */
+	    for (sortp = gf_sortlist; sortp < lastsortp; sortp++)
+	    {
+		/* Ignore unless this is a GS_EXEC */
+		if (sortp->tp & GS_EXEC) {
+		    Eprog prog;
+
+		    if ((prog = parse_string(sortp->exec, 0))) {
+			int ef = errflag, lv = lastval, ret;
+
+			/* Parsed OK, execute for each name */
+			for (tmpptr = matchbuf; tmpptr < matchptr; tmpptr++) {
+			    setsparam("REPLY", ztrdup(tmpptr->name));
+			    execode(prog, 1, 0);
+			    if (!errflag)
+				tmpptr->sortstrs[iexec] =
+				    dupstring(getsparam("REPLY"));
+			    else
+				tmpptr->sortstrs[iexec] = tmpptr->name;
+			}
+
+			ret = lastval;
+			errflag = ef;
+			lastval = lv;
+		    } else {
+			/* Failed, let's be safe */
+			for (tmpptr = matchbuf; tmpptr < matchptr; tmpptr++)
+			    tmpptr->sortstrs[iexec] = tmpptr->name;
+		    }
+
+		    iexec++;
+		}
+	    }
+	}
+
 	/* Sort arguments in to lexical (and possibly numeric) order. *
 	 * This is reversed to facilitate insertion into the list.    */
 	qsort((void *) & matchbuf[0], matchct, sizeof(struct gmatch),
@@ -1682,7 +1813,7 @@
     else if (end > matchct)
 	end = matchct;
     if ((end -= first) > 0) {
-	if (gf_sortlist[0] & GS_NONE) {
+	if (gf_sortlist[0].tp & GS_NONE) {
 	    /* Match list was never reversed, so insert back to front. */
 	    matchptr = matchbuf + matchct - first - 1;
 	    while (end-- > 0) {


-- 
Peter Stephenson <pws@xxxxxxx>                  Software Engineer
CSR PLC, Churchill House, Cambridge Business Park, Cowley Road
Cambridge, CB4 0WZ, UK                          Tel: +44 (0)1223 692070



Messages sorted by: Reverse Date, Date, Thread, Author