Zsh Mailing List Archive
Messages sorted by: Reverse Date, Date, Thread, Author

[PATCH] db/gdbm rewrite



Hello,
gdbm module has specific design:
– hashtable is always empty
– getnode() returns heap-arena allocated (hcalloc) Param each time a key
is referenced
– the Param is PM_SCALAR, but it has u.hash set – this is to pass aroud
GDBM_FILE in u.hash.tmpdata
– the Param interfaces to database, never creates u.str (it cannot,
would overwrite u.hash), returns GDBM internal strings

I've rewritten the module:
– hashtable is filled normally, initially with ~PM_UPTODATE
– Param "dereference" detects ~PM_UPTODATE, uses database, sets u.str,
sets PM_UPTODATE
– GDBM_FILE is passed around in gsu_scalar extension:
        struct gsu_scalar_ext {
            struct gsu_scalar std;
            GDBM_FILE dbf;
        };
– normal ztrdup() strings are returned from Param


This way there is no subtle memory leak when doing:

while (( 1 )); do val=tied_param[key]; done

Tested this and top values were 100 MiB and rising.

Created also extensive tests (following V10private.ztst), doing things
like +=( ), unset on not existing (in Zsh) key, but existing in
database, scoping.

I develop this as pluggable module psprint/zgdbm, here I submit a few
days of work. I might further expand and submit to upstream, but what's
there now is a finished work, confirmed by complete testing.

Sending as patch and alternatively 2 separate files.

PS. Maybe this:

#define PM_UPTODATE     (1<<19) /* Parameter has up-to-date data (e.g.
loaded from DB) */

could be moved to zsh.h. It can be fairly common for every frontend-like
module to use parameters that are not "up to date".

-- 
  Sebastian Gniazdowski
  psprint2@xxxxxxxxxxxx
diff --git a/Src/Modules/db_gdbm.c b/Src/Modules/db_gdbm.c
index 310e329..eb83e7d 100644
--- a/Src/Modules/db_gdbm.c
+++ b/Src/Modules/db_gdbm.c
@@ -6,6 +6,9 @@
  * Copyright (c) 2008 Clint Adams
  * All rights reserved.
  *
+ * Modifications copyright (c) 2017 Sebastian Gniazdowski
+ * All rights reserved.
+ *
  * Permission is hereby granted, without written agreement and without
  * license or royalty fees, to use, copy, modify, and distribute this
  * software and to distribute modified versions of this software for any
@@ -31,6 +34,12 @@
 #include "db_gdbm.mdh"
 #include "db_gdbm.pro"
 
+#ifndef PM_UPTODATE
+#define PM_UPTODATE     (1<<19) /* Parameter has up-to-date data (e.g. loaded from DB) */
+#endif
+
+static Param createhash( char *name, int flags );
+
 /*
  * Make sure we have all the bits I'm using for memory mapping, otherwise
  * I don't know what I'm doing.
@@ -41,8 +50,33 @@
 
 static char *backtype = "db/gdbm";
 
-static const struct gsu_scalar gdbm_gsu =
-{ gdbmgetfn, gdbmsetfn, gdbmunsetfn };
+/*
+ * Longer GSU structure, to carry GDBM_FILE of owning
+ * database. Every parameter (hash value) receives GSU
+ * pointer and thus also receives GDBM_FILE - this way
+ * parameters can access proper database.
+ *
+ * Main HashTable parameter has the same instance of
+ * the custom GSU struct in u.hash->tmpdata field.
+ * When database is closed, `dbf` field is set to NULL
+ * and hash values know to not access database when
+ * being unset (total purge at zuntie).
+ *
+ * When database closing is ended, custom GSU struct
+ * is freed. Only new ztie creates new custom GSU
+ * struct instance.
+ */
+
+struct gsu_scalar_ext {
+    struct gsu_scalar std;
+    GDBM_FILE dbf;
+};
+
+/* Source structure - will be copied to allocated one,
+ * with `dbf` filled. `dbf` allocation <-> gsu allocation. */
+static const struct gsu_scalar_ext gdbm_gsu_ext =
+{ { gdbmgetfn, gdbmsetfn, gdbmunsetfn }, 0 };
+
 /**/
 static const struct gsu_hash gdbm_hash_gsu =
 { hashgetfn, gdbmhashsetfn, gdbmhashunsetfn };
@@ -77,8 +111,7 @@ bin_ztie(char *nam, char **args, Options ops, UNUSED(int func))
     }
 
     /* Here should be a lookup of the backend type against
-     * a registry.
-     */
+     * a registry, if generam DB mechanism is to be added */
     if (strcmp(OPT_ARG(ops, 'd'), backtype) != 0) {
         zwarnnam(nam, "unsupported backend type `%s'", OPT_ARG(ops, 'd'));
 	return 1;
@@ -92,7 +125,8 @@ bin_ztie(char *nam, char **args, Options ops, UNUSED(int func))
 	/*
 	 * Unset any existing parameter.  Note there's no implicit
 	 * "local" here, but if the existing parameter is local
-	 * that will be reflected in the new one.
+	 * then new parameter will be also local without following
+         * unset.
 	 *
 	 * We need to do this before attempting to open the DB
 	 * in case this variable is already tied to a DB.
@@ -107,14 +141,13 @@ bin_ztie(char *nam, char **args, Options ops, UNUSED(int func))
 
     dbf = gdbm_open(resource_name, 0, read_write, 0666, 0);
     if(dbf)
-	addmodulefd(gdbm_fdesc(dbf), FDT_INTERNAL);
+	addmodulefd(gdbm_fdesc(dbf), FDT_MODULE);
     else {
 	zwarnnam(nam, "error opening database file %s", resource_name);
 	return 1;
     }
 
-    if (!(tied_param = createspecialhash(pmname, &getgdbmnode, &scangdbmkeys,
-					 pmflags))) {
+    if (!(tied_param = createhash(pmname, pmflags))) {
         zwarnnam(nam, "cannot create the requested parameter %s", pmname);
 	fdtable[gdbm_fdesc(dbf)] = FDT_UNUSED;
 	gdbm_close(dbf);
@@ -122,7 +155,15 @@ bin_ztie(char *nam, char **args, Options ops, UNUSED(int func))
     }
 
     tied_param->gsu.h = &gdbm_hash_gsu;
-    tied_param->u.hash->tmpdata = (void *)dbf;
+
+    /* Allocate parameter sub-gsu, fill dbf field. 
+     * dbf allocation is 1 to 1 accompanied by
+     * gsu_scalar_ext allocation. */
+
+    struct gsu_scalar_ext *dbf_carrier = (struct gsu_scalar_ext *) zalloc(sizeof(struct gsu_scalar_ext));
+    dbf_carrier->std = gdbm_gsu_ext.std;
+    dbf_carrier->dbf = dbf;
+    tied_param->u.hash->tmpdata = (void *)dbf_carrier;
 
     return 0;
 }
@@ -161,6 +202,18 @@ bin_zuntie(char *nam, char **args, Options ops, UNUSED(int func))
     return ret;
 }
 
+/*
+ * The param is actual param in hash â?? always, because
+ * getgdbmnode creates every new key seen. However, it
+ * might be not PM_UPTODATE - which means that database
+ * wasn't yet queried.
+ *
+ * It will be left in this state if database doesn't
+ * contain such key. That might be a drawback, maybe
+ * setting to empty value has sense, as no other writer
+ * can exist. This would remove subtle hcalloc(1) leak.
+ */
+
 /**/
 static char *
 gdbmgetfn(Param pm)
@@ -169,18 +222,43 @@ gdbmgetfn(Param pm)
     int ret;
     GDBM_FILE dbf;
 
+    /* Key already retrieved? There is no sense of asking the
+     * database again, because:
+     * - there can be only multiple readers
+     * - so, no writer + reader use is allowed
+     *
+     * Thus:
+     * - if we are writers, we for sure have newest copy of data
+     * - if we are readers, we for sure have newest copy of data
+     */
+    if ( pm->node.flags & PM_UPTODATE ) {
+        return pm->u.str ? pm->u.str : (char *) hcalloc(1);
+    }
+
     key.dptr = pm->node.nam;
     key.dsize = strlen(key.dptr);
 
-    dbf = (GDBM_FILE)(pm->u.hash->tmpdata);
-    ret = gdbm_exists(dbf, key);
-    if(ret) {
+    dbf = ((struct gsu_scalar_ext *)pm->gsu.s)->dbf;
+
+    if((ret = gdbm_exists(dbf, key))) {
+        /* We have data â?? store it, return it */
+        pm->node.flags |= PM_UPTODATE;
+
         content = gdbm_fetch(dbf, key);
-    } else {
-        content.dptr = dupstring("");
+
+        /* Ensure there's no leak */
+        if (pm->u.str) {
+            zsfree(pm->u.str);
+        }
+
+        pm->u.str = ztrduppfx( content.dptr, content.dsize );
+
+        /* Can return pointer, correctly saved inside hash */
+        return pm->u.str;
     }
 
-    return content.dptr;
+    /* Can this be "" ? */
+    return (char *) hcalloc(1);
 }
 
 /**/
@@ -190,47 +268,81 @@ gdbmsetfn(Param pm, char *val)
     datum key, content;
     GDBM_FILE dbf;
 
+    /* Set is done on parameter and on database.
+     * See the allowed workers / readers comment
+     * at gdbmgetfn() */
+
+    /* Parameter */
+    if (pm->u.str) {
+        zsfree(pm->u.str);
+        pm->u.str = NULL;
+        pm->node.flags &= ~(PM_UPTODATE);
+    }
+
+    if (val) {
+        pm->u.str = ztrdup(val);
+        pm->node.flags |= PM_UPTODATE;
+    }
+
+    /* Database */
     key.dptr = pm->node.nam;
     key.dsize = strlen(key.dptr);
-    content.dptr = val;
-    content.dsize = strlen(content.dptr);
-
-    dbf = (GDBM_FILE)(pm->u.hash->tmpdata);
-    (void)gdbm_store(dbf, key, content, GDBM_REPLACE);
+    dbf = ((struct gsu_scalar_ext *)pm->gsu.s)->dbf;
+
+    if (val) {
+        if (dbf) {
+            content.dptr = val;
+            content.dsize = strlen(content.dptr);
+            (void)gdbm_store(dbf, key, content, GDBM_REPLACE);
+        }
+    } else {
+        if (dbf) {
+            (void)gdbm_delete(dbf, key);
+        }
+    }
 }
 
 /**/
 static void
 gdbmunsetfn(Param pm, UNUSED(int um))
 {
-    datum key;
-    GDBM_FILE dbf;
-
-    key.dptr = pm->node.nam;
-    key.dsize = strlen(key.dptr);
-
-    dbf = (GDBM_FILE)(pm->u.hash->tmpdata);
-    (void)gdbm_delete(dbf, key);
+    /* Set with NULL */
+    gdbmsetfn(pm, NULL);
 }
 
 /**/
 static HashNode
 getgdbmnode(HashTable ht, const char *name)
 {
-    int len;
-    char *nameu;
-    Param pm = NULL;
-
-    nameu = dupstring(name);
-    unmetafy(nameu, &len);
-
-    pm = (Param) hcalloc(sizeof(struct param));
-    pm->node.nam = nameu;
-    pm->node.flags = PM_SCALAR;
-    pm->gsu.s = &gdbm_gsu;
-    pm->u.hash = ht;
+    HashNode hn = gethashnode2( ht, name );
+    Param val_pm = (Param) hn;
+
+    /* Entry for key doesn't exist? Create it now,
+     * it will be interfacing between the database
+     * and Zsh - through special gdbm_gsu. So, any
+     * seen key results in new interfacing parameter.
+     *
+     * Previous code was returning heap arena Param
+     * that wasn't actually added to the hash. It was
+     * plainly name / database-key holder. Here we
+     * add the Param to its hash, it is not PM_UPTODATE.
+     * It will be loaded from database *and filled*
+     * or left in that state if the database doesn't
+     * contain it.
+     *
+     * No heap arena memory is used, memory usage is
+     * now limited - by number of distinct keys seen,
+     * not by number of key *uses*.
+     * */
+
+    if ( ! val_pm ) {
+        val_pm = (Param) zshcalloc( sizeof (*val_pm) );
+        val_pm->node.flags = PM_SCALAR | PM_HASHELEM; /* no PM_UPTODATE */
+        val_pm->gsu.s = (GsuScalar) ht->tmpdata;
+        ht->addnode( ht, ztrdup( name ), val_pm ); // sets pm->node.nam
+    }
 
-    return &pm->node;
+    return (HashNode) val_pm;
 }
 
 /**/
@@ -239,29 +351,33 @@ scangdbmkeys(HashTable ht, ScanFunc func, int flags)
 {
     Param pm = NULL;
     datum key, content;
-    GDBM_FILE dbf = (GDBM_FILE)(ht->tmpdata);
-
-    pm = (Param) hcalloc(sizeof(struct param));
-
-    pm->node.flags = PM_SCALAR;
-    pm->gsu.s = &nullsetscalar_gsu;
+    GDBM_FILE dbf = ((struct gsu_scalar_ext *)ht->tmpdata)->dbf;
 
+    /* Iterate keys adding them to hash, so
+     * we have Param to use in `func` */
     key = gdbm_firstkey(dbf);
 
     while(key.dptr) {
-	content = gdbm_fetch(dbf, key);
-
-	pm->node.nam = key.dptr;
-	pm->u.str = content.dptr;
-	pm->gsu.s = &nullsetscalar_gsu;
+        /* This returns database-interfacing Param,
+         * it will return u.str or first fetch data
+         * if not PM_UPTODATE (newly created) */
+        char *zkey = ztrduppfx(key.dptr, key.dsize);
+        HashNode hn = getgdbmnode(ht, zkey);
+        zsfree( zkey );
 
-	func(&pm->node, flags);
+	func(hn, flags);
 
+        /* Iterate - no problem as interfacing Param
+         * will do at most only fetches, not stores */
         key = gdbm_nextkey(dbf, key);
     }
 
 }
 
+/*
+ * Replace database with new hash
+ */
+
 /**/
 static void
 gdbmhashsetfn(Param pm, HashTable ht)
@@ -274,7 +390,7 @@ gdbmhashsetfn(Param pm, HashTable ht)
     if (!pm->u.hash || pm->u.hash == ht)
 	return;
 
-    if (!(dbf = (GDBM_FILE)(pm->u.hash->tmpdata)))
+    if (!(dbf = ((struct gsu_scalar_ext *)pm->u.hash->tmpdata)->dbf))
 	return;
 
     key = gdbm_firstkey(dbf);
@@ -292,6 +408,9 @@ gdbmhashsetfn(Param pm, HashTable ht)
     if (!ht)
 	return;
 
+     /* Put new strings into database, waiting
+      * for their interfacing-Params to be created */
+
     for (i = 0; i < ht->hsize; i++)
 	for (hn = ht->nodes[i]; hn; hn = hn->next) {
 	    struct value v;
@@ -319,15 +438,16 @@ gdbmhashsetfn(Param pm, HashTable ht)
 static void
 gdbmuntie(Param pm)
 {
-    GDBM_FILE dbf = (GDBM_FILE)(pm->u.hash->tmpdata);
+    GDBM_FILE dbf = ((struct gsu_scalar_ext *)pm->u.hash->tmpdata)->dbf;
     HashTable ht = pm->u.hash;
 
     if (dbf) { /* paranoia */
 	fdtable[gdbm_fdesc(dbf)] = FDT_UNUSED;
-	gdbm_close(dbf);
-    }
+        gdbm_close(dbf);
 
-    ht->tmpdata = NULL;
+        /* Let hash fields know there's no backend */
+        ((struct gsu_scalar_ext *)ht->tmpdata)->dbf = NULL;
+    }
 
     /* for completeness ... createspecialhash() should have an inverse */
     ht->getnode = ht->getnode2 = gethashnode2;
@@ -342,8 +462,19 @@ static void
 gdbmhashunsetfn(Param pm, UNUSED(int exp))
 {
     gdbmuntie(pm);
-    /* hash table is now normal, so proceed normally... */
+
+    /* Remember custom GSU structure assigned to
+     * u.hash->tmpdata before hash gets deleted */
+    void * gsu_ext = pm->u.hash->tmpdata;
+
+    /* Uses normal unsetter. Will delete all owned
+     * parameters and also hashtable. */
     pm->gsu.h->setfn(pm, NULL);
+
+    /* Don't need custom GSU structure with its
+     * GDBM_FILE pointer anymore */
+    zfree( gsu_ext, sizeof(struct gsu_scalar_ext));
+
     pm->node.flags |= PM_UNSET;
 }
 
@@ -401,3 +532,30 @@ finish_(UNUSED(Module m))
 {
     return 0;
 }
+
+static Param createhash( char *name, int flags ) {
+    Param pm;
+    HashTable ht;
+
+    pm = createparam(name, PM_SPECIAL | PM_HASHED);
+    if (!pm) {
+        return NULL;
+    }
+
+    if (pm->old)
+	pm->level = locallevel;
+
+    /* This creates standard hash. */
+    ht = pm->u.hash = newparamtable(32, name);
+    if (!pm->u.hash) {
+        paramtab->removenode(paramtab, name);
+        paramtab->freenode(&pm->node);
+        zwarnnam(name, "Out of memory when allocating hash");
+    }
+
+    /* These provide special features */
+    ht->getnode = ht->getnode2 = getgdbmnode;
+    ht->scantab = scangdbmkeys;
+
+    return pm;
+}
diff --git a/Test/V11db_gdbm.ztst b/Test/V11db_gdbm.ztst
new file mode 100644
index 0000000..fcea55f
--- /dev/null
+++ b/Test/V11db_gdbm.ztst
@@ -0,0 +1,215 @@
+# Tests for the zsh/param/private module
+
+%prep
+
+ #module_path+=( ~/github/zgdbm/module/Src )
+ #modname="psprint/zgdbm"
+ modname="zsh/db/gdbm"
+ dbfile=db.gdbm
+ if ! zmodload $modname ; then
+   ZTST_unimplemented="can't load $modname module for testing"
+ fi
+ rm -f db.gdbm
+
+%test
+
+ (zmodload -u $modname && zmodload $modname)
+0:unload and reload the module without crashing
+
+ ztie -d db/gdbm -f $dbfile dbase
+ zuntie dbase
+0:create the database
+
+ ztie -r -d db/gdbm -f $dbfile dbase
+ zuntie -u dbase
+0:open the database read-only
+
+ ztie -d db/gdbm -f $dbfile dbase
+ dbase[testkey]=testdata
+ zuntie dbase
+ ztie -r -d db/gdbm -f $dbfile dbase
+ echo $dbase[testkey]
+ zuntie -u dbase
+0:store key in database
+>testdata
+
+ ztie -d db/gdbm -f $dbfile dbase2
+ unset 'dbase2[testkey]'
+ zuntie dbase2
+ ztie -d db/gdbm -f $dbfile dbase
+ echo $dbase[testkey]
+ zuntie dbase
+0:remove key from database (different variables)
+>
+
+ ztie -d db/gdbm -f $dbfile dbase
+ dbase[testkey]=testdata
+ zuntie dbase
+ ztie -r -d db/gdbm -f $dbfile dbase
+ echo $dbase[testkey]
+ zuntie -u dbase
+ ztie -d db/gdbm -f $dbfile dbase
+ unset 'dbase[testkey]'
+ zuntie dbase
+ ztie -r -d db/gdbm -f $dbfile dbase
+ echo $dbase[testkey]
+ zuntie -u dbase
+0:store & remove key from database (the same variables)
+>testdata
+>
+
+ ztie -d db/gdbm -f $dbfile dbase
+ dbase[testkey]=testdata
+ dbase[testkey2]=$dbase[testkey]
+ dbase[testkey3]=$dbase[testkey]x$dbase[testkey2]
+ zuntie dbase
+ ztie -d db/gdbm -f $dbfile dbase
+ echo $dbase[testkey]
+ echo $dbase[testkey2]
+ echo $dbase[testkey3]
+ zuntie dbase
+0:store 2 keys fetching 1st
+>testdata
+>testdata
+>testdataxtestdata
+
+ ztie -d db/gdbm -f $dbfile dbase
+ val=$dbase[testkey2]
+ unset 'dbase[testkey2]'
+ echo $val
+ zuntie dbase
+0:unset key that was fetched
+>testdata
+
+ ztie -r -d db/gdbm -f $dbfile dbase
+ print -rl -- "${(kv)dbase[@]}"
+ zuntie -u dbase
+0:scan read-only tied hash
+>testkey
+>testdata
+>testkey3
+>testdataxtestdata
+
+ ztie -d db/gdbm -f $dbfile dbase
+ local -a arr
+ arr=( "${(kv)dbase[@]}" )
+ print -rl -- "${arr[@]}"
+ zuntie dbase
+0:different scan, also read-write mode
+>testkey
+>testdata
+>testkey3
+>testdataxtestdata
+
+ ztie -d db/gdbm -f $dbfile dbase
+ dbase=( a b c d )
+ zuntie dbase
+ ztie -d db/gdbm -f $dbfile dbase
+ print -rl -- "${(kv)dbase[@]}"
+ zuntie dbase
+0:replace hash / database, scan
+>c
+>d
+>a
+>b
+
+ ztie -d db/gdbm -f $dbfile dbase
+ local -a arr
+ arr=( "${dbase[@]}" )
+ print -rl -- "${arr[@]}"
+ zuntie dbase
+0:scan with no (kv)
+>d
+>b
+
+ ztie -d db/gdbm -f $dbfile dbase
+ print -rl -- "${(k)dbase[@]}"
+ zuntie dbase
+0:scan with keys only (k)
+>c
+>a
+
+ ztie -d db/gdbm -f $dbfile dbase
+ print -rl -- "${(v)dbase[@]}"
+ zuntie dbase
+0:scan with keys only explicit (v)
+>d
+>b
+
+ rm -f $dbfile
+ ztie -r -d db/gdbm -f $dbfile dbase 2>/dev/null
+1:read-only open non-existent database
+
+ ztie -d db/gdbm -f $dbfile dbase
+ dbase+=( a b )
+ echo $dbase[a]
+ zuntie dbase
+ ztie -r -d db/gdbm -f $dbfile dbase
+ echo $dbase[a]
+ print -rl -- "${(kv)dbase[@]}"
+ zuntie -u dbase
+ ztie -d db/gdbm -f $dbfile dbase
+ dbase+=( c d )
+ echo $dbase[a]
+ echo $dbase[c]
+ print -rl -- "${(kv)dbase[@]}"
+ zuntie dbase
+ ztie -r -d db/gdbm -f $dbfile dbase
+ echo $dbase[a]
+ echo $dbase[c]
+ print -rl -- "${(kv)dbase[@]}"
+ zuntie -u dbase
+0:Append with +=( ), also with existing data, also (kv) scan
+>b
+>b
+>a
+>b
+>b
+>d
+>c
+>d
+>a
+>b
+>b
+>d
+>c
+>d
+>a
+>b
+
+ ztie -d db/gdbm -f $dbfile dbase
+ echo ${(t)dbase}
+ zuntie dbase
+0:Type of tied parameter
+>association-special
+
+ typeset -ga dbase
+ ztie -d db/gdbm -f $dbfile dbase
+ echo ${(t)dbase}
+ zuntie dbase
+0:Type of tied parameter, with preceding unset
+>association-special
+
+ local -a dbase
+ ztie -d db/gdbm -f $dbfile dbase
+ echo ${(t)dbase}
+ zuntie dbase
+0:Type of tied parameter, with local parameter already existing
+>association-local-special
+
+ local -a dbase
+ dbase=( fromarray )
+ () {
+     local -a dbase
+     ztie -d db/gdbm -f $dbfile dbase
+     echo ${(t)dbase}
+     zuntie dbase
+ }
+ echo $dbase[1]
+ ztie -d db/gdbm -f $dbfile dbase2
+ echo "Can connect, so untie happened:" $dbase2[a]
+ zuntie dbase2
+0:Test of automatic untie (use of local scope) and of scoping
+>association-local-special
+>fromarray
+>Can connect, so untie happened: b
# Tests for the zsh/param/private module

%prep

 #module_path+=( ~/github/zgdbm/module/Src )
 #modname="psprint/zgdbm"
 modname="zsh/db/gdbm"
 dbfile=db.gdbm
 if ! zmodload $modname ; then
   ZTST_unimplemented="can't load $modname module for testing"
 fi
 rm -f db.gdbm

%test

 (zmodload -u $modname && zmodload $modname)
0:unload and reload the module without crashing

 ztie -d db/gdbm -f $dbfile dbase
 zuntie dbase
0:create the database

 ztie -r -d db/gdbm -f $dbfile dbase
 zuntie -u dbase
0:open the database read-only

 ztie -d db/gdbm -f $dbfile dbase
 dbase[testkey]=testdata
 zuntie dbase
 ztie -r -d db/gdbm -f $dbfile dbase
 echo $dbase[testkey]
 zuntie -u dbase
0:store key in database
>testdata

 ztie -d db/gdbm -f $dbfile dbase2
 unset 'dbase2[testkey]'
 zuntie dbase2
 ztie -d db/gdbm -f $dbfile dbase
 echo $dbase[testkey]
 zuntie dbase
0:remove key from database (different variables)
>

 ztie -d db/gdbm -f $dbfile dbase
 dbase[testkey]=testdata
 zuntie dbase
 ztie -r -d db/gdbm -f $dbfile dbase
 echo $dbase[testkey]
 zuntie -u dbase
 ztie -d db/gdbm -f $dbfile dbase
 unset 'dbase[testkey]'
 zuntie dbase
 ztie -r -d db/gdbm -f $dbfile dbase
 echo $dbase[testkey]
 zuntie -u dbase
0:store & remove key from database (the same variables)
>testdata
>

 ztie -d db/gdbm -f $dbfile dbase
 dbase[testkey]=testdata
 dbase[testkey2]=$dbase[testkey]
 dbase[testkey3]=$dbase[testkey]x$dbase[testkey2]
 zuntie dbase
 ztie -d db/gdbm -f $dbfile dbase
 echo $dbase[testkey]
 echo $dbase[testkey2]
 echo $dbase[testkey3]
 zuntie dbase
0:store 2 keys fetching 1st
>testdata
>testdata
>testdataxtestdata

 ztie -d db/gdbm -f $dbfile dbase
 val=$dbase[testkey2]
 unset 'dbase[testkey2]'
 echo $val
 zuntie dbase
0:unset key that was fetched
>testdata

 ztie -r -d db/gdbm -f $dbfile dbase
 print -rl -- "${(kv)dbase[@]}"
 zuntie -u dbase
0:scan read-only tied hash
>testkey
>testdata
>testkey3
>testdataxtestdata

 ztie -d db/gdbm -f $dbfile dbase
 local -a arr
 arr=( "${(kv)dbase[@]}" )
 print -rl -- "${arr[@]}"
 zuntie dbase
0:different scan, also read-write mode
>testkey
>testdata
>testkey3
>testdataxtestdata

 ztie -d db/gdbm -f $dbfile dbase
 dbase=( a b c d )
 zuntie dbase
 ztie -d db/gdbm -f $dbfile dbase
 print -rl -- "${(kv)dbase[@]}"
 zuntie dbase
0:replace hash / database, scan
>c
>d
>a
>b

 ztie -d db/gdbm -f $dbfile dbase
 local -a arr
 arr=( "${dbase[@]}" )
 print -rl -- "${arr[@]}"
 zuntie dbase
0:scan with no (kv)
>d
>b

 ztie -d db/gdbm -f $dbfile dbase
 print -rl -- "${(k)dbase[@]}"
 zuntie dbase
0:scan with keys only (k)
>c
>a

 ztie -d db/gdbm -f $dbfile dbase
 print -rl -- "${(v)dbase[@]}"
 zuntie dbase
0:scan with keys only explicit (v)
>d
>b

 rm -f $dbfile
 ztie -r -d db/gdbm -f $dbfile dbase 2>/dev/null
1:read-only open non-existent database

 ztie -d db/gdbm -f $dbfile dbase
 dbase+=( a b )
 echo $dbase[a]
 zuntie dbase
 ztie -r -d db/gdbm -f $dbfile dbase
 echo $dbase[a]
 print -rl -- "${(kv)dbase[@]}"
 zuntie -u dbase
 ztie -d db/gdbm -f $dbfile dbase
 dbase+=( c d )
 echo $dbase[a]
 echo $dbase[c]
 print -rl -- "${(kv)dbase[@]}"
 zuntie dbase
 ztie -r -d db/gdbm -f $dbfile dbase
 echo $dbase[a]
 echo $dbase[c]
 print -rl -- "${(kv)dbase[@]}"
 zuntie -u dbase
0:Append with +=( ), also with existing data, also (kv) scan
>b
>b
>a
>b
>b
>d
>c
>d
>a
>b
>b
>d
>c
>d
>a
>b

 ztie -d db/gdbm -f $dbfile dbase
 echo ${(t)dbase}
 zuntie dbase
0:Type of tied parameter
>association-special

 typeset -ga dbase
 ztie -d db/gdbm -f $dbfile dbase
 echo ${(t)dbase}
 zuntie dbase
0:Type of tied parameter, with preceding unset
>association-special

 local -a dbase
 ztie -d db/gdbm -f $dbfile dbase
 echo ${(t)dbase}
 zuntie dbase
0:Type of tied parameter, with local parameter already existing
>association-local-special

 local -a dbase
 dbase=( fromarray )
 () {
     local -a dbase
     ztie -d db/gdbm -f $dbfile dbase
     echo ${(t)dbase}
     zuntie dbase
 }
 echo $dbase[1]
 ztie -d db/gdbm -f $dbfile dbase2
 echo "Can connect, so untie happened:" $dbase2[a]
 zuntie dbase2
0:Test of automatic untie (use of local scope) and of scoping
>association-local-special
>fromarray
>Can connect, so untie happened: b
/*
 * db_gdbm.c - bindings for gdbm
 *
 * This file is part of zsh, the Z shell.
 *
 * Copyright (c) 2008 Clint Adams
 * All rights reserved.
 *
 * Modifications copyright (c) 2017 Sebastian Gniazdowski
 * All rights reserved.
 *
 * Permission is hereby granted, without written agreement and without
 * license or royalty fees, to use, copy, modify, and distribute this
 * software and to distribute modified versions of this software for any
 * purpose, provided that the above copyright notice and the following
 * two paragraphs appear in all copies of this software.
 *
 * In no event shall Clint Adams or the Zsh Development
 * Group be liable to any party for direct, indirect, special, incidental, or
 * consequential damages arising out of the use of this software and its
 * documentation, even if Peter Stephenson, Sven Wischnowsky and the Zsh
 * Development Group have been advised of the possibility of such damage.
 *
 * Clint Adams and the Zsh Development Group
 * specifically disclaim any warranties, including, but not limited to, the
 * implied warranties of merchantability and fitness for a particular purpose.
 * The software provided hereunder is on an "as is" basis, and Peter
 * Stephenson, Sven Wischnowsky and the Zsh Development Group have no
 * obligation to provide maintenance, support, updates, enhancements, or
 * modifications.
 *
 */

#include "db_gdbm.mdh"
#include "db_gdbm.pro"

#ifndef PM_UPTODATE
#define PM_UPTODATE     (1<<19) /* Parameter has up-to-date data (e.g. loaded from DB) */
#endif

static Param createhash( char *name, int flags );

/*
 * Make sure we have all the bits I'm using for memory mapping, otherwise
 * I don't know what I'm doing.
 */
#if defined(HAVE_GDBM_H) && defined(HAVE_GDBM_OPEN)

#include <gdbm.h>

static char *backtype = "db/gdbm";

/*
 * Longer GSU structure, to carry GDBM_FILE of owning
 * database. Every parameter (hash value) receives GSU
 * pointer and thus also receives GDBM_FILE - this way
 * parameters can access proper database.
 *
 * Main HashTable parameter has the same instance of
 * the custom GSU struct in u.hash->tmpdata field.
 * When database is closed, `dbf` field is set to NULL
 * and hash values know to not access database when
 * being unset (total purge at zuntie).
 *
 * When database closing is ended, custom GSU struct
 * is freed. Only new ztie creates new custom GSU
 * struct instance.
 */

struct gsu_scalar_ext {
    struct gsu_scalar std;
    GDBM_FILE dbf;
};

/* Source structure - will be copied to allocated one,
 * with `dbf` filled. `dbf` allocation <-> gsu allocation. */
static const struct gsu_scalar_ext gdbm_gsu_ext =
{ { gdbmgetfn, gdbmsetfn, gdbmunsetfn }, 0 };

/**/
static const struct gsu_hash gdbm_hash_gsu =
{ hashgetfn, gdbmhashsetfn, gdbmhashunsetfn };

static struct builtin bintab[] = {
    BUILTIN("ztie", 0, bin_ztie, 1, -1, 0, "d:f:r", NULL),
    BUILTIN("zuntie", 0, bin_zuntie, 1, -1, 0, "u", NULL),
};

/**/
static int
bin_ztie(char *nam, char **args, Options ops, UNUSED(int func))
{
    char *resource_name, *pmname;
    GDBM_FILE dbf = NULL;
    int read_write = GDBM_SYNC, pmflags = PM_REMOVABLE;
    Param tied_param;

    if(!OPT_ISSET(ops,'d')) {
        zwarnnam(nam, "you must pass `-d %s'", backtype);
	return 1;
    }
    if(!OPT_ISSET(ops,'f')) {
        zwarnnam(nam, "you must pass `-f' with a filename", NULL);
	return 1;
    }
    if (OPT_ISSET(ops,'r')) {
	read_write |= GDBM_READER;
	pmflags |= PM_READONLY;
    } else {
	read_write |= GDBM_WRCREAT;
    }

    /* Here should be a lookup of the backend type against
     * a registry, if generam DB mechanism is to be added */
    if (strcmp(OPT_ARG(ops, 'd'), backtype) != 0) {
        zwarnnam(nam, "unsupported backend type `%s'", OPT_ARG(ops, 'd'));
	return 1;
    }

    resource_name = OPT_ARG(ops, 'f');
    pmname = *args;

    if ((tied_param = (Param)paramtab->getnode(paramtab, pmname)) &&
	!(tied_param->node.flags & PM_UNSET)) {
	/*
	 * Unset any existing parameter.  Note there's no implicit
	 * "local" here, but if the existing parameter is local
	 * then new parameter will be also local without following
         * unset.
	 *
	 * We need to do this before attempting to open the DB
	 * in case this variable is already tied to a DB.
	 *
	 * This can fail if the variable is readonly or restricted.
	 * We could call unsetparam() and check errflag instead
	 * of the return status.
	 */
	if (unsetparam_pm(tied_param, 0, 1))
	    return 1;
    }

    dbf = gdbm_open(resource_name, 0, read_write, 0666, 0);
    if(dbf)
	addmodulefd(gdbm_fdesc(dbf), FDT_MODULE);
    else {
	zwarnnam(nam, "error opening database file %s", resource_name);
	return 1;
    }

    if (!(tied_param = createhash(pmname, pmflags))) {
        zwarnnam(nam, "cannot create the requested parameter %s", pmname);
	fdtable[gdbm_fdesc(dbf)] = FDT_UNUSED;
	gdbm_close(dbf);
	return 1;
    }

    tied_param->gsu.h = &gdbm_hash_gsu;

    /* Allocate parameter sub-gsu, fill dbf field. 
     * dbf allocation is 1 to 1 accompanied by
     * gsu_scalar_ext allocation. */

    struct gsu_scalar_ext *dbf_carrier = (struct gsu_scalar_ext *) zalloc(sizeof(struct gsu_scalar_ext));
    dbf_carrier->std = gdbm_gsu_ext.std;
    dbf_carrier->dbf = dbf;
    tied_param->u.hash->tmpdata = (void *)dbf_carrier;

    return 0;
}

/**/
static int
bin_zuntie(char *nam, char **args, Options ops, UNUSED(int func))
{
    Param pm;
    char *pmname;
    int ret = 0;

    for (pmname = *args; *args++; pmname = *args) {
	pm = (Param) paramtab->getnode(paramtab, pmname);
	if(!pm) {
	    zwarnnam(nam, "cannot untie %s", pmname);
	    ret = 1;
	    continue;
	}
	if (pm->gsu.h != &gdbm_hash_gsu) {
	    zwarnnam(nam, "not a tied gdbm hash: %s", pmname);
	    ret = 1;
	    continue;
	}

	queue_signals();
	if (OPT_ISSET(ops,'u'))
	    gdbmuntie(pm);	/* clear read-only-ness */
	if (unsetparam_pm(pm, 0, 1)) {
	    /* assume already reported */
	    ret = 1;
	}
	unqueue_signals();
    }

    return ret;
}

/*
 * The param is actual param in hash â?? always, because
 * getgdbmnode creates every new key seen. However, it
 * might be not PM_UPTODATE - which means that database
 * wasn't yet queried.
 *
 * It will be left in this state if database doesn't
 * contain such key. That might be a drawback, maybe
 * setting to empty value has sense, as no other writer
 * can exist. This would remove subtle hcalloc(1) leak.
 */

/**/
static char *
gdbmgetfn(Param pm)
{
    datum key, content;
    int ret;
    GDBM_FILE dbf;

    /* Key already retrieved? There is no sense of asking the
     * database again, because:
     * - there can be only multiple readers
     * - so, no writer + reader use is allowed
     *
     * Thus:
     * - if we are writers, we for sure have newest copy of data
     * - if we are readers, we for sure have newest copy of data
     */
    if ( pm->node.flags & PM_UPTODATE ) {
        return pm->u.str ? pm->u.str : (char *) hcalloc(1);
    }

    key.dptr = pm->node.nam;
    key.dsize = strlen(key.dptr);

    dbf = ((struct gsu_scalar_ext *)pm->gsu.s)->dbf;

    if((ret = gdbm_exists(dbf, key))) {
        /* We have data â?? store it, return it */
        pm->node.flags |= PM_UPTODATE;

        content = gdbm_fetch(dbf, key);

        /* Ensure there's no leak */
        if (pm->u.str) {
            zsfree(pm->u.str);
        }

        pm->u.str = ztrduppfx( content.dptr, content.dsize );

        /* Can return pointer, correctly saved inside hash */
        return pm->u.str;
    }

    /* Can this be "" ? */
    return (char *) hcalloc(1);
}

/**/
static void
gdbmsetfn(Param pm, char *val)
{
    datum key, content;
    GDBM_FILE dbf;

    /* Set is done on parameter and on database.
     * See the allowed workers / readers comment
     * at gdbmgetfn() */

    /* Parameter */
    if (pm->u.str) {
        zsfree(pm->u.str);
        pm->u.str = NULL;
        pm->node.flags &= ~(PM_UPTODATE);
    }

    if (val) {
        pm->u.str = ztrdup(val);
        pm->node.flags |= PM_UPTODATE;
    }

    /* Database */
    key.dptr = pm->node.nam;
    key.dsize = strlen(key.dptr);
    dbf = ((struct gsu_scalar_ext *)pm->gsu.s)->dbf;

    if (val) {
        if (dbf) {
            content.dptr = val;
            content.dsize = strlen(content.dptr);
            (void)gdbm_store(dbf, key, content, GDBM_REPLACE);
        }
    } else {
        if (dbf) {
            (void)gdbm_delete(dbf, key);
        }
    }
}

/**/
static void
gdbmunsetfn(Param pm, UNUSED(int um))
{
    /* Set with NULL */
    gdbmsetfn(pm, NULL);
}

/**/
static HashNode
getgdbmnode(HashTable ht, const char *name)
{
    HashNode hn = gethashnode2( ht, name );
    Param val_pm = (Param) hn;

    /* Entry for key doesn't exist? Create it now,
     * it will be interfacing between the database
     * and Zsh - through special gdbm_gsu. So, any
     * seen key results in new interfacing parameter.
     *
     * Previous code was returning heap arena Param
     * that wasn't actually added to the hash. It was
     * plainly name / database-key holder. Here we
     * add the Param to its hash, it is not PM_UPTODATE.
     * It will be loaded from database *and filled*
     * or left in that state if the database doesn't
     * contain it.
     *
     * No heap arena memory is used, memory usage is
     * now limited - by number of distinct keys seen,
     * not by number of key *uses*.
     * */

    if ( ! val_pm ) {
        val_pm = (Param) zshcalloc( sizeof (*val_pm) );
        val_pm->node.flags = PM_SCALAR | PM_HASHELEM; /* no PM_UPTODATE */
        val_pm->gsu.s = (GsuScalar) ht->tmpdata;
        ht->addnode( ht, ztrdup( name ), val_pm ); // sets pm->node.nam
    }

    return (HashNode) val_pm;
}

/**/
static void
scangdbmkeys(HashTable ht, ScanFunc func, int flags)
{
    Param pm = NULL;
    datum key, content;
    GDBM_FILE dbf = ((struct gsu_scalar_ext *)ht->tmpdata)->dbf;

    /* Iterate keys adding them to hash, so
     * we have Param to use in `func` */
    key = gdbm_firstkey(dbf);

    while(key.dptr) {
        /* This returns database-interfacing Param,
         * it will return u.str or first fetch data
         * if not PM_UPTODATE (newly created) */
        char *zkey = ztrduppfx(key.dptr, key.dsize);
        HashNode hn = getgdbmnode(ht, zkey);
        zsfree( zkey );

	func(hn, flags);

        /* Iterate - no problem as interfacing Param
         * will do at most only fetches, not stores */
        key = gdbm_nextkey(dbf, key);
    }

}

/*
 * Replace database with new hash
 */

/**/
static void
gdbmhashsetfn(Param pm, HashTable ht)
{
    int i;
    HashNode hn;
    GDBM_FILE dbf;
    datum key, content;

    if (!pm->u.hash || pm->u.hash == ht)
	return;

    if (!(dbf = ((struct gsu_scalar_ext *)pm->u.hash->tmpdata)->dbf))
	return;

    key = gdbm_firstkey(dbf);
    while (key.dptr) {
	queue_signals();
	(void)gdbm_delete(dbf, key);
	free(key.dptr);
	unqueue_signals();
	key = gdbm_firstkey(dbf);
    }

    /* just deleted everything, clean up */
    (void)gdbm_reorganize(dbf);

    if (!ht)
	return;

     /* Put new strings into database, waiting
      * for their interfacing-Params to be created */

    for (i = 0; i < ht->hsize; i++)
	for (hn = ht->nodes[i]; hn; hn = hn->next) {
	    struct value v;

	    v.isarr = v.flags = v.start = 0;
	    v.end = -1;
	    v.arr = NULL;
	    v.pm = (Param) hn;

	    key.dptr = v.pm->node.nam;
	    key.dsize = strlen(key.dptr);

	    queue_signals();

	    content.dptr = getstrvalue(&v);
	    content.dsize = strlen(content.dptr);

	    (void)gdbm_store(dbf, key, content, GDBM_REPLACE);	

	    unqueue_signals();
	}
}

/**/
static void
gdbmuntie(Param pm)
{
    GDBM_FILE dbf = ((struct gsu_scalar_ext *)pm->u.hash->tmpdata)->dbf;
    HashTable ht = pm->u.hash;

    if (dbf) { /* paranoia */
	fdtable[gdbm_fdesc(dbf)] = FDT_UNUSED;
        gdbm_close(dbf);

        /* Let hash fields know there's no backend */
        ((struct gsu_scalar_ext *)ht->tmpdata)->dbf = NULL;
    }

    /* for completeness ... createspecialhash() should have an inverse */
    ht->getnode = ht->getnode2 = gethashnode2;
    ht->scantab = NULL;

    pm->node.flags &= ~(PM_SPECIAL|PM_READONLY);
    pm->gsu.h = &stdhash_gsu;
}

/**/
static void
gdbmhashunsetfn(Param pm, UNUSED(int exp))
{
    gdbmuntie(pm);

    /* Remember custom GSU structure assigned to
     * u.hash->tmpdata before hash gets deleted */
    void * gsu_ext = pm->u.hash->tmpdata;

    /* Uses normal unsetter. Will delete all owned
     * parameters and also hashtable. */
    pm->gsu.h->setfn(pm, NULL);

    /* Don't need custom GSU structure with its
     * GDBM_FILE pointer anymore */
    zfree( gsu_ext, sizeof(struct gsu_scalar_ext));

    pm->node.flags |= PM_UNSET;
}

#else
# error no gdbm
#endif /* have gdbm */

static struct features module_features = {
    bintab, sizeof(bintab)/sizeof(*bintab),
    NULL, 0,
    NULL, 0,
    NULL, 0,
    0
};

/**/
int
setup_(UNUSED(Module m))
{
    return 0;
}

/**/
int
features_(Module m, char ***features)
{
    *features = featuresarray(m, &module_features);
    return 0;
}

/**/
int
enables_(Module m, int **enables)
{
    return handlefeatures(m, &module_features, enables);
}

/**/
int
boot_(UNUSED(Module m))
{
    return 0;
}

/**/
int
cleanup_(Module m)
{
    return setfeatureenables(m, &module_features, NULL);
}

/**/
int
finish_(UNUSED(Module m))
{
    return 0;
}

static Param createhash( char *name, int flags ) {
    Param pm;
    HashTable ht;

    pm = createparam(name, PM_SPECIAL | PM_HASHED);
    if (!pm) {
        return NULL;
    }

    if (pm->old)
	pm->level = locallevel;

    /* This creates standard hash. */
    ht = pm->u.hash = newparamtable(32, name);
    if (!pm->u.hash) {
        paramtab->removenode(paramtab, name);
        paramtab->freenode(&pm->node);
        zwarnnam(name, "Out of memory when allocating hash");
    }

    /* These provide special features */
    ht->getnode = ht->getnode2 = getgdbmnode;
    ht->scantab = scangdbmkeys;

    return pm;
}


Messages sorted by: Reverse Date, Date, Thread, Author