1999-08-13 01:37:52 -04:00
|
|
|
/*
|
|
|
|
* sdbm - ndbm work-alike hashed database library
|
|
|
|
* based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
|
|
|
|
* author: oz@nexus.yorku.ca
|
|
|
|
* status: public domain.
|
|
|
|
*
|
|
|
|
* core routines
|
|
|
|
*/
|
|
|
|
|
2007-06-09 23:06:15 -04:00
|
|
|
#include "ruby/config.h"
|
2009-03-26 10:32:22 -04:00
|
|
|
#include "ruby/defines.h"
|
2010-05-12 18:39:10 -04:00
|
|
|
|
|
|
|
#ifdef HAVE_UNISTD_H
|
|
|
|
#include <unistd.h>
|
|
|
|
#endif
|
|
|
|
|
2010-05-10 22:49:46 -04:00
|
|
|
#include "sdbm.h"
|
1999-08-13 01:37:52 -04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* sdbm - ndbm work-alike hashed database library
|
|
|
|
* tuning and portability constructs [not nearly enough]
|
|
|
|
* author: oz@nexus.yorku.ca
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define BYTESIZ 8
|
|
|
|
|
|
|
|
#ifdef BSD42
|
|
|
|
#define SEEK_SET L_SET
|
2011-03-24 09:49:26 -04:00
|
|
|
#define memset(s,c,n) bzero((s), (n)) /* only when c is zero */
|
|
|
|
#define memcpy(s1,s2,n) bcopy((s2), (s1), (n))
|
|
|
|
#define memcmp(s1,s2,n) bcmp((s1),(s2),(n))
|
1999-08-13 01:37:52 -04:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* important tuning parms (hah)
|
|
|
|
*/
|
|
|
|
|
2010-10-15 21:06:13 -04:00
|
|
|
#ifndef SEEDUPS
|
|
|
|
#define SEEDUPS 1 /* always detect duplicates */
|
|
|
|
#endif
|
|
|
|
#ifndef BADMESS
|
|
|
|
#define BADMESS 1 /* generate a message for worst case:
|
1999-08-13 01:37:52 -04:00
|
|
|
cannot make room after SPLTMAX splits */
|
2010-10-15 21:06:13 -04:00
|
|
|
#endif
|
|
|
|
|
1999-08-13 01:37:52 -04:00
|
|
|
/*
|
|
|
|
* misc
|
|
|
|
*/
|
|
|
|
#ifdef DEBUG
|
|
|
|
#define debug(x) printf x
|
|
|
|
#else
|
|
|
|
#define debug(x)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef BIG_E
|
|
|
|
#define GET_SHORT(p, i) (((unsigned)((unsigned char *)(p))[(i)*2] << 8) + (((unsigned char *)(p))[(i)*2 + 1]))
|
|
|
|
#define PUT_SHORT(p, i, s) (((unsigned char *)(p))[(i)*2] = (unsigned char)((s) >> 8), ((unsigned char *)(p))[(i)*2 + 1] = (unsigned char)(s))
|
|
|
|
#else
|
2011-03-24 09:49:26 -04:00
|
|
|
#define GET_SHORT(p, i) ((p)[(i)])
|
|
|
|
#define PUT_SHORT(p, i, s) ((p)[(i)] = (s))
|
1999-08-13 01:37:52 -04:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/*#include "pair.h"*/
|
|
|
|
static int fitpair proto((char *, int));
|
|
|
|
static void putpair proto((char *, datum, datum));
|
|
|
|
static datum getpair proto((char *, datum));
|
|
|
|
static int delpair proto((char *, datum));
|
|
|
|
static int chkpage proto((char *));
|
|
|
|
static datum getnkey proto((char *, int));
|
|
|
|
static void splpage proto((char *, char *, long));
|
2010-10-15 21:06:13 -04:00
|
|
|
#if SEEDUPS
|
1999-08-13 01:37:52 -04:00
|
|
|
static int duppair proto((char *, datum));
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
2007-07-15 09:24:39 -04:00
|
|
|
#ifdef DOSISH
|
1999-08-13 01:37:52 -04:00
|
|
|
#include <io.h>
|
|
|
|
#endif
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#ifdef BSD42
|
|
|
|
#include <sys/file.h>
|
|
|
|
#else
|
|
|
|
#include <fcntl.h>
|
|
|
|
/*#include <memory.h>*/
|
|
|
|
#endif
|
|
|
|
#ifndef O_BINARY
|
|
|
|
#define O_BINARY 0
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <errno.h>
|
|
|
|
#ifndef EPERM
|
|
|
|
#define EPERM EACCES
|
|
|
|
#endif
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
#ifdef __STDC__
|
|
|
|
#include <stddef.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef NULL
|
|
|
|
#define NULL 0
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* externals
|
|
|
|
*/
|
2011-10-24 09:49:58 -04:00
|
|
|
#if !defined(__sun) && !defined(_WIN32) && !defined(__CYGWIN__) && !defined(errno)
|
1999-08-13 01:37:52 -04:00
|
|
|
extern int errno;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* forward
|
|
|
|
*/
|
|
|
|
static int getdbit proto((DBM *, long));
|
|
|
|
static int setdbit proto((DBM *, long));
|
|
|
|
static int getpage proto((DBM *, long));
|
|
|
|
static datum getnext proto((DBM *));
|
|
|
|
static int makroom proto((DBM *, long, int));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* useful macros
|
|
|
|
*/
|
|
|
|
#define bad(x) ((x).dptr == NULL || (x).dsize < 0)
|
|
|
|
#define exhash(item) sdbm_hash((item).dptr, (item).dsize)
|
|
|
|
#define ioerr(db) ((db)->flags |= DBM_IOERR)
|
|
|
|
|
|
|
|
#define OFF_PAG(off) (long) (off) * PBLKSIZ
|
|
|
|
#define OFF_DIR(off) (long) (off) * DBLKSIZ
|
|
|
|
|
|
|
|
static long masks[] = {
|
|
|
|
000000000000L, 000000000001L, 000000000003L,
|
|
|
|
000000000007L, 000000000017L, 000000000037L,
|
|
|
|
000000000077L, 000000000177L, 000000000377L,
|
|
|
|
000000000777L, 000000001777L, 000000003777L,
|
|
|
|
000000007777L, 000000017777L, 000000037777L,
|
|
|
|
000000077777L, 000000177777L, 000000377777L,
|
|
|
|
000000777777L, 000001777777L, 000003777777L,
|
|
|
|
000007777777L, 000017777777L, 000037777777L,
|
|
|
|
000077777777L, 000177777777L, 000377777777L,
|
|
|
|
000777777777L, 001777777777L, 003777777777L,
|
|
|
|
007777777777L, 017777777777L
|
|
|
|
};
|
|
|
|
|
|
|
|
datum nullitem = {NULL, 0};
|
|
|
|
|
|
|
|
DBM *
|
2006-02-12 23:53:22 -05:00
|
|
|
sdbm_open(register char *file, register int flags, register int mode)
|
1999-08-13 01:37:52 -04:00
|
|
|
{
|
|
|
|
register DBM *db;
|
|
|
|
register char *dirname;
|
|
|
|
register char *pagname;
|
2011-03-25 02:46:57 -04:00
|
|
|
register size_t n;
|
1999-08-13 01:37:52 -04:00
|
|
|
|
|
|
|
if (file == NULL || !*file)
|
|
|
|
return errno = EINVAL, (DBM *) NULL;
|
|
|
|
/*
|
2013-05-18 23:10:21 -04:00
|
|
|
* need space for two separate filenames
|
1999-08-13 01:37:52 -04:00
|
|
|
*/
|
|
|
|
n = strlen(file) * 2 + strlen(DIRFEXT) + strlen(PAGFEXT) + 2;
|
|
|
|
|
2011-03-25 02:46:57 -04:00
|
|
|
if ((dirname = malloc(n)) == NULL)
|
1999-08-13 01:37:52 -04:00
|
|
|
return errno = ENOMEM, (DBM *) NULL;
|
|
|
|
/*
|
|
|
|
* build the file names
|
|
|
|
*/
|
|
|
|
dirname = strcat(strcpy(dirname, file), DIRFEXT);
|
|
|
|
pagname = strcpy(dirname + strlen(dirname) + 1, file);
|
|
|
|
pagname = strcat(pagname, PAGFEXT);
|
|
|
|
|
|
|
|
db = sdbm_prep(dirname, pagname, flags, mode);
|
|
|
|
free((char *) dirname);
|
|
|
|
return db;
|
|
|
|
}
|
|
|
|
|
2011-10-27 17:07:23 -04:00
|
|
|
static int
|
|
|
|
fd_set_cloexec(int fd)
|
|
|
|
{
|
|
|
|
/* MinGW don't have F_GETFD and FD_CLOEXEC. [ruby-core:40281] */
|
|
|
|
#ifdef F_GETFD
|
|
|
|
int flags, ret;
|
|
|
|
flags = fcntl(fd, F_GETFD); /* should not fail except EBADF. */
|
|
|
|
if (flags == -1) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (2 < fd) {
|
|
|
|
if (!(flags & FD_CLOEXEC)) {
|
|
|
|
flags |= FD_CLOEXEC;
|
|
|
|
ret = fcntl(fd, F_SETFD, flags);
|
|
|
|
if (ret == -1) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
1999-08-13 01:37:52 -04:00
|
|
|
DBM *
|
2006-02-12 23:53:22 -05:00
|
|
|
sdbm_prep(char *dirname, char *pagname, int flags, int mode)
|
1999-08-13 01:37:52 -04:00
|
|
|
{
|
|
|
|
register DBM *db;
|
|
|
|
struct stat dstat;
|
|
|
|
|
|
|
|
if ((db = (DBM *) malloc(sizeof(DBM))) == NULL)
|
|
|
|
return errno = ENOMEM, (DBM *) NULL;
|
|
|
|
|
2011-10-27 11:11:06 -04:00
|
|
|
db->pagf = -1;
|
|
|
|
db->dirf = -1;
|
1999-08-13 01:37:52 -04:00
|
|
|
db->flags = 0;
|
|
|
|
db->hmask = 0;
|
|
|
|
db->blkptr = 0;
|
|
|
|
db->keyptr = 0;
|
|
|
|
/*
|
2010-04-22 04:04:13 -04:00
|
|
|
* adjust user flags so that WRONLY becomes RDWR,
|
1999-08-13 01:37:52 -04:00
|
|
|
* as required by this package. Also set our internal
|
|
|
|
* flag for RDONLY.
|
|
|
|
*/
|
|
|
|
if (flags & O_WRONLY)
|
|
|
|
flags = (flags & ~O_WRONLY) | O_RDWR;
|
|
|
|
if (flags & O_RDONLY)
|
|
|
|
db->flags = DBM_RDONLY;
|
|
|
|
/*
|
|
|
|
* open the files in sequence, and stat the dirfile.
|
|
|
|
* If we fail anywhere, undo everything, return NULL.
|
|
|
|
*/
|
|
|
|
flags |= O_BINARY;
|
2011-10-29 03:12:09 -04:00
|
|
|
#ifdef O_CLOEXEC
|
|
|
|
flags |= O_CLOEXEC;
|
|
|
|
#endif
|
2011-10-27 11:11:06 -04:00
|
|
|
|
|
|
|
if ((db->pagf = open(pagname, flags, mode)) == -1) goto err;
|
2011-10-27 17:07:23 -04:00
|
|
|
if (fd_set_cloexec(db->pagf) == -1) goto err;
|
2011-10-27 11:11:06 -04:00
|
|
|
if ((db->dirf = open(dirname, flags, mode)) == -1) goto err;
|
2011-10-27 17:07:23 -04:00
|
|
|
if (fd_set_cloexec(db->dirf) == -1) goto err;
|
1999-08-13 01:37:52 -04:00
|
|
|
/*
|
|
|
|
* need the dirfile size to establish max bit number.
|
|
|
|
*/
|
2011-10-27 11:11:06 -04:00
|
|
|
if (fstat(db->dirf, &dstat) == -1) goto err;
|
1999-08-13 01:37:52 -04:00
|
|
|
/*
|
|
|
|
* zero size: either a fresh database, or one with a single,
|
|
|
|
* unsplit data page: dirpage is all zeros.
|
|
|
|
*/
|
2011-10-27 11:11:06 -04:00
|
|
|
db->dirbno = (!dstat.st_size) ? 0 : -1;
|
|
|
|
db->pagbno = -1;
|
|
|
|
db->maxbno = dstat.st_size * (long) BYTESIZ;
|
|
|
|
|
|
|
|
(void) memset(db->pagbuf, 0, PBLKSIZ);
|
|
|
|
(void) memset(db->dirbuf, 0, DBLKSIZ);
|
|
|
|
/*
|
|
|
|
* success
|
|
|
|
*/
|
|
|
|
return db;
|
|
|
|
|
|
|
|
err:
|
|
|
|
if (db->pagf != -1)
|
|
|
|
(void) close(db->pagf);
|
|
|
|
if (db->dirf != -1)
|
|
|
|
(void) close(db->dirf);
|
1999-08-13 01:37:52 -04:00
|
|
|
free((char *) db);
|
|
|
|
return (DBM *) NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2006-02-12 23:53:22 -05:00
|
|
|
sdbm_close(register DBM *db)
|
1999-08-13 01:37:52 -04:00
|
|
|
{
|
|
|
|
if (db == NULL)
|
|
|
|
errno = EINVAL;
|
|
|
|
else {
|
|
|
|
(void) close(db->dirf);
|
|
|
|
(void) close(db->pagf);
|
|
|
|
free((char *) db);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
datum
|
2006-02-12 23:53:22 -05:00
|
|
|
sdbm_fetch(register DBM *db, datum key)
|
1999-08-13 01:37:52 -04:00
|
|
|
{
|
|
|
|
if (db == NULL || bad(key))
|
|
|
|
return errno = EINVAL, nullitem;
|
|
|
|
|
|
|
|
if (getpage(db, exhash(key)))
|
|
|
|
return getpair(db->pagbuf, key);
|
|
|
|
|
|
|
|
return ioerr(db), nullitem;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
2006-02-12 23:53:22 -05:00
|
|
|
sdbm_delete(register DBM *db, datum key)
|
1999-08-13 01:37:52 -04:00
|
|
|
{
|
|
|
|
if (db == NULL || bad(key))
|
|
|
|
return errno = EINVAL, -1;
|
|
|
|
if (sdbm_rdonly(db))
|
|
|
|
return errno = EPERM, -1;
|
|
|
|
|
|
|
|
if (getpage(db, exhash(key))) {
|
|
|
|
if (!delpair(db->pagbuf, key))
|
|
|
|
return -1;
|
|
|
|
/*
|
|
|
|
* update the page file
|
|
|
|
*/
|
|
|
|
if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
|
|
|
|
|| write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
|
|
|
|
return ioerr(db), -1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ioerr(db), -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
2006-02-12 23:53:22 -05:00
|
|
|
sdbm_store(register DBM *db, datum key, datum val, int flags)
|
1999-08-13 01:37:52 -04:00
|
|
|
{
|
|
|
|
int need;
|
|
|
|
register long hash;
|
|
|
|
|
|
|
|
if (db == NULL || bad(key))
|
|
|
|
return errno = EINVAL, -1;
|
|
|
|
if (sdbm_rdonly(db))
|
|
|
|
return errno = EPERM, -1;
|
|
|
|
|
|
|
|
need = key.dsize + val.dsize;
|
|
|
|
/*
|
|
|
|
* is the pair too big (or too small) for this database ??
|
|
|
|
*/
|
|
|
|
if (need < 0 || need > PAIRMAX)
|
|
|
|
return errno = EINVAL, -1;
|
|
|
|
|
|
|
|
if (getpage(db, (hash = exhash(key)))) {
|
|
|
|
/*
|
|
|
|
* if we need to replace, delete the key/data pair
|
|
|
|
* first. If it is not there, ignore.
|
|
|
|
*/
|
|
|
|
if (flags == DBM_REPLACE)
|
|
|
|
(void) delpair(db->pagbuf, key);
|
2010-10-15 21:06:13 -04:00
|
|
|
#if SEEDUPS
|
1999-08-13 01:37:52 -04:00
|
|
|
else if (duppair(db->pagbuf, key))
|
|
|
|
return 1;
|
|
|
|
#endif
|
|
|
|
/*
|
|
|
|
* if we do not have enough room, we have to split.
|
|
|
|
*/
|
|
|
|
if (!fitpair(db->pagbuf, need))
|
|
|
|
if (!makroom(db, hash, need))
|
|
|
|
return ioerr(db), -1;
|
|
|
|
/*
|
|
|
|
* we have enough room or split is successful. insert the key,
|
|
|
|
* and update the page file.
|
|
|
|
*/
|
|
|
|
(void) putpair(db->pagbuf, key, val);
|
|
|
|
|
|
|
|
if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
|
|
|
|
|| write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
|
|
|
|
return ioerr(db), -1;
|
|
|
|
/*
|
|
|
|
* success
|
|
|
|
*/
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ioerr(db), -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* makroom - make room by splitting the overfull page
|
|
|
|
* this routine will attempt to make room for SPLTMAX times before
|
|
|
|
* giving up.
|
|
|
|
*/
|
|
|
|
static int
|
2006-02-12 23:53:22 -05:00
|
|
|
makroom(register DBM *db, long int hash, int need)
|
1999-08-13 01:37:52 -04:00
|
|
|
{
|
|
|
|
long newp;
|
|
|
|
char twin[PBLKSIZ];
|
2008-10-04 09:33:22 -04:00
|
|
|
#if defined _WIN32 && !defined __CYGWIN__
|
1999-08-13 01:37:52 -04:00
|
|
|
char zer[PBLKSIZ];
|
|
|
|
long oldtail;
|
|
|
|
#endif
|
|
|
|
char *pag = db->pagbuf;
|
|
|
|
char *new = twin;
|
|
|
|
register int smax = SPLTMAX;
|
|
|
|
|
|
|
|
do {
|
|
|
|
/*
|
|
|
|
* split the current page
|
|
|
|
*/
|
|
|
|
(void) splpage(pag, new, db->hmask + 1);
|
|
|
|
/*
|
|
|
|
* address of the new page
|
|
|
|
*/
|
|
|
|
newp = (hash & db->hmask) | (db->hmask + 1);
|
|
|
|
debug(("newp: %ld\n", newp));
|
|
|
|
/*
|
2013-05-18 23:10:21 -04:00
|
|
|
* write delay, read avoidance/cache shuffle:
|
1999-08-13 01:37:52 -04:00
|
|
|
* select the page for incoming pair: if key is to go to the new page,
|
|
|
|
* write out the previous one, and copy the new one over, thus making
|
|
|
|
* it the current page. If not, simply write the new page, and we are
|
|
|
|
* still looking at the page of interest. current page is not updated
|
|
|
|
* here, as sdbm_store will do so, after it inserts the incoming pair.
|
|
|
|
*/
|
|
|
|
|
2008-10-04 09:33:22 -04:00
|
|
|
#if defined _WIN32 && !defined __CYGWIN__
|
1999-08-13 01:37:52 -04:00
|
|
|
/*
|
|
|
|
* Fill hole with 0 if made it.
|
|
|
|
* (hole is NOT read as 0)
|
|
|
|
*/
|
|
|
|
oldtail = lseek(db->pagf, 0L, SEEK_END);
|
|
|
|
memset(zer, 0, PBLKSIZ);
|
|
|
|
while (OFF_PAG(newp) > oldtail) {
|
|
|
|
if (lseek(db->pagf, 0L, SEEK_END) < 0 ||
|
|
|
|
write(db->pagf, zer, PBLKSIZ) < 0) {
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
oldtail += PBLKSIZ;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (hash & (db->hmask + 1)) {
|
|
|
|
if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
|
|
|
|
|| write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
|
|
|
|
return 0;
|
|
|
|
db->pagbno = newp;
|
|
|
|
(void) memcpy(pag, new, PBLKSIZ);
|
|
|
|
}
|
|
|
|
else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0
|
|
|
|
|| write(db->pagf, new, PBLKSIZ) < 0)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (!setdbit(db, db->curbit))
|
|
|
|
return 0;
|
|
|
|
/*
|
|
|
|
* see if we have enough room now
|
|
|
|
*/
|
|
|
|
if (fitpair(pag, need))
|
|
|
|
return 1;
|
|
|
|
/*
|
|
|
|
* try again... update curbit and hmask as getpage would have
|
|
|
|
* done. because of our update of the current page, we do not
|
|
|
|
* need to read in anything. BUT we have to write the current
|
|
|
|
* [deferred] page out, as the window of failure is too great.
|
|
|
|
*/
|
2010-04-22 04:04:13 -04:00
|
|
|
db->curbit = 2 * db->curbit +
|
1999-08-13 01:37:52 -04:00
|
|
|
((hash & (db->hmask + 1)) ? 2 : 1);
|
|
|
|
db->hmask |= (db->hmask + 1);
|
|
|
|
|
|
|
|
if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
|
|
|
|
|| write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
} while (--smax);
|
|
|
|
/*
|
|
|
|
* if we are here, this is real bad news. After SPLTMAX splits,
|
|
|
|
* we still cannot fit the key. say goodnight.
|
|
|
|
*/
|
2010-10-15 21:06:13 -04:00
|
|
|
#if BADMESS
|
|
|
|
(void) (write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44) < 0);
|
1999-08-13 01:37:52 -04:00
|
|
|
#endif
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* the following two routines will break if
|
|
|
|
* deletions aren't taken into account. (ndbm bug)
|
|
|
|
*/
|
|
|
|
datum
|
2006-02-12 23:53:22 -05:00
|
|
|
sdbm_firstkey(register DBM *db)
|
1999-08-13 01:37:52 -04:00
|
|
|
{
|
|
|
|
if (db == NULL)
|
|
|
|
return errno = EINVAL, nullitem;
|
|
|
|
/*
|
|
|
|
* start at page 0
|
|
|
|
*/
|
|
|
|
(void) memset(db->pagbuf, 0, PBLKSIZ);
|
|
|
|
if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0
|
|
|
|
|| read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
|
|
|
|
return ioerr(db), nullitem;
|
|
|
|
db->pagbno = 0;
|
|
|
|
db->blkptr = 0;
|
|
|
|
db->keyptr = 0;
|
|
|
|
|
|
|
|
return getnext(db);
|
|
|
|
}
|
|
|
|
|
|
|
|
datum
|
2006-02-12 23:53:22 -05:00
|
|
|
sdbm_nextkey(register DBM *db)
|
1999-08-13 01:37:52 -04:00
|
|
|
{
|
|
|
|
if (db == NULL)
|
|
|
|
return errno = EINVAL, nullitem;
|
|
|
|
return getnext(db);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* all important binary trie traversal
|
|
|
|
*/
|
|
|
|
static int
|
2006-02-12 23:53:22 -05:00
|
|
|
getpage(register DBM *db, register long int hash)
|
1999-08-13 01:37:52 -04:00
|
|
|
{
|
|
|
|
register int hbit;
|
|
|
|
register long dbit;
|
|
|
|
register long pagb;
|
|
|
|
|
|
|
|
dbit = 0;
|
|
|
|
hbit = 0;
|
|
|
|
while (dbit < db->maxbno && getdbit(db, dbit))
|
|
|
|
dbit = 2 * dbit + ((hash & ((long) 1 << hbit++)) ? 2 : 1);
|
|
|
|
|
|
|
|
debug(("dbit: %d...", dbit));
|
|
|
|
|
|
|
|
db->curbit = dbit;
|
|
|
|
db->hmask = masks[hbit];
|
|
|
|
|
|
|
|
pagb = hash & db->hmask;
|
|
|
|
/*
|
|
|
|
* see if the block we need is already in memory.
|
|
|
|
* note: this lookaside cache has about 10% hit rate.
|
|
|
|
*/
|
2010-04-22 04:04:13 -04:00
|
|
|
if (pagb != db->pagbno) {
|
1999-08-13 01:37:52 -04:00
|
|
|
/*
|
|
|
|
* note: here, we assume a "hole" is read as 0s.
|
|
|
|
* if not, must zero pagbuf first.
|
|
|
|
*/
|
|
|
|
(void) memset(db->pagbuf, 0, PBLKSIZ);
|
|
|
|
|
|
|
|
if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0
|
|
|
|
|| read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
|
|
|
|
return 0;
|
|
|
|
if (!chkpage(db->pagbuf)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
db->pagbno = pagb;
|
|
|
|
|
|
|
|
debug(("pag read: %d\n", pagb));
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2006-02-12 23:53:22 -05:00
|
|
|
getdbit(register DBM *db, register long int dbit)
|
1999-08-13 01:37:52 -04:00
|
|
|
{
|
|
|
|
register long c;
|
|
|
|
register long dirb;
|
|
|
|
|
|
|
|
c = dbit / BYTESIZ;
|
|
|
|
dirb = c / DBLKSIZ;
|
|
|
|
|
|
|
|
if (dirb != db->dirbno) {
|
|
|
|
if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
|
|
|
|
|| read(db->dirf, db->dirbuf, DBLKSIZ) < 0)
|
|
|
|
return 0;
|
|
|
|
db->dirbno = dirb;
|
|
|
|
|
|
|
|
debug(("dir read: %d\n", dirb));
|
|
|
|
}
|
|
|
|
|
|
|
|
return db->dirbuf[c % DBLKSIZ] & (1 << (dbit % BYTESIZ));
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2006-02-12 23:53:22 -05:00
|
|
|
setdbit(register DBM *db, register long int dbit)
|
1999-08-13 01:37:52 -04:00
|
|
|
{
|
|
|
|
register long c;
|
|
|
|
register long dirb;
|
|
|
|
|
|
|
|
c = dbit / BYTESIZ;
|
|
|
|
dirb = c / DBLKSIZ;
|
|
|
|
|
|
|
|
if (dirb != db->dirbno) {
|
|
|
|
if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
|
|
|
|
|| read(db->dirf, db->dirbuf, DBLKSIZ) < 0)
|
|
|
|
return 0;
|
|
|
|
db->dirbno = dirb;
|
|
|
|
|
|
|
|
debug(("dir read: %d\n", dirb));
|
|
|
|
}
|
|
|
|
|
|
|
|
db->dirbuf[c % DBLKSIZ] |= (1 << (dbit % BYTESIZ));
|
|
|
|
|
|
|
|
if (dbit >= db->maxbno)
|
|
|
|
db->maxbno += (long) DBLKSIZ * BYTESIZ;
|
|
|
|
|
|
|
|
if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
|
|
|
|
|| write(db->dirf, db->dirbuf, DBLKSIZ) < 0)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* getnext - get the next key in the page, and if done with
|
|
|
|
* the page, try the next page in sequence
|
|
|
|
*/
|
|
|
|
static datum
|
2006-02-12 23:53:22 -05:00
|
|
|
getnext(register DBM *db)
|
1999-08-13 01:37:52 -04:00
|
|
|
{
|
|
|
|
datum key;
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
db->keyptr++;
|
|
|
|
key = getnkey(db->pagbuf, db->keyptr);
|
|
|
|
if (key.dptr != NULL)
|
|
|
|
return key;
|
|
|
|
/*
|
|
|
|
* we either run out, or there is nothing on this page..
|
|
|
|
* try the next one... If we lost our position on the
|
|
|
|
* file, we will have to seek.
|
|
|
|
*/
|
|
|
|
db->keyptr = 0;
|
|
|
|
if (db->pagbno != db->blkptr++)
|
|
|
|
if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0)
|
|
|
|
break;
|
|
|
|
db->pagbno = db->blkptr;
|
|
|
|
if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0)
|
|
|
|
break;
|
|
|
|
if (!chkpage(db->pagbuf)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ioerr(db), nullitem;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* pair.c */
|
|
|
|
/*
|
|
|
|
* sdbm - ndbm work-alike hashed database library
|
|
|
|
* based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
|
|
|
|
* author: oz@nexus.yorku.ca
|
|
|
|
* status: public domain.
|
|
|
|
*
|
|
|
|
* page-level routines
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef BSD42
|
|
|
|
/*#include <memory.h>*/
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define exhash(item) sdbm_hash((item).dptr, (item).dsize)
|
|
|
|
|
2010-04-22 04:04:13 -04:00
|
|
|
/*
|
|
|
|
* forward
|
1999-08-13 01:37:52 -04:00
|
|
|
*/
|
|
|
|
static int seepair proto((char *, int, char *, int));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* page format:
|
|
|
|
* +------------------------------+
|
|
|
|
* ino | n | keyoff | datoff | keyoff |
|
|
|
|
* +------------+--------+--------+
|
|
|
|
* | datoff | - - - ----> |
|
|
|
|
* +--------+---------------------+
|
|
|
|
* | F R E E A R E A |
|
|
|
|
* +--------------+---------------+
|
|
|
|
* | <---- - - - | data |
|
|
|
|
* +--------+-----+----+----------+
|
|
|
|
* | key | data | key |
|
|
|
|
* +--------+----------+----------+
|
|
|
|
*
|
|
|
|
* calculating the offsets for free area: if the number
|
|
|
|
* of entries (ino[0]) is zero, the offset to the END of
|
|
|
|
* the free area is the block size. Otherwise, it is the
|
|
|
|
* nth (ino[ino[0]]) entry's offset.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int
|
2006-02-12 23:53:22 -05:00
|
|
|
fitpair(char *pag, int need)
|
1999-08-13 01:37:52 -04:00
|
|
|
{
|
|
|
|
register int n;
|
|
|
|
register int off;
|
|
|
|
register int free;
|
|
|
|
register short *ino = (short *) pag;
|
|
|
|
|
|
|
|
off = ((n = GET_SHORT(ino,0)) > 0) ? GET_SHORT(ino,n) : PBLKSIZ;
|
2012-02-27 03:40:37 -05:00
|
|
|
free = off - (n + 1) * (int)sizeof(short);
|
|
|
|
need += 2 * (int)sizeof(short);
|
1999-08-13 01:37:52 -04:00
|
|
|
|
|
|
|
debug(("free %d need %d\n", free, need));
|
|
|
|
|
|
|
|
return need <= free;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2006-02-12 23:53:22 -05:00
|
|
|
putpair(char *pag, datum key, datum val)
|
1999-08-13 01:37:52 -04:00
|
|
|
{
|
|
|
|
register int n;
|
|
|
|
register int off;
|
|
|
|
register short *ino = (short *) pag;
|
|
|
|
|
|
|
|
off = ((n = GET_SHORT(ino,0)) > 0) ? GET_SHORT(ino,n) : PBLKSIZ;
|
|
|
|
/*
|
|
|
|
* enter the key first
|
|
|
|
*/
|
|
|
|
off -= key.dsize;
|
|
|
|
if (key.dsize)
|
|
|
|
(void) memcpy(pag + off, key.dptr, key.dsize);
|
|
|
|
PUT_SHORT(ino,n + 1,off);
|
|
|
|
/*
|
|
|
|
* now the data
|
|
|
|
*/
|
|
|
|
off -= val.dsize;
|
|
|
|
if (val.dsize)
|
|
|
|
(void) memcpy(pag + off, val.dptr, val.dsize);
|
|
|
|
PUT_SHORT(ino,n + 2,off);
|
|
|
|
/*
|
|
|
|
* adjust item count
|
|
|
|
*/
|
|
|
|
PUT_SHORT(ino,0,GET_SHORT(ino,0) + 2);
|
|
|
|
}
|
|
|
|
|
|
|
|
static datum
|
2006-02-12 23:53:22 -05:00
|
|
|
getpair(char *pag, datum key)
|
1999-08-13 01:37:52 -04:00
|
|
|
{
|
|
|
|
register int i;
|
|
|
|
register int n;
|
|
|
|
datum val;
|
|
|
|
register short *ino = (short *) pag;
|
|
|
|
|
|
|
|
if ((n = GET_SHORT(ino,0)) == 0)
|
|
|
|
return nullitem;
|
|
|
|
|
|
|
|
if ((i = seepair(pag, n, key.dptr, key.dsize)) == 0)
|
|
|
|
return nullitem;
|
|
|
|
|
|
|
|
val.dptr = pag + GET_SHORT(ino,i + 1);
|
|
|
|
val.dsize = GET_SHORT(ino,i) - GET_SHORT(ino,i + 1);
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
2010-10-15 21:06:13 -04:00
|
|
|
#if SEEDUPS
|
1999-08-13 01:37:52 -04:00
|
|
|
static int
|
2006-02-12 23:53:22 -05:00
|
|
|
duppair(char *pag, datum key)
|
1999-08-13 01:37:52 -04:00
|
|
|
{
|
|
|
|
register short *ino = (short *) pag;
|
|
|
|
return GET_SHORT(ino,0) > 0 &&
|
|
|
|
seepair(pag, GET_SHORT(ino,0), key.dptr, key.dsize) > 0;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static datum
|
2006-02-12 23:53:22 -05:00
|
|
|
getnkey(char *pag, int num)
|
1999-08-13 01:37:52 -04:00
|
|
|
{
|
|
|
|
datum key;
|
|
|
|
register int off;
|
|
|
|
register short *ino = (short *) pag;
|
|
|
|
|
|
|
|
num = num * 2 - 1;
|
|
|
|
if (GET_SHORT(ino,0) == 0 || num > GET_SHORT(ino,0))
|
|
|
|
return nullitem;
|
|
|
|
|
|
|
|
off = (num > 1) ? GET_SHORT(ino,num - 1) : PBLKSIZ;
|
|
|
|
|
|
|
|
key.dptr = pag + GET_SHORT(ino,num);
|
|
|
|
key.dsize = off - GET_SHORT(ino,num);
|
|
|
|
|
|
|
|
return key;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2006-02-12 23:53:22 -05:00
|
|
|
delpair(char *pag, datum key)
|
1999-08-13 01:37:52 -04:00
|
|
|
{
|
|
|
|
register int n;
|
|
|
|
register int i;
|
|
|
|
register short *ino = (short *) pag;
|
|
|
|
|
|
|
|
if ((n = GET_SHORT(ino,0)) == 0)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if ((i = seepair(pag, n, key.dptr, key.dsize)) == 0)
|
|
|
|
return 0;
|
|
|
|
/*
|
|
|
|
* found the key. if it is the last entry
|
|
|
|
* [i.e. i == n - 1] we just adjust the entry count.
|
|
|
|
* hard case: move all data down onto the deleted pair,
|
|
|
|
* shift offsets onto deleted offsets, and adjust them.
|
|
|
|
* [note: 0 < i < n]
|
|
|
|
*/
|
|
|
|
if (i < n - 1) {
|
|
|
|
register int m;
|
|
|
|
register char *dst = pag + (i == 1 ? PBLKSIZ : GET_SHORT(ino,i - 1));
|
|
|
|
register char *src = pag + GET_SHORT(ino,i + 1);
|
2011-03-25 02:46:57 -04:00
|
|
|
register ptrdiff_t zoo = dst - src;
|
1999-08-13 01:37:52 -04:00
|
|
|
|
2011-03-25 02:46:57 -04:00
|
|
|
debug(("free-up %"PRIdPTRDIFF" ", zoo));
|
1999-08-13 01:37:52 -04:00
|
|
|
/*
|
|
|
|
* shift data/keys down
|
|
|
|
*/
|
|
|
|
m = GET_SHORT(ino,i + 1) - GET_SHORT(ino,n);
|
|
|
|
#ifdef DUFF
|
|
|
|
#define MOVB *--dst = *--src
|
|
|
|
|
|
|
|
if (m > 0) {
|
|
|
|
register int loop = (m + 8 - 1) >> 3;
|
|
|
|
|
|
|
|
switch (m & (8 - 1)) {
|
|
|
|
case 0: do {
|
|
|
|
MOVB; case 7: MOVB;
|
|
|
|
case 6: MOVB; case 5: MOVB;
|
|
|
|
case 4: MOVB; case 3: MOVB;
|
|
|
|
case 2: MOVB; case 1: MOVB;
|
|
|
|
} while (--loop);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
#ifdef MEMMOVE
|
|
|
|
memmove(dst, src, m);
|
|
|
|
#else
|
|
|
|
while (m--)
|
|
|
|
*--dst = *--src;
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
/*
|
|
|
|
* adjust offset index up
|
|
|
|
*/
|
|
|
|
while (i < n - 1) {
|
|
|
|
PUT_SHORT(ino,i, GET_SHORT(ino,i + 2) + zoo);
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
PUT_SHORT(ino, 0, GET_SHORT(ino, 0) - 2);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* search for the key in the page.
|
|
|
|
* return offset index in the range 0 < i < n.
|
|
|
|
* return 0 if not found.
|
|
|
|
*/
|
|
|
|
static int
|
2006-02-12 23:53:22 -05:00
|
|
|
seepair(char *pag, register int n, register char *key, register int siz)
|
1999-08-13 01:37:52 -04:00
|
|
|
{
|
|
|
|
register int i;
|
|
|
|
register int off = PBLKSIZ;
|
|
|
|
register short *ino = (short *) pag;
|
|
|
|
|
|
|
|
for (i = 1; i < n; i += 2) {
|
|
|
|
if (siz == off - GET_SHORT(ino,i) &&
|
|
|
|
memcmp(key, pag + GET_SHORT(ino,i), siz) == 0)
|
|
|
|
return i;
|
|
|
|
off = GET_SHORT(ino,i + 1);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2006-02-12 23:53:22 -05:00
|
|
|
splpage(char *pag, char *new, long int sbit)
|
1999-08-13 01:37:52 -04:00
|
|
|
{
|
|
|
|
datum key;
|
|
|
|
datum val;
|
|
|
|
|
|
|
|
register int n;
|
|
|
|
register int off = PBLKSIZ;
|
|
|
|
char cur[PBLKSIZ];
|
|
|
|
register short *ino = (short *) cur;
|
|
|
|
|
|
|
|
(void) memcpy(cur, pag, PBLKSIZ);
|
|
|
|
(void) memset(pag, 0, PBLKSIZ);
|
|
|
|
(void) memset(new, 0, PBLKSIZ);
|
|
|
|
|
|
|
|
n = GET_SHORT(ino,0);
|
|
|
|
for (ino++; n > 0; ino += 2) {
|
2010-04-22 04:04:13 -04:00
|
|
|
key.dptr = cur + GET_SHORT(ino,0);
|
1999-08-13 01:37:52 -04:00
|
|
|
key.dsize = off - GET_SHORT(ino,0);
|
|
|
|
val.dptr = cur + GET_SHORT(ino,1);
|
|
|
|
val.dsize = GET_SHORT(ino,0) - GET_SHORT(ino,1);
|
|
|
|
/*
|
|
|
|
* select the page pointer (by looking at sbit) and insert
|
|
|
|
*/
|
|
|
|
(void) putpair((exhash(key) & sbit) ? new : pag, key, val);
|
|
|
|
|
|
|
|
off = GET_SHORT(ino,1);
|
|
|
|
n -= 2;
|
|
|
|
}
|
|
|
|
|
2010-04-22 04:04:13 -04:00
|
|
|
debug(("%d split %d/%d\n", ((short *) cur)[0] / 2,
|
1999-08-13 01:37:52 -04:00
|
|
|
((short *) new)[0] / 2,
|
|
|
|
((short *) pag)[0] / 2));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2010-04-22 04:04:13 -04:00
|
|
|
* check page sanity:
|
1999-08-13 01:37:52 -04:00
|
|
|
* number of entries should be something
|
|
|
|
* reasonable, and all offsets in the index should be in order.
|
|
|
|
* this could be made more rigorous.
|
|
|
|
*/
|
|
|
|
static int
|
2006-02-12 23:53:22 -05:00
|
|
|
chkpage(char *pag)
|
1999-08-13 01:37:52 -04:00
|
|
|
{
|
|
|
|
register int n;
|
|
|
|
register int off;
|
|
|
|
register short *ino = (short *) pag;
|
|
|
|
|
2010-05-17 09:21:17 -04:00
|
|
|
if ((n = GET_SHORT(ino,0)) < 0 || n > PBLKSIZ / (int)sizeof(short))
|
1999-08-13 01:37:52 -04:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (n > 0) {
|
|
|
|
off = PBLKSIZ;
|
|
|
|
for (ino++; n > 0; ino += 2) {
|
|
|
|
if (GET_SHORT(ino,0) > off || GET_SHORT(ino,1) > off ||
|
|
|
|
GET_SHORT(ino,1) > GET_SHORT(ino,0))
|
|
|
|
return 0;
|
|
|
|
off = GET_SHORT(ino,1);
|
|
|
|
n -= 2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* hash.c */
|
|
|
|
/*
|
|
|
|
* sdbm - ndbm work-alike hashed database library
|
|
|
|
* based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
|
|
|
|
* author: oz@nexus.yorku.ca
|
|
|
|
* status: public domain. keep it that way.
|
|
|
|
*
|
|
|
|
* hashing routine
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* polynomial conversion ignoring overflows
|
|
|
|
* [this seems to work remarkably well, in fact better
|
|
|
|
* then the ndbm hash function. Replace at your own risk]
|
|
|
|
* use: 65599 nice.
|
2010-04-22 04:04:13 -04:00
|
|
|
* 65587 even better.
|
1999-08-13 01:37:52 -04:00
|
|
|
*/
|
|
|
|
long
|
2006-02-12 23:53:22 -05:00
|
|
|
sdbm_hash(register char *str, register int len)
|
1999-08-13 01:37:52 -04:00
|
|
|
{
|
|
|
|
register unsigned long n = 0;
|
|
|
|
|
|
|
|
#ifdef DUFF
|
|
|
|
|
|
|
|
#define HASHC n = *str++ + 65599 * n
|
|
|
|
|
|
|
|
if (len > 0) {
|
|
|
|
register int loop = (len + 8 - 1) >> 3;
|
|
|
|
|
|
|
|
switch(len & (8 - 1)) {
|
|
|
|
case 0: do {
|
|
|
|
HASHC; case 7: HASHC;
|
|
|
|
case 6: HASHC; case 5: HASHC;
|
|
|
|
case 4: HASHC; case 3: HASHC;
|
|
|
|
case 2: HASHC; case 1: HASHC;
|
|
|
|
} while (--loop);
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
while (len--)
|
|
|
|
n = ((*str++) & 255) + 65587L * n;
|
|
|
|
#endif
|
|
|
|
return n;
|
|
|
|
}
|