jdict

command line tool for looking up terms in yomidict dictionaries
git clone anongit@rnpnr.xyz:jdict.git
Log | Files | Refs | Feed | README | LICENSE

Commit: 73755710cecc6c69ec90197ba001b9a580441bfd
Parent: 53c4625aea0b09dbaf8c46a380454b4a015a6991
Author: Randy Palamar
Date:   Sat, 18 Nov 2023 13:44:56 -0700

use fixed length strings internally

This is slightly slower than sticking to NUL terminated strings
but the semantics are much nicer.

Diffstat:
Mconfig.def.h | 2+-
Mjdict.c | 66+++++++++++++++++++++++++++++++++---------------------------------
Mutil.c | 50++++++++++++++++++++++++++++++--------------------
Mutil.h | 15+++++++++++----
4 files changed, 75 insertions(+), 58 deletions(-)

diff --git a/config.def.h b/config.def.h @@ -7,7 +7,7 @@ static char *prefix = "/usr/share/yomidicts"; /* field separator for output printing */ -static char *fsep = "\t"; +static s8 fsep = s8("\t"); /* repl prompt and quit strings */ static char *repl_prompt = "\033[32;1m入力:\033[0m "; diff --git a/jdict.c b/jdict.c @@ -22,8 +22,8 @@ #define BUFLEN 256 typedef struct { - char *term; - char **defs; + s8 term; + s8 *defs; size_t ndefs; } DictEnt; @@ -58,9 +58,9 @@ free_ents(DictEnt *ents, size_t nents) for (i = 0; i < nents; i++) { for (j = 0; j < ents[i].ndefs; j++) - free(ents[i].defs[j]); + free(ents[i].defs[j].s); free(ents[i].defs); - free(ents[i].term); + free(ents[i].term.s); } free(ents); } @@ -68,14 +68,7 @@ free_ents(DictEnt *ents, size_t nents) static int entcmp(DictEnt *a, DictEnt *b) { - if (a->term == NULL || b->term == NULL) { - if (a->term == NULL && b->term) - return -1; - else if (a->term && b->term == NULL) - return 1; - return 0; - } - return strcmp(a->term, b->term); + return s8cmp(a->term, b->term); } static void @@ -86,7 +79,7 @@ merge_ents(DictEnt *a, DictEnt *b) if (nlen == 0) return; - a->defs = xreallocarray(a->defs, nlen, sizeof(char *)); + a->defs = xreallocarray(a->defs, nlen, sizeof(s8)); for (i = 0; i < b->ndefs; i++) a->defs[a->ndefs + i] = b->defs[i]; @@ -103,7 +96,7 @@ dedup(Dict *d) for (j = i+1; j < d->nents && !entcmp(&d->ents[i], &d->ents[j]); j++) { merge_ents(&d->ents[i], &d->ents[j]); /* don't leak memory after merging */ - free(d->ents[j].term); + free(d->ents[j].term.s); free(d->ents[j].defs); } memcpy(&dents[len++], &d->ents[i], sizeof(DictEnt)); @@ -177,12 +170,12 @@ make_ent(YomiTok *toks, char *data) } d = xreallocarray(NULL, 1, sizeof(DictEnt)); - d->term = xmemdup(data + tstr->start, tstr->end - tstr->start); + d->term = s8dup(data + tstr->start, tstr->end - tstr->start); d->ndefs = tdefs->len; - d->defs = xreallocarray(NULL, d->ndefs, sizeof(char *)); + d->defs = xreallocarray(NULL, d->ndefs, sizeof(s8)); for (i = 1; i <= d->ndefs; i++) - d->defs[i - 1] = xmemdup(data + tdefs[i].start, - tdefs[i].end - tdefs[i].start); + d->defs[i - 1] = s8dup(data + tdefs[i].start, + tdefs[i].end - tdefs[i].start); return d; } @@ -348,14 +341,14 @@ make_dicts(Dict *dicts, size_t ndicts) } static DictEnt * -find_ent(const char *term, DictEnt *ents, size_t nents) +find_ent(s8 term, DictEnt *ents, size_t nents) { int r; if (nents == 0) return NULL; - r = strcmp(term, ents[nents/2].term); + r = s8cmp(term, ents[nents/2].term); if (r == 0) return &ents[nents/2]; if (r < 0) @@ -368,7 +361,7 @@ find_ent(const char *term, DictEnt *ents, size_t nents) } static void -find_and_print(const char *term, Dict *d) +find_and_print(s8 term, Dict *d) { DictEnt *ent = find_ent(term, d->ents, d->nents); size_t i; @@ -377,16 +370,17 @@ find_and_print(const char *term, Dict *d) return; for (i = 0; i < ent->ndefs; i++) { + if (!s8cmp(fsep, s8("\n"))) + ent->defs[i] = unescape(ent->defs[i]); fputs(d->name, stdout); - fputs(fsep, stdout); - fputs(!strcmp(fsep, "\n")? unescape(ent->defs[i]) - : ent->defs[i], stdout); + fwrite(fsep.s, fsep.len, 1, stdout); + fwrite(ent->defs[i].s, ent->defs[i].len, 1, stdout); fputc('\n', stdout); } } static void -find_and_print_defs(Dict *dict, char **terms, size_t nterms) +find_and_print_defs(Dict *dict, s8 *terms, size_t nterms) { size_t i; @@ -405,19 +399,22 @@ find_and_print_defs(Dict *dict, char **terms, size_t nterms) static void repl(Dict *dicts, size_t ndicts) { - char buf[BUFLEN]; + char t[BUFLEN]; + s8 buf = {t, BUFLEN}; size_t i; make_dicts(dicts, ndicts); - fsep = "\n"; + fsep = s8("\n"); for (;;) { fputs(repl_prompt, stdout); fflush(stdout); - if (fgets(buf, LEN(buf), stdin) == NULL) + buf.len = BUFLEN; + if (fgets(buf.s, buf.len, stdin) == NULL) break; + buf.len = strlen(buf.s); for (i = 0; i < ndicts; i++) - find_and_print(trim(buf), &dicts[i]); + find_and_print(s8trim(buf), &dicts[i]); } puts(repl_quit); @@ -428,7 +425,8 @@ repl(Dict *dicts, size_t ndicts) int main(int argc, char *argv[]) { - char **terms = NULL, *t; + s8 *terms = NULL; + char *t; Dict *dicts = NULL; size_t i, ndicts = 0, nterms = 0; int iflag = 0; @@ -449,7 +447,8 @@ main(int argc, char *argv[]) die("invalid dictionary name: %s\n", t); break; case 'F': - fsep = unescape(EARGF(usage())); + t = EARGF(usage()); + fsep = unescape((s8){t, strlen(t)}); break; case 'i': iflag = 1; @@ -465,8 +464,9 @@ main(int argc, char *argv[]) /* remaining argv elements are terms to search for */ for (i = 0; argc && *argv; argv++, i++, argc--) { - terms = xreallocarray(terms, ++nterms, sizeof(char *)); - terms[i] = *argv; + terms = xreallocarray(terms, ++nterms, sizeof(s8)); + terms[i].s = *argv; + terms[i].len = strlen(terms[i].s); } if (nterms == 0 && iflag == 0) diff --git a/util.c b/util.c @@ -20,29 +20,38 @@ die(const char *fmt, ...) exit(1); } +int +s8cmp(s8 a, s8 b) +{ + if (a.len == 0 || a.len != b.len) + return a.len - b.len; + return memcmp(a.s, b.s, a.len); +} + /* * trim whitespace from start and end of str - * returns start of trimmed str + * returns a new s8 (same memory) */ -char * -trim(char *s) +s8 +s8trim(s8 str) { - char *p = &s[strlen(s)-1]; + char *p = &str.s[str.len-1]; - for (; isspace(*p); *p = 0, p--); - for (; *s && isspace(*s); s++); + for (; str.len && isspace(*p); str.len--, p--); + for (; str.len && isspace(*str.s); str.len--, str.s++); - return s; + return str; } /* replace escaped control chars with their actual char */ -char * -unescape(char *s) +s8 +unescape(s8 str) { - char *t = s; + char *t = str.s; + ptrdiff_t rem = str.len; int off; - while ((t = strchr(t, '\\')) != NULL) { + while ((t = memchr(t, '\\', rem)) != NULL) { off = 1; switch (t[1]) { case 'n': t[0] = '\n'; t++; break; @@ -50,10 +59,11 @@ unescape(char *s) case 'u': t++; continue; default: off++; } - memmove(t, t + off, strlen(t + off) + 1); + rem = str.len-- - (t - str.s) - off; + memmove(t, t + off, rem); } - return s; + return str; } void * @@ -67,13 +77,13 @@ xreallocarray(void *o, size_t n, size_t s) return new; } -char * -xmemdup(void *src, ptrdiff_t len) +s8 +s8dup(void *src, ptrdiff_t len) { - char *p; + s8 str = {0, len}; if (len < 0) - die("xmemdup(): negative len\n"); - p = xreallocarray(NULL, 1, len + 1); - p[len] = 0; - return memcpy(p, src, len); + die("s8dup(): negative len\n"); + str.s = xreallocarray(NULL, 1, len); + memcpy(str.s, src, len); + return str; } diff --git a/util.h b/util.h @@ -1,8 +1,15 @@ /* See LICENSE for license details. */ #define LEN(a) (sizeof(a) / sizeof(*a)) -void die(const char *, ...); -char *unescape(char *); -char *trim(char *); -char *xmemdup(void *, ptrdiff_t); +typedef struct { + char *s; + ptrdiff_t len; +} s8; +#define s8(s) (s8){s, LEN(s) - 1} + +int s8cmp(s8, s8); +s8 s8dup(void *, ptrdiff_t); +s8 s8trim(s8); +s8 unescape(s8); void *xreallocarray(void *, size_t, size_t); +void die(const char *, ...);