jdict.c (7809B)
1 /* See LICENSE for license details. */ 2 #include <dirent.h> 3 #include <fcntl.h> 4 #include <limits.h> 5 #include <stddef.h> 6 #include <stdint.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <sys/mman.h> 11 #include <unistd.h> 12 13 #include "arg.h" 14 #include "util.c" 15 #include "yomidict.c" 16 17 #define YOMI_TOKS_PER_ENT 10 18 19 /* buffer length for interactive mode */ 20 #define BUFLEN 256 21 22 /* Number of hash table slots (1 << HT_EXP) */ 23 #define HT_EXP 20 24 25 typedef uint64_t u64; 26 typedef uint32_t u32; 27 typedef int32_t i32; 28 29 typedef struct { 30 s8 term; 31 s8 *defs; 32 size_t ndefs; 33 } DictEnt; 34 35 struct ht { 36 DictEnt **ents; 37 i32 len; 38 }; 39 40 typedef struct { 41 const char *rom; 42 const char *name; 43 struct ht ht; 44 } Dict; 45 46 #include "config.h" 47 48 char *argv0; 49 50 static void 51 usage(void) 52 { 53 die("usage: %s [-d path] [-F FS] [-i] term ...\n", argv0); 54 } 55 56 static void 57 merge_ents(DictEnt *a, DictEnt *b) 58 { 59 size_t i, nlen = a->ndefs + b->ndefs; 60 61 if (nlen == 0) 62 return; 63 64 a->defs = xreallocarray(a->defs, nlen, sizeof(s8)); 65 66 for (i = 0; i < b->ndefs; i++) 67 a->defs[a->ndefs + i] = b->defs[i]; 68 a->ndefs = nlen; 69 } 70 71 /* FNV-1a hash */ 72 static u64 73 hash(s8 v) 74 { 75 u64 h = 0x3243f6a8885a308d; /* digits of pi */ 76 for (; v.len; v.len--) { 77 h ^= v.s[v.len - 1] & 0xFF; 78 h *= 1111111111111111111; /* random prime */ 79 } 80 return h; 81 } 82 83 static i32 84 ht_lookup(u64 hash, int exp, i32 idx) 85 { 86 u32 mask = ((u32)1 << exp) - 1; 87 u32 step = (hash >> (64 - exp)) | 1; 88 return (idx + step) & mask; 89 } 90 91 static DictEnt * 92 intern(struct ht *t, DictEnt *e) 93 { 94 s8 key = e->term; 95 u64 h = hash(key); 96 i32 i = h; 97 for (;;) { 98 i = ht_lookup(h, HT_EXP, i); 99 if (!t->ents[i]) { 100 /* empty slot */ 101 if ((u32)t->len + 1 == (u32)1<<(HT_EXP - 1)) { 102 fputs("intern: ht exceeded 0.5 fill factor\n", stderr); 103 return NULL; 104 } 105 t->len++; 106 t->ents[i] = e; 107 return e; 108 } else if (!s8cmp(t->ents[i]->term, e->term)) { 109 /* found; return the stored instance */ 110 return t->ents[i]; 111 } 112 } 113 } 114 115 static size_t 116 count_term_banks(const char *path) 117 { 118 DIR *dir; 119 struct dirent *dent; 120 size_t nbanks = 0; 121 122 if (!(dir = opendir(path))) 123 die("opendir(): failed to open: %s\n", path); 124 125 /* count term banks in path */ 126 while ((dent = readdir(dir)) != NULL) 127 if (dent->d_type == DT_REG) 128 nbanks++; 129 /* remove index.json from count */ 130 nbanks--; 131 132 closedir(dir); 133 return nbanks; 134 } 135 136 /* takes a token of type YOMI_ENTRY and creates a DictEnt */ 137 static DictEnt * 138 make_ent(YomiTok *toks, char *data) 139 { 140 size_t i; 141 DictEnt *d; 142 YomiTok *tstr = NULL, *tdefs = NULL; 143 144 if (toks[0].type != YOMI_ENTRY) { 145 fprintf(stderr, "toks[0].type = %d\n", toks[0].type); 146 return NULL; 147 } 148 149 for (i = 1; i < toks[0].len; i++) 150 switch (toks[i].type) { 151 case YOMI_STR: 152 if (tstr == NULL) 153 tstr = &toks[i]; 154 break; 155 case YOMI_ARRAY: 156 if (tdefs == NULL) 157 tdefs = &toks[i]; 158 default: /* FALLTHROUGH */ 159 break; 160 } 161 162 /* check if entry was valid */ 163 if (tdefs == NULL || tstr == NULL) { 164 fprintf(stderr, "make_ent: %s == NULL\n", 165 tdefs == NULL? "tdefs" : "tstr"); 166 return NULL; 167 } 168 169 d = xreallocarray(NULL, 1, sizeof(DictEnt)); 170 d->term = s8dup(data + tstr->start, tstr->end - tstr->start); 171 d->ndefs = tdefs->len; 172 d->defs = xreallocarray(NULL, d->ndefs, sizeof(s8)); 173 for (i = 1; i <= d->ndefs; i++) 174 d->defs[i - 1] = s8dup(data + tdefs[i].start, 175 tdefs[i].end - tdefs[i].start); 176 177 return d; 178 } 179 180 static void 181 parse_term_bank(struct ht *ht, const char *tbank) 182 { 183 int i = 0, r, ntoks, fd; 184 size_t flen; 185 char *data; 186 YomiTok *toks = NULL; 187 YomiScanner s = {0}; 188 DictEnt *e, *n; 189 190 if ((fd = open(tbank, O_RDONLY)) < 0) 191 die("can't open file: %s\n", tbank); 192 flen = lseek(fd, 0, SEEK_END); 193 data = mmap(NULL, flen, PROT_READ, MAP_PRIVATE, fd, 0); 194 close(fd); 195 196 if (data == MAP_FAILED) 197 die("couldn't mmap file: %s\n", tbank); 198 199 /* allocate tokens */ 200 ntoks = (1 << HT_EXP) * YOMI_TOKS_PER_ENT + 1; 201 toks = xreallocarray(toks, ntoks, sizeof(YomiTok)); 202 203 yomi_scanner_init(&s, data, flen); 204 while ((r = yomi_scan(&s, toks, ntoks)) < 0) { 205 switch (r) { 206 case YOMI_ERROR_NOMEM: 207 goto cleanup; 208 case YOMI_ERROR_INVAL: 209 case YOMI_ERROR_MALFO: 210 fprintf(stderr, "yomi_parse: %s\n", 211 r == YOMI_ERROR_INVAL? "YOMI_ERROR_INVAL" 212 : "YOMI_ERROR_MALFO"); 213 goto cleanup; 214 } 215 } 216 217 for (i = 0; i < r; i++) { 218 if (toks[i].type != YOMI_ENTRY) 219 continue; 220 221 if ((e = make_ent(&toks[i], data)) == NULL) 222 break; 223 if ((n = intern(ht, e)) == NULL) 224 break; 225 if (n == e) 226 continue; 227 /* hash table entry already exists, append new defs */ 228 if (s8cmp(n->term, e->term)) { 229 fputs("hash collision: ", stderr); 230 fwrite(e->term.s, e->term.len, 1, stderr); 231 fputc('\t', stderr); 232 fwrite(n->term.s, n->term.len, 1, stderr); 233 fputc('\n', stderr); 234 } 235 merge_ents(n, e); 236 free(e->term.s); 237 free(e->defs); 238 free(e); 239 } 240 241 cleanup: 242 munmap(data, flen); 243 free(toks); 244 } 245 246 static int 247 make_dict(Dict *d) 248 { 249 char path[PATH_MAX - 20], tbank[PATH_MAX]; 250 size_t nbanks; 251 252 d->ht.ents = xreallocarray(NULL, sizeof(DictEnt *), 1 << HT_EXP); 253 254 snprintf(path, LEN(path), "%s/%s", prefix, d->rom); 255 if ((nbanks = count_term_banks(path)) == 0) { 256 fprintf(stderr, "no term banks found: %s\n", path); 257 return 0; 258 } 259 260 for (size_t i = 1; i <= nbanks; i++) { 261 snprintf(tbank, LEN(tbank), "%s/term_bank_%zu.json", path, i); 262 parse_term_bank(&d->ht, tbank); 263 } 264 265 return 1; 266 } 267 268 static void 269 make_dicts(Dict *dicts, size_t ndicts) 270 { 271 for (size_t i = 0; i < ndicts; i++) 272 if (!make_dict(&dicts[i])) 273 die("make_dict(%s): returned NULL\n", dicts[i].rom); 274 } 275 276 static DictEnt * 277 find_ent(s8 term, Dict *d) 278 { 279 u64 h = hash(term); 280 i32 i = ht_lookup(h, HT_EXP, (i32)h); 281 return d->ht.ents[i]; 282 } 283 284 static void 285 find_and_print(s8 term, Dict *d) 286 { 287 DictEnt *ent = find_ent(term, d); 288 size_t i; 289 290 if (!ent || s8cmp(term, ent->term)) 291 return; 292 293 for (i = 0; i < ent->ndefs; i++) { 294 if (!s8cmp(fsep, s8("\n"))) 295 ent->defs[i] = unescape(ent->defs[i]); 296 fputs(d->name, stdout); 297 fwrite(fsep.s, fsep.len, 1, stdout); 298 fwrite(ent->defs[i].s, ent->defs[i].len, 1, stdout); 299 fputc('\n', stdout); 300 } 301 } 302 303 static void 304 find_and_print_defs(Dict *dict, s8 *terms, size_t nterms) 305 { 306 size_t i; 307 308 if (!make_dict(dict)) { 309 fputs("failed to allocate dict: ", stdout); 310 puts(dict->rom); 311 return; 312 } 313 314 for (i = 0; i < nterms; i++) 315 find_and_print(terms[i], dict); 316 } 317 318 static void 319 repl(Dict *dicts, size_t ndicts) 320 { 321 char t[BUFLEN]; 322 s8 buf = {t, BUFLEN}; 323 size_t i; 324 325 make_dicts(dicts, ndicts); 326 327 fsep = s8("\n"); 328 for (;;) { 329 fputs(repl_prompt, stdout); 330 fflush(stdout); 331 buf.len = BUFLEN; 332 if (fgets(buf.s, buf.len, stdin) == NULL) 333 break; 334 buf.len = strlen(buf.s); 335 for (i = 0; i < ndicts; i++) 336 find_and_print(s8trim(buf), &dicts[i]); 337 } 338 puts(repl_quit); 339 } 340 341 int 342 main(int argc, char *argv[]) 343 { 344 s8 *terms = NULL; 345 char *t; 346 Dict *dicts = NULL; 347 size_t i, ndicts = 0, nterms = 0; 348 int iflag = 0; 349 350 argv0 = argv[0]; 351 352 ARGBEGIN { 353 case 'd': 354 t = EARGF(usage()); 355 for (i = 0; i < LEN(default_dict_map); i++) { 356 if (strcmp(t, default_dict_map[i].rom) == 0) { 357 dicts = &default_dict_map[i]; 358 ndicts++; 359 break; 360 } 361 } 362 if (dicts == NULL) 363 die("invalid dictionary name: %s\n", t); 364 break; 365 case 'F': 366 t = EARGF(usage()); 367 fsep = unescape((s8){t, strlen(t)}); 368 break; 369 case 'i': 370 iflag = 1; 371 break; 372 default: 373 usage(); 374 } ARGEND 375 376 if (ndicts == 0) { 377 dicts = default_dict_map; 378 ndicts = LEN(default_dict_map); 379 } 380 381 /* remaining argv elements are terms to search for */ 382 for (i = 0; argc && *argv; argv++, i++, argc--) { 383 terms = xreallocarray(terms, ++nterms, sizeof(s8)); 384 terms[i].s = *argv; 385 terms[i].len = strlen(terms[i].s); 386 } 387 388 if (nterms == 0 && iflag == 0) 389 usage(); 390 391 if (iflag == 0) 392 for (i = 0; i < ndicts; i++) 393 find_and_print_defs(&dicts[i], terms, nterms); 394 else 395 repl(dicts, ndicts); 396 397 free(terms); 398 399 return 0; 400 }