jdict.c (13983B)
1 /* See LICENSE for license details. */ 2 #ifndef asm 3 #ifdef __asm 4 #define asm __asm 5 #else 6 #define asm __asm__ 7 #endif 8 #endif 9 10 #define FORCE_INLINE inline __attribute__((always_inline)) 11 12 #ifdef __ARM_ARCH_ISA_A64 13 /* TODO? debuggers just loop here forever and need a manual PC increment (jump +1 in gdb) */ 14 #define debugbreak() asm volatile ("brk 0xf000") 15 #elif __x86_64__ 16 #define debugbreak() asm volatile ("int3; nop") 17 #endif 18 19 #ifdef _DEBUG 20 #define ASSERT(c) do { debugbreak(); } while (0) 21 #else 22 #define ASSERT(c) {} 23 #endif 24 25 #ifndef unreachable 26 #define unreachable() __builtin_unreachable() 27 #endif 28 29 #define ARRAY_COUNT(a) (sizeof(a) / sizeof(*a)) 30 #define ISSPACE(c) ((c) == ' ' || (c) == '\n' || (c) == '\t') 31 32 #define MEGABYTE (1024ULL * 1024ULL) 33 34 typedef struct { 35 size len; 36 u8 *s; 37 } s8; 38 #define s8(cstr) (s8){.len = ARRAY_COUNT(cstr) - 1, .s = (u8 *)cstr} 39 40 typedef struct { 41 u8 *data; 42 u32 cap; 43 u32 widx; 44 i32 fd; 45 b32 errors; 46 } Stream; 47 48 typedef struct { 49 u8 *beg, *end; 50 #ifdef _DEBUG_ARENA 51 size min_capacity_remaining; 52 #endif 53 } Arena; 54 55 #include "yomidict.c" 56 57 #define YOMI_TOKS_PER_ENT 10 58 59 /* Number of hash table slots (1 << HT_EXP) */ 60 #define HT_EXP 20 61 62 typedef struct DictDef { 63 s8 text; 64 struct DictDef *next; 65 } DictDef; 66 67 typedef struct { 68 s8 term; 69 DictDef *def; 70 } DictEnt; 71 72 struct ht { 73 DictEnt **ents; 74 i32 len; 75 }; 76 77 typedef struct { 78 s8 rom; 79 s8 name; 80 struct ht ht; 81 } Dict; 82 83 #include "config.h" 84 85 static void __attribute__((noreturn)) os_exit(i32); 86 87 static b32 os_write(iptr, s8); 88 static b32 os_read_stdin(u8 *, size); 89 90 static iptr os_begin_path_stream(Stream *, Arena *, u32); 91 static s8 os_get_valid_file(iptr, s8, Arena *, u32); 92 static void os_end_path_stream(iptr); 93 94 static Stream error_stream; 95 static Stream stdout_stream; 96 97 static void 98 stream_flush(Stream *s) 99 { 100 if (s->fd <= 0) { 101 s->errors = 1; 102 } else if (s->widx) { 103 s->errors = !os_write(s->fd, (s8){.len = s->widx, .s = s->data}); 104 if (!s->errors) s->widx = 0; 105 } 106 } 107 108 static void 109 stream_append_byte(Stream *s, u8 b) 110 { 111 if (s->widx + 1 > s->cap) 112 stream_flush(s); 113 if (!s->errors) 114 s->data[s->widx++] = b; 115 } 116 117 static void 118 stream_append_s8(Stream *s, s8 str) 119 { 120 if (str.len > s->cap - s->widx) 121 stream_flush(s); 122 s->errors |= (s->cap - s->widx) < str.len; 123 if (!s->errors) { 124 for (size i = 0; i < str.len; i++) 125 s->data[s->widx++] = str.s[i]; 126 } 127 } 128 129 static void 130 stream_ensure_newline(Stream *s) 131 { 132 if (s->widx && s->data[s->widx - 1] != '\n') 133 stream_append_byte(s, '\n'); 134 } 135 136 #ifdef _DEBUG_ARENA 137 static void 138 stream_append_u64(Stream *s, u64 n) 139 { 140 u8 tmp[64]; 141 u8 *end = tmp + sizeof(tmp); 142 u8 *beg = end; 143 do { *--beg = '0' + (n % 10); } while (n /= 10); 144 stream_append_s8(s, (s8){.len = end - beg, .s = beg}); 145 } 146 #endif 147 148 static s8 149 cstr_to_s8(char *cstr) 150 { 151 s8 result = {.s = (u8 *)cstr}; 152 if (cstr) while (*cstr) { result.len++; cstr++; } 153 return result; 154 } 155 156 static void __attribute__((noreturn)) 157 die(Stream *s) 158 { 159 stream_ensure_newline(s); 160 stream_flush(s); 161 os_exit(1); 162 } 163 164 static void * 165 mem_clear(void *p_, u8 c, size len) 166 { 167 u8 *p = p_; 168 while (len) p[--len] = c; 169 return p; 170 } 171 172 enum arena_flags { 173 ARENA_NONE = 0 << 0, 174 ARENA_NO_CLEAR = 1 << 0, 175 ARENA_ALLOC_END = 1 << 1, 176 }; 177 178 #define alloc(a, t, n, flags) (t *)alloc_(a, sizeof(t), _Alignof(t), n, flags) 179 static void * 180 alloc_(Arena *a, size len, size align, size count, u32 flags) 181 { 182 size padding; 183 if (flags & ARENA_ALLOC_END) padding = (usize)a->end & (align - 1); 184 else padding = -(usize)a->beg & (align - 1); 185 186 size available = a->end - a->beg - padding; 187 if (available <= 0 || available / len <= count) 188 ASSERT(0); 189 190 void *result; 191 if (flags & ARENA_ALLOC_END) { 192 a->end -= padding + count * len; 193 result = a->end; 194 } else { 195 result = a->beg + padding; 196 a->beg += padding + count * len; 197 } 198 199 #ifdef _DEBUG_ARENA 200 if (a->end - a->beg < a->min_capacity_remaining) 201 a->min_capacity_remaining = a->end - a->beg; 202 #endif 203 204 if (flags & ARENA_NO_CLEAR) return result; 205 else return mem_clear(result, 0, count * len); 206 } 207 208 static void 209 usage(s8 argv0) 210 { 211 stream_append_s8(&error_stream, s8("usage: ")); 212 stream_append_s8(&error_stream, argv0); 213 stream_append_s8(&error_stream, s8(" [-d path] [-F FS] [-i] term ...\n")); 214 die(&error_stream); 215 } 216 217 static s8 218 s8_dup(Arena *a, s8 old) 219 { 220 s8 result = {.len = old.len, .s = alloc(a, u8, old.len, ARENA_NO_CLEAR)}; 221 for (size i = 0; i < old.len; i++) 222 result.s[i] = old.s[i]; 223 return result; 224 } 225 226 static b32 227 s8_equal(s8 a, s8 b) 228 { 229 i32 result = 0; 230 if (a.len != b.len) 231 return 0; 232 /* NOTE: we assume short strings in this program */ 233 for (size i = 0; i < a.len; i++) 234 result += b.s[i] - a.s[i]; 235 return result == 0; 236 } 237 238 static s8 239 s8_cut_head(s8 s, size count) 240 { 241 s8 result = s; 242 result.s += count; 243 result.len -= count; 244 return result; 245 } 246 247 /* 248 * trim whitespace from start and end of str 249 * returns a new s8 (same memory) 250 */ 251 static s8 252 s8trim(s8 str) 253 { 254 u8 *p = str.s + str.len - 1; 255 256 for (; str.len && ISSPACE(*p); str.len--, p--); 257 for (; str.len && ISSPACE(*str.s); str.len--, str.s++); 258 259 return str; 260 } 261 262 /* replace escaped control chars with their actual char */ 263 static s8 264 unescape(s8 str) 265 { 266 for (size i = 0; i < str.len; i++) { 267 if (str.s[i] == '\\') { 268 switch (str.s[i + 1]) { 269 case 'n': str.s[i] = '\n'; break; 270 case 't': str.s[i] = '\t'; break; 271 default: continue; 272 } 273 str.len--; 274 for (size j = i + 1; j < str.len; j++) 275 str.s[j] = str.s[j + 1]; 276 } 277 } 278 return str; 279 } 280 281 /* FNV-1a hash */ 282 static u64 283 hash(s8 v) 284 { 285 u64 h = 0x3243f6a8885a308d; /* digits of pi */ 286 for (; v.len; v.len--) { 287 h ^= v.s[v.len - 1] & 0xFF; 288 h *= 1111111111111111111; /* random prime */ 289 } 290 return h; 291 } 292 293 static i32 294 ht_lookup(u64 hash, int exp, i32 idx) 295 { 296 u32 mask = ((u32)1 << exp) - 1; 297 u32 step = (hash >> (64 - exp)) | 1; 298 return (idx + step) & mask; 299 } 300 301 static DictEnt ** 302 intern(struct ht *t, s8 key) 303 { 304 u64 h = hash(key); 305 i32 i = h; 306 for (;;) { 307 i = ht_lookup(h, HT_EXP, i); 308 if (!t->ents[i]) { 309 /* empty slot */ 310 #ifdef _DEBUG 311 if ((u32)t->len + 1 == (u32)1<<(HT_EXP - 1)) { 312 stream_append_s8(&error_stream, 313 s8("intern: ht exceeded 0.5 fill factor\n")); 314 } 315 #endif 316 t->len++; 317 return t->ents + i; 318 } else if (s8_equal(t->ents[i]->term, key)) { 319 /* found; return the stored instance */ 320 return t->ents + i; 321 } 322 /* NOTE: else relookup and try again */ 323 } 324 } 325 326 static void 327 parse_term_bank(Arena *a, struct ht *ht, s8 data) 328 { 329 /* allocate tokens */ 330 size ntoks = (1 << HT_EXP) * YOMI_TOKS_PER_ENT + 1; 331 YomiTok *toks = alloc(a, YomiTok, ntoks, ARENA_ALLOC_END|ARENA_NO_CLEAR); 332 333 YomiScanner s = {0}; 334 yomi_scanner_init(&s, (char *)data.s, data.len); 335 i32 r; 336 while ((r = yomi_scan(&s, toks, ntoks)) < 0) { 337 switch (r) { 338 case YOMI_ERROR_NOMEM: 339 goto cleanup; 340 case YOMI_ERROR_INVAL: 341 case YOMI_ERROR_MALFO: 342 stream_append_s8(&error_stream, s8("yomi_parse: ")); 343 if (r == YOMI_ERROR_INVAL) 344 stream_append_s8(&error_stream, s8("YOMI_ERROR_INVAL\n")); 345 else 346 stream_append_s8(&error_stream, s8("YOMI_ERROR_MALFO\n")); 347 goto cleanup; 348 } 349 } 350 351 for (i32 i = 0; i < r; i++) { 352 YomiTok *base_tok = toks + i; 353 if (base_tok->type != YOMI_ENTRY) 354 continue; 355 356 YomiTok *tstr = 0, *tdefs = 0; 357 for (usize j = 1; j < base_tok->len; j++) { 358 switch (base_tok[j].type) { 359 case YOMI_STR: if (!tstr) tstr = base_tok + j; break; 360 case YOMI_ARRAY: if (!tdefs) tdefs = base_tok + j; break; 361 default: break; 362 } 363 } 364 365 /* check if entry was valid */ 366 if (!tdefs || !tstr) { 367 stream_append_s8(&error_stream, s8("parse_term_bank: invalid entry: missing ")); 368 if (!tdefs) stream_append_s8(&error_stream, s8("definition token\n")); 369 else stream_append_s8(&error_stream, s8("name token\n")); 370 break; 371 } 372 373 s8 mem_term = {.len = tstr->end - tstr->start, .s = data.s + tstr->start}; 374 DictEnt **n = intern(ht, mem_term); 375 376 if (!*n) { 377 *n = alloc(a, DictEnt, 1, 0); 378 (*n)->term = s8_dup(a, mem_term); 379 } else { 380 if (!s8_equal((*n)->term, mem_term)) { 381 stream_append_s8(&error_stream, s8("hash collision: ")); 382 stream_append_s8(&error_stream, mem_term); 383 stream_append_byte(&error_stream, '\t'); 384 stream_append_s8(&error_stream, (*n)->term); 385 stream_append_byte(&error_stream, '\n'); 386 } 387 } 388 389 for (usize i = 1; i <= tdefs->len; i++) { 390 DictDef *def = alloc(a, DictDef, 1, ARENA_NO_CLEAR); 391 def->text = s8_dup(a, (s8){.len = tdefs[i].end - tdefs[i].start, 392 .s = data.s + tdefs[i].start}); 393 def->next = (*n)->def; 394 (*n)->def = def; 395 } 396 } 397 398 cleanup: 399 stream_ensure_newline(&error_stream); 400 } 401 402 static int 403 make_dict(Arena *a, Dict *d) 404 { 405 u8 *starting_arena_end = a->end; 406 Stream path = {.cap = 1 * MEGABYTE}; 407 path.data = alloc(a, u8, path.cap, ARENA_ALLOC_END|ARENA_NO_CLEAR); 408 d->ht.ents = alloc(a, DictEnt *, 1 << HT_EXP, 0); 409 410 stream_append_s8(&path, prefix); 411 stream_append_s8(&path, os_path_sep); 412 stream_append_s8(&path, d->rom); 413 iptr path_stream = os_begin_path_stream(&path, a, ARENA_ALLOC_END); 414 415 u8 *arena_end = a->end; 416 s8 fn_pre = s8("term"); 417 for (s8 filedata = os_get_valid_file(path_stream, fn_pre, a, ARENA_ALLOC_END); 418 filedata.len; 419 filedata = os_get_valid_file(path_stream, fn_pre, a, ARENA_ALLOC_END)) 420 { 421 parse_term_bank(a, &d->ht, filedata); 422 a->end = arena_end; 423 } 424 os_end_path_stream(path_stream); 425 426 a->end = starting_arena_end; 427 428 return 1; 429 } 430 431 static void 432 make_dicts(Arena *a, Dict *dicts, u32 ndicts) 433 { 434 for (u32 i = 0; i < ndicts; i++) { 435 if (!make_dict(a, &dicts[i])) { 436 stream_append_s8(&error_stream, s8("make_dict failed for: ")); 437 stream_append_s8(&error_stream, dicts[i].rom); 438 stream_append_byte(&error_stream, '\n'); 439 } 440 } 441 } 442 443 static DictEnt * 444 find_ent(s8 term, Dict *d) 445 { 446 u64 h = hash(term); 447 i32 i = ht_lookup(h, HT_EXP, (i32)h); 448 return d->ht.ents[i]; 449 } 450 451 static void 452 find_and_print(s8 term, Dict *d) 453 { 454 DictEnt *ent = find_ent(term, d); 455 456 if (!ent || !s8_equal(term, ent->term)) 457 return; 458 459 b32 print_for_readability = s8_equal(fsep, s8("\n")); 460 b32 printed_header = 0; 461 for (DictDef *def = ent->def; def; def = def->next) { 462 if (print_for_readability) 463 def->text = unescape(def->text); 464 /* NOTE: some dictionaries are "hand-made" by idiots and have definitions 465 * with only white space in them */ 466 def->text = s8trim(def->text); 467 if (def->text.len) { 468 if (!print_for_readability) { 469 stream_append_s8(&stdout_stream, d->name); 470 } else if (!printed_header) { 471 stream_append_s8(&stdout_stream, s8("\x1b[36;1m")); 472 stream_append_s8(&stdout_stream, d->name); 473 stream_append_s8(&stdout_stream, s8("\x1b[0m")); 474 printed_header = 1; 475 } 476 477 stream_append_s8(&stdout_stream, fsep); 478 stream_append_s8(&stdout_stream, def->text); 479 stream_append_byte(&stdout_stream, '\n'); 480 } 481 } 482 if (print_for_readability && printed_header) 483 stream_append_byte(&stdout_stream, '\n'); 484 stream_flush(&stdout_stream); 485 } 486 487 static void 488 find_and_print_defs(Arena *a, Dict *dict, s8 *terms, u32 nterms) 489 { 490 if (!make_dict(a, dict)) { 491 stream_append_s8(&error_stream, s8("failed to allocate dict: ")); 492 stream_append_s8(&error_stream, dict->rom); 493 stream_append_byte(&stdout_stream, '\n'); 494 return; 495 } 496 497 for (u32 i = 0; i < nterms; i++) 498 find_and_print(terms[i], dict); 499 } 500 501 static b32 502 get_stdin_line(Stream *buf) 503 { 504 b32 result = 0; 505 for (; buf->widx < buf->cap; buf->widx++) { 506 u8 *c = buf->data + buf->widx; 507 if (!os_read_stdin(c, 1) || *c == (u8)-1) { 508 break; 509 } else if (*c == '\n') { 510 result = 1; 511 break; 512 } 513 } 514 return result; 515 } 516 517 static void 518 repl(Arena *a, Dict *dicts, u32 ndicts) 519 { 520 Stream buf = {.cap = 4096}; 521 buf.data = alloc(a, u8, buf.cap, ARENA_NO_CLEAR); 522 523 make_dicts(a, dicts, ndicts); 524 525 fsep = s8("\n"); 526 for (;;) { 527 stream_append_s8(&stdout_stream, repl_prompt); 528 stream_flush(&stdout_stream); 529 if (!get_stdin_line(&buf)) 530 break; 531 s8 trimmed = s8trim((s8){.len = buf.widx, .s = buf.data}); 532 for (u32 i = 0; i < ndicts; i++) 533 find_and_print(trimmed, &dicts[i]); 534 buf.widx = 0; 535 } 536 stream_append_s8(&stdout_stream, repl_quit); 537 } 538 539 static i32 540 jdict(Arena *a, i32 argc, char *argv[]) 541 { 542 Dict *dicts = 0; 543 i32 ndicts = 0, nterms = 0; 544 i32 iflag = 0; 545 546 s8 argv0 = cstr_to_s8(argv[0]); 547 for (argv++, argc--; argv[0] && argv[0][0] == '-' && argv[0][1]; argc--, argv++) { 548 /* NOTE: '--' to end parameters */ 549 if (argv[0][1] == '-' && argv[0][2] == 0) { 550 argv++; 551 argc--; 552 break; 553 } 554 switch (argv[0][1]) { 555 case 'F': 556 if (!argv[1] || !argv[1][0]) 557 usage(argv0); 558 fsep = unescape(cstr_to_s8(argv[1])); 559 argv++; 560 break; 561 case 'd': { 562 if (!argv[1] || !argv[1][0]) 563 usage(argv0); 564 s8 dname = cstr_to_s8(argv[1]); 565 for (u32 j = 0; j < ARRAY_COUNT(default_dict_map); j++) { 566 if (s8_equal(dname, default_dict_map[j].rom)) { 567 dicts = &default_dict_map[j]; 568 ndicts++; 569 break; 570 } 571 } 572 if (!dicts) { 573 stream_append_s8(&error_stream, s8("invalid dictionary name: ")); 574 stream_append_s8(&error_stream, dname); 575 die(&error_stream); 576 } 577 argv++; 578 } break; 579 case 'i': iflag = 1; break; 580 default: usage(argv0); break; 581 } 582 } 583 584 if (ndicts == 0) { 585 dicts = default_dict_map; 586 ndicts = ARRAY_COUNT(default_dict_map); 587 } 588 589 /* NOTE: remaining argv elements are search terms */ 590 nterms = argc; 591 s8 *terms = alloc(a, s8, nterms, 0); 592 for (i32 i = 0; argc && *argv; argv++, i++, argc--) 593 terms[i] = cstr_to_s8(*argv); 594 595 if (nterms == 0 && iflag == 0) 596 usage(argv0); 597 598 if (iflag == 0) 599 for (i32 i = 0; i < ndicts; i++) 600 find_and_print_defs(a, &dicts[i], terms, nterms); 601 else 602 repl(a, dicts, ndicts); 603 604 #ifdef _DEBUG_ARENA 605 stream_append_s8(&error_stream, s8("min remaining arena capacity: ")); 606 stream_append_u64(&error_stream, memory.min_capacity_remaining); 607 stream_append_s8(&error_stream, s8("\nremaining arena capacity: ")); 608 stream_append_u64(&error_stream, memory.end - memory.beg); 609 #endif 610 611 stream_ensure_newline(&error_stream); 612 stream_flush(&error_stream); 613 614 stream_ensure_newline(&stdout_stream); 615 stream_flush(&stdout_stream); 616 617 return 0; 618 }