Commit: c96174551d79bbb13a28d33e120c1418517fbc6b
Parent: a84487ddffe62190b7fb2060d1612f6f17db2430
Author: Randy Palamar
Date: Mon, 14 Oct 2024 21:42:23 -0600
stop mmap-ing term banks
lseek to find the file size is slow and mmap is unpredictable.
This gives a small but significant performance improvement:
old:
avgtime -n 128 ./jdict -d koujien 驀進
real 0.103594
user 0.086875
sys 0.006719
new:
avgtime -n 128 ./jdict -d koujien 驀進
real 0.095000
user 0.078125
sys 0.006953
Diffstat:
M | jdict.c | | | 55 | ++++++++++++++++++++++++++++++++++++++++--------------- |
1 file changed, 40 insertions(+), 15 deletions(-)
diff --git a/jdict.c b/jdict.c
@@ -7,6 +7,7 @@
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
+#include <sys/stat.h>
#include <unistd.h>
#include <stdint.h>
@@ -95,6 +96,38 @@ os_new_arena(size cap)
return a;
}
+static size
+os_file_size(char *file)
+{
+ struct stat st;
+ if (stat(file, &st) < 0) {
+ fprintf(stderr, "failed to stat: %s\n", file);
+ exit(1);
+ }
+ return st.st_size;
+}
+
+static s8
+os_read_file(char *file, u8 *buf, size file_size)
+{
+ i32 fd = open(file, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "failed to open: %s\n", file);
+ exit(1);
+ }
+
+ s8 result = {.len = file_size, .s = buf};
+ size rlen = read(fd, result.s, result.len);
+ close(fd);
+
+ if (rlen != result.len) {
+ fprintf(stderr, "failed to read whole file: %s\n", file);
+ exit(1);
+ }
+
+ return result;
+}
+
static void *
mem_clear(void *p_, u8 c, size len)
{
@@ -294,26 +327,20 @@ count_term_banks(const char *path)
}
static void
-parse_term_bank(Arena *a, struct ht *ht, const char *tbank)
+parse_term_bank(Arena *a, struct ht *ht, char *tbank)
{
Arena start = *a;
- i32 fd = open(tbank, O_RDONLY);
- if (fd < 0)
- die("can't open file: %s\n", tbank);
- size flen = lseek(fd, 0, SEEK_END);
- u8 *data = mmap(NULL, flen, PROT_READ, MAP_PRIVATE, fd, 0);
- close(fd);
-
- if (data == MAP_FAILED)
- die("couldn't mmap file: %s\n", tbank);
+ size file_size = os_file_size(tbank);
+ u8 *file_buf = alloc(a, u8, file_size, ARENA_ALLOC_END|ARENA_NO_CLEAR);
+ s8 data = os_read_file(tbank, file_buf, file_size);
/* allocate tokens */
size ntoks = (1 << HT_EXP) * YOMI_TOKS_PER_ENT + 1;
YomiTok *toks = alloc(a, YomiTok, ntoks, ARENA_ALLOC_END|ARENA_NO_CLEAR);
YomiScanner s = {0};
- yomi_scanner_init(&s, (char *)data, flen);
+ yomi_scanner_init(&s, (char *)data.s, data.len);
i32 r;
while ((r = yomi_scan(&s, toks, ntoks)) < 0) {
switch (r) {
@@ -355,7 +382,7 @@ parse_term_bank(Arena *a, struct ht *ht, const char *tbank)
break;
}
- s8 mem_term = {.len = tstr->end - tstr->start, .s = data + tstr->start};
+ s8 mem_term = {.len = tstr->end - tstr->start, .s = data.s + tstr->start};
DictEnt **n = intern(ht, mem_term);
if (!*n) {
@@ -374,15 +401,13 @@ parse_term_bank(Arena *a, struct ht *ht, const char *tbank)
for (size_t i = 1; i <= tdefs->len; i++) {
DictDef *def = alloc(a, DictDef, 1, ARENA_NO_CLEAR);
def->text = s8_dup(a, (s8){.len = tdefs[i].end - tdefs[i].start,
- .s = data + tdefs[i].start});
+ .s = data.s + tdefs[i].start});
def->next = (*n)->def;
(*n)->def = def;
}
}
cleanup:
- munmap(data, flen);
-
/* NOTE: clear temporary allocations */
a->end = start.end;
}