yomidict.c (3984B)
1 /* See LICENSE for license details. 2 * 3 * yomidict.c implements a simple lexer for yomichan dictionary 4 * text. This is all it knows how to do. Finding and reading term 5 * banks as well as searching through lexed tokens should be 6 * implemented elsewhere. 7 */ 8 9 #define ul unsigned long 10 11 #define ISDIGIT(c) ((c) >= '0' && (c) <= '9') 12 13 typedef enum { 14 YOMI_UNDEF = 0, 15 YOMI_ENTRY = 1, 16 YOMI_ARRAY = 2, 17 YOMI_STR = 4, 18 YOMI_NUM = 8 19 } YomiType; 20 21 typedef struct { 22 unsigned long start; 23 unsigned long end; 24 unsigned long len; 25 int parent; /* parent tok number */ 26 YomiType type; 27 } YomiTok; 28 29 typedef struct { 30 const char *data; 31 ul len; 32 ul pos; /* offset in yomi bank */ 33 ul toknext; 34 int parent; /* parent tok of current element */ 35 } YomiScanner; 36 37 enum { 38 YOMI_ERROR_NOMEM = -1, 39 YOMI_ERROR_INVAL = -2, 40 YOMI_ERROR_MALFO = -3 41 }; 42 43 static void 44 yomi_scanner_init(YomiScanner *s, const char *data, ul datalen) 45 { 46 s->data = data; 47 s->len = datalen; 48 s->pos = 0; 49 s->toknext = 0; 50 s->parent = -1; 51 } 52 53 static YomiTok * 54 alloctok(YomiScanner *s, YomiTok *toks, ul ntoks) 55 { 56 YomiTok *t; 57 58 if (ntoks <= s->toknext) 59 return NULL; 60 61 t = &toks[s->toknext++]; 62 t->parent = -1; 63 t->start = -1; 64 t->end = -1; 65 t->len = 0; 66 67 return t; 68 } 69 70 static int 71 string(YomiScanner *s, YomiTok *t) 72 { 73 const char *d = s->data; 74 ul start = s->pos++; 75 76 for (; s->pos < s->len; s->pos++) { 77 /* skip over escaped " */ 78 if (d[s->pos] == '\\' && s->pos + 1 < s->len && d[s->pos + 1] == '\"') { 79 s->pos++; 80 continue; 81 } 82 83 /* end of str */ 84 if (d[s->pos] == '\"') { 85 t->start = start + 1; 86 t->end = s->pos; 87 t->parent = s->parent; 88 t->type = YOMI_STR; 89 return 0; 90 } 91 } 92 93 s->pos = start; 94 return YOMI_ERROR_MALFO; 95 } 96 97 static int 98 number(YomiScanner *s, YomiTok *t) 99 { 100 const char *d = s->data; 101 ul start = s->pos; 102 103 for (; s->pos < s->len; s->pos++) { 104 switch (d[s->pos]) { 105 case ' ': 106 case ',': 107 case '\n': 108 case '\r': 109 case '\t': 110 case ']': 111 t->parent = s->parent; 112 t->start = start; 113 t->end = s->pos; 114 t->type = YOMI_NUM; 115 s->pos--; 116 return 0; 117 } 118 if (!ISDIGIT(d[s->pos])) { 119 s->pos = start; 120 return YOMI_ERROR_INVAL; 121 } 122 } 123 s->pos = start; 124 return YOMI_ERROR_MALFO; 125 } 126 127 static int 128 yomi_scan(YomiScanner *s, YomiTok *toks, ul ntoks) 129 { 130 YomiTok *tok; 131 int r, count = s->toknext; 132 133 if (toks == NULL) 134 return -1; 135 136 for (; s->pos < s->len; s->pos++) { 137 switch (s->data[s->pos]) { 138 case '[': /* YOMI_ARRAY || YOMI_ENTRY */ 139 count++; 140 141 tok = alloctok(s, toks, ntoks); 142 if (!tok) 143 return YOMI_ERROR_NOMEM; 144 145 if (s->parent == -1 || toks[s->parent].type != YOMI_ARRAY) { 146 tok->type = YOMI_ARRAY; 147 } else { 148 tok->type = YOMI_ENTRY; 149 toks[s->parent].len++; 150 } 151 152 tok->start = s->pos; 153 tok->parent = s->parent; 154 s->parent = s->toknext - 1; /* the current tok */ 155 break; 156 157 case ']': 158 if (s->toknext < 1 || s->parent == -1) 159 return YOMI_ERROR_INVAL; 160 161 tok = &toks[s->parent]; 162 for (;;) { 163 if (tok->start != (ul)-1 && tok->end == (ul)-1) { 164 /* inside unfinished tok */ 165 tok->end = s->pos + 1; 166 s->parent = tok->parent; 167 break; 168 } else if (tok->parent == -1) { 169 /* this is the super tok */ 170 break; 171 } else { 172 tok = &toks[tok->parent]; 173 } 174 } 175 break; 176 177 case ',': 178 if (s->parent != -1 && 179 toks[s->parent].type != YOMI_ARRAY && 180 toks[s->parent].type != YOMI_ENTRY) 181 s->parent = toks[s->parent].parent; 182 break; 183 184 case '\"': 185 tok = alloctok(s, toks, ntoks); 186 if (tok == NULL) 187 return YOMI_ERROR_NOMEM; 188 189 r = string(s, tok); 190 if (r != 0) 191 return r; 192 193 count++; 194 if (s->parent != -1) 195 toks[s->parent].len++; 196 else 197 toks[0].len++; 198 199 case ' ': /* FALLTHROUGH */ 200 case '\n': 201 case '\r': 202 case '\t': 203 break; 204 205 default: 206 tok = alloctok(s, toks, ntoks); 207 if (tok == NULL) 208 return YOMI_ERROR_NOMEM; 209 210 r = number(s, tok); 211 if (r != 0) 212 return r; 213 214 count++; 215 if (s->parent != -1) 216 toks[s->parent].len++; 217 else 218 toks[0].len++; 219 } 220 } 221 return count; 222 }