yomidict.c (4024B)
1 /* See LICENSE for license details. 2 * 3 * yomidict.c implements a simple lexer for yomichan dictionary 4 * text. This is all it knows how to do. Finding and reading term 5 * banks as well as searching through lexed tokens should be 6 * implemented elsewhere. 7 */ 8 #include <stddef.h> 9 #include <stdlib.h> 10 11 #define ul unsigned long 12 13 #define ISDIGIT(c) ((c) >= '0' && (c) <= '9') 14 15 typedef enum { 16 YOMI_UNDEF = 0, 17 YOMI_ENTRY = 1, 18 YOMI_ARRAY = 2, 19 YOMI_STR = 4, 20 YOMI_NUM = 8 21 } YomiType; 22 23 typedef struct { 24 unsigned long start; 25 unsigned long end; 26 unsigned long len; 27 int parent; /* parent tok number */ 28 YomiType type; 29 } YomiTok; 30 31 typedef struct { 32 const char *data; 33 ul len; 34 ul pos; /* offset in yomi bank */ 35 ul toknext; 36 int parent; /* parent tok of current element */ 37 } YomiScanner; 38 39 enum { 40 YOMI_ERROR_NOMEM = -1, 41 YOMI_ERROR_INVAL = -2, 42 YOMI_ERROR_MALFO = -3 43 }; 44 45 static void 46 yomi_scanner_init(YomiScanner *s, const char *data, ul datalen) 47 { 48 s->data = data; 49 s->len = datalen; 50 s->pos = 0; 51 s->toknext = 0; 52 s->parent = -1; 53 } 54 55 static YomiTok * 56 alloctok(YomiScanner *s, YomiTok *toks, ul ntoks) 57 { 58 YomiTok *t; 59 60 if (ntoks <= s->toknext) 61 return NULL; 62 63 t = &toks[s->toknext++]; 64 t->parent = -1; 65 t->start = -1; 66 t->end = -1; 67 t->len = 0; 68 69 return t; 70 } 71 72 static int 73 string(YomiScanner *s, YomiTok *t) 74 { 75 const char *d = s->data; 76 ul start = s->pos++; 77 78 for (; s->pos < s->len; s->pos++) { 79 /* skip over escaped " */ 80 if (d[s->pos] == '\\' && s->pos + 1 < s->len && d[s->pos + 1] == '\"') { 81 s->pos++; 82 continue; 83 } 84 85 /* end of str */ 86 if (d[s->pos] == '\"') { 87 t->start = start + 1; 88 t->end = s->pos; 89 t->parent = s->parent; 90 t->type = YOMI_STR; 91 return 0; 92 } 93 } 94 95 s->pos = start; 96 return YOMI_ERROR_MALFO; 97 } 98 99 static int 100 number(YomiScanner *s, YomiTok *t) 101 { 102 const char *d = s->data; 103 ul start = s->pos; 104 105 for (; s->pos < s->len; s->pos++) { 106 switch (d[s->pos]) { 107 case ' ': 108 case ',': 109 case '\n': 110 case '\r': 111 case '\t': 112 case ']': 113 t->parent = s->parent; 114 t->start = start; 115 t->end = s->pos; 116 t->type = YOMI_NUM; 117 s->pos--; 118 return 0; 119 } 120 if (!ISDIGIT(d[s->pos])) { 121 s->pos = start; 122 return YOMI_ERROR_INVAL; 123 } 124 } 125 s->pos = start; 126 return YOMI_ERROR_MALFO; 127 } 128 129 static int 130 yomi_scan(YomiScanner *s, YomiTok *toks, ul ntoks) 131 { 132 YomiTok *tok; 133 int r, count = s->toknext; 134 135 if (toks == NULL) 136 return -1; 137 138 for (; s->pos < s->len; s->pos++) { 139 switch (s->data[s->pos]) { 140 case '[': /* YOMI_ARRAY || YOMI_ENTRY */ 141 count++; 142 143 tok = alloctok(s, toks, ntoks); 144 if (!tok) 145 return YOMI_ERROR_NOMEM; 146 147 if (s->parent == -1 || toks[s->parent].type != YOMI_ARRAY) { 148 tok->type = YOMI_ARRAY; 149 } else { 150 tok->type = YOMI_ENTRY; 151 toks[s->parent].len++; 152 } 153 154 tok->start = s->pos; 155 tok->parent = s->parent; 156 s->parent = s->toknext - 1; /* the current tok */ 157 break; 158 159 case ']': 160 if (s->toknext < 1 || s->parent == -1) 161 return YOMI_ERROR_INVAL; 162 163 tok = &toks[s->parent]; 164 for (;;) { 165 if (tok->start != (ul)-1 && tok->end == (ul)-1) { 166 /* inside unfinished tok */ 167 tok->end = s->pos + 1; 168 s->parent = tok->parent; 169 break; 170 } else if (tok->parent == -1) { 171 /* this is the super tok */ 172 break; 173 } else { 174 tok = &toks[tok->parent]; 175 } 176 } 177 break; 178 179 case ',': 180 if (s->parent != -1 && 181 toks[s->parent].type != YOMI_ARRAY && 182 toks[s->parent].type != YOMI_ENTRY) 183 s->parent = toks[s->parent].parent; 184 break; 185 186 case '\"': 187 tok = alloctok(s, toks, ntoks); 188 if (tok == NULL) 189 return YOMI_ERROR_NOMEM; 190 191 r = string(s, tok); 192 if (r != 0) 193 return r; 194 195 count++; 196 if (s->parent != -1) 197 toks[s->parent].len++; 198 else 199 toks[0].len++; 200 201 case ' ': /* FALLTHROUGH */ 202 case '\n': 203 case '\r': 204 case '\t': 205 break; 206 207 default: 208 tok = alloctok(s, toks, ntoks); 209 if (tok == NULL) 210 return YOMI_ERROR_NOMEM; 211 212 r = number(s, tok); 213 if (r != 0) 214 return r; 215 216 count++; 217 if (s->parent != -1) 218 toks[s->parent].len++; 219 else 220 toks[0].len++; 221 } 222 } 223 return count; 224 }