jdict

command line tool for looking up terms in yomidict dictionaries
git clone anongit@rnpnr.xyz:jdict.git
Log | Files | Refs | Feed | README | LICENSE

yomidict.c (3984B)


      1 /* See LICENSE for license details.
      2  *
      3  * yomidict.c implements a simple lexer for yomichan dictionary
      4  * text. This is all it knows how to do. Finding and reading term
      5  * banks as well as searching through lexed tokens should be
      6  * implemented elsewhere.
      7  */
      8 
      9 #define ul unsigned long
     10 
     11 #define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
     12 
     13 typedef enum {
     14 	YOMI_UNDEF = 0,
     15 	YOMI_ENTRY = 1,
     16 	YOMI_ARRAY = 2,
     17 	YOMI_STR = 4,
     18 	YOMI_NUM = 8
     19 } YomiType;
     20 
     21 typedef struct {
     22 	unsigned long start;
     23 	unsigned long end;
     24 	unsigned long len;
     25 	int parent; /* parent tok number */
     26 	YomiType type;
     27 } YomiTok;
     28 
     29 typedef struct {
     30 	const char *data;
     31 	ul len;
     32 	ul pos; /* offset in yomi bank */
     33 	ul toknext;
     34 	int parent; /* parent tok of current element */
     35 } YomiScanner;
     36 
     37 enum {
     38 	YOMI_ERROR_NOMEM = -1,
     39 	YOMI_ERROR_INVAL = -2,
     40 	YOMI_ERROR_MALFO = -3
     41 };
     42 
     43 static void
     44 yomi_scanner_init(YomiScanner *s, const char *data, ul datalen)
     45 {
     46 	s->data = data;
     47 	s->len = datalen;
     48 	s->pos = 0;
     49 	s->toknext = 0;
     50 	s->parent = -1;
     51 }
     52 
     53 static YomiTok *
     54 alloctok(YomiScanner *s, YomiTok *toks, ul ntoks)
     55 {
     56 	YomiTok *t;
     57 
     58 	if (ntoks <= s->toknext)
     59 		return NULL;
     60 
     61 	t = &toks[s->toknext++];
     62 	t->parent = -1;
     63 	t->start = -1;
     64 	t->end = -1;
     65 	t->len = 0;
     66 
     67 	return t;
     68 }
     69 
     70 static int
     71 string(YomiScanner *s, YomiTok *t)
     72 {
     73 	const char *d = s->data;
     74 	ul start = s->pos++;
     75 
     76 	for (; s->pos < s->len; s->pos++) {
     77 		/* skip over escaped " */
     78 		if (d[s->pos] == '\\' && s->pos + 1 < s->len && d[s->pos + 1] == '\"') {
     79 			s->pos++;
     80 			continue;
     81 		}
     82 
     83 		/* end of str */
     84 		if (d[s->pos] == '\"') {
     85 			t->start = start + 1;
     86 			t->end = s->pos;
     87 			t->parent = s->parent;
     88 			t->type = YOMI_STR;
     89 			return 0;
     90 		}
     91 	}
     92 
     93 	s->pos = start;
     94 	return YOMI_ERROR_MALFO;
     95 }
     96 
     97 static int
     98 number(YomiScanner *s, YomiTok *t)
     99 {
    100 	const char *d = s->data;
    101 	ul start = s->pos;
    102 
    103 	for (; s->pos < s->len; s->pos++) {
    104 		switch (d[s->pos]) {
    105 		case ' ':
    106 		case ',':
    107 		case '\n':
    108 		case '\r':
    109 		case '\t':
    110 		case ']':
    111 			t->parent = s->parent;
    112 			t->start = start;
    113 			t->end = s->pos;
    114 			t->type = YOMI_NUM;
    115 			s->pos--;
    116 			return 0;
    117 		}
    118 		if (!ISDIGIT(d[s->pos])) {
    119 			s->pos = start;
    120 			return YOMI_ERROR_INVAL;
    121 		}
    122 	}
    123 	s->pos = start;
    124 	return YOMI_ERROR_MALFO;
    125 }
    126 
    127 static int
    128 yomi_scan(YomiScanner *s, YomiTok *toks, ul ntoks)
    129 {
    130 	YomiTok *tok;
    131 	int r, count = s->toknext;
    132 
    133 	if (toks == NULL)
    134 		return -1;
    135 
    136 	for (; s->pos < s->len; s->pos++) {
    137 		switch (s->data[s->pos]) {
    138 		case '[': /* YOMI_ARRAY || YOMI_ENTRY */
    139 			count++;
    140 
    141 			tok = alloctok(s, toks, ntoks);
    142 			if (!tok)
    143 				return YOMI_ERROR_NOMEM;
    144 
    145 			if (s->parent == -1 || toks[s->parent].type != YOMI_ARRAY) {
    146 				tok->type = YOMI_ARRAY;
    147 			} else {
    148 				tok->type = YOMI_ENTRY;
    149 				toks[s->parent].len++;
    150 			}
    151 
    152 			tok->start = s->pos;
    153 			tok->parent = s->parent;
    154 			s->parent = s->toknext - 1; /* the current tok */
    155 			break;
    156 
    157 		case ']':
    158 			if (s->toknext < 1 || s->parent == -1)
    159 				return YOMI_ERROR_INVAL;
    160 
    161 			tok = &toks[s->parent];
    162 			for (;;) {
    163 				if (tok->start != (ul)-1 && tok->end == (ul)-1) {
    164 					/* inside unfinished tok */
    165 					tok->end = s->pos + 1;
    166 					s->parent = tok->parent;
    167 					break;
    168 				} else if (tok->parent == -1) {
    169 					 /* this is the super tok */
    170 					break;
    171 				} else {
    172 					tok = &toks[tok->parent];
    173 				}
    174 			}
    175 			break;
    176 
    177 		case ',':
    178 			if (s->parent != -1 &&
    179 			    toks[s->parent].type != YOMI_ARRAY &&
    180 			    toks[s->parent].type != YOMI_ENTRY)
    181 				s->parent = toks[s->parent].parent;
    182 			break;
    183 
    184 		case '\"':
    185 			tok = alloctok(s, toks, ntoks);
    186 			if (tok == NULL)
    187 				return YOMI_ERROR_NOMEM;
    188 
    189 			r = string(s, tok);
    190 			if (r != 0)
    191 				return r;
    192 
    193 			count++;
    194 			if (s->parent != -1)
    195 				toks[s->parent].len++;
    196 			else
    197 				toks[0].len++;
    198 
    199 		case ' ': /* FALLTHROUGH */
    200 		case '\n':
    201 		case '\r':
    202 		case '\t':
    203 			break;
    204 
    205 		default:
    206 			tok = alloctok(s, toks, ntoks);
    207 			if (tok == NULL)
    208 				return YOMI_ERROR_NOMEM;
    209 
    210 			r = number(s, tok);
    211 			if (r != 0)
    212 				return r;
    213 
    214 			count++;
    215 			if (s->parent != -1)
    216 				toks[s->parent].len++;
    217 			else
    218 				toks[0].len++;
    219 		}
    220 	}
    221 	return count;
    222 }