jdict

command line tool for looking up terms in yomidict dictionaries
git clone anongit@rnpnr.xyz:jdict.git
Log | Files | Refs | Feed | README | LICENSE

yomidict.c (4024B)


      1 /* See LICENSE for license details.
      2  *
      3  * yomidict.c implements a simple lexer for yomichan dictionary
      4  * text. This is all it knows how to do. Finding and reading term
      5  * banks as well as searching through lexed tokens should be
      6  * implemented elsewhere.
      7  */
      8 #include <stddef.h>
      9 #include <stdlib.h>
     10 
     11 #define ul unsigned long
     12 
     13 #define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
     14 
     15 typedef enum {
     16 	YOMI_UNDEF = 0,
     17 	YOMI_ENTRY = 1,
     18 	YOMI_ARRAY = 2,
     19 	YOMI_STR = 4,
     20 	YOMI_NUM = 8
     21 } YomiType;
     22 
     23 typedef struct {
     24 	unsigned long start;
     25 	unsigned long end;
     26 	unsigned long len;
     27 	int parent; /* parent tok number */
     28 	YomiType type;
     29 } YomiTok;
     30 
     31 typedef struct {
     32 	const char *data;
     33 	ul len;
     34 	ul pos; /* offset in yomi bank */
     35 	ul toknext;
     36 	int parent; /* parent tok of current element */
     37 } YomiScanner;
     38 
     39 enum {
     40 	YOMI_ERROR_NOMEM = -1,
     41 	YOMI_ERROR_INVAL = -2,
     42 	YOMI_ERROR_MALFO = -3
     43 };
     44 
     45 static void
     46 yomi_scanner_init(YomiScanner *s, const char *data, ul datalen)
     47 {
     48 	s->data = data;
     49 	s->len = datalen;
     50 	s->pos = 0;
     51 	s->toknext = 0;
     52 	s->parent = -1;
     53 }
     54 
     55 static YomiTok *
     56 alloctok(YomiScanner *s, YomiTok *toks, ul ntoks)
     57 {
     58 	YomiTok *t;
     59 
     60 	if (ntoks <= s->toknext)
     61 		return NULL;
     62 
     63 	t = &toks[s->toknext++];
     64 	t->parent = -1;
     65 	t->start = -1;
     66 	t->end = -1;
     67 	t->len = 0;
     68 
     69 	return t;
     70 }
     71 
     72 static int
     73 string(YomiScanner *s, YomiTok *t)
     74 {
     75 	const char *d = s->data;
     76 	ul start = s->pos++;
     77 
     78 	for (; s->pos < s->len; s->pos++) {
     79 		/* skip over escaped " */
     80 		if (d[s->pos] == '\\' && s->pos + 1 < s->len && d[s->pos + 1] == '\"') {
     81 			s->pos++;
     82 			continue;
     83 		}
     84 
     85 		/* end of str */
     86 		if (d[s->pos] == '\"') {
     87 			t->start = start + 1;
     88 			t->end = s->pos;
     89 			t->parent = s->parent;
     90 			t->type = YOMI_STR;
     91 			return 0;
     92 		}
     93 	}
     94 
     95 	s->pos = start;
     96 	return YOMI_ERROR_MALFO;
     97 }
     98 
     99 static int
    100 number(YomiScanner *s, YomiTok *t)
    101 {
    102 	const char *d = s->data;
    103 	ul start = s->pos;
    104 
    105 	for (; s->pos < s->len; s->pos++) {
    106 		switch (d[s->pos]) {
    107 		case ' ':
    108 		case ',':
    109 		case '\n':
    110 		case '\r':
    111 		case '\t':
    112 		case ']':
    113 			t->parent = s->parent;
    114 			t->start = start;
    115 			t->end = s->pos;
    116 			t->type = YOMI_NUM;
    117 			s->pos--;
    118 			return 0;
    119 		}
    120 		if (!ISDIGIT(d[s->pos])) {
    121 			s->pos = start;
    122 			return YOMI_ERROR_INVAL;
    123 		}
    124 	}
    125 	s->pos = start;
    126 	return YOMI_ERROR_MALFO;
    127 }
    128 
    129 static int
    130 yomi_scan(YomiScanner *s, YomiTok *toks, ul ntoks)
    131 {
    132 	YomiTok *tok;
    133 	int r, count = s->toknext;
    134 
    135 	if (toks == NULL)
    136 		return -1;
    137 
    138 	for (; s->pos < s->len; s->pos++) {
    139 		switch (s->data[s->pos]) {
    140 		case '[': /* YOMI_ARRAY || YOMI_ENTRY */
    141 			count++;
    142 
    143 			tok = alloctok(s, toks, ntoks);
    144 			if (!tok)
    145 				return YOMI_ERROR_NOMEM;
    146 
    147 			if (s->parent == -1 || toks[s->parent].type != YOMI_ARRAY) {
    148 				tok->type = YOMI_ARRAY;
    149 			} else {
    150 				tok->type = YOMI_ENTRY;
    151 				toks[s->parent].len++;
    152 			}
    153 
    154 			tok->start = s->pos;
    155 			tok->parent = s->parent;
    156 			s->parent = s->toknext - 1; /* the current tok */
    157 			break;
    158 
    159 		case ']':
    160 			if (s->toknext < 1 || s->parent == -1)
    161 				return YOMI_ERROR_INVAL;
    162 
    163 			tok = &toks[s->parent];
    164 			for (;;) {
    165 				if (tok->start != (ul)-1 && tok->end == (ul)-1) {
    166 					/* inside unfinished tok */
    167 					tok->end = s->pos + 1;
    168 					s->parent = tok->parent;
    169 					break;
    170 				} else if (tok->parent == -1) {
    171 					 /* this is the super tok */
    172 					break;
    173 				} else {
    174 					tok = &toks[tok->parent];
    175 				}
    176 			}
    177 			break;
    178 
    179 		case ',':
    180 			if (s->parent != -1 &&
    181 			    toks[s->parent].type != YOMI_ARRAY &&
    182 			    toks[s->parent].type != YOMI_ENTRY)
    183 				s->parent = toks[s->parent].parent;
    184 			break;
    185 
    186 		case '\"':
    187 			tok = alloctok(s, toks, ntoks);
    188 			if (tok == NULL)
    189 				return YOMI_ERROR_NOMEM;
    190 
    191 			r = string(s, tok);
    192 			if (r != 0)
    193 				return r;
    194 
    195 			count++;
    196 			if (s->parent != -1)
    197 				toks[s->parent].len++;
    198 			else
    199 				toks[0].len++;
    200 
    201 		case ' ': /* FALLTHROUGH */
    202 		case '\n':
    203 		case '\r':
    204 		case '\t':
    205 			break;
    206 
    207 		default:
    208 			tok = alloctok(s, toks, ntoks);
    209 			if (tok == NULL)
    210 				return YOMI_ERROR_NOMEM;
    211 
    212 			r = number(s, tok);
    213 			if (r != 0)
    214 				return r;
    215 
    216 			count++;
    217 			if (s->parent != -1)
    218 				toks[s->parent].len++;
    219 			else
    220 				toks[0].len++;
    221 		}
    222 	}
    223 	return count;
    224 }