Commit: 55fba3d1594a3b06f14eadeb46ac261d7a6d5f9c
Parent: a7c87daaeaef492de6690dacb07fdf348c758b78
Author: Randy Palamar
Date: Sat, 22 Oct 2022 13:31:59 -0600
properly merge and remove duplicate entries
the method used causes higher peak memory usage in both modes but lowers
runtime memory usage in interactive mode.
Diffstat:
M | jdict.c | | | 58 | ++++++++++++++++++++++++++++++++++++++-------------------- |
1 file changed, 38 insertions(+), 20 deletions(-)
diff --git a/jdict.c b/jdict.c
@@ -49,38 +49,55 @@ free_ents(DictEnt *ents, size_t nents)
ents = NULL;
}
-/* FIXME: this isn't the best, we are modifying the value of const ptrs
- * and wasting some memory by not freeing b.
- */
static int
+entcmp(const void *va, const void *vb)
+{
+ const DictEnt *a = va, *b = vb;
+ return strcmp(a->term, b->term);
+}
+
+
+static void
merge_ents(DictEnt *a, DictEnt *b)
{
size_t i, nlen = a->ndefs + b->ndefs;
a->defs = xreallocarray(a->defs, nlen, sizeof(char *));
- for (i = 0; i < b->ndefs; i++) {
+ for (i = 0; i < b->ndefs; i++)
a->defs[a->ndefs + i] = b->defs[i];
- b->defs[i] = NULL;
- }
a->ndefs = nlen;
-
- free(b->defs);
- b->defs = NULL;
- b->ndefs = 0;
-
- return 1;
}
-static int
-entcmp(const void *va, const void *vb)
+static DictEnt *
+dedup(DictEnt *ents, size_t *nents)
{
- int r;
- const DictEnt *a = va, *b = vb;
-
- if (!(r = strcmp(a->term, b->term)))
- return merge_ents((DictEnt *)a, (DictEnt *)b);
- return r;
+ size_t i, len = 0;
+ DictEnt *dents = xreallocarray(NULL, *nents, sizeof(DictEnt));
+
+ for (i = 0; i < *nents - 1; i++) {
+ if (!entcmp(&ents[i], &ents[i+1])) {
+ /* merge and copy then skip the next ent */
+ merge_ents(&ents[i], &ents[i+1]);
+ memcpy(&dents[len++], &ents[i++], sizeof(DictEnt));
+ /* don't leak memory after merging */
+ free(ents[i].term);
+ free(ents[i].defs);
+ } else {
+ memcpy(&dents[len++], &ents[i], sizeof(DictEnt));
+ }
+ }
+ /* move last ent if it wasn't a duplicate */
+ if (i + 1 < *nents)
+ memcpy(&dents[len++], &ents[i+1], sizeof(DictEnt));
+
+ /* all entries were copied to dents so old ents can be freed.
+ * the term and defs ptrs shouldn't be removed since they still
+ * to their respective data. the duplicate ones are freed above
+ */
+ free(ents);
+ *nents = len;
+ return dents;
}
/* takes a token of type YOMI_ENTRY and creates a DictEnt */
@@ -217,6 +234,7 @@ make_dict(struct Dict *dict, size_t *nents)
return NULL;
}
qsort(ents, *nents, sizeof(DictEnt), entcmp);
+ ents = dedup(ents, nents);
return ents;
}