Commit: 73e7573e49b63dbf70c55427fa3dfb5df6102554
Parent: c20911f58d50f6a446c0c4959fc36a7b97018483
Author: Randy Palamar
Date: Wed, 30 Oct 2024 06:10:46 -0600
simd mem_copy
Diffstat:
3 files changed, 14 insertions(+), 10 deletions(-)
diff --git a/extern/stb_truetype.h b/extern/stb_truetype.h
@@ -1467,10 +1467,8 @@ static int stbtt__GetGlyphShapeTT(Arena *a, const stbtt_fontinfo *info, int glyp
// Append vertices.
tmp = alloc(a, stbtt_vertex, num_vertices + comp_num_verts);
if (num_vertices > 0 && vertices)
- mem_copy((s8){.len = num_vertices * sizeof(stbtt_vertex), .data = (u8 *)vertices},
- (s8){.len = (num_vertices + comp_num_verts) * sizeof(stbtt_vertex), .data = (u8 *)tmp});
- mem_copy((s8){.len = comp_num_verts * sizeof(stbtt_vertex), .data = (u8 *)comp_verts},
- (s8){.len = comp_num_verts * sizeof(stbtt_vertex), .data = (u8 *)(tmp + num_vertices)});
+ mem_copy(vertices, tmp, num_vertices * sizeof(stbtt_vertex));
+ mem_copy(comp_verts, tmp + num_vertices, comp_num_verts * sizeof(stbtt_vertex));
vertices = tmp;
num_vertices += comp_num_verts;
}
diff --git a/terminal.c b/terminal.c
@@ -1469,7 +1469,7 @@ handle_input(Term *t, Arena a, s8 raw)
feed_line(&tv->lines, old, t->cursor.style);
TermView *nv = t->views + t->view_idx;
size nstart = nv->log.widx;
- mem_copy(raw, (s8){nv->log.cap, nv->log.buf + nstart});
+ mem_copy(raw.data, nv->log.buf + nstart, raw.len);
commit_to_rb(tv, -raw.len);
commit_to_rb(nv, raw.len);
raw.data = nv->log.buf + nstart;
diff --git a/util.c b/util.c
@@ -53,12 +53,18 @@ normalize_range(Range r)
}
static void
-mem_copy(s8 src, s8 dest)
+mem_copy(void *src, void *dest, size len)
{
- ASSERT(src.len > 0 && dest.len > 0);
- ASSERT(dest.len >= src.len);
- for (size i = 0; i < src.len; i++)
- dest.data[i] = src.data[i];
+ ASSERT(len >= 0);
+ u8 *s = src, *d = dest;
+#if defined(__AVX512BW__)
+ /* TODO: aligned load/store and comparison */
+ for (; len >= 64; len -= 64, s += 64, d += 64)
+ _mm512_storeu_epi8(d, _mm512_loadu_epi8(s));
+#endif
+ for (; len >= 16; len -= 16, s += 16, d += 16)
+ _mm_storeu_si128((__m128i *)d, _mm_loadu_si128((__m128i*)s));
+ for (; len; len--) *d++ = *s++;
}
static void *