vtgl

terminal emulator implemented in OpenGL
git clone anongit@rnpnr.xyz:vtgl.git
Log | Files | Refs | Feed | LICENSE

Commit: 73e7573e49b63dbf70c55427fa3dfb5df6102554
Parent: c20911f58d50f6a446c0c4959fc36a7b97018483
Author: Randy Palamar
Date:   Wed, 30 Oct 2024 06:10:46 -0600

simd mem_copy

Diffstat:
Mextern/stb_truetype.h | 6++----
Mterminal.c | 2+-
Mutil.c | 16+++++++++++-----
3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/extern/stb_truetype.h b/extern/stb_truetype.h @@ -1467,10 +1467,8 @@ static int stbtt__GetGlyphShapeTT(Arena *a, const stbtt_fontinfo *info, int glyp // Append vertices. tmp = alloc(a, stbtt_vertex, num_vertices + comp_num_verts); if (num_vertices > 0 && vertices) - mem_copy((s8){.len = num_vertices * sizeof(stbtt_vertex), .data = (u8 *)vertices}, - (s8){.len = (num_vertices + comp_num_verts) * sizeof(stbtt_vertex), .data = (u8 *)tmp}); - mem_copy((s8){.len = comp_num_verts * sizeof(stbtt_vertex), .data = (u8 *)comp_verts}, - (s8){.len = comp_num_verts * sizeof(stbtt_vertex), .data = (u8 *)(tmp + num_vertices)}); + mem_copy(vertices, tmp, num_vertices * sizeof(stbtt_vertex)); + mem_copy(comp_verts, tmp + num_vertices, comp_num_verts * sizeof(stbtt_vertex)); vertices = tmp; num_vertices += comp_num_verts; } diff --git a/terminal.c b/terminal.c @@ -1469,7 +1469,7 @@ handle_input(Term *t, Arena a, s8 raw) feed_line(&tv->lines, old, t->cursor.style); TermView *nv = t->views + t->view_idx; size nstart = nv->log.widx; - mem_copy(raw, (s8){nv->log.cap, nv->log.buf + nstart}); + mem_copy(raw.data, nv->log.buf + nstart, raw.len); commit_to_rb(tv, -raw.len); commit_to_rb(nv, raw.len); raw.data = nv->log.buf + nstart; diff --git a/util.c b/util.c @@ -53,12 +53,18 @@ normalize_range(Range r) } static void -mem_copy(s8 src, s8 dest) +mem_copy(void *src, void *dest, size len) { - ASSERT(src.len > 0 && dest.len > 0); - ASSERT(dest.len >= src.len); - for (size i = 0; i < src.len; i++) - dest.data[i] = src.data[i]; + ASSERT(len >= 0); + u8 *s = src, *d = dest; +#if defined(__AVX512BW__) + /* TODO: aligned load/store and comparison */ + for (; len >= 64; len -= 64, s += 64, d += 64) + _mm512_storeu_epi8(d, _mm512_loadu_epi8(s)); +#endif + for (; len >= 16; len -= 16, s += 16, d += 16) + _mm_storeu_si128((__m128i *)d, _mm_loadu_si128((__m128i*)s)); + for (; len; len--) *d++ = *s++; } static void *