Commit: beda44ae53194d3ff72eae4d43397b11cf89c36c
Parent: 25eaa2baec4e1090349f5da57bdf2968411b6859
Author: Randy Palamar
Date: Sun, 4 Jan 2026 10:27:18 -0700
util: avx512 memory copy
If the CPU being targeted supports AVX512BW then we might as well
use 64 byte wide instructions. masked load/store make it trivial
to clean up the remainder.
Diffstat:
1 file changed, 9 insertions(+), 0 deletions(-)
diff --git a/util.c b/util.c
@@ -12,7 +12,16 @@ function void
mem_copy(void *restrict dest, void *restrict src, uz n)
{
u8 *s = src, *d = dest;
+ #ifdef __AVX512BW__
+ for (; n >= 64; n -= 64, s += 64, d += 64)
+ _mm512_storeu_epi8(d, _mm512_loadu_epi8(s));
+ if (n > 0) {
+ __mmask64 k = _cvtu64_mask64(_bzhi_u64(-1, n));
+ _mm512_mask_storeu_epi8(d, k, _mm512_maskz_loadu_epi8(k, s));
+ }
+ #else
for (; n; n--) *d++ = *s++;
+ #endif
}
function void