oblique_mc

Monte Carlo in Single Layer Biological Tissue
git clone anongit@rnpnr.xyz:oblique_mc.git
Log | Files | Refs | Feed | README | LICENSE

Commit: c20f629f18ae50d1e53bea9d3153ee594703f485
Parent: 8bec6828f49cd283ce9f341af87903d4adba4c81
Author: Randy Palamar
Date:   Thu,  4 Apr 2024 06:30:51 -0600

use AVX for adding matrices

Diffstat:
Mmc.c | 22++++++++++++++--------
1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/mc.c b/mc.c @@ -11,6 +11,7 @@ * plane. Initial launch direction is always towards origin. */ +#include <immintrin.h> #include <math.h> #include <pthread.h> #include <stdarg.h> @@ -229,17 +230,22 @@ alloc_mat2(u32 x, u32 y) static void sum_mat2(Mat2 m1, Mat2 m2) { - if (m1.Nx != m2.Nx || m1.Ny != m2.Ny) - die("sum_mat2: matrix sizes incompatible\n"); - /* TODO: Vectorize this */ + u64 N_total = m1.Nx * m1.Ny; f64 *b1 = m1.b; f64 *b2 = m2.b; - for (u32 i = 0; i < m1.Nx; i++) { - for (u32 j = 0; j < m1.Ny; j++) - b1[j] += b2[j]; - b1 += m1.Ny; - b2 += m1.Ny; + +#if defined(__AVX__) + while (N_total >= 4) { + __m256d v1 = _mm256_load_pd(b1); + __m256d v2 = _mm256_load_pd(b2); + _mm256_store_pd(b1, _mm256_add_pd(v1, v2)); + N_total -= 4; + b1 += 4; + b2 += 4; } +#endif + for (u64 i = 0; i < N_total; i++) + b1[i] += b2[i]; } static Vec3