util.c (10599B)
1 /* See LICENSE for copyright details */ 2 #ifndef _UTIL_C_ 3 #define _UTIL_C_ 4 #include <emmintrin.h> 5 #include <immintrin.h> 6 7 #include <stddef.h> 8 #include <stdint.h> 9 10 #include "shader_inc.h" 11 #include "lora_sb_0_inc.h" 12 #include "lora_sb_1_inc.h" 13 #include "config.h" 14 15 #ifndef asm 16 #define asm __asm__ 17 #endif 18 19 #ifdef _DEBUG 20 #define ASSERT(c) do { if (!(c)) asm("int3; nop"); } while (0); 21 #define DEBUG_EXPORT 22 #else 23 #define ASSERT(c) 24 #define DEBUG_EXPORT static 25 #endif 26 27 typedef uint8_t u8; 28 typedef int32_t i32; 29 typedef uint32_t u32; 30 typedef uint32_t b32; 31 typedef int64_t i64; 32 typedef uint64_t u64; 33 typedef float f32; 34 typedef double f64; 35 typedef ptrdiff_t size; 36 37 typedef struct { size len; u8 *data; } s8; 38 #define s8(s) (s8){.len = sizeof(s) - 1, .data = (u8 *)s} 39 40 typedef struct { 41 u8 *data; 42 u32 cap; 43 u32 widx; 44 i32 fd; 45 b32 errors; 46 } Stream; 47 48 typedef union { 49 struct { u32 w, h; }; 50 struct { u32 x, y; }; 51 u32 E[2]; 52 } uv2; 53 54 typedef union { 55 struct { f32 x, y; }; 56 struct { f32 w, h; }; 57 Vector2 rv; 58 f32 E[2]; 59 } v2; 60 61 typedef union { 62 struct { f32 x, y, z, w; }; 63 struct { f32 r, g, b, a; }; 64 Vector4 rv; 65 f32 E[4]; 66 } v4; 67 68 typedef union { 69 struct { v2 pos, size; }; 70 Rectangle rr; 71 } Rect; 72 73 enum colour_mode { 74 CM_RGB = 0, 75 CM_HSV = 1, 76 CM_LAST 77 }; 78 79 enum colour_picker_mode { 80 CPM_PICKER = 0, 81 CPM_SLIDERS = 1, 82 CPM_LAST 83 }; 84 85 enum colour_picker_flags { 86 CPF_REFILL_TEXTURE = 1 << 0, 87 CPF_PRINT_DEBUG = 1 << 30, 88 }; 89 90 enum input_indices { 91 INPUT_HEX, 92 INPUT_R, 93 INPUT_G, 94 INPUT_B, 95 INPUT_A 96 }; 97 98 enum mouse_pressed { 99 MOUSE_NONE = 0 << 0, 100 MOUSE_LEFT = 1 << 0, 101 MOUSE_RIGHT = 1 << 1, 102 }; 103 104 enum cardinal_direction { NORTH, EAST, SOUTH, WEST }; 105 106 #define WINDOW_ASPECT_RATIO (4.3f/3.2f) 107 108 #define BUTTON_HOVER_SPEED 8.0f 109 110 #define SLIDER_BORDER_COLOUR (Color){.r = 0x00, .g = 0x00, .b = 0x00, .a = 0xCC} 111 #define SLIDER_BORDER_WIDTH 3.0f 112 #define SLIDER_ROUNDNESS 0.035f 113 #define SLIDER_SCALE_SPEED 8.0f 114 #define SLIDER_SCALE_TARGET 1.5f 115 #define SLIDER_TRI_SIZE (v2){.x = 6, .y = 8} 116 117 #define SELECTOR_BORDER_COLOUR SLIDER_BORDER_COLOUR 118 #define SELECTOR_BORDER_WIDTH SLIDER_BORDER_WIDTH 119 #define SELECTOR_ROUNDNESS 0.3f 120 121 #define RECT_BTN_BORDER_WIDTH (SLIDER_BORDER_WIDTH + 3.0f) 122 123 #define TEXT_HOVER_SPEED 5.0f 124 125 typedef struct { 126 f32 hover_t; 127 } ButtonState; 128 129 #define COLOUR_STACK_ITEMS 8 130 typedef struct { 131 ButtonState buttons[COLOUR_STACK_ITEMS]; 132 v4 items[COLOUR_STACK_ITEMS]; 133 v4 last; 134 i32 widx; 135 f32 fade_param; 136 f32 y_off_t; 137 ButtonState tri_btn; 138 } ColourStackState; 139 140 typedef struct { 141 f32 scale_t[4]; 142 f32 colour_t[4]; 143 } SliderState; 144 145 typedef struct { 146 f32 hex_hover_t; 147 ButtonState mode; 148 } StatusBarState; 149 150 typedef struct { 151 ButtonState buttons[CPM_LAST]; 152 f32 mode_visible_t; 153 i32 next_mode; 154 } ModeChangeState; 155 156 typedef struct { 157 f32 scale_t[3]; 158 f32 base_hue; 159 f32 fractional_hue; 160 } PickerModeState; 161 162 typedef struct { 163 i32 idx; 164 i32 cursor; 165 f32 cursor_hover_p; 166 f32 cursor_t; 167 f32 cursor_t_target; 168 i32 buf_len; 169 u8 buf[64]; 170 } InputState; 171 172 #ifdef _DEBUG 173 enum clock_counts { 174 CC_WHOLE_RUN, 175 CC_DO_PICKER, 176 CC_DO_SLIDER, 177 CC_UPPER, 178 CC_LOWER, 179 CC_TEMP, 180 CC_LAST 181 }; 182 183 static struct { 184 i64 cpu_cycles[CC_LAST]; 185 i64 total_cycles[CC_LAST]; 186 i64 hit_count[CC_LAST]; 187 } g_debug_clock_counts; 188 189 #define BEGIN_CYCLE_COUNT(cc_name) \ 190 g_debug_clock_counts.cpu_cycles[cc_name] = __rdtsc(); \ 191 g_debug_clock_counts.hit_count[cc_name]++ 192 193 #define END_CYCLE_COUNT(cc_name) \ 194 g_debug_clock_counts.cpu_cycles[cc_name] = __rdtsc() - g_debug_clock_counts.cpu_cycles[cc_name]; \ 195 g_debug_clock_counts.total_cycles[cc_name] += g_debug_clock_counts.cpu_cycles[cc_name] 196 197 #else 198 #define BEGIN_CYCLE_COUNT(a) 199 #define END_CYCLE_COUNT(a) 200 #endif 201 202 typedef struct { 203 v4 colour, previous_colour; 204 ColourStackState colour_stack; 205 206 uv2 window_size; 207 v2 window_pos; 208 v2 mouse_pos; 209 210 f32 dt; 211 212 Font font; 213 Color bg, fg; 214 215 InputState is; 216 ModeChangeState mcs; 217 PickerModeState pms; 218 SliderState ss; 219 StatusBarState sbs; 220 ButtonState buttons[2]; 221 222 i32 held_idx; 223 224 f32 selection_hover_t[2]; 225 v4 hover_colour; 226 v4 cursor_colour; 227 228 Shader picker_shader; 229 RenderTexture slider_texture; 230 RenderTexture picker_texture; 231 232 i32 mode_id, colour_mode_id, colours_id; 233 i32 regions_id, radius_id, border_thick_id; 234 235 u32 flags; 236 enum colour_mode colour_mode; 237 enum colour_picker_mode mode; 238 } ColourPickerCtx; 239 240 #define ARRAY_COUNT(a) (sizeof(a) / sizeof(*a)) 241 #define ABS(x) ((x) < 0 ? (-x) : (x)) 242 #define MIN(a, b) ((a) < (b) ? (a) : (b)) 243 #define CLAMP(x, a, b) ((x) = (x) < (a) ? (a) : (x) > (b) ? (b) : (x)) 244 #define CLAMP01(a) CLAMP(a, 0, 1) 245 246 #define ISDIGIT(a) ((a) >= '0' && (a) <= '9') 247 #define ISHEX(a) (ISDIGIT(a) || ((a) >= 'a' && (a) <= 'f') || ((a) >= 'A' && (a) <= 'F')) 248 249 static Color 250 colour_from_normalized(v4 colour) 251 { 252 __m128 colour_v = _mm_loadu_ps(colour.E); 253 __m128 scale = _mm_set1_ps(255.0f); 254 __m128i result = _mm_cvtps_epi32(_mm_mul_ps(colour_v, scale)); 255 _Alignas(16) u32 outu[4]; 256 _mm_store_si128((__m128i *)outu, result); 257 return (Color){.r = outu[0] & 0xFF, .g = outu[1] & 0xFF, .b = outu[2] & 0xFF, .a = outu[3] & 0xFF }; 258 } 259 260 static v4 261 rgb_to_hsv(v4 rgb) 262 { 263 __m128 rgba = _mm_loadu_ps(rgb.E); 264 __m128 gbra = _mm_shuffle_ps(rgba, rgba, _MM_SHUFFLE(3, 0, 2, 1)); 265 __m128 brga = _mm_shuffle_ps(gbra, gbra, _MM_SHUFFLE(3, 0, 2, 1)); 266 267 __m128 Max = _mm_max_ps(rgba, _mm_max_ps(gbra, brga)); 268 __m128 Min = _mm_min_ps(rgba, _mm_min_ps(gbra, brga)); 269 __m128 C = _mm_sub_ps(Max, Min); 270 271 __m128 zero = _mm_set1_ps(0); 272 __m128 six = _mm_set1_ps(6); 273 274 _Alignas(16) f32 aval[4] = {0, 2, 4, 0}; 275 _Alignas(16) f32 scale[4] = {1.0/6.0f, 0, 0, 0}; 276 /* NOTE if C == 0 then take H as 0/1 (which are equivalent in HSV) */ 277 __m128 t = _mm_div_ps(_mm_sub_ps(gbra, brga), C); 278 t = _mm_and_ps(t, _mm_cmpneq_ps(zero, C)); 279 t = _mm_add_ps(t, _mm_load_ps(aval)); 280 /* TODO: does (G - B) / C ever exceed 6.0? */ 281 /* NOTE: Compute fmodf on element [0] */ 282 t = _mm_sub_ps(t, _mm_mul_ps(_mm_floor_ps(_mm_mul_ps(t, _mm_load_ps(scale))), six)); 283 284 __m128 H = _mm_div_ps(_mm_and_ps(t, _mm_cmpeq_ps(rgba, Max)), six); 285 __m128 S = _mm_div_ps(C, Max); 286 287 /* NOTE: Make sure H & S are 0 instead of NaN when V == 0 */ 288 H = _mm_and_ps(H, _mm_cmpneq_ps(zero, Max)); 289 S = _mm_and_ps(S, _mm_cmpneq_ps(zero, Max)); 290 291 __m128 H0 = _mm_shuffle_ps(H, H, _MM_SHUFFLE(3, 0, 0, 0)); 292 __m128 H1 = _mm_shuffle_ps(H, H, _MM_SHUFFLE(3, 1, 1, 1)); 293 __m128 H2 = _mm_shuffle_ps(H, H, _MM_SHUFFLE(3, 2, 2, 2)); 294 H = _mm_or_ps(H0, _mm_or_ps(H1, H2)); 295 296 /* NOTE: keep only element [0] from H vector; Max contains V & A */ 297 __m128 hva = _mm_blend_ps(Max, H, 0x01); 298 __m128 hsva = _mm_blend_ps(hva, S, 0x02); 299 hsva = _mm_min_ps(hsva, _mm_set1_ps(1)); 300 301 v4 res; 302 _mm_storeu_ps(res.E, hsva); 303 return res; 304 } 305 306 static v4 307 hsv_to_rgb(v4 hsv) 308 { 309 /* f(k(n)) = V - V*S*max(0, min(k, min(4 - k, 1))) 310 * k(n) = fmod((n + H * 6), 6) 311 * (R, G, B) = (f(n = 5), f(n = 3), f(n = 1)) 312 */ 313 _Alignas(16) f32 nval[4] = {5.0f, 3.0f, 1.0f, 0.0f}; 314 __m128 n = _mm_load_ps(nval); 315 __m128 H = _mm_set1_ps(hsv.x); 316 __m128 S = _mm_set1_ps(hsv.y); 317 __m128 V = _mm_set1_ps(hsv.z); 318 __m128 six = _mm_set1_ps(6); 319 320 __m128 t = _mm_add_ps(n, _mm_mul_ps(six, H)); 321 __m128 rem = _mm_floor_ps(_mm_div_ps(t, six)); 322 __m128 k = _mm_sub_ps(t, _mm_mul_ps(rem, six)); 323 324 t = _mm_min_ps(_mm_sub_ps(_mm_set1_ps(4), k), _mm_set1_ps(1)); 325 t = _mm_max_ps(_mm_set1_ps(0), _mm_min_ps(k, t)); 326 t = _mm_mul_ps(t, _mm_mul_ps(S, V)); 327 328 v4 rgba; 329 _mm_storeu_ps(rgba.E, _mm_sub_ps(V, t)); 330 rgba.a = hsv.a; 331 332 return rgba; 333 } 334 335 static v4 336 normalize_colour(u32 rgba) 337 { 338 return (v4){ 339 .r = ((rgba >> 24) & 0xFF) / 255.0f, 340 .g = ((rgba >> 16) & 0xFF) / 255.0f, 341 .b = ((rgba >> 8) & 0xFF) / 255.0f, 342 .a = ((rgba >> 0) & 0xFF) / 255.0f, 343 }; 344 } 345 346 static u32 347 pack_rl_colour(Color colour) 348 { 349 return colour.r << 24 | colour.g << 16 | colour.b << 8 | colour.a << 0; 350 } 351 352 static u32 353 parse_hex_u32(s8 s) 354 { 355 u32 res = 0; 356 357 /* NOTE: skip over '0x' or '0X' */ 358 if (s.len > 2 && s.data[0] == '0' && (s.data[1] == 'x' || s.data[1] == 'X')) { 359 s.data += 2; 360 s.len -= 2; 361 } 362 363 for (; s.len > 0; s.len--, s.data++) { 364 res <<= 4; 365 if (ISDIGIT(*s.data)) { 366 res |= *s.data - '0'; 367 } else if (ISHEX(*s.data)) { 368 /* NOTE: convert to lowercase first then convert to value */ 369 *s.data |= 0x20; 370 res |= *s.data - 0x57; 371 } else { 372 /* NOTE: do nothing (treat invalid value as 0) */ 373 } 374 } 375 return res; 376 } 377 378 static f64 379 parse_f64(s8 s) 380 { 381 f64 integral = 0, fractional = 0, sign = 1; 382 383 if (s.len && *s.data == '-') { 384 sign *= -1; 385 s.data++; 386 s.len--; 387 } 388 389 while (s.len && ISDIGIT(*s.data)) { 390 integral *= 10; 391 integral += *s.data - '0'; 392 s.data++; 393 s.len--; 394 } 395 396 if (s.len && *s.data == '.') { s.data++; s.len--; } 397 398 while (s.len) { 399 ASSERT(s.data[s.len - 1] != '.'); 400 fractional *= 0.1f; 401 fractional += (s.data[--s.len] - '0') * 0.1f; 402 } 403 404 f64 result = sign * (integral + fractional); 405 406 return result; 407 } 408 409 static s8 410 cstr_to_s8(char *s) 411 { 412 s8 result = {.data = (u8 *)s}; 413 if (s) while (*s) { result.len++; s++; } 414 return result; 415 } 416 417 static void 418 stream_append_byte(Stream *s, u8 b) 419 { 420 s->errors |= s->widx + 1 > s->cap; 421 if (!s->errors) 422 s->data[s->widx++] = b; 423 } 424 425 static void 426 stream_append_hex_u8(Stream *s, u32 n) 427 { 428 static u8 hex[16] = {"0123456789abcdef"}; 429 s->errors |= (s->cap - s->widx) < 2; 430 if (!s->errors) { 431 s->data[s->widx + 1] = hex[(n >> 0) & 0x0f]; 432 s->data[s->widx + 0] = hex[(n >> 4) & 0x0f]; 433 s->widx += 2; 434 } 435 } 436 437 static void 438 stream_append_s8(Stream *s, s8 str) 439 { 440 s->errors |= (s->cap - s->widx) < str.len; 441 if (!s->errors) { 442 for (size i = 0; i < str.len; i++) 443 s->data[s->widx++] = str.data[i]; 444 } 445 } 446 447 static void 448 stream_append_u64(Stream *s, u64 n) 449 { 450 u8 tmp[64]; 451 u8 *end = tmp + sizeof(tmp); 452 u8 *beg = end; 453 do { *--beg = '0' + (n % 10); } while (n /= 10); 454 stream_append_s8(s, (s8){.len = end - beg, .data = beg}); 455 } 456 457 static void 458 stream_append_f64(Stream *s, f64 f, i64 prec) 459 { 460 if (f < 0) { 461 stream_append_byte(s, '-'); 462 f *= -1; 463 } 464 465 /* NOTE: round last digit */ 466 f += 0.5f / prec; 467 468 if (f >= (f64)(-1UL >> 1)) { 469 stream_append_s8(s, s8("inf")); 470 } else { 471 u64 integral = f; 472 u64 fraction = (f - integral) * prec; 473 stream_append_u64(s, integral); 474 stream_append_byte(s, '.'); 475 for (u64 i = prec / 10; i > 1; i /= 10) { 476 if (i > fraction) 477 stream_append_byte(s, '0'); 478 } 479 stream_append_u64(s, fraction); 480 } 481 } 482 483 static void 484 stream_append_colour(Stream *s, Color c) 485 { 486 stream_append_hex_u8(s, c.r); 487 stream_append_hex_u8(s, c.g); 488 stream_append_hex_u8(s, c.b); 489 stream_append_hex_u8(s, c.a); 490 } 491 492 #endif /* _UTIL_C_ */