os_linux_common.c (15765B)
1 #define FUTEX_WAIT 0 2 #define FUTEX_WAKE 1 3 4 #define CLOCK_MONOTONIC 1 5 6 #define PR_SET_NAME 15 7 8 #define PROT_NONE 0x00 9 #define PROT_READ 0x01 10 #define PROT_RW 0x03 11 12 #define MFD_CLOEXEC 0x01 13 14 #define MAP_SHARED 0x01 15 #define MAP_PRIVATE 0x02 16 #define MAP_FIXED 0x10 17 #define MAP_ANON 0x20 18 19 #define MADV_FREE 8 20 #define MADV_HUGEPAGE 14 21 22 #define O_RDONLY 0x00000 23 #define O_WRONLY 0x00001 24 #define O_RDWR 0x00002 25 #define O_CREAT 0x00040 26 #define O_NOCTTY 0x00100 27 #define O_APPEND 0x00400 28 #define O_NONBLOCK 0x00800 29 #define O_CLOEXEC 0x80000 30 31 #define IN_CLOSE_WRITE 0x00000008 32 #define IN_CLOSE_NOWRITE 0x00000010 33 #define IN_MODIFY 0x00000002 34 35 #define AT_EMPTY_PATH 0x1000 36 #define AT_FDCWD (-100) 37 38 #define LINUX_INOTIFY_MASK (IN_CLOSE_WRITE|IN_CLOSE_NOWRITE|IN_MODIFY) 39 40 #define WNOHANG 1 41 #define W_IF_EXITED(s) (!((s) & 0x7F)) 42 43 /* TODO: glibc/gcc indirectly include sys/select.h if you include immintrin.h. If that 44 * header is removed this can also be removed */ 45 #undef FD_SET 46 #undef FD_ISSET 47 48 #define FD_SET(d, s) ((s)[(d) / (8 * sizeof(*(s)))] |= (1ULL << ((d) % (8 * sizeof(*(s)))))) 49 #define FD_ISSET(d, s) ((s)[(d) / (8 * sizeof(*(s)))] & (1ULL << ((d) % (8 * sizeof(*(s)))))) 50 51 typedef __attribute__((aligned(16))) u8 statx_buffer[256]; 52 #define STATX_BUF_MEMBER(sb, t, off) (*(t *)((u8 *)(sb) + off)) 53 #define STATX_INODE(sb) STATX_BUF_MEMBER(sb, u64, 32) 54 #define STATX_FILE_SIZE(sb) STATX_BUF_MEMBER(sb, u64, 40) 55 56 #define STATX_INO 0x00000100U 57 #define STATX_SIZE 0x00000200U 58 59 #define TIOCSCTTY 0x540E 60 #define TIOCSWINSZ 0x5414 61 #define TIOCSPTLCK 0x40045431 /* (un)lock pty */ 62 #define TIOCGPTN 0x80045430 /* get pty number */ 63 64 #define OS_PATH_SEPERATOR "/" 65 66 #define OS_MAP_READ PROT_READ 67 #define OS_MAP_PRIVATE MAP_PRIVATE 68 69 struct __attribute__((aligned(16))) stack_base { 70 void (*entry)(struct stack_base *stack); 71 OS *os; 72 os_thread_entry_point_fn *user_entry_point; 73 iptr user_context; 74 char name[16]; 75 i32 futex; 76 }; 77 78 typedef struct { 79 os_file_watch_callback_fn *fn; 80 u8 *path; 81 void *user_ctx; 82 u64 inode; 83 i32 handle; 84 } linux_file_watch; 85 86 typedef struct linux_deferred_file_reload_queue { 87 struct linux_deferred_file_reload_queue *next; 88 struct linux_deferred_file_reload_queue *last; 89 i32 index; 90 i32 failures; 91 } linux_deferred_file_reload_queue; 92 93 typedef struct { 94 iptr handle; 95 iptr process_id; 96 } linux_platform_process; 97 98 typedef struct { 99 Arena platform_memory; 100 void *window; 101 102 OS os; 103 104 TerminalMemory memory; 105 TerminalInput input; 106 107 Stream char_stream; 108 109 linux_platform_process child; 110 i32 inotify_fd; 111 i32 win_fd; 112 113 linux_deferred_file_reload_queue file_reload_queue; 114 linux_deferred_file_reload_queue *file_reload_free_list; 115 linux_file_watch file_watches[32]; 116 i32 file_watch_count; 117 118 Stream error_stream; 119 120 DEBUG_DECL(void *library_handle); 121 } PlatformCtx; 122 global PlatformCtx linux_ctx; 123 124 function void 125 os_write_err_msg(s8 msg) 126 { 127 syscall3(SYS_write, 2, (iptr)msg.data, msg.len); 128 } 129 130 __attribute__((noreturn)) 131 function void 132 os_fatal(s8 msg) 133 { 134 os_write_err_msg(msg); 135 syscall1(SYS_exit_group, 1); 136 __builtin_unreachable(); 137 } 138 139 function u32 140 os_file_attribute_to_mode(u32 attr) 141 { 142 u32 result = O_CREAT; 143 if (attr & OS_FA_READ && attr & OS_FA_WRITE) { 144 result |= O_RDWR; 145 } else if (attr & OS_FA_READ) { 146 result |= O_RDONLY; 147 } else if (attr & OS_FA_WRITE) { 148 result |= O_WRONLY; 149 } 150 151 if (attr & OS_FA_APPEND) 152 result |= O_APPEND; 153 154 return result; 155 } 156 157 function iptr 158 os_open(u8 *name, u32 attr) 159 { 160 u64 result = syscall4(SYS_openat, AT_FDCWD, (iptr)name, os_file_attribute_to_mode(attr), 0660); 161 if (result > -4096UL) 162 result = INVALID_FILE; 163 return result; 164 } 165 166 function b32 167 os_offset_write(iptr file, s8 raw, iz offset) 168 { 169 iz result = syscall4(SYS_pwrite64, file, (iptr)raw.data, raw.len, offset); 170 return result == raw.len; 171 } 172 173 function OS_WRITE_FN(os_write) 174 { 175 iz result = syscall3(SYS_write, file, (iptr)raw.data, raw.len); 176 return result == raw.len; 177 } 178 179 function void 180 os_close(iptr file) 181 { 182 syscall1(SYS_close, file); 183 } 184 185 function OS_READ_FN(os_read) 186 { 187 u64 r = 0, remaining = buffer.len, total_bytes_read = 0; 188 189 do { 190 remaining -= r; 191 total_bytes_read += r; 192 r = syscall3(SYS_read, file, (iptr)(buffer.data + total_bytes_read), remaining); 193 } while (r <= -4096UL && remaining != 0); 194 195 return total_bytes_read; 196 } 197 198 function OS_READ_FILE_FN(os_read_file) 199 { 200 s8 result = {0}; 201 202 statx_buffer sb; 203 u64 fd = syscall4(SYS_openat, AT_FDCWD, (iptr)path, O_RDONLY, 0); 204 u64 status = syscall5(SYS_statx, fd, 0, AT_EMPTY_PATH, STATX_SIZE, (iptr)sb); 205 206 if (fd <= -4096UL && status == 0) { 207 result = s8alloc(a, STATX_FILE_SIZE(sb)); 208 iz rlen = os_read(fd, result); 209 syscall1(SYS_close, fd); 210 if (result.len != rlen) 211 result.len = 0; 212 } 213 214 return result; 215 } 216 217 function OSMemoryBlock 218 os_block_alloc(iz requested_size) 219 { 220 OSMemoryBlock result = {0}; 221 222 /* TODO: query system for HUGETLB support and use those instead of page size */ 223 iz alloc_size = requested_size; 224 if (alloc_size % PAGE_SIZE != 0) 225 alloc_size += PAGE_SIZE - alloc_size % PAGE_SIZE; 226 227 u64 memory = syscall6(SYS_mmap, 0, alloc_size, PROT_RW, MAP_ANON|MAP_PRIVATE, -1, 0); 228 if (memory <= -4096UL) { 229 result.memory = (void *)memory; 230 result.size = alloc_size; 231 syscall3(SYS_madvise, memory, alloc_size, MADV_HUGEPAGE); 232 } 233 234 return result; 235 } 236 237 function void 238 os_release_memory_block(OSMemoryBlock memory) 239 { 240 syscall3(SYS_madvise, (iptr)memory.memory, memory.size, MADV_FREE); 241 syscall3(SYS_mprotect, (iptr)memory.memory, memory.size, PROT_NONE); 242 } 243 244 function void 245 os_release_ring_buffer(OSRingBuffer *rb) 246 { 247 syscall2(SYS_munmap, (iptr)(rb->data - rb->capacity), rb->capacity * 3); 248 } 249 250 function f64 251 os_get_time(void) 252 { 253 i64 timespec[2]; 254 syscall2(SYS_clock_gettime, CLOCK_MONOTONIC, (iptr)timespec); 255 f64 result = timespec[0] + ((f64)timespec[1]) * 1e-9; 256 return result; 257 } 258 259 function os_mapped_file 260 os_map_file(char *path, i32 mode, i32 perm) 261 { 262 os_mapped_file result = {0}; 263 264 i32 open_mode = 0; 265 switch (mode) { 266 case OS_MAP_READ: open_mode = O_RDONLY; break; 267 default: ASSERT(0); 268 } 269 270 statx_buffer sb; 271 u64 fd = syscall4(SYS_openat, AT_FDCWD, (iptr)path, open_mode, 0); 272 u64 status = syscall5(SYS_statx, fd, 0, AT_EMPTY_PATH, STATX_SIZE, (iptr)sb); 273 274 if (fd <= -4096UL && status == 0) { 275 u64 memory = syscall6(SYS_mmap, 0, STATX_FILE_SIZE(sb), mode, perm, fd, 0); 276 if (memory <= -4096UL) { 277 result.data = (u8 *)memory; 278 result.len = STATX_FILE_SIZE(sb); 279 } 280 syscall1(SYS_close, fd); 281 } 282 283 return result; 284 } 285 286 function OS_ALLOCATE_RING_BUFFER_FN(os_allocate_ring_buffer) 287 { 288 OSRingBuffer result = {0}; 289 /* TODO: query system for HUGETLB support and use those instead of page size */ 290 if (capacity % PAGE_SIZE != 0) 291 capacity += PAGE_SIZE - capacity % PAGE_SIZE; 292 ASSERT(capacity % PAGE_SIZE == 0); 293 294 u64 fd = syscall2(SYS_memfd_create, (iptr)"vtgl:rb", MFD_CLOEXEC); 295 if (fd > -4096UL) os_fatal(s8("os_alloc_ring_buffer: failed to open mem_fd\n")); 296 syscall2(SYS_ftruncate, fd, capacity); 297 298 result.capacity = capacity; 299 result.data = (u8 *)syscall6(SYS_mmap, 0, (iptr)(3 * capacity), 0, MAP_ANON|MAP_PRIVATE, -1, 0); 300 if ((u64)result.data > -4096UL) 301 os_fatal(s8("os_alloc_ring_buffer: initial mmap failed\n")); 302 syscall3(SYS_madvise, (iptr)result.data, 3 * capacity, MADV_HUGEPAGE); 303 304 for (i32 i = 0; i < 3; i++) { 305 u64 memory = syscall6(SYS_mmap, (iptr)(result.data + i * capacity), capacity, 306 PROT_RW, MAP_FIXED|MAP_SHARED, fd, 0); 307 if (memory > -4096UL) { 308 u8 buf[256]; 309 Stream err = {.data = buf, .capacity = sizeof(buf)}; 310 stream_push_s8(&err, s8("os_alloc_ring_buffer: mmap(")); 311 stream_push_u64(&err, i); 312 stream_push_s8(&err, s8(") failed\n")); 313 os_fatal(stream_to_s8(&err)); 314 } 315 } 316 syscall1(SYS_close, fd); 317 318 /* NOTE: start in the middle page */ 319 result.data += result.capacity; 320 321 return result; 322 } 323 324 function b32 325 os_child_exited(iptr pid) 326 { 327 i64 status; 328 i64 r = syscall4(SYS_wait4, pid, (iptr)&status, WNOHANG, 0); 329 return r == pid && W_IF_EXITED(status); 330 } 331 332 function linux_platform_process 333 os_fork_child(s8 cmd, c8 **envp) 334 { 335 i32 n = 0; 336 337 /* NOTE: we open in non-blocking mode so that we can try and fully drain the pipe 338 * before processing. Otherwise a single read will be limited to the page size */ 339 u64 m = syscall4(SYS_openat, AT_FDCWD, (iptr)"/dev/ptmx", O_RDWR|O_NOCTTY|O_NONBLOCK|O_CLOEXEC, 0); 340 if (m > -4096UL) os_fatal(s8("os_fork_child: failed to open master terminal\n")); 341 /* NOTE: first unlock the tty, then get a valid pty number */ 342 if (syscall3(SYS_ioctl, m, TIOCSPTLCK, (iptr)&n) || syscall3(SYS_ioctl, m, TIOCGPTN, (iptr)&n)) 343 os_fatal(s8("os_fork_child: failed to get a pty number\n")); 344 345 u8 buffer[20] = {"/dev/pts/"}; 346 Stream sbuf = {.data = buffer, .capacity = 20, .count = sizeof("/dev/pts/") - 1}; 347 stream_push_i64(&sbuf, n); 348 stream_push_byte(&sbuf, 0); 349 350 u64 s = syscall4(SYS_openat, AT_FDCWD, (iptr)sbuf.data, O_RDWR|O_NOCTTY, 0); 351 if (s > -4096UL) os_fatal(s8("os_fork_child: failed to open slave terminal\n")); 352 353 u64 pid = syscall2(SYS_clone, SIGCHLD, 0); 354 if (pid > -4096UL) os_fatal(s8("os_fork_child: failed to fork a child\n")); 355 356 if (pid == 0) { 357 syscall1(SYS_setsid, 0); 358 syscall3(SYS_dup3, s, 0, 0); 359 syscall3(SYS_dup3, s, 1, 0); 360 syscall3(SYS_dup3, s, 2, 0); 361 syscall3(SYS_ioctl, s, TIOCSCTTY, 0); 362 if (s > 2) syscall1(SYS_close, s); 363 ASSERT(cmd.data[cmd.len] == 0); 364 u8 *argv[] = {cmd.data, 0}; 365 syscall3(SYS_execve, (iptr)cmd.data, (iptr)argv, (iptr)envp); 366 __builtin_unreachable(); 367 os_fatal(s8("failed to exec child\n")); 368 } 369 syscall1(SYS_close, s); 370 371 return (linux_platform_process){.process_id = pid, .handle = m}; 372 } 373 374 function OS_SET_TERMINAL_SIZE_FN(os_set_terminal_size) 375 { 376 u16 win_size[4]; 377 win_size[0] = rows; 378 win_size[1] = columns; 379 win_size[2] = window_width; 380 win_size[3] = window_height; 381 if (syscall3(SYS_ioctl, child, TIOCSWINSZ, (iptr)win_size) > -4096UL) 382 os_write_err_msg(s8("os_set_term_size\n")); 383 } 384 385 function OS_ADD_FILE_WATCH_FN(linux_add_file_watch) 386 { 387 u64 wd = syscall3(SYS_inotify_add_watch, linux_ctx.inotify_fd, (iptr)path, LINUX_INOTIFY_MASK); 388 if (wd <= -4096UL) { 389 statx_buffer sb; 390 syscall5(SYS_statx, AT_FDCWD, (iptr)path, 0, STATX_INO, (iptr)sb); 391 392 i32 idx = linux_ctx.file_watch_count++; 393 ASSERT(idx < ARRAY_COUNT(linux_ctx.file_watches)); 394 linux_ctx.file_watches[idx].fn = fn; 395 linux_ctx.file_watches[idx].path = path; 396 linux_ctx.file_watches[idx].handle = wd; 397 linux_ctx.file_watches[idx].inode = STATX_INODE(sb); 398 linux_ctx.file_watches[idx].user_ctx = user_ctx; 399 } 400 } 401 402 function void 403 try_deferred_file_loads(PlatformCtx *ctx) 404 { 405 linux_deferred_file_reload_queue *file = ctx->file_reload_queue.next; 406 while (file) { 407 linux_file_watch *fw = ctx->file_watches + file->index; 408 409 statx_buffer sb; 410 syscall5(SYS_statx, AT_FDCWD, (iptr)fw->path, 0, STATX_INO, (iptr)sb); 411 412 fw->handle = syscall3(SYS_inotify_add_watch, ctx->inotify_fd, (iptr)fw->path, 413 LINUX_INOTIFY_MASK); 414 fw->inode = STATX_INODE(sb); 415 416 if ((u64)fw->handle <= -4096UL) { 417 fw->fn(fw->path, fw->user_ctx); 418 file->last->next = file->next; 419 file->next = ctx->file_reload_free_list; 420 ctx->file_reload_free_list = file; 421 file = file->last; 422 } else { 423 file->failures++; 424 #if 0 425 TODO 426 if (file->failures > MAX_FILE_RELOAD_TRIES) { 427 log 428 remove from list 429 } 430 #endif 431 } 432 file = file->next; 433 } 434 } 435 436 function b32 437 defer_file_reload(PlatformCtx *ctx, i32 file_watch_index, statx_buffer *sb) 438 { 439 b32 result = 1; 440 linux_file_watch *fw = ctx->file_watches + file_watch_index; 441 442 fw->inode = STATX_INODE(*sb); 443 fw->handle = syscall3(SYS_inotify_add_watch, ctx->inotify_fd, (iptr)fw->path, LINUX_INOTIFY_MASK); 444 445 if ((u64)fw->handle > -4096UL) { 446 result = 0; 447 448 linux_deferred_file_reload_queue *new = ctx->file_reload_free_list; 449 if (new) ctx->file_reload_free_list = new->next; 450 else new = push_struct(&ctx->platform_memory, typeof(*new)); 451 new->index = file_watch_index; 452 new->failures = 0; 453 DLLPushDown(&ctx->file_reload_queue, new); 454 } 455 456 return result; 457 } 458 459 function void 460 dispatch_file_watch_events(PlatformCtx *ctx) 461 { 462 struct { 463 i32 wd; 464 u32 mask, cookie, len; 465 c8 name[]; 466 } *ie; 467 468 u8 *mem = alloc_(&ctx->platform_memory, 4096, 64, 1); 469 s8 buf = {.len = 4096, .data = mem}; 470 471 for (;;) { 472 iz rlen = syscall3(SYS_read, ctx->inotify_fd, (iptr)buf.data, buf.len); 473 if (rlen <= 0) 474 break; 475 476 for (u8 *data = buf.data; data < buf.data + rlen; data += sizeof(*ie) + ie->len) { 477 ie = (void *)data; 478 for (i32 i = 0; i < ctx->file_watch_count; i++) { 479 linux_file_watch *fw = ctx->file_watches + i; 480 if (fw->handle != ie->wd) 481 continue; 482 483 b32 file_changed = (ie->mask & IN_CLOSE_WRITE) != 0; 484 file_changed |= (ie->mask & IN_MODIFY) != 0; 485 /* NOTE: some editors and the compiler will rewrite a file 486 * completely and thus the inode will change; here we 487 * detect that and restart the watch */ 488 statx_buffer sb; 489 u64 status = syscall5(SYS_statx, AT_FDCWD, (iptr)fw->path, 0, STATX_INO, (iptr)sb); 490 491 if (status > -4096UL || fw->inode != STATX_INODE(sb)) { 492 syscall2(SYS_inotify_rm_watch, ctx->inotify_fd, fw->handle); 493 fw->handle = INVALID_FILE; 494 file_changed = defer_file_reload(ctx, i, &sb); 495 } 496 if (file_changed) 497 fw->fn(fw->path, fw->user_ctx); 498 } 499 } 500 } 501 } 502 503 function struct stack_base * 504 new_stack(iz capacity) 505 { 506 u64 p = syscall6(SYS_mmap, 0, capacity, PROT_RW, MAP_ANON|MAP_PRIVATE, -1, 0); 507 if (p > -4096UL) 508 os_fatal(s8("new_stack: mmap failed\n")); 509 i64 count = capacity / sizeof(struct stack_base); 510 /* NOTE: remember the stack grows down; we want to start at the highest address */ 511 struct stack_base *result = (struct stack_base *)p + count - 1; 512 return result; 513 } 514 515 function OS_WAIT_ON_VALUE_FN(os_wait_on_value) 516 { 517 iptr timeout = 0; 518 i64 timespec[2]; 519 if (timeout_ms != (u32)-1) { 520 timeout = (iptr)×pec; 521 timespec[0] = timeout_ms / 1000; 522 timespec[1] = (timeout_ms % 1000) * 1000000; 523 } 524 return syscall4(SYS_futex, (iptr)value, FUTEX_WAIT, current, timeout) == 0; 525 } 526 527 function OS_WAKE_WAITERS_FN(os_wake_waiters) 528 { 529 atomic_inc(sync, 1); 530 syscall4(SYS_futex, (iptr)sync, FUTEX_WAKE, I32_MAX, 0); 531 } 532 533 function void 534 thread_entry_point(struct stack_base *stack) 535 { 536 syscall2(SYS_prctl, PR_SET_NAME, (iptr)stack->name); 537 i64 result = stack->user_entry_point(stack->os, stack->user_context, &stack->futex); 538 syscall1(SYS_exit, result); 539 __builtin_unreachable(); 540 } 541 542 function OS_SPAWN_THREAD_FN(os_spawn_thread) 543 { 544 struct stack_base *stack = new_stack(KB(256)); 545 546 s8 name = c_str_to_s8(thread_name); 547 name.len = MIN(name.len, (iz)(sizeof(stack->name) - 1)); 548 mem_copy(stack->name, name.data, name.len); 549 550 stack->entry = thread_entry_point; 551 stack->user_entry_point = entry_point; 552 stack->user_context = user_context; 553 stack->os = os; 554 555 new_thread(stack); 556 557 return &stack->futex; 558 } 559 560 function void 561 usage(char *argv0, Stream *err) 562 { 563 stream_push_s8(err, s8("usage: ")); 564 stream_push_s8(err, c_str_to_s8(argv0)); 565 stream_push_s8(err, s8(" [-v] [-g COLxROW]\n")); 566 os_fatal(stream_to_s8(err)); 567 } 568 569 function s8 570 get_default_cmd(char **envp) 571 { 572 s8 result = envp_lookup(s8("SHELL="), envp); 573 if (result.len == 0) 574 result = s8("/bin/sh"); 575 return result; 576 } 577 578 function SLLVariableVector 579 parse_environment(Arena *a, char **envp) 580 { 581 SLLVariableVector env = {0}; 582 for (; *envp; envp++) { 583 s8 e = c_str_to_s8(*envp); 584 if (!s8_prefix_of(s8("TERM="), e)) { 585 Variable *var = push_struct(a, Variable); 586 var->type = VT_S8; 587 var->s8 = e; 588 SLLVariableVectorPush(a, &env, var); 589 } 590 } 591 592 Variable *var = push_struct(a, Variable); 593 var->type = VT_S8; 594 /* TODO: don't pretend to be xterm ? */ 595 var->s8 = s8("TERM=xterm"); 596 SLLVariableVectorPush(a, &env, var); 597 598 return env; 599 }