Commit: 9434ba782ac9df4c7ddb2461b89a8b56812e4dd9
Parent: 904252067b7f7d1d79f4096b7f2363042d70dcd6
Author: Randy Palamar
Date: Fri, 27 Dec 2024 22:45:43 -0700
port to aarch64
There are a number of syscalls which don't exist on aarch64; they
needed to be replaced with their modern counterparts:
dup2 -> dup3
fork -> clone
open -> openat
stat -> statx
These all exist with the same semantics on amd64 so there
shouldn't be any problem with using them there as well.
Some intrisics needed to be replaced as well and they are now
defined in the intrisics.c file.
Diffstat:
9 files changed, 269 insertions(+), 63 deletions(-)
diff --git a/debug.h b/debug.h
@@ -108,7 +108,7 @@ static DebugTable g_debug_table;
u64 event_index = atomic_fetch_add(&g_debug_table.event_array_event_index, 1); \
ASSERT((event_index & 0xFFFFFFFF) < MAX_DEBUG_EVENT_COUNT); \
DebugEvent *event = g_debug_table.events[event_index >> 32] + (event_index & 0xFFFFFFFF); \
- event->clock = __rdtsc(); \
+ event->clock = rdtsc(); \
event->metadata_index = counter; \
event->type = event_type
diff --git a/intrinsics.c b/intrinsics.c
@@ -0,0 +1,24 @@
+#define FORCE_INLINE inline __attribute__((always_inline))
+
+#define clz_u32(a) __builtin_clz(a)
+#define ctz_u32(a) __builtin_ctz(a)
+
+#ifdef __ARM_ARCH_ISA_A64
+/* TODO? debuggers just loop here forever and need a manual PC increment (jump +1 in gdb) */
+#define debugbreak() asm volatile ("brk 0xf000")
+
+static FORCE_INLINE u64
+rdtsc(void)
+{
+ register u64 cntvct asm("x0");
+ asm volatile ("mrs x0, cntvct_el0" : "=x"(cntvct));
+ return cntvct;
+}
+#elif __x86_64__
+#include <immintrin.h>
+
+#define debugbreak() asm volatile ("int3; nop")
+
+#define rdtsc() __rdtsc()
+
+#endif
diff --git a/platform_linux_aarch64.c b/platform_linux_aarch64.c
@@ -0,0 +1,170 @@
+/* TODO: generate this whole file with a metaprogram */
+
+/* See LICENSE for license details. */
+#ifndef asm
+#ifdef __asm
+#define asm __asm
+#else
+#define asm __asm__
+#endif
+#endif
+
+#define SYS_dup3 24
+#define SYS_inotify_init1 26
+#define SYS_inotify_add_watch 27
+#define SYS_inotify_rm_watch 28
+#define SYS_ioctl 29
+#define SYS_ftruncate 46
+#define SYS_openat 56
+#define SYS_close 57
+#define SYS_read 63
+#define SYS_write 64
+#define SYS_pwrite64 68
+#define SYS_pselect6 72
+#define SYS_exit_group 94
+#define SYS_futex 98
+#define SYS_clock_gettime 113
+#define SYS_setsid 157
+#define SYS_prctl 167
+#define SYS_clone 220
+#define SYS_execve 221
+#define SYS_mmap 222
+#define SYS_madvise 233
+#define SYS_wait4 260
+#define SYS_memfd_create 279
+#define SYS_statx 291
+
+#define SIGCHLD 17
+
+/* NOTE(rnp): technically arm64 can have 4K, 16K or 64K pages but we will just assume 64K */
+#define PAGE_SIZE 65536
+
+/* TODO: check that this is equivalent */
+typedef u64 sys_fd_set[16];
+
+static FORCE_INLINE i64
+syscall0(i64 n)
+{
+ register i64 x8 asm("x8") = n;
+ register i64 x0 asm("x0");
+ asm volatile ("svc 0"
+ : "=x"(x0)
+ : "x"(x8)
+ : "memory", "cc"
+ );
+ return x0;
+}
+
+static FORCE_INLINE i64
+syscall1(i64 n, i64 a1)
+{
+ register i64 x8 asm("x8") = n;
+ register i64 x0 asm("x0") = a1;
+ asm volatile ("svc 0"
+ : "+x"(x0)
+ : "x"(x8)
+ : "memory", "cc"
+ );
+ return x0;
+}
+
+static FORCE_INLINE i64
+syscall2(i64 n, i64 a1, i64 a2)
+{
+ register i64 x8 asm("x8") = n;
+ register i64 x0 asm("x0") = a1;
+ register i64 x1 asm("x1") = a2;
+ asm volatile ("svc 0"
+ : "+x"(x0)
+ : "x"(x8), "x"(x1)
+ : "memory", "cc"
+ );
+ return x0;
+}
+
+static FORCE_INLINE i64
+syscall3(i64 n, i64 a1, i64 a2, i64 a3)
+{
+ register i64 x8 asm("x8") = n;
+ register i64 x0 asm("x0") = a1;
+ register i64 x1 asm("x1") = a2;
+ register i64 x2 asm("x2") = a3;
+ asm volatile ("svc 0"
+ : "+x"(x0)
+ : "x"(x8), "x"(x1), "x"(x2)
+ : "memory", "cc"
+ );
+ return x0;
+}
+
+static FORCE_INLINE i64
+syscall4(i64 n, i64 a1, i64 a2, i64 a3, i64 a4)
+{
+ register i64 x8 asm("x8") = n;
+ register i64 x0 asm("x0") = a1;
+ register i64 x1 asm("x1") = a2;
+ register i64 x2 asm("x2") = a3;
+ register i64 x3 asm("x3") = a4;
+ asm volatile ("svc 0"
+ : "+x"(x0)
+ : "x"(x8), "x"(x1), "x"(x2), "x"(x3)
+ : "memory", "cc"
+ );
+ return x0;
+}
+
+static FORCE_INLINE i64
+syscall5(i64 n, i64 a1, i64 a2, i64 a3, i64 a4, i64 a5)
+{
+ register i64 x8 asm("x8") = n;
+ register i64 x0 asm("x0") = a1;
+ register i64 x1 asm("x1") = a2;
+ register i64 x2 asm("x2") = a3;
+ register i64 x3 asm("x3") = a4;
+ register i64 x4 asm("x4") = a5;
+ asm volatile ("svc 0"
+ : "+x"(x0)
+ : "x"(x8), "x"(x1), "x"(x2), "x"(x3), "x"(x4)
+ : "memory", "cc"
+ );
+ return x0;
+}
+
+static FORCE_INLINE i64
+syscall6(i64 n, i64 a1, i64 a2, i64 a3, i64 a4, i64 a5, i64 a6)
+{
+ register i64 x8 asm("x8") = n;
+ register i64 x0 asm("x0") = a1;
+ register i64 x1 asm("x1") = a2;
+ register i64 x2 asm("x2") = a3;
+ register i64 x3 asm("x3") = a4;
+ register i64 x4 asm("x4") = a5;
+ register i64 x5 asm("x5") = a6;
+ asm volatile ("svc 0"
+ : "+x"(x0)
+ : "x"(x8), "x"(x1), "x"(x2), "x"(x3), "x"(x4), "x"(x5)
+ : "memory", "cc"
+ );
+ return x0;
+}
+
+__attribute__((naked))
+static i64
+new_thread(void *stack_base)
+{
+ asm volatile (
+ "mov x8, #220\n" // SYS_clone
+ "mov x1, x0\n" // arg2 = new stack
+ "mov x0, #0xF00\n" // arg1 = clone flags (VM|FS|FILES|SIGHAND|THREAD|SYSVMEM)
+ "movk x0, #0x5, lsl #16\n" // no 32 bit immediates in general on arm
+ "svc 0\n"
+ "cbnz x0, 1f\n" // don't clobber syscall return in calling thread
+ "mov x0, sp\n"
+ "ldr x1, [sp]\n" // arm doesn't take the return address from the stack;
+ "blr x1\n" // we need to load it and branch to it
+ "1: ret"
+ ::: "x8", "x1", "memory", "cc"
+ );
+}
+
+#include "platform_linux_common.c"
diff --git a/platform_linux_amd64.c b/platform_linux_amd64.c
@@ -10,17 +10,12 @@
/* TODO: X macro that defines all of these with the appropriate function/macro */
#define SYS_read 0
#define SYS_write 1
-#define SYS_open 2
#define SYS_close 3
-#define SYS_stat 4
-#define SYS_fstat 5
#define SYS_mmap 9
#define SYS_ioctl 16
-#define SYS_pwrite 18
+#define SYS_pwrite64 18
#define SYS_madvise 28
-#define SYS_dup2 33
#define SYS_clone 56
-#define SYS_fork 57
#define SYS_execve 59
#define SYS_wait4 61
#define SYS_ftruncate 77
@@ -32,16 +27,16 @@
#define SYS_exit_group 231
#define SYS_inotify_add_watch 254
#define SYS_inotify_rm_watch 255
-#define SYS_pselect 270
+#define SYS_openat 257
+#define SYS_pselect6 270
+#define SYS_dup3 292
#define SYS_inotify_init1 294
#define SYS_memfd_create 319
+#define SYS_statx 332
-#define PAGE_SIZE 4096
+#define SIGCHLD 17
-typedef __attribute__((aligned(16))) u8 stat_buffer[144];
-#define STAT_BUF_MEMBER(sb, t, off) (*(t *)((u8 *)(sb) + off))
-#define STAT_INODE(sb) STAT_BUF_MEMBER(sb, i64, 8)
-#define STAT_FILE_SIZE(sb) STAT_BUF_MEMBER(sb, i64, 48)
+#define PAGE_SIZE 4096
typedef u64 sys_fd_set[16];
diff --git a/platform_linux_common.c b/platform_linux_common.c
@@ -30,6 +30,9 @@
#define IN_CLOSE_NOWRITE 0x00000010
#define IN_MODIFY 0x00000002
+#define AT_EMPTY_PATH 0x1000
+#define AT_FDCWD (-100)
+
#define LINUX_INOTIFY_MASK (IN_CLOSE_WRITE|IN_CLOSE_NOWRITE|IN_MODIFY)
#define WNOHANG 1
@@ -43,6 +46,14 @@
#define FD_SET(d, s) ((s)[(d) / (8 * sizeof(*(s)))] |= (1ULL << ((d) % (8 * sizeof(*(s))))))
#define FD_ISSET(d, s) ((s)[(d) / (8 * sizeof(*(s)))] & (1ULL << ((d) % (8 * sizeof(*(s))))))
+typedef __attribute__((aligned(16))) u8 statx_buffer[256];
+#define STATX_BUF_MEMBER(sb, t, off) (*(t *)((u8 *)(sb) + off))
+#define STATX_INODE(sb) STATX_BUF_MEMBER(sb, u64, 32)
+#define STATX_FILE_SIZE(sb) STATX_BUF_MEMBER(sb, u64, 40)
+
+#define STATX_INO 0x00000100U
+#define STATX_SIZE 0x00000200U
+
#define TIOCSCTTY 0x540E
#define TIOCSWINSZ 0x5414
#define TIOCSPTLCK 0x40045431 /* (un)lock pty */
@@ -69,7 +80,7 @@ typedef struct {
platform_file_watch_callback_fn *fn;
u8 *path;
void *user_ctx;
- i32 inode;
+ u64 inode;
i32 handle;
} linux_file_watch;
@@ -149,7 +160,7 @@ os_file_attribute_to_mode(u32 attr)
static iptr
os_open(u8 *name, u32 attr)
{
- iptr result = syscall3(SYS_open, (iptr)name, os_file_attribute_to_mode(attr), 0660);
+ iptr result = syscall4(SYS_openat, AT_FDCWD, (iptr)name, os_file_attribute_to_mode(attr), 0660);
if (result > -4096UL)
result = INVALID_FILE;
return result;
@@ -158,7 +169,7 @@ os_open(u8 *name, u32 attr)
static b32
os_offset_write(iptr file, s8 raw, size offset)
{
- size result = syscall4(SYS_pwrite, file, (iptr)raw.data, raw.len, offset);
+ size result = syscall4(SYS_pwrite64, file, (iptr)raw.data, raw.len, offset);
return result == raw.len;
}
@@ -191,12 +202,12 @@ static PLATFORM_READ_FILE_FN(os_read_file)
{
s8 result = {0};
- stat_buffer sb;
- i32 fd = syscall3(SYS_open, (iptr)path, O_RDONLY, 0);
- i64 status = syscall2(SYS_fstat, fd, (iptr)sb);
+ statx_buffer sb;
+ i32 fd = syscall4(SYS_openat, AT_FDCWD, (iptr)path, O_RDONLY, 0);
+ i64 status = syscall5(SYS_statx, fd, 0, AT_EMPTY_PATH, STATX_SIZE, (iptr)sb);
if (fd <= -4096UL && status == 0) {
- result = s8alloc(a, STAT_FILE_SIZE(sb));
+ result = s8alloc(a, STATX_FILE_SIZE(sb));
size rlen = os_read(fd, result);
syscall1(SYS_close, fd);
if (result.len != rlen)
@@ -246,15 +257,15 @@ os_map_file(char *path, i32 mode, i32 perm)
default: ASSERT(0);
}
- stat_buffer sb;
- i32 fd = syscall3(SYS_open, (iptr)path, open_mode, 0);
- i64 status = syscall2(SYS_fstat, fd, (iptr)sb);
+ statx_buffer sb;
+ i32 fd = syscall4(SYS_openat, AT_FDCWD, (iptr)path, open_mode, 0);
+ i64 status = syscall5(SYS_statx, fd, 0, AT_EMPTY_PATH, STATX_SIZE, (iptr)sb);
if (fd <= -4096UL && status == 0) {
- i64 memory = syscall6(SYS_mmap, 0, STAT_FILE_SIZE(sb), mode, perm, fd, 0);
+ i64 memory = syscall6(SYS_mmap, 0, STATX_FILE_SIZE(sb), mode, perm, fd, 0);
if (memory <= -4096UL) {
result.data = (u8 *)memory;
- result.len = STAT_FILE_SIZE(sb);
+ result.len = STATX_FILE_SIZE(sb);
}
syscall1(SYS_close, fd);
}
@@ -314,7 +325,7 @@ os_fork_child(s8 cmd, c8 **envp)
/* NOTE: we open in non-blocking mode so that we can try and fully drain the pipe
* before processing. Otherwise a single read will be limited to the page size */
- i64 m = syscall3(SYS_open, (iptr)"/dev/ptmx", O_RDWR|O_NOCTTY|O_NONBLOCK|O_CLOEXEC, 0);
+ i64 m = syscall4(SYS_openat, AT_FDCWD, (iptr)"/dev/ptmx", O_RDWR|O_NOCTTY|O_NONBLOCK|O_CLOEXEC, 0);
if (m > -4096UL) os_fatal(s8("os_fork_child: failed to open master terminal\n"));
/* NOTE: first unlock the tty, then get a valid pty number */
if (syscall3(SYS_ioctl, m, TIOCSPTLCK, (iptr)&n) || syscall3(SYS_ioctl, m, TIOCGPTN, (iptr)&n))
@@ -325,17 +336,17 @@ os_fork_child(s8 cmd, c8 **envp)
stream_push_i64(&sbuf, n);
stream_push_byte(&sbuf, 0);
- i64 s = syscall3(SYS_open, (iptr)sbuf.buf, O_RDWR|O_NOCTTY, 0);
+ i64 s = syscall4(SYS_openat, AT_FDCWD, (iptr)sbuf.buf, O_RDWR|O_NOCTTY, 0);
if (s > -4096UL) os_fatal(s8("os_fork_child: failed to open slave terminal\n"));
- i64 pid = syscall1(SYS_fork, 0);
+ i64 pid = syscall2(SYS_clone, SIGCHLD, 0);
if (pid > -4096UL) os_fatal(s8("os_fork_child: failed to fork a child\n"));
if (pid == 0) {
syscall1(SYS_setsid, 0);
- syscall2(SYS_dup2, s, 0);
- syscall2(SYS_dup2, s, 1);
- syscall2(SYS_dup2, s, 2);
+ syscall3(SYS_dup3, s, 0, 0);
+ syscall3(SYS_dup3, s, 1, 0);
+ syscall3(SYS_dup3, s, 2, 0);
syscall3(SYS_ioctl, s, TIOCSCTTY, 0);
if (s > 2) syscall1(SYS_close, s);
ASSERT(cmd.data[cmd.len] == 0);
@@ -362,17 +373,17 @@ static PLATFORM_SET_TERMINAL_SIZE_FN(os_set_terminal_size)
static PLATFORM_ADD_FILE_WATCH_FN(linux_add_file_watch)
{
- stat_buffer sb;
- syscall2(SYS_stat, (iptr)path, (iptr)sb);
-
i32 wd = syscall3(SYS_inotify_add_watch, linux_ctx.inotify_fd, (iptr)path, LINUX_INOTIFY_MASK);
- if (wd < 4096UL) {
+ if (wd <= -4096UL) {
+ statx_buffer sb;
+ syscall5(SYS_statx, AT_FDCWD, (iptr)path, 0, STATX_INO, (iptr)sb);
+
i32 idx = linux_ctx.file_watch_count++;
ASSERT(idx < ARRAY_COUNT(linux_ctx.file_watches));
linux_ctx.file_watches[idx].fn = fn;
linux_ctx.file_watches[idx].path = path;
linux_ctx.file_watches[idx].handle = wd;
- linux_ctx.file_watches[idx].inode = STAT_INODE(sb);
+ linux_ctx.file_watches[idx].inode = STATX_INODE(sb);
linux_ctx.file_watches[idx].user_ctx = user_ctx;
}
}
@@ -384,14 +395,14 @@ try_deferred_file_loads(PlatformCtx *ctx)
while (file) {
linux_file_watch *fw = ctx->file_watches + file->index;
- stat_buffer sb;
- syscall2(SYS_stat, (iptr)fw->path, (iptr)sb);
+ statx_buffer sb;
+ syscall5(SYS_statx, AT_FDCWD, (iptr)fw->path, 0, STATX_INO, (iptr)sb);
fw->handle = syscall3(SYS_inotify_add_watch, ctx->inotify_fd, (iptr)fw->path,
LINUX_INOTIFY_MASK);
- fw->inode = STAT_INODE(sb);
+ fw->inode = STATX_INODE(sb);
- if (fw->handle < -4096UL) {
+ if (fw->handle <= -4096UL) {
fw->fn(fw->path, fw->user_ctx);
file->last->next = file->next;
file->next = ctx->file_reload_free_list;
@@ -412,15 +423,15 @@ try_deferred_file_loads(PlatformCtx *ctx)
}
static b32
-defer_file_reload(PlatformCtx *ctx, i32 file_watch_index, stat_buffer *sb)
+defer_file_reload(PlatformCtx *ctx, i32 file_watch_index, statx_buffer *sb)
{
b32 result = 1;
linux_file_watch *fw = ctx->file_watches + file_watch_index;
- fw->inode = STAT_INODE(*sb);
+ fw->inode = STATX_INODE(*sb);
fw->handle = syscall3(SYS_inotify_add_watch, ctx->inotify_fd, (iptr)fw->path, LINUX_INOTIFY_MASK);
- if (fw->handle >= -4096UL) {
+ if (fw->handle > -4096UL) {
result = 0;
linux_deferred_file_reload_queue *new = ctx->file_reload_free_list;
@@ -463,9 +474,10 @@ dispatch_file_watch_events(PlatformCtx *ctx)
/* NOTE: some editors and the compiler will rewrite a file
* completely and thus the inode will change; here we
* detect that and restart the watch */
- stat_buffer sb = {0};
- syscall2(SYS_stat, (iptr)fw->path, (iptr)sb);
- if (fw->inode != STAT_INODE(sb)) {
+ statx_buffer sb;
+ i64 status = syscall5(SYS_statx, AT_FDCWD, (iptr)fw->path, 0, STATX_INO, (iptr)sb);
+
+ if (status > -4096UL || fw->inode != STATX_INODE(sb)) {
syscall2(SYS_inotify_rm_watch, ctx->inotify_fd, fw->handle);
fw->handle = INVALID_FILE;
file_changed = defer_file_reload(ctx, i, &sb);
diff --git a/platform_linux_x11.c b/platform_linux_x11.c
@@ -277,7 +277,7 @@ update_input(PlatformCtx *ctx)
i32 max_fd = MAX(ctx->inotify_fd, ctx->child.handle);
max_fd = MAX(max_fd, ctx->win_fd);
- syscall6(SYS_pselect, max_fd + 1, (iptr)rfd, 0, 0, (iptr)timeout, 0);
+ syscall6(SYS_pselect6, max_fd + 1, (iptr)rfd, 0, 0, (iptr)timeout, 0);
input->data_available = FD_ISSET(ctx->child.handle, rfd) != 0;
@@ -531,5 +531,8 @@ main(i32 argc, char *argv[], char *envp[])
}
}
+ syscall1(SYS_exit_group, 0);
+ __builtin_unreachable();
+
return 0;
}
diff --git a/terminal.c b/terminal.c
@@ -384,14 +384,14 @@ next_tab_position(Term *t, b32 backwards)
u32 result = 32 * idx;
if (backwards) {
- u32 zeroes = _lzcnt_u32(t->tabs[idx--] & mask);
+ u32 zeroes = clz_u32(t->tabs[idx--] & mask);
while (idx < ARRAY_COUNT(t->tabs) && zeroes == 32)
- zeroes = _lzcnt_u32(t->tabs[idx--]);
+ zeroes = clz_u32(t->tabs[idx--]);
result = 32 * (idx + 1) + 32 - zeroes;
} else {
- u32 zeroes = _tzcnt_u32(t->tabs[idx++] & ~mask);
+ u32 zeroes = ctz_u32(t->tabs[idx++] & ~mask);
while (idx < ARRAY_COUNT(t->tabs) && zeroes == 32)
- zeroes = _tzcnt_u32(t->tabs[idx++]);
+ zeroes = ctz_u32(t->tabs[idx++]);
result = 32 * (idx - 1) + zeroes + 1;
}
ASSERT(result < t->size.w);
diff --git a/util.c b/util.c
@@ -11,7 +11,7 @@ safe_left_shift(u32 n, u32 shift)
static u32
round_down_power_of_2(u32 a)
{
- u32 result = 0x80000000UL >> _lzcnt_u32(a);
+ u32 result = 0x80000000UL >> clz_u32(a);
return result;
}
diff --git a/vtgl.h b/vtgl.h
@@ -5,8 +5,6 @@
#include <stddef.h>
#include <stdint.h>
-#include <immintrin.h>
-
#ifndef asm
#define asm __asm__
#endif
@@ -19,14 +17,6 @@
#define static_assert _Static_assert
#endif
-#ifdef _DEBUG
-#define ASSERT(c) do { if (!(c)) asm("int3; nop"); } while(0)
-#define DEBUG_EXPORT
-#else
-#define ASSERT(c) do { (void)(c); } while(0)
-#define DEBUG_EXPORT static
-#endif
-
#define atomic_and(ptr, n) __atomic_and_fetch(ptr, n, __ATOMIC_RELEASE);
#define atomic_fetch_add(ptr, n) __atomic_fetch_add(ptr, n, __ATOMIC_RELEASE);
#define atomic_load(ptr) __atomic_load_n(ptr, __ATOMIC_ACQUIRE)
@@ -82,6 +72,16 @@ typedef ptrdiff_t iptr;
typedef ptrdiff_t size;
typedef size_t usize;
+#include "intrinsics.c"
+
+#ifdef _DEBUG
+#define ASSERT(c) do { if (!(c)) debugbreak(); } while(0)
+#define DEBUG_EXPORT
+#else
+#define ASSERT(c) do { (void)(c); } while(0)
+#define DEBUG_EXPORT static
+#endif
+
typedef struct { void *memory; size size; } MemoryBlock;
typedef struct { u8 *beg, *end; } Arena;
@@ -393,7 +393,9 @@ typedef VTGL_HANDLE_KEYS_FN(vtgl_handle_keys_fn);
#include "debug.h"
-#if defined(__linux__) && (defined(__x86_64__) || defined(_M_X64))
+#ifdef __ARM_ARCH_ISA_A64
+#include "platform_linux_aarch64.c"
+#elif defined(__linux__) && (defined(__x86_64__) || defined(_M_X64))
#include "platform_linux_amd64.c"
#else
#error Unsupported Platform!