Commit: c5faa279e4cf4ae3c73ded7e7f4fa5f0d2e8b8cb
Parent: 2908510e6bf25dbad116105e3113fe6e490112b7
Author: Randy Palamar
Date: Tue, 31 Dec 2024 09:09:04 -0700
support libc free aarch64 build
A few things needed to be tweaked:
SYS_open -> SYS_openat
SYS_stat -> SYS_fstat
O_DIRECTORY is a different constant on aarch64 for some reason.
Building without libc on gcc doesn't seem to reduce the binary
size by all that much but with clang we go from 34.4K to 17.1K
which is pretty significant.
Benchmarks: (./jdict -d koujien 驀進)
clang (freestanding):
Time (mean ± σ): 168.5 ms ± 3.7 ms [User: 124.2 ms, System: 43.0 ms]
Range (min … max): 164.4 ms … 177.4 ms 16 runs
clang (posix):
Time (mean ± σ): 171.5 ms ± 2.3 ms [User: 121.2 ms, System: 49.2 ms]
Range (min … max): 167.6 ms … 176.5 ms 17 runs
gcc (freestanding):
Time (mean ± σ): 185.2 ms ± 2.1 ms [User: 141.4 ms, System: 42.7 ms]
Range (min … max): 182.3 ms … 189.5 ms 16 runs
gcc (posix):
Time (mean ± σ): 186.6 ms ± 2.2 ms [User: 137.6 ms, System: 47.0 ms]
Range (min … max): 182.6 ms … 190.7 ms 16 runs
Diffstat:
5 files changed, 193 insertions(+), 53 deletions(-)
diff --git a/build.sh b/build.sh
@@ -14,6 +14,10 @@ src=platform_posix.c
[ ! $debug ] && ldflags="-s $ldflags"
case $(uname -sm) in
+"Linux aarch64")
+ src=platform_linux_aarch64.c
+ cflags="${cflags} -nostdlib -ffreestanding -fno-stack-protector -Wl,--gc-sections"
+ ;;
"Linux x86_64")
src=platform_linux_amd64.c
cflags="${cflags} -nostdinc -nostdlib -ffreestanding -fno-stack-protector -Wl,--gc-sections"
diff --git a/jdict.c b/jdict.c
@@ -1,6 +1,23 @@
/* See LICENSE for license details. */
+#ifndef asm
+#ifdef __asm
+#define asm __asm
+#else
+#define asm __asm__
+#endif
+#endif
+
+#define FORCE_INLINE inline __attribute__((always_inline))
+
+#ifdef __ARM_ARCH_ISA_A64
+/* TODO? debuggers just loop here forever and need a manual PC increment (jump +1 in gdb) */
+#define debugbreak() asm volatile ("brk 0xf000")
+#elif __x86_64__
+#define debugbreak() asm volatile ("int3; nop")
+#endif
+
#ifdef _DEBUG
-#define ASSERT(c) do { __asm("int3; nop"); } while (0)
+#define ASSERT(c) do { debugbreak(); } while (0)
#else
#define ASSERT(c) {}
#endif
@@ -73,7 +90,6 @@ typedef struct {
static void __attribute__((noreturn)) os_exit(i32);
static b32 os_write(iptr, s8);
-static s8 os_read_whole_file(char *, Arena *, u32);
static b32 os_read_stdin(u8 *, size);
static PathStream os_begin_path_stream(Stream *, Arena *, u32);
diff --git a/platform_linux.c b/platform_linux.c
@@ -7,17 +7,28 @@
#define PROT_READ 0x01
#define PROT_WRITE 0x02
+#define PROT_RW 0x03
#define MAP_PRIVATE 0x02
-#define MAP_ANONYMOUS 0x20
+#define MAP_ANON 0x20
+
+#define AT_FDCWD (-100)
#define O_RDONLY 0x00
-#define O_DIRECTORY 0x10000
#define DT_REGULAR_FILE 8
+typedef __attribute__((aligned(16))) u8 stat_buffer[144];
+#define STAT_BUF_MEMBER(sb, t, off) (*(t *)((u8 *)(sb) + off))
+#define STAT_FILE_SIZE(sb) STAT_BUF_MEMBER(sb, u64, 48)
+
+#define DIRENT_RECLEN_OFF 16
+#define DIRENT_TYPE_OFF 18
+#define DIRENT_NAME_OFF 19
+
static i64 syscall1(i64, i64);
static i64 syscall2(i64, i64, i64);
static i64 syscall3(i64, i64, i64, i64);
+static i64 syscall4(i64, i64, i64, i64, i64);
static i64 syscall6(i64, i64, i64, i64, i64, i64, i64);
typedef struct {
@@ -38,7 +49,7 @@ static void
os_exit(i32 code)
{
syscall1(SYS_exit, code);
- __builtin_unreachable();
+ unreachable();
}
static b32
@@ -60,25 +71,25 @@ os_read_stdin(u8 *buf, size count)
}
static s8
-os_read_whole_file(char *file, Arena *a, u32 arena_flags)
+os_read_whole_file_at(char *file, iptr dir_fd, Arena *a, u32 arena_flags)
{
- __attribute((aligned(16))) u8 stat_buf[STAT_BUF_SIZE];
- u64 status = syscall2(SYS_stat, (iptr)file, (iptr)stat_buf);
- if (status > -4096UL) {
- stream_append_s8(&error_stream, s8("failed to stat: "));
+ u64 fd = syscall4(SYS_openat, dir_fd, (iptr)file, O_RDONLY, 0);
+ if (fd > -4096UL) {
+ stream_append_s8(&error_stream, s8("failed to open: "));
stream_append_s8(&error_stream, cstr_to_s8(file));
die(&error_stream);
}
- u64 fd = syscall3(SYS_open, (iptr)file, O_RDONLY, 0);
- if (fd > -4096UL) {
- stream_append_s8(&error_stream, s8("failed to open: "));
+ stat_buffer sb;
+ u64 status = syscall2(SYS_fstat, fd, (iptr)sb);
+ if (status > -4096UL) {
+ stream_append_s8(&error_stream, s8("failed to stat: "));
stream_append_s8(&error_stream, cstr_to_s8(file));
die(&error_stream);
}
- i64 *file_size = (i64 *)(stat_buf + STAT_SIZE_OFF);
- s8 result = {.len = *file_size, .s = alloc(a, u8, *file_size, arena_flags|ARENA_NO_CLEAR)};
+ u64 file_size = STAT_FILE_SIZE(sb);
+ s8 result = {.len = file_size, .s = alloc(a, u8, file_size, arena_flags|ARENA_NO_CLEAR)};
size rlen = syscall3(SYS_read, fd, (iptr)result.s, result.len);
syscall1(SYS_close, fd);
@@ -92,33 +103,31 @@ os_read_whole_file(char *file, Arena *a, u32 arena_flags)
}
static Arena
-os_new_arena(size cap)
+os_new_arena(size requested_size)
{
- Arena a = {0};
+ Arena result = {0};
- size pagesize = PAGESIZE;
- if (cap % pagesize != 0)
- cap += pagesize - cap % pagesize;
+ size alloc_size = requested_size;
+ if (alloc_size % PAGESIZE != 0)
+ alloc_size += PAGESIZE - alloc_size % PAGESIZE;
- u64 p = syscall6(SYS_mmap, 0, cap, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
- if (p > -4096UL) {
- return (Arena){0};
- } else {
- a.beg = (u8 *)p;
- }
- a.end = a.beg + cap;
+ u64 memory = syscall6(SYS_mmap, 0, alloc_size, PROT_RW, MAP_ANON|MAP_PRIVATE, -1, 0);
+ if (memory <= -4096UL) {
+ result.beg = (void *)memory;
+ result.end = result.beg + alloc_size;
#ifdef _DEBUG_ARENA
- a.min_capacity_remaining = cap;
+ result.min_capacity_remaining = alloc_size;
#endif
- return a;
+ }
+
+ return result;
}
static PathStream
os_begin_path_stream(Stream *dir_name, Arena *a, u32 arena_flags)
{
stream_append_byte(dir_name, 0);
- u64 fd = syscall3(SYS_open, (iptr)dir_name->data, O_DIRECTORY|O_RDONLY, 0);
- dir_name->widx--;
+ u64 fd = syscall4(SYS_openat, AT_FDCWD, (iptr)dir_name->data, O_DIRECTORY|O_RDONLY, 0);
if (fd > -4096UL) {
stream_append_s8(&error_stream, s8("os_begin_path_stream: failed to open: "));
@@ -126,8 +135,6 @@ os_begin_path_stream(Stream *dir_name, Arena *a, u32 arena_flags)
die(&error_stream);
}
- stream_append_byte(dir_name, '/');
-
LinuxDirectoryStream *lds = alloc(a, LinuxDirectoryStream, 1, arena_flags);
lds->fd = fd;
return (PathStream){.dir_name = dir_name, .dirfd = lds};
@@ -176,10 +183,8 @@ os_get_valid_file(PathStream *ps, s8 match_prefix, Arena *a, u32 arena_flags)
}
}
if (valid) {
- Stream dir_name = *ps->dir_name;
- stream_append_s8(&dir_name, name);
- result = os_read_whole_file((char *)dir_name.data, a,
- arena_flags);
+ result = os_read_whole_file_at((char *)name.s, lds->fd,
+ a, arena_flags);
break;
}
}
diff --git a/platform_linux_aarch64.c b/platform_linux_aarch64.c
@@ -0,0 +1,115 @@
+/* See LICENSE for license details. */
+typedef unsigned char u8;
+typedef signed long i64;
+typedef unsigned long u64;
+typedef signed int i32;
+typedef unsigned int u32;
+typedef unsigned int b32;
+typedef unsigned short u16;
+typedef signed long size;
+typedef unsigned long usize;
+typedef signed long iptr;
+
+#define SYS_openat 56
+#define SYS_close 57
+#define SYS_getdents64 61
+#define SYS_read 63
+#define SYS_write 64
+#define SYS_fstat 80
+#define SYS_exit 93
+#define SYS_mmap 222
+
+/* NOTE(rnp): technically arm64 can have 4K, 16K or 64K pages but we will just assume 64K */
+#define PAGESIZE 65536
+
+#define O_DIRECTORY 0x4000
+
+#include "platform_linux.c"
+
+static FORCE_INLINE i64
+syscall1(i64 n, i64 a1)
+{
+ register i64 x8 asm("x8") = n;
+ register i64 x0 asm("x0") = a1;
+ asm volatile ("svc 0"
+ : "=r"(x0)
+ : "0"(x0), "r"(x8)
+ : "memory", "cc"
+ );
+ return x0;
+}
+
+static FORCE_INLINE i64
+syscall2(i64 n, i64 a1, i64 a2)
+{
+ register i64 x8 asm("x8") = n;
+ register i64 x0 asm("x0") = a1;
+ register i64 x1 asm("x1") = a2;
+ asm volatile ("svc 0"
+ : "=r"(x0)
+ : "0"(x0), "r"(x8), "r"(x1)
+ : "memory", "cc"
+ );
+ return x0;
+}
+
+static FORCE_INLINE i64
+syscall3(i64 n, i64 a1, i64 a2, i64 a3)
+{
+ register i64 x8 asm("x8") = n;
+ register i64 x0 asm("x0") = a1;
+ register i64 x1 asm("x1") = a2;
+ register i64 x2 asm("x2") = a3;
+ asm volatile ("svc 0"
+ : "=r"(x0)
+ : "0"(x0), "r"(x8), "r"(x1), "r"(x2)
+ : "memory", "cc"
+ );
+ return x0;
+}
+
+static FORCE_INLINE i64
+syscall4(i64 n, i64 a1, i64 a2, i64 a3, i64 a4)
+{
+ register i64 x8 asm("x8") = n;
+ register i64 x0 asm("x0") = a1;
+ register i64 x1 asm("x1") = a2;
+ register i64 x2 asm("x2") = a3;
+ register i64 x3 asm("x3") = a4;
+ asm volatile ("svc 0"
+ : "=r"(x0)
+ : "0"(x0), "r"(x8), "r"(x1), "r"(x2), "r"(x3)
+ : "memory", "cc"
+ );
+ return x0;
+}
+
+static FORCE_INLINE i64
+syscall6(i64 n, i64 a1, i64 a2, i64 a3, i64 a4, i64 a5, i64 a6)
+{
+ register i64 x8 asm("x8") = n;
+ register i64 x0 asm("x0") = a1;
+ register i64 x1 asm("x1") = a2;
+ register i64 x2 asm("x2") = a3;
+ register i64 x3 asm("x3") = a4;
+ register i64 x4 asm("x4") = a5;
+ register i64 x5 asm("x5") = a6;
+ asm volatile ("svc 0"
+ : "=r"(x0)
+ : "0"(x0), "r"(x8), "r"(x1), "r"(x2), "r"(x3), "r"(x4), "r"(x5)
+ : "memory", "cc"
+ );
+ return x0;
+}
+
+asm (
+ ".global _start\n"
+ "_start:\n"
+ " ldr x0, [sp], #8\n"
+ " mov x1, sp\n"
+ " add x2, sp, x0, lsl #3\n"
+ " add x2, x2, #8\n"
+ " sub sp, sp, #8\n"
+ " bl linux_main\n"
+ " brk #0\n"
+);
diff --git a/platform_linux_amd64.c b/platform_linux_amd64.c
@@ -10,31 +10,18 @@ typedef signed long size;
typedef unsigned long usize;
typedef signed long iptr;
-#ifndef asm
-#ifdef __asm
-#define asm __asm
-#else
-#define asm __asm__
-#endif
-#endif
-
#define SYS_read 0
#define SYS_write 1
-#define SYS_open 2
#define SYS_close 3
-#define SYS_stat 4
+#define SYS_fstat 5
#define SYS_mmap 9
#define SYS_exit 60
#define SYS_getdents64 217
+#define SYS_openat 257
#define PAGESIZE 4096
-#define STAT_BUF_SIZE 144
-#define STAT_SIZE_OFF 48
-
-#define DIRENT_RECLEN_OFF 16
-#define DIRENT_TYPE_OFF 18
-#define DIRENT_NAME_OFF 19
+#define O_DIRECTORY 0x10000
#include "platform_linux.c"
@@ -75,6 +62,19 @@ syscall3(i64 n, i64 a1, i64 a2, i64 a3)
}
static i64
+syscall4(i64 n, i64 a1, i64 a2, i64 a3, i64 a4)
+{
+ i64 result;
+ register i64 r10 asm("r10") = a4;
+ asm volatile ("syscall"
+ : "=a"(result)
+ : "a"(n), "D"(a1), "S"(a2), "d"(a3), "r"(r10)
+ : "rcx", "r11", "memory"
+ );
+ return result;
+}
+
+static i64
syscall6(i64 n, i64 a1, i64 a2, i64 a3, i64 a4, i64 a5, i64 a6)
{
i64 result;