jdict

command line tool for looking up terms in yomidict dictionaries
git clone anongit@rnpnr.xyz:jdict.git
Log | Files | Refs | Feed | README | LICENSE

Commit: cf9f08e76699db10a7556d17c1b41ea11cb57b00
Parent: 5742fd0f2ba55cdb382cc64eae2d0b130ec25ef0
Author: Randy Palamar
Date:   Wed, 16 Oct 2024 22:36:45 -0600

support building without libc on linux-x86_64

The binaries have no significant performance difference (maybe
libc free is slightly faster) but the libc free one is 23.6K
whereas the libc one statically linked against musl is 45.2K. This
size difference is way worse if the binaries are not
stripped/debug symbols included.

Diffstat:
Mbuild.sh | 7+++++++
Mjdict.c | 5+++--
Aplatform_linux.c | 209+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aplatform_linux_amd64.c | 91+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mplatform_posix.c | 4+++-
5 files changed, 313 insertions(+), 3 deletions(-)

diff --git a/build.sh b/build.sh @@ -12,4 +12,11 @@ src=platform_posix.c [ $debug ] && cflags="$cflags -O0 -ggdb -D_DEBUG" [ ! $debug ] && ldflags="-s $ldflags" +case $(uname -sm) in +"Linux x86_64") + src=platform_linux_amd64.c + cflags="${cflags} -nostdlib -ffreestanding -fno-stack-protector -Wl,--gc-sections" + ;; +esac + ${cc} $cflags $ldflags $src -o jdict diff --git a/jdict.c b/jdict.c @@ -7,6 +7,7 @@ typedef uint64_t u64; typedef int32_t i32; typedef uint32_t u32; typedef uint32_t b32; +typedef uint16_t u16; typedef ptrdiff_t size; typedef ptrdiff_t iptr; @@ -90,7 +91,7 @@ static void os_write(iptr, s8); static s8 os_read_whole_file(char *, Arena *, u32); static b32 os_read_stdin(u8 *, size); -static PathStream os_begin_path_stream(Stream *); +static PathStream os_begin_path_stream(Stream *, Arena *, u32); static s8 os_get_valid_file(PathStream *, s8, Arena *, u32); static void os_end_path_stream(PathStream *); @@ -421,7 +422,7 @@ make_dict(Arena *a, Dict *d) stream_append_s8(&path, prefix); stream_append_s8(&path, os_path_sep); stream_append_s8(&path, d->rom); - PathStream ps = os_begin_path_stream(&path); + PathStream ps = os_begin_path_stream(&path, a, ARENA_ALLOC_END); u8 *arena_end = a->end; s8 fn_pre = s8("term"); diff --git a/platform_linux.c b/platform_linux.c @@ -0,0 +1,209 @@ +/* See LICENSE for license details. */ +#define os_path_sep s8("/") + +#include "jdict.c" + +#define PROT_READ 0x01 +#define PROT_WRITE 0x02 +#define MAP_PRIVATE 0x02 +#define MAP_ANONYMOUS 0x20 + +#define O_RDONLY 0x00 +#define O_DIRECTORY 0x10000 + +#define DT_REGULAR_FILE 8 + +static i64 syscall1(i64, i64); +static i64 syscall2(i64, i64, i64); +static i64 syscall3(i64, i64, i64, i64); +static i64 syscall6(i64, i64, i64, i64, i64, i64, i64); + +typedef struct { + u8 buf[2048]; + iptr fd; + i32 buf_pos; + i32 buf_end; +} LinuxDirectoryStream; + +/* NOTE: necessary garbage required by GCC/CLANG even when -nostdlib is used */ +__attribute((section(".text.memset"))) +void *memset(void *d, int c, size_t n) +{ + u8 *bytes = d; + for (size_t i = 0; i < n; i++) + bytes[i] = c; + return d; +} + +static void +os_exit(i32 code) +{ + syscall1(SYS_exit, code); + __builtin_unreachable(); +} + +static void +os_write(iptr fd, s8 raw) +{ + while (raw.len) { + size r = syscall3(SYS_write, fd, (i64)raw.s, raw.len); + if (r < 0) os_exit(1); + raw = s8_cut_head(raw, r); + } +} + +static b32 +os_read_stdin(u8 *buf, size count) +{ + size rlen = syscall3(SYS_read, 0, (iptr)buf, count); + return rlen == count; +} + +static s8 +os_read_whole_file(char *file, Arena *a, u32 arena_flags) +{ + __attribute((aligned(16))) u8 stat_buf[STAT_BUF_SIZE]; + u64 status = syscall2(SYS_stat, (iptr)file, (iptr)stat_buf); + if (status > -4096UL) { + stream_append_s8(&error_stream, s8("failed to stat: ")); + stream_append_s8(&error_stream, cstr_to_s8(file)); + die(&error_stream); + } + + u64 fd = syscall3(SYS_open, (iptr)file, O_RDONLY, 0); + if (fd > -4096UL) { + stream_append_s8(&error_stream, s8("failed to open: ")); + stream_append_s8(&error_stream, cstr_to_s8(file)); + die(&error_stream); + } + + i64 *file_size = (i64 *)(stat_buf + STAT_SIZE_OFF); + s8 result = {.len = *file_size, .s = alloc(a, u8, *file_size, arena_flags|ARENA_NO_CLEAR)}; + size rlen = syscall3(SYS_read, fd, (iptr)result.s, result.len); + syscall1(SYS_close, fd); + + if (rlen != result.len) { + stream_append_s8(&error_stream, s8("failed to read whole file: ")); + stream_append_s8(&error_stream, cstr_to_s8(file)); + die(&error_stream); + } + + return result; +} + +static Arena +os_new_arena(size cap) +{ + Arena a = {0}; + + size pagesize = PAGESIZE; + if (cap % pagesize != 0) + cap += pagesize - cap % pagesize; + + u64 p = syscall6(SYS_mmap, 0, cap, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + if (p > -4096UL) { + return (Arena){0}; + } else { + a.beg = (u8 *)p; + } + a.end = a.beg + cap; +#ifdef _DEBUG_ARENA + a.min_capacity_remaining = cap; +#endif + return a; +} + +static PathStream +os_begin_path_stream(Stream *dir_name, Arena *a, u32 arena_flags) +{ + stream_append_byte(dir_name, 0); + u64 fd = syscall3(SYS_open, (iptr)dir_name->data, O_DIRECTORY|O_RDONLY, 0); + dir_name->widx--; + + if (fd > -4096UL) { + stream_append_s8(&error_stream, s8("os_begin_path_stream: failed to open: ")); + stream_append_s8(&error_stream, (s8){.len = dir_name->widx, .s = dir_name->data}); + die(&error_stream); + } + + stream_append_byte(dir_name, '/'); + + LinuxDirectoryStream *lds = alloc(a, LinuxDirectoryStream, 1, arena_flags); + lds->fd = fd; + return (PathStream){.dir_name = dir_name, .dirfd = lds}; +} + +static void +os_end_path_stream(PathStream *ps) +{ + LinuxDirectoryStream *lds = ps->dirfd; + syscall1(SYS_close, (iptr)lds->fd); + ps->dirfd = 0; +} + +static s8 +os_get_valid_file(PathStream *ps, s8 match_prefix, Arena *a, u32 arena_flags) +{ + s8 result = {0}; + LinuxDirectoryStream *lds = ps->dirfd; + if (lds) { + for (;;) { + if (lds->buf_pos >= lds->buf_end) { + u64 ret = syscall3(SYS_getdents, lds->fd, (iptr)lds->buf, + sizeof(lds->buf)); + if (ret > -4096UL) { + stream_append_s8(&error_stream, s8("os_get_valid_file: SYS_getdents")); + die(&error_stream); + } + if (ret == 0) + break; + lds->buf_end = ret; + lds->buf_pos = 0; + } + u16 record_len = *(u16 *)(lds->buf + lds->buf_pos + DIRENT_RECLEN_OFF); + u8 type = lds->buf[lds->buf_pos + record_len - 1]; + /* NOTE: technically this contains extra NULs but it doesn't matter + * for this purpose. We need NUL terminated to call SYS_read */ + s8 name = {.len = record_len - 2 - DIRENT_NAME_OFF, + .s = lds->buf + lds->buf_pos + DIRENT_NAME_OFF}; + lds->buf_pos += record_len; + if (type == DT_REGULAR_FILE) { + b32 valid = 1; + for (size i = 0; i < match_prefix.len; i++) { + if (match_prefix.s[i] != name.s[i]) { + valid = 0; + break; + } + } + if (valid) { + Stream dir_name = *ps->dir_name; + stream_append_s8(&dir_name, name); + result = os_read_whole_file((char *)dir_name.data, a, + arena_flags); + break; + } + } + } + } + return result; +} + +void +linux_main(i32 argc, char *argv[], char *envp[]) +{ + (void)envp; + + Arena memory = os_new_arena(1024 * MEGABYTE); + + error_stream.fd = 2; + error_stream.cap = 4096; + error_stream.data = alloc(&memory, u8, error_stream.cap, ARENA_NO_CLEAR); + + stdout_stream.fd = 1; + stdout_stream.cap = 8 * MEGABYTE; + stdout_stream.data = alloc(&memory, u8, error_stream.cap, ARENA_NO_CLEAR); + + jdict(&memory, argc, argv); + + os_exit(0); +} diff --git a/platform_linux_amd64.c b/platform_linux_amd64.c @@ -0,0 +1,91 @@ +/* See LICENSE for license details. */ + +#ifndef asm +#ifdef __asm +#define asm __asm +#else +#define asm __asm__ +#endif +#endif + +#define SYS_read 0 +#define SYS_write 1 +#define SYS_open 2 +#define SYS_close 3 +#define SYS_stat 4 +#define SYS_mmap 9 +#define SYS_exit 60 +#define SYS_getdents 78 + +#define PAGESIZE 4096 + +#define STAT_BUF_SIZE 144 +#define STAT_SIZE_OFF 48 + +#define DIRENT_RECLEN_OFF 16 +#define DIRENT_NAME_OFF 18 + +#include "platform_linux.c" + +static i64 +syscall1(i64 n, i64 a1) +{ + i64 result; + asm ("syscall" + : "=a"(result) + : "a"(n), "D"(a1) + : "rcx", "r11", "memory" + ); + return result; +} + +static i64 +syscall2(i64 n, i64 a1, i64 a2) +{ + i64 result; + asm ("syscall" + : "=a"(result) + : "a"(n), "D"(a1), "S"(a2) + : "rcx", "r11", "memory" + ); + return result; +} + +static i64 +syscall3(i64 n, i64 a1, i64 a2, i64 a3) +{ + i64 result; + asm ("syscall" + : "=a"(result) + : "a"(n), "D"(a1), "S"(a2), "d"(a3) + : "rcx", "r11", "memory" + ); + return result; +} + +static i64 +syscall6(i64 n, i64 a1, i64 a2, i64 a3, i64 a4, i64 a5, i64 a6) +{ + i64 result; + register i64 r10 asm("r10") = a4; + register i64 r8 asm("r8") = a5; + register i64 r9 asm("r9") = a6; + asm ("syscall" + : "=a"(result) + : "a"(n), "D"(a1), "S"(a2), "d"(a3), "r"(r10), "r"(r8), "r"(r9) + : "rcx", "r11", "memory" + ); + return result; +} + +asm ( + ".intel_syntax noprefix\n" + ".global _start\n" + "_start:\n" + " mov edi, DWORD PTR [rsp]\n" + " lea rsi, [rsp+8]\n" + " lea rdx, [rsi+rdi*8+8]\n" + " call linux_main\n" + " ud2\n" + ".att_syntax\n" +); diff --git a/platform_posix.c b/platform_posix.c @@ -84,8 +84,10 @@ os_write(iptr file, s8 raw) } static PathStream -os_begin_path_stream(Stream *dir_name) +os_begin_path_stream(Stream *dir_name, Arena *a, u32 arena_flags) { + (void)a; (void)arena_flags; + stream_append_byte(dir_name, 0); DIR *dir = opendir((char *)dir_name->data); dir_name->widx--;