Commit: 50dbb69776976cf9b043a9c1187c0e0e5971b850
Parent: 845f8fab3fc9c2622c947a26d3398cb7972ce934
Author: Randy Palamar
Date: Mon, 17 Feb 2025 10:11:25 -0700
improve utf8 and osc parsing robustness and add llvm fuzzer
If the terminal comes across a byte that looks like it should be
utf-8 but decoding fails despite there being more than the maximum
needed number of bytes available it will now be treated as an
ASCII control char.
To aid in fuzz testing the OSC parser should not call os_fatal
even if the cmd is invalid. Instead it will just fail and consume
the available data since we don't know the end.
Finally ASSERTS are meant to catch programming mistakes not
unhandled input - those ones were removed.
With all these the LLVM fuzzer is now usable (though it can't find
any issues at the moment).
Diffstat:
8 files changed, 175 insertions(+), 90 deletions(-)
diff --git a/build.sh b/build.sh
@@ -10,6 +10,7 @@ for arg in $@; do
debug) build=debug ;;
optimized_debug) build=optimized_debug ;;
fuzz) build=fuzz ;;
+ fuzz_llvm) build=fuzz_llvm ;;
fuzz_results) build=fuzz_results ;;
release) build=release ;;
*) echo "usage: $0 [release|debug|optimized_debug|fuzz] [gcc|clang]"; exit 1 ;;
@@ -28,6 +29,8 @@ testcflags="-march=native -Wall -ggdb -D_DEBUG -DVERSION=test -I."
testcflags="${testcflags} -Wno-unused-variable -Wno-unused-function -Wno-undefined-internal"
testcflags="${testcflags} -fsanitize=address,undefined"
+export ASAN_OPTIONS="abort_on_error=1:halt_on_error=1:symbolize=0:max_malloc_fill_size=$((1<<30))"
+
[ ! -s "./config.h" ] && cp config.def.h config.h
case ${build} in
@@ -43,13 +46,32 @@ release)
cflags="${cflags} -O3"
;;
fuzz)
+ input=tests/llvm_fuzz_corpus
+ [ ! -d ${input} ] && input=tests/fuzz_in
afl-clang-fast ${testcflags} -O3 -o tests/test-fuzz tests/test-fuzz.c
- AFL_AUTORESUME=1 afl-fuzz -o fuzz_out -i tests/fuzz_in ./tests/test-fuzz
+ AFL_SKIP_CPUFREQ=1 AFL_AUTORESUME=1 afl-fuzz -o tests/fuzz_out -i tests/llvm_fuzz_corpus ./tests/test-fuzz
+ exit 0
+ ;;
+fuzz_llvm)
+ clang ${testcflags} -O0 -fsanitize=fuzzer,address,undefined -o tests/test-fuzz tests/test-fuzz.c
+ mkdir -p tests/llvm_fuzz_corpus
+ input=tests/llvm_fuzz_corpus
+ if [ ! -d ${input} ]; then
+ ./tests/test-fuzz -merge=1 tests/llvm_fuzz_corpus tests/fuzz_in
+ else
+ ./tests/test-fuzz tests/llvm_fuzz_corpus
+ fi
exit 0
;;
fuzz_results)
- ${cc} ${testcflags} -O0 tests/test-fuzz-results.c -o tests/test-fuzz-results
+ ${cc} ${testcflags} -DFUZZ_RESULTS -O0 tests/test-fuzz.c -o tests/test-fuzz-results
+ set -e
for file in fuzz_out/default/crashes/id*; do
+ echo ${file}
+ ./tests/test-fuzz-results "${file}"
+ done
+ for file in tests/llvm_fuzz_corpus/*; do
+ echo ${file}
./tests/test-fuzz-results "${file}"
done
exit 0
diff --git a/platform_linux_x11.c b/platform_linux_x11.c
@@ -388,7 +388,7 @@ main(i32 argc, char *argv[], char *envp[])
cres = i32_from_cstr(argv[i + 1], 'x');
if (cres.status == CR_SUCCESS)
cells.w = cres.i;
- cres = i32_from_cstr(cres.unparsed, 0);
+ cres = i32_from_cstr(cres.unparsed.c_str, 0);
if (cres.status == CR_SUCCESS)
cells.h = cres.i;
if (cells.w <= 0 || cells.h <= 0) {
diff --git a/terminal.c b/terminal.c
@@ -413,7 +413,9 @@ next_tab_position(Term *t, b32 backwards)
zeroes = ctz_u32(t->tabs[idx++]);
result = 32 * (idx - 1) + zeroes + 1;
}
- ASSERT(result < t->size.w);
+ /* TODO(rnp): is clamping this correct? */
+ //ASSERT(result < t->size.w);
+ result = MIN(result, t->size.w - 1);
return result;
}
@@ -521,7 +523,7 @@ erase_in_display(Term *t, CSI *csi)
case 3: /* Erase Saved Lines (xterm) */
/* NOTE: ignored; we don't save lines in the way xterm does */
break;
- default: ASSERT(0);
+ default: /* TODO(rnp): warn about invalid argument */ ;
}
}
@@ -540,7 +542,7 @@ erase_in_line(Term *t, CSI *csi)
case 2: /* Erase All */
fb_clear_region(t, cpos.y, cpos.y, 0, t->size.w);
break;
- default: ASSERT(0);
+ default: /* TODO(rnp): warn about invalid argument */ ;
}
}
@@ -942,7 +944,7 @@ handle_csi(Term *t, CSI *csi)
BEGIN_TIMED_BLOCK();
s8 raw = csi->raw;
b32 ret = parse_csi(&raw, csi);
- ASSERT(ret);
+ if (!ret) goto unknown;
#define ORONE(x) ((x)? (x) : 1)
@@ -1019,51 +1021,48 @@ handle_csi(Term *t, CSI *csi)
END_TIMED_BLOCK();
}
-static i32
+static b32
parse_osc(s8 *raw, OSC *osc)
{
BEGIN_TIMED_BLOCK();
- i32 result = 0;
-
- *osc = (OSC){0};
+ b32 result = 1;
+ /* TODO(rnp): make this whole function re-entrant */
+ zero_struct(osc);
osc->raw.data = raw->data;
- /* NOTE: parse command then store the rest as a string */
- u32 cp;
- while (raw->len) {
- cp = get_ascii(raw);
- osc->raw.len++;
- if (!BETWEEN(cp, '0', '9'))
- break;
- osc->cmd *= 10;
- osc->cmd += cp - '0';
-
- /* TODO: Performance? */
- /* NOTE: The maximum OSC in xterm is 119 so if this
- * exceeds that the whole sequence is malformed */
- if (osc->cmd > 1000)
- break;
+ struct conversion_result cmd = s8_parse_i32_until(*raw, ';');
+ if (cmd.status != CR_FAILURE) {
+ osc->cmd = cmd.i;
+ osc->arg = cmd.unparsed.s8;
+ osc->raw.len = osc->arg.data - raw->data;
+ *raw = consume(*raw, osc->raw.len);
+ } else {
+ result = 0;
}
- if (cp != ';' || osc->cmd > 1000)
- os_fatal(s8("parse_osc: malformed\n"));
-
- osc->arg.data = raw->data;
- while (raw->len) {
- cp = get_ascii(raw);
+ if (osc->arg.len && peek(osc->arg, 0) == ';') {
+ osc->arg.data++;
+ osc->arg.len = 0;
osc->raw.len++;
- if (cp == '\a')
- goto end;
- if (cp == 0x1B && peek(*raw, 0) == '\\') {
- get_ascii(raw);
+ get_ascii(raw);
+ while (raw->len) {
+ u32 cp = get_ascii(raw);
osc->raw.len++;
- goto end;
+ if (cp == '\a')
+ goto end;
+ if (cp == 0x1B && (raw->len && peek(*raw, 0) == '\\')) {
+ get_ascii(raw);
+ osc->raw.len++;
+ goto end;
+ }
+ osc->arg.len++;
}
- osc->arg.len++;
+ /* NOTE: if we fell out of the loop then we ran out of characters */
+ result = 0;
+ } else {
+ result = 0;
}
- /* NOTE: if we fell out of the loop then we ran out of characters */
- result = -1;
end:
END_TIMED_BLOCK();
@@ -1110,8 +1109,8 @@ handle_osc(Term *t, s8 *raw, Arena a)
{
BEGIN_TIMED_BLOCK();
OSC osc;
- i32 ret = parse_osc(raw, &osc);
- ASSERT(ret != -1);
+ if (!parse_osc(raw, &osc))
+ goto unknown;
Stream buffer = arena_stream(a);
switch (osc.cmd) {
@@ -1119,6 +1118,7 @@ handle_osc(Term *t, s8 *raw, Arena a)
case 1: break; /* IGNORED: set icon name */
case 2: stream_push_s8(&buffer, osc.arg); t->platform->set_window_title(&buffer); break;
default:
+ unknown:
stream_push_s8(&t->error_stream, s8("unhandled osc cmd: "));
dump_osc(&osc, &t->error_stream);
break;
@@ -1377,11 +1377,14 @@ handle_input(Term *t, Arena a, s8 raw)
* terminal when not in UTF8 mode */
if (cp > 0x7F && (t->mode.term & TM_UTF8)) {
cp = get_utf8(&raw);
- tv->lines.buf[tv->lines.widx].has_unicode = 1;
- if (cp == (u32)-1) {
+ if (cp == (u32)-1 && start_len < 4) {
/* NOTE: Need More Bytes! */
- raw.len = start_len;
goto end;
+ } else if (cp == (u32)-1 && start_len >= 4) {
+ /* NOTE(rnp): invalid/garbage cp; treat as ASCII control char */
+ cp = get_ascii(&raw);
+ } else if (cp != (u32)-1) {
+ tv->lines.buf[tv->lines.widx].has_unicode = 1;
}
} else {
cp = get_ascii(&raw);
diff --git a/tests/test-fuzz-results.c b/tests/test-fuzz-results.c
@@ -1,30 +0,0 @@
-/* See LICENSE for copyright details */
-#define ASSERT(c) do { (void)(c); } while(0)
-#include "test-common.c"
-
-i32
-main(i32 argc, char *argv[])
-{
- if (argc != 2) {
- os_write_err_msg(s8("usage: test-fuzz-results crash_input\n"));
- return 1;
- }
-
- u8 buf[4096];
-
- Arena file_backing = arena_from_memory_block(os_block_alloc(MB(1)));
- s8 file_data = os_read_file((u8 *)argv[1], &file_backing);
-
- MemoryBlock term_backing = {.memory = malloc(MB(4)), .size = MB(4)};
- Term *term = place_term_into_memory(term_backing, 24, 80);
- term->error_stream = arena_stream(arena_from_memory_block(os_block_alloc(MB(4))));
- s8 raw = launder_static_string(term, file_data);
- handle_input(term, term->arena_for_frame, raw);
-
- if (term->error_stream.widx != 0)
- os_write_err_msg(stream_to_s8(&term->error_stream));
-
- release_term_memory(term_backing);
-
- return 0;
-}
diff --git a/tests/test-fuzz.c b/tests/test-fuzz.c
@@ -1,6 +1,29 @@
/* See LICENSE for copyright details */
-#define ASSERT(c) do { (void)(c); } while(0)
#include "test-common.c"
+
+static void
+fuzz_entry_point(s8 data, Stream error_stream)
+{
+ MemoryBlock term_backing = {.memory = malloc(MB(4)), .size = MB(4)};
+ Term *term = place_term_into_memory(term_backing, 24, 80);
+ term->error_stream = error_stream;
+ s8 raw = launder_static_string(term, data);
+ handle_input(term, term->arena_for_frame, raw);
+
+ if (term->error_stream.widx != 0)
+ os_write_err_msg(stream_to_s8(&term->error_stream));
+
+ release_term_memory(term_backing);
+}
+
+i32
+LLVMFuzzerTestOneInput(const u8 *data, size_t size)
+{
+ fuzz_entry_point((s8){.data = (u8 *)data, .len = size}, (Stream){0});
+ return 0;
+}
+
+#ifdef __AFL_FUZZ_TESTCASE_LEN
#include <unistd.h>
__AFL_FUZZ_INIT();
@@ -10,11 +33,26 @@ main(void)
__AFL_INIT();
u8 *buf = __AFL_FUZZ_TESTCASE_BUF;
while (__AFL_LOOP(10000)) {
- MemoryBlock term_backing = {.memory = malloc(MB(4)), .size = MB(4)};
- Term *term = place_term_into_memory(term_backing, 24, 80);
i32 len = __AFL_FUZZ_TESTCASE_LEN;
- s8 raw = launder_static_string(term, (s8){.data = buf, .len = len});
- handle_input(term, term->arena_for_frame, raw);
+ fuzz_entry_point((s8){.data = buf, .len = len}, (Stream){0});
}
return 0;
}
+#endif
+
+#ifdef FUZZ_RESULTS
+i32
+main(i32 argc, char *argv[])
+{
+ if (argc != 2) {
+ os_write_err_msg(s8("usage: test-fuzz-results crash_input\n"));
+ return 1;
+ }
+
+ Arena file_backing = arena_from_memory_block(os_block_alloc(MB(1)));
+ s8 file_data = os_read_file((u8 *)argv[1], &file_backing);
+ fuzz_entry_point(file_data, arena_stream(arena_from_memory_block(os_block_alloc(MB(4)))));
+
+ return 0;
+}
+#endif
diff --git a/util.c b/util.c
@@ -159,7 +159,7 @@ mem_copy(void *restrict src, void *restrict dest, size len)
for (; len; len--) *d++ = *s++;
}
-#define zero_struct(s) mem_clear(s, 0, sizeof(typeof(*s0)))
+#define zero_struct(s) mem_clear(s, 0, sizeof(typeof(*s)))
static void *
mem_clear(void *p_, u8 c, size len)
{
@@ -297,6 +297,62 @@ s8_prefix_of(s8 s, s8 match)
return result;
}
+static s8
+s8_chop_at(s8 raw, u8 delim)
+{
+ size i;
+ for (i = 0; i < raw.len; i++) {
+ if (raw.data[i] == delim)
+ break;
+ }
+ s8 result = {.len = i, .data = raw.data};
+ return result;
+}
+
+static struct conversion_result
+s8_parse_i32(s8 raw)
+{
+ struct conversion_result result = {.status = CR_SUCCESS};
+ i32 scale = 1;
+
+ size i = 0;
+ if (raw.len && raw.data[0] == '-') {
+ scale = -1;
+ i = 1;
+ }
+
+ for (; i < raw.len; i++) {
+ i32 digit = (i32)raw.data[i] - '0';
+ if (BETWEEN(digit, 0, 9)) {
+ if (result.i > (I32_MAX - digit) / 10) {
+ result.status = CR_OUT_OF_RANGE;
+ result.i = I32_MAX;
+ } else {
+ result.i = 10 * result.i + digit;
+ }
+ } else {
+ break;
+ }
+ }
+
+ if (i == 0 || (i == 1 && raw.data[0] == '-'))
+ result.status = CR_FAILURE;
+
+ result.unparsed.s8 = (s8){.len = raw.len - i, .data = raw.data + i};
+ result.i *= scale;
+
+ return result;
+}
+
+static struct conversion_result
+s8_parse_i32_until(s8 raw, u8 delim)
+{
+ s8 chopped = s8_chop_at(raw, delim);
+ struct conversion_result result = s8_parse_i32(chopped);
+ result.unparsed.s8 = (s8){.data = raw.data + chopped.len, .len = raw.len - chopped.len};
+ return result;
+}
+
static struct conversion_result
i32_from_cstr(char *s, char delim)
{
@@ -313,7 +369,7 @@ i32_from_cstr(char *s, char delim)
for (; *s && *s != delim; s++) {
i32 digit = s[0] - '0';
- if (!BETWEEN(digit, '0', '9'))
+ if (!BETWEEN(digit, 0, 9))
return ret;
if (ret.i > (I32_MAX - digit) / 10) {
@@ -323,9 +379,9 @@ i32_from_cstr(char *s, char delim)
ret.i = 10 * ret.i + digit;
}
- ret.i *= scale;
- ret.status = CR_SUCCESS;
- ret.unparsed = (*s == delim) ? s + 1 : s;
+ ret.i *= scale;
+ ret.status = CR_SUCCESS;
+ ret.unparsed.c_str = (*s == delim) ? s + 1 : s;
return ret;
}
diff --git a/util.h b/util.h
@@ -279,7 +279,7 @@ typedef __attribute__((aligned(64))) struct {
typedef enum { CR_FAILURE, CR_SUCCESS, CR_OUT_OF_RANGE } conversion_status;
struct conversion_result {
conversion_status status;
- char *unparsed;
+ union { char *c_str; s8 s8; } unparsed;
union { i32 i; f32 f; Colour colour;};
};
diff --git a/vtgl.h b/vtgl.h
@@ -76,14 +76,10 @@ typedef size_t usize;
#include "intrinsics.c"
#ifdef _DEBUG
-#ifndef ASSERT
#define ASSERT(c) do { if (!(c)) debugbreak(); } while(0)
-#endif
#define DEBUG_EXPORT
#else
-#ifndef ASSERT
#define ASSERT(c) do { (void)(c); } while(0)
-#endif
#define DEBUG_EXPORT static
#endif