Commit: 928bfe25823c5b76deb653d39e492c9a5a2af354
Parent: adca726ddc7b32cf23f24d4292f55fc9492cadda
Author: Randy Palamar
Date: Mon, 26 Aug 2024 09:40:34 -0600
move utf8_decode to seperate file
Its better to keep code that has been brought into the codebase seperated.
Diffstat:
4 files changed, 44 insertions(+), 39 deletions(-)
diff --git a/LICENSE b/LICENSE
@@ -11,3 +11,9 @@ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+----------------------------------------------------------------------
+extern/utf8_decode.c has the following license:
+
+Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
diff --git a/extern/utf8_decode.c b/extern/utf8_decode.c
@@ -0,0 +1,36 @@
+static const u8 utf8d[] = {
+ /* The first part of the table maps bytes to character classes that
+ * to reduce the size of the transition table and create bitmasks. */
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
+
+ /* The second part is a transition table that maps a combination
+ * of a state of the automaton and a character class to a state. */
+ 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
+ 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
+ 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
+ 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
+ 12,36,12,12,12,12,12,12,12,12,12,12,
+};
+
+#define UTF8_ACCEPT 0
+#define UTF8_REJECT 1
+
+static u32
+utf8_decode(u32 *state, u32 *cp, u8 byte)
+{
+ u8 type = utf8d[byte];
+
+ *cp = (*state != UTF8_ACCEPT) ?
+ (byte & 0x3fu) | (*cp << 6) :
+ (0xff >> type) & (byte);
+
+ *state = utf8d[256 + *state + type];
+ return *state;
+}
diff --git a/terminal.c b/terminal.c
@@ -78,7 +78,7 @@ get_utf8(s8 *raw)
u32 state = 0, cp;
size off = 0;
while (off < raw->len) {
- if (!utf8decode(&state, &cp, raw->data[off++])) {
+ if (!utf8_decode(&state, &cp, raw->data[off++])) {
*raw = consume(*raw, off);
return cp;
}
diff --git a/util.c b/util.c
@@ -155,41 +155,4 @@ utf8_encode(u32 cp)
return ret;
}
-/* Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
- * See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. */
-static const u8 utf8d[] = {
- /* The first part of the table maps bytes to character classes that
- * to reduce the size of the transition table and create bitmasks. */
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
- 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
- 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
-
- /* The second part is a transition table that maps a combination
- * of a state of the automaton and a character class to a state. */
- 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
- 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
- 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
- 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
- 12,36,12,12,12,12,12,12,12,12,12,12,
-};
-
-#define UTF8_ACCEPT 0
-#define UTF8_REJECT 1
-
-static u32
-utf8decode(u32 *state, u32 *cp, u8 byte)
-{
- u8 type = utf8d[byte];
-
- *cp = (*state != UTF8_ACCEPT) ?
- (byte & 0x3fu) | (*cp << 6) :
- (0xff >> type) & (byte);
-
- *state = utf8d[256 + *state + type];
- return *state;
-}
+#include "extern/utf8_decode.c"