0018-m4-Use-hand-written-lexer-to-avoid-cycle-in-bootstra.patch (7485B)
1 From edf250c633bef40e7e37dafc9fc393dd2ad9074f Mon Sep 17 00:00:00 2001 2 From: Michael Forney <mforney@mforney.org> 3 Date: Tue, 10 Apr 2018 13:37:14 -0700 4 Subject: [PATCH] m4: Use hand-written lexer to avoid cycle in bootstrap 5 6 --- 7 usr.bin/m4/tokenizer.c | 191 +++++++++++++++++++++++++++++++++++++++++ 8 usr.bin/m4/tokenizer.l | 109 ----------------------- 9 2 files changed, 191 insertions(+), 109 deletions(-) 10 create mode 100644 usr.bin/m4/tokenizer.c 11 delete mode 100644 usr.bin/m4/tokenizer.l 12 13 diff --git a/usr.bin/m4/tokenizer.c b/usr.bin/m4/tokenizer.c 14 new file mode 100644 15 index 00000000000..fa19fc65035 16 --- /dev/null 17 +++ b/usr.bin/m4/tokenizer.c 18 @@ -0,0 +1,191 @@ 19 +/* $OpenBSD: tokenizer.l,v 1.10 2017/06/17 01:55:16 bcallah Exp $ */ 20 +/* 21 + * Copyright (c) 2004 Marc Espie <espie@cvs.openbsd.org> 22 + * 23 + * Permission to use, copy, modify, and distribute this software for any 24 + * purpose with or without fee is hereby granted, provided that the above 25 + * copyright notice and this permission notice appear in all copies. 26 + * 27 + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 28 + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 29 + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 30 + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 31 + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 32 + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 33 + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 34 + */ 35 +#include "parser.tab.h" 36 +#include <assert.h> 37 +#include <ctype.h> 38 +#include <errno.h> 39 +#include <limits.h> 40 +#include <stdbool.h> 41 +#include <stdio.h> 42 +#include <stdlib.h> 43 +#include <stdint.h> 44 + 45 +extern void m4_warnx(const char *, ...); 46 +extern int mimic_gnu; 47 +extern int32_t yylval; 48 +static const char *yypos; 49 + 50 +void 51 +yy_scan_string(const char *s) 52 +{ 53 + yypos = s; 54 +} 55 + 56 +static int32_t 57 +number(const char *yytext, size_t yylen) 58 +{ 59 + long l; 60 + 61 + errno = 0; 62 + l = strtol(yytext, NULL, 0); 63 + if (((l == LONG_MAX || l == LONG_MIN) && errno == ERANGE) || 64 + l > INT32_MAX || l < INT32_MIN) 65 + m4_warnx("numeric overflow in expr: %.*s", (int)yylen, yytext); 66 + return l; 67 +} 68 + 69 +static int32_t 70 +parse_radix(const char *yytext, size_t yylen) 71 +{ 72 + long base; 73 + char *next; 74 + long l; 75 + int d; 76 + 77 + l = 0; 78 + base = strtol(yytext+2, &next, 0); 79 + if (base > 36 || next == NULL) { 80 + m4_warnx("error in number %.*s", (int)yylen, yytext); 81 + } else { 82 + next++; 83 + while (*next != 0) { 84 + if (*next >= '0' && *next <= '9') 85 + d = *next - '0'; 86 + else if (*next >= 'a' && *next <= 'z') 87 + d = *next - 'a' + 10; 88 + else { 89 + assert(*next >= 'A' && *next <= 'Z'); 90 + d = *next - 'A' + 10; 91 + } 92 + if (d >= base) { 93 + m4_warnx("error in number %.*s", (int)yylen, yytext); 94 + return 0; 95 + } 96 + l = base * l + d; 97 + next++; 98 + } 99 + } 100 + return l; 101 +} 102 + 103 +static int 104 +isodigit(int c) 105 +{ 106 + return c >= '0' && c <= '7'; 107 +} 108 + 109 +int yylex(void) 110 +{ 111 + const char *start; 112 + 113 +next: 114 + start = yypos; 115 + switch (*yypos) { 116 + case ' ': 117 + case '\t': 118 + case '\n': 119 + ++yypos; 120 + goto next; 121 + case '<': 122 + switch (yypos[1]) { 123 + case '=': 124 + yypos += 2; 125 + return LE; 126 + case '<': 127 + yypos += 2; 128 + return LSHIFT; 129 + } 130 + break; 131 + case '>': 132 + switch (yypos[1]) { 133 + case '=': 134 + yypos += 2; 135 + return GE; 136 + case '>': 137 + yypos += 2; 138 + return RSHIFT; 139 + } 140 + break; 141 + case '=': 142 + if (yypos[1] != '=') 143 + break; 144 + yypos += 2; 145 + return EQ; 146 + case '!': 147 + if (yypos[1] != '=') 148 + break; 149 + yypos += 2; 150 + return NE; 151 + case '&': 152 + if (yypos[1] != '&') 153 + break; 154 + yypos += 2; 155 + return LAND; 156 + case '|': 157 + if (yypos[1] != '|') 158 + break; 159 + yypos += 2; 160 + return LOR; 161 + case '*': 162 + if (!mimic_gnu || yypos[1] != '*') 163 + break; 164 + yypos += 2; 165 + return EXPONENT; 166 + case '0': 167 + switch (*++yypos) { 168 + case 'x': 169 + case 'X': 170 + if (!isxdigit(*++yypos)) 171 + return ERROR; 172 + do ++yypos; 173 + while (isxdigit(*yypos)); 174 + break; 175 + case 'r': 176 + case 'R': 177 + if (!mimic_gnu) 178 + break; 179 + if (!isdigit(*++yypos)) 180 + return ERROR; 181 + do ++yypos; 182 + while (isdigit(*yypos)); 183 + if (*yypos != ':') 184 + return ERROR; 185 + if (!isalnum(*++yypos)) 186 + return ERROR; 187 + do ++yypos; 188 + while (isalnum(*yypos)); 189 + yylval = parse_radix(start, yypos - start); 190 + return NUMBER; 191 + default: 192 + do ++yypos; 193 + while (isodigit(*yypos)); 194 + break; 195 + } 196 + yylval = number(start, yypos - start); 197 + return NUMBER; 198 + case '\0': 199 + return '\0'; 200 + } 201 + if (isdigit(*yypos)) { 202 + do ++yypos; 203 + while (isdigit(*yypos)); 204 + yylval = number(start, yypos - start); 205 + return NUMBER; 206 + } 207 + 208 + return *yypos++; 209 +} 210 diff --git a/usr.bin/m4/tokenizer.l b/usr.bin/m4/tokenizer.l 211 deleted file mode 100644 212 index 94f02fb6085..00000000000 213 --- a/usr.bin/m4/tokenizer.l 214 +++ /dev/null 215 @@ -1,109 +0,0 @@ 216 -%{ 217 -/* $OpenBSD: tokenizer.l,v 1.10 2017/06/17 01:55:16 bcallah Exp $ */ 218 -/* 219 - * Copyright (c) 2004 Marc Espie <espie@cvs.openbsd.org> 220 - * 221 - * Permission to use, copy, modify, and distribute this software for any 222 - * purpose with or without fee is hereby granted, provided that the above 223 - * copyright notice and this permission notice appear in all copies. 224 - * 225 - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 226 - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 227 - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 228 - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 229 - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 230 - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 231 - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 232 - */ 233 -#include "parser.h" 234 -#include <assert.h> 235 -#include <stdlib.h> 236 -#include <errno.h> 237 -#include <stdint.h> 238 -#include <limits.h> 239 - 240 -extern void m4_warnx(const char *, ...); 241 -extern int mimic_gnu; 242 -extern int32_t yylval; 243 - 244 -int32_t number(void); 245 -int32_t parse_radix(void); 246 -%} 247 - 248 -delim [ \t\n] 249 -ws {delim}+ 250 -hex 0[xX][0-9a-fA-F]+ 251 -oct 0[0-7]* 252 -dec [1-9][0-9]* 253 -radix 0[rR][0-9]+:[0-9a-zA-Z]+ 254 - 255 -%option noyywrap 256 - 257 -%% 258 -{ws} {/* just skip it */} 259 -{hex}|{oct}|{dec} { yylval = number(); return(NUMBER); } 260 -{radix} { if (mimic_gnu) { 261 - yylval = parse_radix(); return(NUMBER); 262 - } else { 263 - return(ERROR); 264 - } 265 - } 266 -"<=" { return(LE); } 267 -">=" { return(GE); } 268 -"<<" { return(LSHIFT); } 269 -">>" { return(RSHIFT); } 270 -"==" { return(EQ); } 271 -"!=" { return(NE); } 272 -"&&" { return(LAND); } 273 -"||" { return(LOR); } 274 -"**" { if (mimic_gnu) { return (EXPONENT); } } 275 -. { return yytext[0]; } 276 -%% 277 - 278 -int32_t 279 -number() 280 -{ 281 - long l; 282 - 283 - errno = 0; 284 - l = strtol(yytext, NULL, 0); 285 - if (((l == LONG_MAX || l == LONG_MIN) && errno == ERANGE) || 286 - l > INT32_MAX || l < INT32_MIN) 287 - m4_warnx("numeric overflow in expr: %s", yytext); 288 - return l; 289 -} 290 - 291 -int32_t 292 -parse_radix() 293 -{ 294 - long base; 295 - char *next; 296 - long l; 297 - int d; 298 - 299 - l = 0; 300 - base = strtol(yytext+2, &next, 0); 301 - if (base > 36 || next == NULL) { 302 - m4_warnx("error in number %s", yytext); 303 - } else { 304 - next++; 305 - while (*next != 0) { 306 - if (*next >= '0' && *next <= '9') 307 - d = *next - '0'; 308 - else if (*next >= 'a' && *next <= 'z') 309 - d = *next - 'a' + 10; 310 - else { 311 - assert(*next >= 'A' && *next <= 'Z'); 312 - d = *next - 'A' + 10; 313 - } 314 - if (d >= base) { 315 - m4_warnx("error in number %s", yytext); 316 - return 0; 317 - } 318 - l = base * l + d; 319 - next++; 320 - } 321 - } 322 - return l; 323 -} 324 - 325 -- 326 2.17.0 327