url.c (30353B)
1 /* url.c 2 * (c) 2002 Mikulas Patocka 3 * This file is a part of the Links program, released under GPL. 4 */ 5 6 #include <string.h> 7 8 #include "links.h" 9 10 static const struct { 11 char *prot; 12 int port; 13 void (*func)(struct connection *); 14 void (*nc_func)(struct session *, unsigned char *); 15 int free_syntax; 16 int need_slashes; 17 int need_slash_after_host; 18 int allow_post; 19 int bypasses_socks; 20 } protocols[] = { 21 {"data", 0, data_func, NULL, 1, 0, 0, 0, 0}, 22 { "file", 0, file_func, NULL, 1, 1, 0, 0, 1}, 23 { "https", 443, https_func, NULL, 0, 1, 1, 1, 0}, 24 { "http", 80, http_func, NULL, 0, 1, 1, 1, 0}, 25 { "proxy", 3128, proxy_func, NULL, 0, 1, 1, 1, 0}, 26 { NULL, 0, NULL, NULL, 0, 0, 0, 0, 0} 27 }; 28 29 static int 30 check_protocol(unsigned char *p, size_t l) 31 { 32 int i; 33 for (i = 0; protocols[i].prot; i++) 34 if (!casecmp(cast_uchar protocols[i].prot, p, l) 35 && strlen(protocols[i].prot) == l) 36 return i; 37 return -1; 38 } 39 40 static int 41 get_prot_info(unsigned char *prot, int *port, 42 void (**func)(struct connection *), 43 void (**nc_func)(struct session *ses, unsigned char *), 44 int *allow_post, int *bypasses_socks) 45 { 46 int i; 47 for (i = 0; protocols[i].prot; i++) 48 if (!casestrcmp(cast_uchar protocols[i].prot, prot)) { 49 if (port) 50 *port = protocols[i].port; 51 if (func) 52 *func = protocols[i].func; 53 if (nc_func) 54 *nc_func = protocols[i].nc_func; 55 if (allow_post) 56 *allow_post = protocols[i].allow_post; 57 if (bypasses_socks) 58 *bypasses_socks = protocols[i].bypasses_socks; 59 return 0; 60 } 61 return -1; 62 } 63 64 int 65 parse_url(unsigned char *url, int *prlen, unsigned char **user, int *uslen, 66 unsigned char **pass, int *palen, unsigned char **host, int *holen, 67 unsigned char **port, int *polen, unsigned char **data, int *dalen, 68 unsigned char **post) 69 { 70 unsigned char *p, *q; 71 unsigned char p_c[2]; 72 int a; 73 if (prlen) 74 *prlen = 0; 75 if (user) 76 *user = NULL; 77 if (uslen) 78 *uslen = 0; 79 if (pass) 80 *pass = NULL; 81 if (palen) 82 *palen = 0; 83 if (host) 84 *host = NULL; 85 if (holen) 86 *holen = 0; 87 if (port) 88 *port = NULL; 89 if (polen) 90 *polen = 0; 91 if (data) 92 *data = NULL; 93 if (dalen) 94 *dalen = 0; 95 if (post) 96 *post = NULL; 97 if (!url || !(p = cast_uchar strchr(cast_const_char url, ':'))) 98 return -1; 99 if (prlen) 100 *prlen = (int)(p - url); 101 if ((a = check_protocol(url, p - url)) == -1) 102 return -1; 103 if (p[1] != '/' || p[2] != '/') { 104 if (protocols[a].need_slashes) 105 return -1; 106 p -= 2; 107 } 108 if (protocols[a].free_syntax) { 109 if (data) 110 *data = p + 3; 111 if (dalen) 112 *dalen = strlen((char *)(p + 3)); 113 return 0; 114 } 115 p += 3; 116 q = p + strcspn(cast_const_char p, "@/?"); 117 if (!*q && protocols[a].need_slash_after_host) 118 return -1; 119 if (*q == '@') { 120 unsigned char *pp; 121 while (strcspn(cast_const_char(q + 1), "@") 122 < strcspn(cast_const_char(q + 1), "/?")) 123 q = q + 1 + strcspn(cast_const_char(q + 1), "@"); 124 pp = cast_uchar strchr(cast_const_char p, ':'); 125 if (!pp || pp > q) { 126 if (user) 127 *user = p; 128 if (uslen) 129 *uslen = (int)(q - p); 130 } else { 131 if (user) 132 *user = p; 133 if (uslen) 134 *uslen = (int)(pp - p); 135 if (pass) 136 *pass = pp + 1; 137 if (palen) 138 *palen = (int)(q - pp - 1); 139 } 140 p = q + 1; 141 } 142 if (p[0] == '[') { 143 q = cast_uchar strchr((char *)p, ']'); 144 if (q) { 145 q++; 146 goto have_host; 147 } 148 } 149 q = p + strcspn((char *)p, ":/?"); 150 have_host: 151 if (!*q && protocols[a].need_slash_after_host) 152 return -1; 153 if (host) 154 *host = p; 155 if (holen) 156 *holen = (int)(q - p); 157 if (*q == ':') { 158 unsigned char *pp = q + strcspn((char *)q, "/"); 159 int cc; 160 if (*pp != '/' && protocols[a].need_slash_after_host) 161 return -1; 162 if (port) 163 *port = q + 1; 164 if (polen) 165 *polen = (int)(pp - q - 1); 166 for (cc = 0; cc < pp - q - 1; cc++) 167 if (q[cc + 1] < '0' || q[cc + 1] > '9') 168 return -1; 169 q = pp; 170 } 171 if (*q && *q != '?') 172 q++; 173 p = q; 174 p_c[0] = POST_CHAR; 175 p_c[1] = 0; 176 q = p + strcspn((char *)p, (char *)p_c); 177 if (data) 178 *data = p; 179 if (dalen) 180 *dalen = (int)(q - p); 181 if (post) 182 *post = *q ? q + 1 : NULL; 183 return 0; 184 } 185 186 unsigned char * 187 get_protocol_name(unsigned char *url) 188 { 189 int l; 190 if (parse_url(url, &l, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 191 NULL, NULL, NULL)) 192 return NULL; 193 return memacpy(url, l); 194 } 195 196 unsigned char * 197 get_keepalive_id(unsigned char *url) 198 { 199 unsigned char *h, *p, *k, *d; 200 int hl, pl; 201 if (parse_url(url, NULL, NULL, NULL, NULL, NULL, &h, &hl, &p, &pl, &d, 202 NULL, NULL)) 203 return NULL; 204 if (is_proxy_url(url) && !casecmp(d, cast_uchar "https://", 8)) { 205 if (parse_url(d, NULL, NULL, NULL, NULL, NULL, &h, &hl, &p, &pl, 206 NULL, NULL, NULL)) 207 return NULL; 208 } 209 k = p ? p + pl : h ? h + hl : NULL; 210 if (!k) 211 return stracpy(cast_uchar ""); 212 return memacpy(url, k - url); 213 } 214 215 unsigned char * 216 get_host_name(unsigned char *url) 217 { 218 unsigned char *h; 219 int hl; 220 if (parse_url(url, NULL, NULL, NULL, NULL, NULL, &h, &hl, NULL, NULL, 221 NULL, NULL, NULL)) 222 return stracpy(cast_uchar ""); 223 return memacpy(h, hl); 224 } 225 226 unsigned char * 227 get_user_name(unsigned char *url) 228 { 229 unsigned char *h; 230 int hl; 231 if (parse_url(url, NULL, &h, &hl, NULL, NULL, NULL, NULL, NULL, NULL, 232 NULL, NULL, NULL)) 233 return NULL; 234 return memacpy(h, hl); 235 } 236 237 unsigned char * 238 get_pass(unsigned char *url) 239 { 240 unsigned char *h; 241 int hl; 242 if (parse_url(url, NULL, NULL, NULL, &h, &hl, NULL, NULL, NULL, NULL, 243 NULL, NULL, NULL)) 244 return NULL; 245 return memacpy(h, hl); 246 } 247 248 unsigned char * 249 get_port_str(unsigned char *url) 250 { 251 unsigned char *h; 252 int hl; 253 if (parse_url(url, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &h, &hl, 254 NULL, NULL, NULL)) 255 return NULL; 256 return hl ? memacpy(h, hl) : NULL; 257 } 258 259 int 260 get_port(unsigned char *url) 261 { 262 unsigned char *h; 263 int hl; 264 long n = -1; 265 if (parse_url(url, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &h, &hl, 266 NULL, NULL, NULL)) 267 return -1; 268 if (h) { 269 n = strtol((char *)h, NULL, 10); 270 if (n > 0 && n < 65536) 271 return (int)n; 272 return -1; 273 } 274 if ((h = get_protocol_name(url))) { 275 int nn = -1; 276 get_prot_info(h, &nn, NULL, NULL, NULL, NULL); 277 free(h); 278 n = nn; 279 } 280 return (int)n; 281 } 282 283 void (*get_protocol_handle(unsigned char *url))(struct connection *) 284 { 285 unsigned char *p; 286 void (*f)(struct connection *) = NULL; 287 int post = 0; 288 if (!(p = get_protocol_name(url))) 289 return NULL; 290 get_prot_info(p, NULL, &f, NULL, &post, NULL); 291 free(p); 292 if (!post && strchr(cast_const_char url, POST_CHAR)) 293 return NULL; 294 return f; 295 } 296 297 void (*get_external_protocol_function(unsigned char *url))(struct session *, 298 unsigned char *) 299 { 300 unsigned char *p; 301 void (*f)(struct session *, unsigned char *) = NULL; 302 int post = 0; 303 if (!(p = get_protocol_name(url))) 304 return NULL; 305 get_prot_info(p, NULL, NULL, &f, &post, NULL); 306 free(p); 307 if (!post && strchr(cast_const_char url, POST_CHAR)) 308 return NULL; 309 return f; 310 } 311 312 int 313 url_bypasses_socks(unsigned char *url) 314 { 315 int ret = 0; 316 unsigned char *p; 317 if (!(p = get_protocol_name(url))) 318 return 1; 319 get_prot_info(p, NULL, NULL, NULL, NULL, &ret); 320 free(p); 321 return ret; 322 } 323 324 unsigned char * 325 get_url_data(unsigned char *url) 326 { 327 unsigned char *d; 328 if (parse_url(url, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 329 &d, NULL, NULL)) 330 return NULL; 331 return d; 332 } 333 334 #define dsep(x) (lo ? dir_sep(x) : (x) == '/') 335 336 static void 337 translate_directories(unsigned char *url) 338 { 339 unsigned char *dd = get_url_data(url); 340 unsigned char *s, *d; 341 int lo = !casecmp(url, cast_uchar "file://", 7); 342 if (!casecmp(url, cast_uchar "javascript:", 11)) 343 return; 344 if (!casecmp(url, cast_uchar "magnet:", 7)) 345 return; 346 if (!dd || dd == url) 347 return; 348 if (!dsep(*dd)) { 349 dd--; 350 if (!dsep(*dd)) { 351 dd++; 352 memmove(dd + 1, dd, strlen((char *)dd) + 1); 353 *dd = '/'; 354 } 355 } 356 s = dd; 357 d = dd; 358 r: 359 if (end_of_dir(url, s[0])) { 360 memmove(d, s, strlen((char *)s) + 1); 361 return; 362 } 363 if (dsep(s[0]) && s[1] == '.' 364 && (dsep(s[2]) || !s[2] || end_of_dir(url, s[2]))) { 365 if (!dsep(s[2])) 366 *d++ = *s; 367 s += 2; 368 goto r; 369 } 370 if (dsep(s[0]) && s[1] == '.' && s[2] == '.' 371 && (dsep(s[3]) || !s[3] || end_of_dir(url, s[3]))) { 372 while (d > dd) { 373 d--; 374 if (dsep(*d)) 375 goto b; 376 } 377 b: 378 if (!dsep(s[3])) 379 *d++ = *s; 380 s += 3; 381 goto r; 382 } 383 if ((*d++ = *s++)) 384 goto r; 385 } 386 387 static unsigned char * 388 translate_hashbang(unsigned char *up) 389 { 390 unsigned char *u, *p, *dp, *data, *post_seq; 391 int q; 392 unsigned char *r; 393 int rl; 394 if (!strstr((char *)up, "#!") && !strstr((char *)up, "#%21")) 395 return up; 396 u = stracpy(up); 397 p = extract_position(u); 398 if (!p) { 399 free_u_ret_up: 400 free(u); 401 return up; 402 } 403 if (p[0] == '!') 404 dp = p + 1; 405 else if (!casecmp(p, cast_uchar "%21", 3)) 406 dp = p + 3; 407 else { 408 free(p); 409 goto free_u_ret_up; 410 } 411 if (!(post_seq = cast_uchar strchr((char *)u, POST_CHAR))) 412 post_seq = cast_uchar strchr((char *)u, 0); 413 data = get_url_data(u); 414 if (!data) 415 data = u; 416 r = NULL; 417 rl = add_bytes_to_str(&r, 0, u, post_seq - u); 418 q = strlen((char *)data); 419 if (q && (data[q - 1] == '&' || data[q - 1] == '?')) 420 ; 421 else if (strchr((char *)data, '?')) 422 rl = add_chr_to_str(&r, rl, '&'); 423 else 424 rl = add_chr_to_str(&r, rl, '?'); 425 rl = add_to_str(&r, rl, cast_uchar "_escaped_fragment_="); 426 for (; *dp; dp++) { 427 unsigned char c = *dp; 428 if (c <= 0x20 || c == 0x23 || c == 0x25 || c == 0x26 429 || c == 0x2b || c >= 0x7f) { 430 unsigned char h[4]; 431 sprintf((char *)h, "%%%02X", c); 432 rl = add_to_str(&r, rl, h); 433 } else 434 rl = add_chr_to_str(&r, rl, c); 435 } 436 rl = add_to_str(&r, rl, post_seq); 437 free(u); 438 free(p); 439 free(up); 440 return r; 441 } 442 443 static unsigned char * 444 rewrite_url_google_docs(unsigned char *n) 445 { 446 int i; 447 unsigned char *id, *id_end, *url_end; 448 unsigned char *res; 449 size_t l; 450 struct { 451 const char *beginning; 452 const char *result1; 453 const char *result2; 454 } const patterns[] = { 455 {"https://docs.google.com/document/d/", 456 "https://docs.google.com/document/d/", "/export?format=pdf"}, 457 { "https://docs.google.com/document/u/", 458 "https://docs.google.com/document/u/", "/export?format=pdf"}, 459 { "https://docs.google.com/spreadsheets/d/", 460 "https://docs.google.com/spreadsheets/d/", "/export?format=pdf"}, 461 { "https://docs.google.com/spreadsheets/u/", 462 "https://docs.google.com/spreadsheets/u/", "/export?format=pdf"}, 463 { "https://docs.google.com/presentation/d/", 464 "https://docs.google.com/presentation/d/", "/export/pdf" }, 465 { "https://docs.google.com/presentation/u/", 466 "https://docs.google.com/presentation/u/", "/export/pdf" }, 467 { "https://drive.google.com/file/d/", 468 "https://drive.google.com/uc?export=download&id=", "" }, 469 { "https://drive.google.com/file/u/", 470 "https://drive.google.com/uc?export=download&id=", "" } 471 }; 472 for (i = 0; i < array_elements(patterns); i++) 473 if (!cmpbeg(n, cast_uchar patterns[i].beginning)) 474 goto match; 475 return n; 476 match: 477 id = n + strlen(patterns[i].beginning); 478 url_end = id + strcspn(cast_const_char id, "#" POST_CHAR_STRING); 479 id_end = memchr(id, '/', url_end - id); 480 if (!id_end) 481 return n; 482 if (!cmpbeg(id_end, cast_uchar "/export")) 483 return n; 484 if (!patterns[i].result2[0]) { 485 id = id_end; 486 while (id[-1] != '/') 487 id--; 488 } 489 res = NULL; 490 l = add_to_str(&res, 0, cast_uchar patterns[i].result1); 491 l = add_bytes_to_str(&res, l, id, id_end - id); 492 l = add_to_str(&res, l, cast_uchar patterns[i].result2); 493 free(n); 494 return res; 495 } 496 497 static unsigned char * 498 rewrite_url_mediawiki_svg(unsigned char *n) 499 { 500 const char u1[] = "/media/math/render/svg/"; 501 const char u2[] = "/media/math/render/png/"; 502 unsigned char *d, *s; 503 d = get_url_data(n); 504 if (!d) 505 return n; 506 s = cast_uchar strstr((char *)d, u1); 507 if (!s) 508 return n; 509 memcpy(s, u2, strlen(u2)); 510 return n; 511 } 512 513 static unsigned char * 514 rewrite_url(unsigned char *n) 515 { 516 extend_str(&n, 1); 517 translate_directories(n); 518 n = translate_hashbang(n); 519 n = rewrite_url_google_docs(n); 520 n = rewrite_url_mediawiki_svg(n); 521 return n; 522 } 523 524 static int 525 test_qualified_name(char *host, char *hostname) 526 { 527 char *c; 528 if (!strcasecmp(host, hostname)) 529 return 1; 530 c = strchr(hostname, '.'); 531 if (c) { 532 *c = 0; 533 if (!strcasecmp(host, hostname)) 534 return 1; 535 } 536 return 0; 537 } 538 539 static int 540 is_local_host(char *host) 541 { 542 if (!*host) 543 return 1; 544 if (!strcasecmp(host, "localhost")) 545 return 1; 546 { 547 int rs; 548 char n[4096]; 549 n[0] = 0; 550 EINTRLOOP(rs, gethostname(n, sizeof(n))); 551 n[sizeof(n) - 1] = 0; 552 if (!rs && strlen(n) < sizeof(n) - 1) { 553 if (test_qualified_name(host, n)) 554 return 1; 555 } 556 } 557 return 0; 558 } 559 560 static void 561 insert_wd(unsigned char **up, unsigned char *cwd) 562 { 563 unsigned char *u = *up; 564 unsigned char *cw; 565 unsigned char *url; 566 char *host; 567 size_t url_l; 568 int i; 569 if (!u || !cwd || !*cwd) 570 return; 571 if (casecmp(u, cast_uchar "file://", 7)) 572 return; 573 for (i = 7; u[i] && !dir_sep(u[i]); i++) 574 ; 575 host = cast_char memacpy(u + 7, i - 7); 576 if (is_local_host(host)) { 577 free(host); 578 memmove(u + 7, u + i, strlen(cast_const_char(u + i)) + 1); 579 return; 580 } 581 free(host); 582 url = NULL; 583 url_l = add_bytes_to_str(&url, 0, u, 7); 584 for (cw = cwd; *cw; cw++) { 585 unsigned char c = *cw; 586 if (c < ' ' || c == '%' || c >= 127) { 587 unsigned char h[4]; 588 sprintf((char *)h, "%%%02X", (unsigned)c & 0xff); 589 url_l = add_to_str(&url, url_l, h); 590 } else 591 url_l = add_chr_to_str(&url, url_l, c); 592 } 593 if (!dir_sep(cwd[strlen((char *)cwd) - 1])) 594 url_l = add_chr_to_str(&url, url_l, '/'); 595 url_l = add_to_str(&url, url_l, u + 7); 596 free(u); 597 *up = url; 598 } 599 600 int 601 url_non_ascii(unsigned char *url) 602 { 603 unsigned char *ch; 604 for (ch = url; *ch; ch++) 605 if (*ch >= 128) 606 return 1; 607 return 0; 608 } 609 610 static unsigned char * 611 translate_idn(unsigned char *nu, int canfail) 612 { 613 if (url_non_ascii(nu)) { 614 unsigned char *id = idn_encode_url(nu, 0); 615 if (!id) { 616 if (!canfail) 617 return nu; 618 free(nu); 619 return NULL; 620 } 621 free(nu); 622 return id; 623 } 624 return nu; 625 } 626 627 /* 628 * U funkce join_urls musi byt prvni url absolutni (takove, co projde funkci 629 * parse_url bez chyby --- pokud neni absolutni, tak to spatne na internal) a 630 * druhe url je relativni cesta vuci nemu nebo taky absolutni url. Pokud je 631 * druhe url absolutni, vrati se to; pokud je relativni, tak se spoji prvni a 632 * druhe url. 633 */ 634 unsigned char * 635 join_urls(unsigned char *base, unsigned char *rel) 636 { 637 unsigned char *p, *n, *pp, *ch; 638 int l; 639 int lo = !casecmp(base, cast_uchar "file://", 7); 640 int data = !casecmp(base, cast_uchar "data:", 5); 641 if (rel[0] == '#' || !rel[0]) { 642 n = stracpy(base); 643 for (p = n; *p && *p != POST_CHAR && *p != '#'; p++) 644 ; 645 *p = 0; 646 add_to_strn(&n, rel); 647 goto return_n; 648 } 649 if (rel[0] == '?' || rel[0] == '&') { 650 unsigned char rj[3]; 651 unsigned char *d = get_url_data(base); 652 if (!d) 653 goto bad_base; 654 rj[0] = rel[0]; 655 rj[1] = POST_CHAR; 656 rj[2] = 0; 657 d += strcspn((char *)d, (char *)rj); 658 n = memacpy(base, d - base); 659 add_to_strn(&n, rel); 660 goto return_n; 661 } 662 if (rel[0] == '/' && rel[1] == '/' && !data) { 663 unsigned char *s; 664 if (!(s = cast_uchar strstr(cast_const_char base, "//"))) { 665 if (!(s = cast_uchar strchr(cast_const_char base, 666 ':'))) { 667 bad_base: 668 internal("bad base url: %s", base); 669 return NULL; 670 } 671 s++; 672 } 673 n = memacpy(base, s - base); 674 add_to_strn(&n, rel); 675 if (!parse_url(n, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 676 NULL, NULL, NULL, NULL, NULL)) 677 goto return_n; 678 add_to_strn(&n, cast_uchar "/"); 679 if (!parse_url(n, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 680 NULL, NULL, NULL, NULL, NULL)) 681 goto return_n; 682 free(n); 683 } 684 if (is_proxy_url(rel)) 685 goto prx; 686 if (!parse_url(rel, &l, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 687 NULL, NULL, NULL)) { 688 n = stracpy(rel); 689 goto return_n; 690 } 691 n = stracpy(rel); 692 while (n[0] && n[strlen((char *)n) - 1] <= ' ') 693 n[strlen((char *)n) - 1] = 0; 694 extend_str(&n, 1); 695 ch = cast_uchar strrchr((char *)n, '#'); 696 if (!ch || strchr((char *)ch, '/')) 697 ch = n + strlen((char *)n); 698 memmove(ch + 1, ch, strlen((char *)ch) + 1); 699 *ch = '/'; 700 if (!parse_url(n, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 701 NULL, NULL, NULL)) 702 goto return_n; 703 free(n); 704 prx: 705 if (parse_url(base, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 706 NULL, &p, NULL, NULL) 707 || !p) { 708 goto bad_base; 709 } 710 if (!dsep(*p)) 711 p--; 712 if (!data) { 713 if (end_of_dir(base, rel[0])) 714 for (; *p; p++) { 715 if (end_of_dir(base, *p)) 716 break; 717 } 718 else if (!dsep(rel[0])) 719 for (pp = p; *pp; pp++) { 720 if (end_of_dir(base, *pp)) 721 break; 722 if (dsep(*pp)) 723 p = pp + 1; 724 } 725 } 726 n = memacpy(base, p - base); 727 add_to_strn(&n, rel); 728 goto return_n; 729 730 return_n: 731 n = translate_idn(n, 0); 732 n = rewrite_url(n); 733 return n; 734 } 735 736 unsigned char * 737 translate_url(unsigned char *url, unsigned char *cwd) 738 { 739 unsigned char *ch; 740 unsigned char *nu, *da; 741 unsigned char *prefix; 742 int sl; 743 while (*url == ' ') 744 url++; 745 if (*url && url[strlen((char *)url) - 1] == ' ') { 746 nu = stracpy(url); 747 while (*nu && nu[strlen((char *)nu) - 1] == ' ') 748 nu[strlen((char *)nu) - 1] = 0; 749 ch = translate_url(nu, cwd); 750 free(nu); 751 return ch; 752 } 753 if (is_proxy_url(url)) 754 return NULL; 755 if (!parse_url(url, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 756 NULL, &da, NULL, NULL)) { 757 nu = stracpy(url); 758 goto return_nu; 759 } 760 if (strchr((char *)url, POST_CHAR)) 761 return NULL; 762 if (strstr((char *)url, "://")) { 763 nu = stracpy(url); 764 extend_str(&nu, 1); 765 ch = cast_uchar strrchr((char *)nu, '#'); 766 if (!ch || strchr((char *)ch, '/')) 767 ch = nu + strlen((char *)nu); 768 memmove(ch + 1, ch, strlen((char *)ch) + 1); 769 *ch = '/'; 770 if (!parse_url(nu, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 771 NULL, NULL, NULL, NULL, NULL)) 772 goto return_nu; 773 free(nu); 774 } 775 prefix = cast_uchar "file://"; 776 if (url[0] == '[' && strchr((char *)url, ']')) { 777 ch = url; 778 goto http; 779 } 780 ch = url + strcspn((char *)url, ".:/@"); 781 sl = 0; 782 if (*ch != ':' || *(url + strcspn((char *)url, "/@")) == '@') { 783 if (*url != '.' && *ch == '.') { 784 unsigned char *e, *f, *g; 785 int tl; 786 for (e = ch + 1; 787 *(f = e + strcspn((char *)e, ".:/")) == '.'; 788 e = f + 1) 789 ; 790 g = memacpy(e, f - e); 791 tl = is_tld(g); 792 free(g); 793 if (tl) { 794 http: 795 prefix = cast_uchar "http://"; 796 sl = 1; 797 } 798 } 799 if (*ch == '@' || *ch == ':' 800 || !cmpbeg(url, cast_uchar "ftp.")) { 801 prefix = cast_uchar "ftp://"; 802 sl = 1; 803 } 804 nu = stracpy(prefix); 805 add_to_strn(&nu, url); 806 if (sl && !strchr((char *)url, '/')) 807 add_to_strn(&nu, cast_uchar "/"); 808 if (parse_url(nu, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 809 NULL, NULL, NULL, NULL, NULL)) { 810 free(nu); 811 return NULL; 812 } 813 goto return_nu; 814 } 815 nu = memacpy(url, ch - url + 1); 816 add_to_strn(&nu, cast_uchar "//"); 817 add_to_strn(&nu, ch + 1); 818 if (!parse_url(nu, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 819 NULL, NULL, NULL)) 820 goto return_nu; 821 add_to_strn(&nu, cast_uchar "/"); 822 if (!parse_url(nu, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 823 NULL, NULL, NULL)) 824 goto return_nu; 825 free(nu); 826 return NULL; 827 828 return_nu: 829 nu = translate_idn(nu, 1); 830 if (!nu) 831 return NULL; 832 insert_wd(&nu, cwd); 833 nu = rewrite_url(nu); 834 return nu; 835 } 836 837 unsigned char * 838 extract_position(unsigned char *url) 839 { 840 unsigned char *u, *uu, *r; 841 if ((u = get_url_data(url))) 842 url = u; 843 if (!(u = cast_uchar strchr((char *)url, POST_CHAR))) 844 u = cast_uchar strchr((char *)url, 0); 845 if (!(uu = memchr(url, '#', u - url))) 846 return NULL; 847 r = memacpy(uu + 1, u - uu - 1); 848 memmove(uu, u, strlen((char *)u) + 1); 849 return r; 850 } 851 852 int 853 url_not_saveable(unsigned char *url) 854 { 855 int p, palen; 856 unsigned char *u = translate_url(url, cast_uchar "/"); 857 if (!u) 858 return 1; 859 p = parse_url(u, NULL, NULL, NULL, NULL, &palen, NULL, NULL, NULL, NULL, 860 NULL, NULL, NULL); 861 free(u); 862 return p || palen; 863 } 864 865 #define accept_char(x) \ 866 ((x) != 10 && (x) != 13 && (x) != '"' && (x) != '\'' && (x) != '&' \ 867 && (x) != '<' && (x) != '>') 868 #define special_char(x) ((x) < ' ' || (x) == '%' || (x) == '#' || (x) >= 127) 869 870 /* 871 * -2 percent to raw 872 * -1 percent to html 873 * 0 raw to html 874 * 1 raw to percent 875 */ 876 877 size_t 878 add_conv_str(unsigned char **s, size_t l, unsigned char *b, int ll, 879 int encode_special) 880 { 881 for (; ll > 0; ll--, b++) { 882 unsigned char chr = *b; 883 if (!chr) 884 continue; 885 if (special_char(chr) && encode_special == 1) { 886 unsigned char h[4]; 887 sprintf((char *)h, "%%%02X", (unsigned)chr & 0xff); 888 l = add_to_str(s, l, h); 889 continue; 890 } 891 if (chr == '%' && encode_special <= -1 && ll > 2 892 && ((b[1] >= '0' && b[1] <= '9') 893 || (b[1] >= 'A' && b[1] <= 'F') 894 || (b[1] >= 'a' && b[1] <= 'f')) 895 && ((b[2] >= '0' && b[2] <= '9') 896 || (b[2] >= 'A' && b[2] <= 'F') 897 || (b[2] >= 'a' && b[2] <= 'f'))) { 898 int i; 899 chr = 0; 900 for (i = 1; i < 3; i++) { 901 if (b[i] >= '0' && b[i] <= '9') 902 chr = chr * 16 + b[i] - '0'; 903 if (b[i] >= 'A' && b[i] <= 'F') 904 chr = chr * 16 + b[i] - 'A' + 10; 905 if (b[i] >= 'a' && b[i] <= 'f') 906 chr = chr * 16 + b[i] - 'a' + 10; 907 } 908 ll -= 2; 909 b += 2; 910 if (!chr) 911 continue; 912 } 913 if (chr == ' ' && (!encode_special || encode_special == -1)) 914 l = add_to_str(s, l, cast_uchar " "); 915 else if (accept_char(chr) || encode_special == -2) 916 l = add_chr_to_str(s, l, chr); 917 else if (chr == 10 || chr == 13) { 918 continue; 919 } else { 920 l = add_to_str(s, l, cast_uchar "&#"); 921 l = add_num_to_str(s, l, (int)chr); 922 l = add_chr_to_str(s, l, ';'); 923 } 924 } 925 return l; 926 } 927 928 void 929 convert_file_charset(unsigned char **s, int *l, int start_l) 930 { 931 } 932 933 static const char xn[] = "xn--"; 934 static const size_t xn_l = sizeof(xn) - 1; 935 936 #define puny_max_length 63 937 #define puny_base 36 938 #define puny_tmin 1 939 #define puny_tmax 26 940 #define puny_skew 38 941 #define puny_damp 700 942 #define puny_init_bias 72 943 944 static int 945 ascii_allowed(unsigned c) 946 { 947 return c == '-' || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') 948 || (c >= 'a' && c <= 'z'); 949 } 950 951 static unsigned char 952 puny_chrenc(unsigned n) 953 { 954 return n + (n < 26 ? 'a' : '0' - 26); 955 } 956 957 static unsigned 958 puny_chrdec(unsigned char c) 959 { 960 if (c <= '9') 961 return c - '0' + 26; 962 if (c <= 'Z') 963 return c - 'A'; 964 return c - 'a'; 965 } 966 967 struct puny_state { 968 unsigned ascii_numpoints; 969 unsigned numpoints; 970 unsigned bias; 971 unsigned k; 972 }; 973 974 static void 975 puny_init(struct puny_state *st, unsigned numpoints) 976 { 977 st->ascii_numpoints = numpoints; 978 st->numpoints = numpoints; 979 st->bias = puny_init_bias; 980 st->k = puny_base; 981 } 982 983 static unsigned 984 puny_threshold(struct puny_state *st) 985 { 986 unsigned k = st->k; 987 st->k += puny_base; 988 if (k <= st->bias) 989 return puny_tmin; 990 if (k >= st->bias + puny_tmax) 991 return puny_tmax; 992 return k - st->bias; 993 } 994 995 static void 996 puny_adapt(struct puny_state *st, unsigned val) 997 { 998 unsigned k; 999 val = st->ascii_numpoints == st->numpoints ? val / puny_damp : val / 2; 1000 st->numpoints++; 1001 val += val / st->numpoints; 1002 k = 0; 1003 while (val > ((puny_base - puny_tmin) * puny_tmax) / 2) { 1004 val /= puny_base - puny_tmin; 1005 k += puny_base; 1006 } 1007 st->bias = 1008 k + (((puny_base - puny_tmin + 1) * val) / (val + puny_skew)); 1009 st->k = puny_base; 1010 } 1011 1012 static unsigned char * 1013 puny_encode(unsigned char *s, int len) 1014 { 1015 unsigned char *p; 1016 unsigned *uni; 1017 unsigned uni_l; 1018 unsigned char *res; 1019 size_t res_l; 1020 unsigned i; 1021 unsigned ni, cchar, skip; 1022 struct puny_state st; 1023 1024 if (len > 7 * puny_max_length) 1025 goto err; 1026 uni = xmalloc(len * sizeof(unsigned)); 1027 uni_l = 0; 1028 for (p = s; p < s + len;) { 1029 unsigned c; 1030 GET_UTF_8(p, c); 1031 c = uni_locase(c); 1032 if (c < 128 && !ascii_allowed(c)) 1033 goto err_free_uni; 1034 if (c > 0x10FFFF) 1035 goto err_free_uni; 1036 uni[uni_l++] = c; 1037 } 1038 if (uni_l > puny_max_length) 1039 goto err_free_uni; 1040 1041 res = NULL; 1042 res_l = add_to_str(&res, 0, cast_uchar xn); 1043 1044 ni = 0; 1045 for (i = 0; i < uni_l; i++) 1046 if (uni[i] < 128) { 1047 res_l = add_chr_to_str(&res, res_l, uni[i]); 1048 ni++; 1049 } 1050 1051 if (ni == uni_l) { 1052 memmove(res, res + xn_l, res_l - xn_l + 1); 1053 res_l -= 4; 1054 goto ret_free_uni; 1055 } 1056 1057 if (res_l != xn_l) 1058 res_l = add_chr_to_str(&res, res_l, '-'); 1059 1060 puny_init(&st, ni); 1061 1062 cchar = 128; 1063 skip = 0; 1064 1065 while (1) { 1066 unsigned dlen = 0; 1067 unsigned lchar = -1U; 1068 for (i = 0; i < uni_l; i++) { 1069 unsigned c = uni[i]; 1070 if (c < cchar) 1071 dlen++; 1072 else if (c < lchar) 1073 lchar = c; 1074 } 1075 if (lchar == -1U) 1076 break; 1077 skip += (lchar - cchar) * (dlen + 1); 1078 for (i = 0; i < uni_l; i++) { 1079 unsigned c = uni[i]; 1080 if (c < lchar) 1081 skip++; 1082 if (c == lchar) { 1083 unsigned n; 1084 n = skip; 1085 while (1) { 1086 unsigned t = puny_threshold(&st); 1087 if (n < t) { 1088 res_l = add_chr_to_str( 1089 &res, res_l, 1090 puny_chrenc(n)); 1091 break; 1092 } else { 1093 unsigned d = 1094 (n - t) % (puny_base - t); 1095 n = (n - t) / (puny_base - t); 1096 res_l = add_chr_to_str( 1097 &res, res_l, 1098 puny_chrenc(d + t)); 1099 } 1100 } 1101 puny_adapt(&st, skip); 1102 skip = 0; 1103 } 1104 } 1105 skip++; 1106 cchar = lchar + 1; 1107 } 1108 1109 ret_free_uni: 1110 free(uni); 1111 1112 if (res_l > puny_max_length) 1113 goto err; 1114 1115 return res; 1116 1117 err_free_uni: 1118 free(uni); 1119 err: 1120 return NULL; 1121 } 1122 1123 static unsigned char * 1124 puny_decode(unsigned char *s, int len) 1125 { 1126 unsigned char *p, *last_dash; 1127 unsigned *uni; 1128 unsigned uni_l; 1129 unsigned char *res; 1130 size_t res_l; 1131 unsigned i; 1132 unsigned cchar, pos; 1133 struct puny_state st; 1134 1135 if (!(len >= 4 && !casecmp(s, cast_uchar xn, xn_l))) 1136 return NULL; 1137 s += xn_l; 1138 len -= xn_l; 1139 1140 last_dash = NULL; 1141 for (p = s; p < s + len; p++) { 1142 unsigned char c = *p; 1143 if (!ascii_allowed(c)) 1144 goto err; 1145 if (c == '-') 1146 last_dash = p; 1147 } 1148 1149 if (len > puny_max_length) 1150 goto err; 1151 1152 uni = xmalloc(len * sizeof(unsigned)); 1153 uni_l = 0; 1154 1155 if (last_dash) { 1156 for (p = s; p < last_dash; p++) 1157 uni[uni_l++] = *p; 1158 p = last_dash + 1; 1159 } else 1160 p = s; 1161 1162 puny_init(&st, uni_l); 1163 1164 cchar = 128; 1165 pos = 0; 1166 1167 while (p < s + len) { 1168 unsigned w = 1; 1169 unsigned val = 0; 1170 while (1) { 1171 unsigned n, t, nv, nw; 1172 if (p >= s + len) 1173 goto err_free_uni; 1174 n = puny_chrdec(*p++); 1175 nw = n * w; 1176 if (nw / w != n) 1177 goto err_free_uni; 1178 nv = val + nw; 1179 if (nv < val) 1180 goto err_free_uni; 1181 val = nv; 1182 t = puny_threshold(&st); 1183 if (n < t) 1184 break; 1185 nw = w * (puny_base - t); 1186 if (nw / w != puny_base - t) 1187 goto err_free_uni; 1188 w = nw; 1189 } 1190 puny_adapt(&st, val); 1191 1192 if (val > uni_l - pos) { 1193 unsigned cp; 1194 val -= uni_l - pos + 1; 1195 pos = 0; 1196 cp = val / (uni_l + 1) + 1; 1197 val %= uni_l + 1; 1198 if (cchar + cp < cchar) 1199 goto err_free_uni; 1200 cchar += cp; 1201 if (cchar > 0x10FFFF) 1202 goto err_free_uni; 1203 } 1204 pos += val; 1205 memmove(uni + pos + 1, uni + pos, 1206 (uni_l - pos) * sizeof(unsigned)); 1207 uni[pos++] = cchar; 1208 uni_l++; 1209 } 1210 1211 res = NULL; 1212 res_l = 0; 1213 1214 for (i = 0; i < uni_l; i++) { 1215 unsigned char *us = encode_utf_8(uni[i]); 1216 res_l = add_to_str(&res, res_l, us); 1217 } 1218 1219 free(uni); 1220 1221 return res; 1222 1223 err_free_uni: 1224 free(uni); 1225 err: 1226 return NULL; 1227 } 1228 1229 unsigned char * 1230 idn_encode_host(unsigned char *host, int len, unsigned char *separator, 1231 int decode) 1232 { 1233 unsigned char *p, *s; 1234 int l, i; 1235 size_t pl; 1236 p = NULL; 1237 pl = 0; 1238 1239 next_host_elem: 1240 l = len; 1241 for (s = separator; *s; s++) { 1242 unsigned char *d = memchr(host, *s, l); 1243 if (d) 1244 l = (int)(d - host); 1245 } 1246 1247 if (!decode) { 1248 for (i = 0; i < l; i++) 1249 if (host[i] >= 0x80) { 1250 unsigned char *enc = puny_encode(host, l); 1251 if (!enc) 1252 goto err; 1253 pl = add_to_str(&p, pl, enc); 1254 free(enc); 1255 goto advance_host; 1256 } 1257 } else { 1258 unsigned char *dec = puny_decode(host, l); 1259 if (dec) { 1260 pl = add_to_str(&p, pl, dec); 1261 free(dec); 1262 goto advance_host; 1263 } 1264 } 1265 1266 pl = add_bytes_to_str(&p, pl, host, l); 1267 1268 advance_host: 1269 if (l != len) { 1270 pl = add_chr_to_str(&p, pl, host[l]); 1271 host += l + 1; 1272 len -= l + 1; 1273 goto next_host_elem; 1274 } 1275 return p; 1276 1277 err: 1278 free(p); 1279 return NULL; 1280 } 1281 1282 unsigned char * 1283 idn_encode_url(unsigned char *url, int decode) 1284 { 1285 unsigned char *host, *p, *h; 1286 int holen; 1287 size_t pl; 1288 if (parse_url(url, NULL, NULL, NULL, NULL, NULL, &host, &holen, NULL, 1289 NULL, NULL, NULL, NULL) 1290 || !host) { 1291 host = url; 1292 holen = 0; 1293 } 1294 1295 h = idn_encode_host(host, holen, cast_uchar ".", decode); 1296 if (!h) 1297 return NULL; 1298 1299 p = NULL; 1300 pl = add_bytes_to_str(&p, 0, url, host - url); 1301 pl = add_to_str(&p, pl, h); 1302 pl = add_to_str(&p, pl, host + holen); 1303 free(h); 1304 return p; 1305 } 1306 1307 static unsigned char * 1308 display_url_or_host(struct terminal *term, unsigned char *url, int warn_idn, 1309 int just_host, unsigned char *separator) 1310 { 1311 unsigned char *uu, *url_dec, *url_conv, *url_conv2, *url_enc, *ret; 1312 int is_idn; 1313 1314 if (!url) 1315 return stracpy(cast_uchar ""); 1316 1317 url = stracpy(url); 1318 if (!just_host) 1319 if ((uu = cast_uchar strchr((char *)url, POST_CHAR))) 1320 *uu = 0; 1321 1322 if (!url_non_ascii(url) && !strstr((char *)url, xn)) 1323 return url; 1324 1325 if (!just_host) 1326 url_dec = idn_encode_url(url, 1); 1327 else 1328 url_dec = idn_encode_host(url, (int)strlen((char *)url), 1329 separator, 1); 1330 is_idn = strcmp((char *)url_dec, (char *)url); 1331 url_conv = stracpy(url_dec); 1332 free(url_dec); 1333 url_conv2 = stracpy(url_conv); 1334 if (!just_host) 1335 url_enc = idn_encode_url(url_conv2, 0); 1336 else 1337 url_enc = idn_encode_host( 1338 url_conv2, (int)strlen((char *)url_conv2), separator, 0); 1339 if (!url_enc) { 1340 url_enc = stracpy(url_conv2); 1341 is_idn = 1; 1342 } 1343 free(url_conv2); 1344 if (!strcmp((char *)url_enc, (char *)url)) { 1345 if (is_idn && warn_idn) { 1346 ret = stracpy(cast_uchar "(IDN) "); 1347 add_to_strn(&ret, url_conv); 1348 } else { 1349 ret = url_conv; 1350 url_conv = NULL; 1351 } 1352 } else 1353 ret = stracpy(url); 1354 free(url); 1355 free(url_conv); 1356 free(url_enc); 1357 return ret; 1358 } 1359 1360 unsigned char * 1361 display_url(struct terminal *term, unsigned char *url, int warn_idn) 1362 { 1363 return display_url_or_host(term, url, warn_idn, 0, cast_uchar "."); 1364 } 1365 1366 unsigned char * 1367 display_host(struct terminal *term, unsigned char *host) 1368 { 1369 return display_url_or_host(term, host, 1, 1, cast_uchar "."); 1370 } 1371 1372 unsigned char * 1373 display_host_list(struct terminal *term, unsigned char *host) 1374 { 1375 return display_url_or_host(term, host, 0, 1, cast_uchar ".,"); 1376 }