http.c (32031B)
1 /* http.c 2 * HTTP protocol client implementation 3 * (c) 2002 Mikulas Patocka 4 * This file is a part of the Links program, released under GPL. 5 */ 6 7 #include <limits.h> 8 #include <stdlib.h> 9 #include <string.h> 10 11 #include "links.h" 12 13 struct http_connection_info { 14 int bl_flags; 15 int http10; 16 int https_forward; 17 int close; 18 int send_close; 19 off_t length; 20 int version; 21 int chunk_remaining; 22 }; 23 24 /* prototypes */ 25 static void http_send_header(struct connection *c); 26 static void http_get_header(struct connection *c); 27 static void test_restart(struct connection *c); 28 static size_t add_user_agent(unsigned char **, size_t, const char *); 29 static size_t add_referer(unsigned char **, size_t, unsigned char *, 30 unsigned char *); 31 static size_t add_accept(unsigned char **, size_t); 32 static size_t add_accept_encoding(unsigned char **, size_t, unsigned char *, 33 struct connection *); 34 static size_t add_accept_charset(unsigned char **, size_t, 35 struct http_connection_info *); 36 static size_t add_connection(unsigned char **, size_t, int, int, int); 37 static size_t add_upgrade(unsigned char **, size_t); 38 static size_t add_if_modified(unsigned char **, size_t, struct connection *); 39 static size_t add_range(unsigned char **, size_t, unsigned char *, 40 struct connection *); 41 static size_t add_pragma_no_cache(unsigned char **, size_t, int); 42 static size_t add_proxy_auth_string(unsigned char **, size_t, unsigned char *); 43 static size_t add_auth_string(unsigned char **, size_t, unsigned char *); 44 static size_t add_post_header(unsigned char **, size_t, unsigned char **); 45 static size_t add_extra_options(unsigned char **, size_t); 46 47 /* Returns a string pointer with value of the item. 48 * The string must be destroyed after usage with mem_free. 49 */ 50 unsigned char * 51 parse_http_header(unsigned char *head, unsigned char *item, unsigned char **ptr) 52 { 53 unsigned char *i, *f, *g, *h; 54 if (!head) 55 return NULL; 56 for (f = head; *f; f++) { 57 if (*f != 10) 58 continue; 59 f++; 60 for (i = item; *i && *f; i++, f++) 61 if (upcase(*i) != upcase(*f)) 62 goto cont; 63 if (!*f) 64 break; 65 if (f[0] == ':') { 66 while (f[1] == ' ') 67 f++; 68 for (g = ++f; *g >= ' '; g++) 69 ; 70 while (g > f && g[-1] == ' ') 71 g--; 72 h = memacpy(f, g - f); 73 if (ptr) 74 *ptr = f; 75 return h; 76 } 77 cont: 78 f--; 79 } 80 return NULL; 81 } 82 83 unsigned char * 84 parse_header_param(unsigned char *x, unsigned char *e, int all) 85 { 86 unsigned char u; 87 size_t le = strlen((char *)e); 88 int lp; 89 unsigned char *y = x; 90 if (!all) { 91 a: 92 if (!(y = cast_uchar strchr((char *)y, ';'))) 93 return NULL; 94 } 95 while (*y && (*y == ';' || *y <= ' ')) 96 y++; 97 if (strlen((char *)y) < le) 98 return NULL; 99 if (casecmp(y, e, le)) 100 goto a; 101 y += le; 102 while (*y && (*y <= ' ' || *y == '=')) 103 y++; 104 u = ';'; 105 if (*y == '\'' || *y == '"') 106 u = *y++; 107 lp = 0; 108 while (y[lp] >= ' ' && y[lp] != u) { 109 lp++; 110 if (lp == INT_MAX) 111 overalloc(); 112 } 113 return memacpy(y, lp); 114 } 115 116 int 117 get_http_code(unsigned char *head, int *code, int *version) 118 { 119 if (!head) 120 return -1; 121 while (head[0] == ' ') 122 head++; 123 if (upcase(head[0]) != 'H' || upcase(head[1]) != 'T' 124 || upcase(head[2]) != 'T' || upcase(head[3]) != 'P') 125 return -1; 126 if (head[4] == '/' && head[5] >= '0' && head[5] <= '9' && head[6] == '.' 127 && head[7] >= '0' && head[7] <= '9' && head[8] <= ' ') { 128 if (version) 129 *version = (head[5] - '0') * 10 + head[7] - '0'; 130 } else if (version) 131 *version = 0; 132 for (head += 4; *head > ' '; head++) 133 ; 134 if (*head++ != ' ') 135 return -1; 136 if (head[0] < '1' || head[0] > '9' || head[1] < '0' || head[1] > '9' 137 || head[2] < '0' || head[2] > '9') { 138 if (code) 139 *code = 200; 140 return 0; 141 } 142 if (code) 143 *code = (head[0] - '0') * 100 + (head[1] - '0') * 10 + head[2] 144 - '0'; 145 return 0; 146 } 147 148 static struct { 149 const char *name; 150 int bugs; 151 } buggy_servers[] = { 152 {"mod_czech/3.1.0", BL_HTTP10 }, 153 { "Purveyor", BL_HTTP10 }, 154 { "Netscape-Enterprise", BL_HTTP10 | BL_NO_ACCEPT_LANGUAGE}, 155 { "Apache Coyote", BL_HTTP10 }, 156 { "lighttpd", BL_HTTP10 }, 157 { "FORPSI", BL_NO_RANGE }, 158 { "Sausalito", BL_HTTP10 }, 159 { NULL, 0 } 160 }; 161 162 static int 163 check_http_server_bugs(unsigned char *url, struct http_connection_info *info, 164 unsigned char *head) 165 { 166 unsigned char *server; 167 int i, bugs; 168 if (!http_options.allow_blacklist || info->http10) 169 return 0; 170 if (!(server = parse_http_header(head, cast_uchar "Server", NULL))) 171 return 0; 172 bugs = 0; 173 for (i = 0; buggy_servers[i].name; i++) 174 if (strstr((char *)server, buggy_servers[i].name)) 175 bugs |= buggy_servers[i].bugs; 176 free(server); 177 if (bugs && (server = get_host_name(url))) { 178 add_blacklist_entry(server, bugs); 179 free(server); 180 return bugs & ~BL_NO_RANGE; 181 } 182 return 0; 183 } 184 185 static void 186 http_end_request(struct connection *c, int notrunc, int nokeepalive, int state) 187 { 188 struct http_connection_info *info = c->info; 189 if (state == S__OK && c->cache) { 190 if (!notrunc) 191 truncate_entry(c->cache, c->from, 1); 192 c->cache->incomplete = 0; 193 } 194 setcstate(c, state); 195 if (c->info && !info->close && !info->send_close && !nokeepalive) { 196 add_keepalive_socket(c, HTTP_KEEPALIVE_TIMEOUT, 0); 197 } else 198 abort_connection(c); 199 } 200 201 void 202 http_func(struct connection *c) 203 { 204 /*setcstate(c, S_CONN);*/ 205 /*set_connection_timeout(c);*/ 206 if (get_keepalive_socket(c, NULL)) { 207 int p; 208 if ((p = get_port(c->url)) < 0) { 209 setcstate(c, S_BAD_URL); 210 abort_connection(c); 211 return; 212 } 213 make_connection(c, p, &c->sock1, http_send_header); 214 } else 215 http_send_header(c); 216 } 217 218 void 219 proxy_func(struct connection *c) 220 { 221 http_func(c); 222 } 223 224 static size_t 225 add_url_to_str(unsigned char **str, size_t l, unsigned char *url) 226 { 227 unsigned char *sp; 228 for (sp = url; *sp && *sp != POST_CHAR; sp++) { 229 if (*sp <= ' ' || *sp >= 127) { 230 unsigned char esc[4]; 231 sprintf((char *)esc, "%%%02X", (int)*sp); 232 l = add_to_str(str, l, esc); 233 } else 234 l = add_chr_to_str(str, l, *sp); 235 } 236 return l; 237 } 238 239 static void 240 http_send_header(struct connection *c) 241 { 242 struct http_connection_info *info; 243 int http10 = http_options.http10; 244 int proxy; 245 unsigned char *hdr; 246 unsigned char *h, *u; 247 unsigned char *u2; 248 size_t l = 0; 249 unsigned char *post = NULL; 250 unsigned char *host; 251 252 if (!c->cache) 253 if (!find_in_cache(c->url, &c->cache)) 254 c->cache->refcount--; 255 256 proxy = is_proxy_url(c->url); 257 host = remove_proxy_prefix(c->url); 258 set_connection_timeout_keepal(c); 259 info = mem_calloc(sizeof(struct http_connection_info)); 260 c->info = info; 261 info->https_forward = !c->ssl && proxy && host 262 && !casecmp(host, cast_uchar "https://", 8); 263 if (c->ssl) 264 proxy = 0; 265 hdr = NULL; 266 if (!host) { 267 http_bad_url: 268 free(hdr); 269 http_end_request(c, 0, 1, S_BAD_URL); 270 return; 271 } 272 if (!info->https_forward && (h = get_host_name(host))) { 273 info->bl_flags = get_blacklist_flags(h); 274 free(h); 275 } 276 if (info->bl_flags & BL_HTTP10) 277 http10 = 1; 278 info->http10 = http10; 279 if (!info->https_forward) { 280 post = cast_uchar strchr((char *)host, POST_CHAR); 281 if (post) 282 post++; 283 } 284 info->send_close = info->https_forward || http10; 285 if (info->https_forward) { 286 l = add_to_str(&hdr, l, cast_uchar "CONNECT "); 287 h = get_host_name(host); 288 if (!h) 289 goto http_bad_url; 290 l = add_to_str(&hdr, l, h); 291 free(h); 292 h = get_port_str(host); 293 if (!h) 294 h = stracpy(cast_uchar "443"); 295 l = add_chr_to_str(&hdr, l, ':'); 296 l = add_to_str(&hdr, l, h); 297 free(h); 298 goto added_connect; 299 } else if (!post) 300 l = add_to_str(&hdr, l, cast_uchar "GET "); 301 else { 302 l = add_to_str(&hdr, l, cast_uchar "POST "); 303 c->unrestartable = 2; 304 } 305 if (!proxy) { 306 l = add_chr_to_str(&hdr, l, '/'); 307 u = get_url_data(host); 308 } else 309 u = host; 310 311 if (post && post < u) 312 goto http_bad_url; 313 314 u2 = u; 315 if (proxy && !*c->socks_proxy && *proxies.dns_append) { 316 unsigned char *u_host; 317 int u_host_len; 318 int u2_len = 0; 319 if (parse_url(u, NULL, NULL, NULL, NULL, NULL, &u_host, 320 &u_host_len, NULL, NULL, NULL, NULL, NULL)) 321 goto http_bad_url; 322 u2 = NULL; 323 u2_len = 324 add_bytes_to_str(&u2, u2_len, u, u_host + u_host_len - u); 325 u2_len = add_to_str(&u2, u2_len, proxies.dns_append); 326 u2_len = add_to_str(&u2, u2_len, u_host + u_host_len); 327 } 328 l = add_url_to_str(&hdr, l, u2); 329 if (u2 != u) 330 free(u2); 331 added_connect: 332 if (!http10) 333 l = add_to_str(&hdr, l, cast_uchar " HTTP/1.1\r\n"); 334 else 335 l = add_to_str(&hdr, l, cast_uchar " HTTP/1.0\r\n"); 336 if (!info->https_forward && (h = get_host_name(host))) { 337 l = add_to_str(&hdr, l, cast_uchar "Host: "); 338 if (*h && h[strlen((char *)h) - 1] == '.') { 339 h[strlen((char *)h) - 1] = 0; 340 } 341 if (h[0] == '[' && h[strlen((char *)h) - 1] == ']') { 342 char *pc = strchr((char *)h, '%'); 343 if (pc) { 344 pc[0] = ']'; 345 pc[1] = 0; 346 } 347 } 348 l = add_to_str(&hdr, l, h); 349 free(h); 350 if ((h = get_port_str(host))) { 351 if (strcmp(cast_char h, c->ssl ? "443" : "80")) { 352 l = add_chr_to_str(&hdr, l, ':'); 353 l = add_to_str(&hdr, l, h); 354 } 355 free(h); 356 } 357 l = add_to_str(&hdr, l, cast_uchar "\r\n"); 358 } 359 l = add_user_agent(&hdr, l, 360 info->https_forward ? NULL : cast_char host); 361 if (proxy) 362 l = add_proxy_auth_string(&hdr, l, c->url); 363 if (!info->https_forward) { 364 test_restart(c); 365 l = add_referer(&hdr, l, host, c->prev_url); 366 l = add_accept(&hdr, l); 367 l = add_accept_encoding(&hdr, l, host, c); 368 l = add_accept_charset(&hdr, l, info); 369 l = add_connection(&hdr, l, http10, proxy, !info->send_close); 370 l = add_upgrade(&hdr, l); 371 l = add_if_modified(&hdr, l, c); 372 l = add_range(&hdr, l, host, c); 373 l = add_pragma_no_cache(&hdr, l, c->no_cache); 374 l = add_auth_string(&hdr, l, host); 375 l = add_post_header(&hdr, l, &post); 376 l = add_cookies(&hdr, l, host); 377 l = add_extra_options(&hdr, l); 378 } 379 l = add_to_str(&hdr, l, cast_uchar "\r\n"); 380 if (post) { 381 while (post[0] && post[1]) { 382 int h1, h2; 383 h1 = post[0] <= '9' ? (unsigned)post[0] - '0' 384 : post[0] >= 'A' ? upcase(post[0]) - 'A' + 10 385 : 0; 386 if (h1 < 0 || h1 >= 16) 387 h1 = 0; 388 h2 = post[1] <= '9' ? (unsigned)post[1] - '0' 389 : post[1] >= 'A' ? upcase(post[1]) - 'A' + 10 390 : 0; 391 if (h2 < 0 || h2 >= 16) 392 h2 = 0; 393 l = add_chr_to_str(&hdr, l, h1 * 16 + h2); 394 post += 2; 395 } 396 } 397 write_to_socket(c, c->sock1, hdr, l, http_get_header); 398 free(hdr); 399 setcstate(c, S_SENT); 400 } 401 402 static void 403 test_restart(struct connection *c) 404 { 405 /* If the cached entity is compressed, request the whole file and turn 406 * off compression */ 407 if (c->cache && c->from) { 408 unsigned char *d; 409 if ((d = parse_http_header(c->cache->head, 410 cast_uchar "Content-Encoding", 411 NULL))) { 412 free(d); 413 c->from = 0; 414 c->no_compress = 1; 415 if (c->tries >= 1) { 416 unsigned char *h; 417 if ((h = get_host_name(c->url))) { 418 add_blacklist_entry(h, 419 BL_NO_COMPRESSION); 420 free(h); 421 } 422 } 423 } 424 } 425 } 426 427 static size_t 428 add_user_agent(unsigned char **hdr, size_t l, const char *url) 429 { 430 l = add_to_str(hdr, l, cast_uchar "User-Agent: "); 431 if (SCRUB_HEADERS) 432 l = add_to_str(hdr, l, 433 cast_uchar 434 "Mozilla/5.0 (Windows NT 6.1; rv:60.0) " 435 "Gecko/20100101 Firefox/60.0\r\n"); 436 else if (!(*http_options.header.fake_useragent)) { 437 /* 438 * Google started to return css-styled page for searches. 439 * It returns non-css page if the user agent begins with Lynx. 440 */ 441 if (url 442 && (casestrstr(cast_uchar url, cast_uchar "/www.google.") 443 || casestrstr(cast_uchar url, cast_uchar "/google.")) 444 && strstr(url, "/search?") 445 && (strstr(url, "?q=") || strstr(url, "&q=")) 446 && !strstr(url, "?tbm=isch") && !strstr(url, "&tbm=isch")) 447 l = add_to_str(hdr, l, cast_uchar("Lynx/")); 448 449 l = add_to_str(hdr, l, cast_uchar("Links (" VERSION "; ")); 450 l = add_to_str(hdr, l, system_name); 451 l = add_to_str(hdr, l, cast_uchar "; "); 452 if (!list_empty(terminals)) { 453 unsigned char *t = cast_uchar "text"; 454 l = add_to_str(hdr, l, t); 455 } else { 456 l = add_to_str(hdr, l, cast_uchar "dump"); 457 } 458 l = add_to_str(hdr, l, cast_uchar ")\r\n"); 459 } else { 460 l = add_to_str(hdr, l, http_options.header.fake_useragent); 461 l = add_to_str(hdr, l, cast_uchar "\r\n"); 462 } 463 return l; 464 } 465 466 static size_t 467 add_referer(unsigned char **hdr, size_t l, unsigned char *url, 468 unsigned char *prev_url) 469 { 470 l = add_to_str(hdr, l, cast_uchar "Referer: "); 471 l = add_url_to_str(hdr, l, url); 472 return add_to_str(hdr, l, cast_uchar "\r\n"); 473 } 474 475 static size_t 476 add_accept(unsigned char **hdr, size_t l) 477 { 478 if (SCRUB_HEADERS) 479 return add_to_str(hdr, l, 480 cast_uchar 481 "Accept: " 482 "text/html,application/xhtml+xml,application/" 483 "xml;q=0.9,*/*;q=0.8\r\n"); 484 return add_to_str(hdr, l, cast_uchar "Accept: */*\r\n"); 485 } 486 487 static int 488 advertise_compression(unsigned char *url, struct connection *c) 489 { 490 struct http_connection_info *info = c->info; 491 char *extd; 492 if (c->no_compress || http_options.no_compression 493 || info->bl_flags & BL_NO_COMPRESSION) 494 return 0; 495 496 /* Fix for bugzilla. The attachment may be compressed and if the server 497 compresses it again, we can't decompress the inner compression */ 498 if (strstr((char *)url, "/attachment.cgi?")) 499 return 0; 500 501 extd = strrchr((char *)url, '.'); 502 if (extd && get_compress_by_extension(extd + 1, strchr((extd + 1), 0))) 503 return 0; 504 return 1; 505 } 506 507 static size_t 508 add_accept_encoding(unsigned char **hdr, size_t l, unsigned char *url, 509 struct connection *c) 510 { 511 if (advertise_compression(url, c)) { 512 size_t orig_l = l; 513 size_t l1; 514 l = add_to_str(hdr, l, cast_uchar "Accept-Encoding: "); 515 l1 = l; 516 l = add_to_str(hdr, l, cast_uchar "gzip, deflate"); 517 if (l != l1) 518 l = add_to_str(hdr, l, cast_uchar "\r\n"); 519 else 520 l = orig_l; 521 } 522 523 return l; 524 } 525 526 static size_t 527 add_accept_charset(unsigned char **hdr, size_t l, 528 struct http_connection_info *info) 529 { 530 static unsigned char *accept_charset = NULL; 531 532 if (SCRUB_HEADERS || info->bl_flags & BL_NO_CHARSET 533 || http_options.no_accept_charset) 534 return l; 535 536 if (!accept_charset) { 537 unsigned char *cs, *ac; 538 size_t aclen = 0; 539 ac = NULL; 540 cs = get_cp_mime_name(0); 541 if (aclen) 542 aclen = add_chr_to_str(&ac, aclen, ','); 543 else 544 aclen = add_to_str(&ac, aclen, 545 cast_uchar "Accept-Charset: "); 546 aclen = add_to_str(&ac, aclen, cs); 547 if (aclen) 548 aclen = add_to_str(&ac, aclen, cast_uchar "\r\n"); 549 if (!(accept_charset = cast_uchar strdup((char *)ac))) { 550 l = add_to_str(hdr, l, ac); 551 free(ac); 552 return l; 553 } 554 free(ac); 555 } 556 return add_to_str(hdr, l, accept_charset); 557 } 558 559 static size_t 560 add_connection(unsigned char **hdr, size_t l, int http10, int proxy, int alive) 561 { 562 if (!http10) { 563 if (!proxy) 564 l = add_to_str(hdr, l, cast_uchar "Connection: "); 565 else 566 l = add_to_str(hdr, l, cast_uchar "Proxy-Connection: "); 567 if (alive) 568 l = add_to_str(hdr, l, cast_uchar "keep-alive\r\n"); 569 else 570 l = add_to_str(hdr, l, cast_uchar "close\r\n"); 571 } 572 return l; 573 } 574 575 static size_t 576 add_upgrade(unsigned char **hdr, size_t l) 577 { 578 if (proxies.only_proxies) 579 return add_to_str( 580 hdr, l, cast_uchar "Upgrade-Insecure-Requests: 1\r\n"); 581 return l; 582 } 583 584 static size_t 585 add_if_modified(unsigned char **hdr, size_t l, struct connection *c) 586 { 587 struct cache_entry *e; 588 if ((e = c->cache)) { 589 int code = 0; 590 if (get_http_code(e->head, &code, NULL) || code >= 400) 591 return l; 592 if (!e->incomplete && e->head && c->no_cache <= NC_IF_MOD) { 593 unsigned char *m; 594 if (e->last_modified) 595 m = stracpy(e->last_modified); 596 else if ((m = parse_http_header( 597 e->head, cast_uchar "Date", NULL))) 598 ; 599 else if ((m = parse_http_header( 600 e->head, cast_uchar "Expires", NULL))) 601 ; 602 else 603 return l; 604 l = add_to_str(hdr, l, 605 cast_uchar "If-Modified-Since: "); 606 l = add_to_str(hdr, l, m); 607 l = add_to_str(hdr, l, cast_uchar "\r\n"); 608 free(m); 609 } 610 } 611 612 return l; 613 } 614 615 static size_t 616 add_range(unsigned char **hdr, size_t l, unsigned char *url, 617 struct connection *c) 618 { 619 struct cache_entry *e; 620 struct http_connection_info *info = c->info; 621 if ((e = c->cache)) { 622 int code = 0; 623 if (!get_http_code(e->head, &code, NULL) && code >= 300) 624 return l; 625 } 626 if (c->from && c->no_cache < NC_IF_MOD 627 && !(info->bl_flags & BL_NO_RANGE)) { 628 l = add_to_str(hdr, l, cast_uchar "Range: bytes="); 629 l = add_num_to_str(hdr, l, c->from); 630 l = add_to_str(hdr, l, cast_uchar "-\r\n"); 631 } 632 return l; 633 } 634 635 static size_t 636 add_pragma_no_cache(unsigned char **hdr, size_t l, int no_cache) 637 { 638 if (no_cache >= NC_PR_NO_CACHE) 639 return add_to_str( 640 hdr, l, 641 cast_uchar 642 "Pragma: no-cache\r\nCache-Control: no-cache\r\n"); 643 return l; 644 } 645 646 static size_t 647 add_proxy_auth_string(unsigned char **hdr, size_t l, unsigned char *url) 648 { 649 unsigned char *h; 650 if ((h = get_auth_string(url, 1))) { 651 l = add_to_str(hdr, l, h); 652 free(h); 653 } 654 return l; 655 } 656 657 static size_t 658 add_auth_string(unsigned char **hdr, size_t l, unsigned char *url) 659 { 660 unsigned char *h; 661 if ((h = get_auth_string(url, 0))) { 662 l = add_to_str(hdr, l, h); 663 free(h); 664 } 665 return l; 666 } 667 668 static size_t 669 add_post_header(unsigned char **hdr, size_t l, unsigned char **post) 670 { 671 if (!*post) 672 return l; 673 674 unsigned char *pd = cast_uchar strchr((char *)*post, '\n'); 675 if (pd) { 676 l = add_to_str(hdr, l, cast_uchar "Content-Type: "); 677 l = add_bytes_to_str(hdr, l, *post, pd - *post); 678 l = add_to_str(hdr, l, cast_uchar "\r\n"); 679 *post = pd + 1; 680 } 681 l = add_to_str(hdr, l, cast_uchar "Content-Length: "); 682 l = add_num_to_str(hdr, l, strlen((char *)*post) / 2); 683 return add_to_str(hdr, l, cast_uchar "\r\n"); 684 } 685 686 static size_t 687 add_extra_options(unsigned char **hdr, size_t l) 688 { 689 unsigned char *p = http_options.header.extra_header; 690 while (1) { 691 unsigned char *q = p + strcspn((char *)p, "\\"); 692 if (p != q) { 693 unsigned char *c; 694 unsigned char *s = memacpy(p, q - p); 695 c = cast_uchar strchr((char *)s, ':'); 696 if (c && casecmp(s, cast_uchar "Cookie:", 7)) { 697 unsigned char *v = NULL; 698 unsigned char *cc = memacpy(s, c - s); 699 unsigned char *x = 700 parse_http_header(*hdr, cc, &v); 701 free(cc); 702 if (x) { 703 unsigned char *new_hdr; 704 size_t new_l; 705 free(x); 706 new_hdr = NULL; 707 new_l = add_bytes_to_str( 708 &new_hdr, 0, *hdr, v - *hdr); 709 while (*++c == ' ') 710 ; 711 new_l = add_to_str(&new_hdr, new_l, c); 712 new_l = add_to_str( 713 &new_hdr, new_l, 714 v + strcspn((char *)v, "\r\n")); 715 free(*hdr); 716 *hdr = new_hdr; 717 l = new_l; 718 goto already_added; 719 } 720 } 721 l = add_to_str(hdr, l, s); 722 l = add_to_str(hdr, l, cast_uchar "\r\n"); 723 already_added: 724 free(s); 725 } 726 if (!*q) 727 break; 728 p = q + 1; 729 } 730 731 return l; 732 } 733 734 static int 735 is_line_in_buffer(struct read_buffer *rb) 736 { 737 int l; 738 for (l = 0; l < rb->len; l++) { 739 if (rb->data[l] == 10) 740 return l + 1; 741 if (l < rb->len - 1 && rb->data[l] == 13 742 && rb->data[l + 1] == 10) 743 return l + 2; 744 if (l == rb->len - 1 && rb->data[l] == 13) 745 return 0; 746 if (rb->data[l] < ' ') 747 return -1; 748 } 749 return 0; 750 } 751 752 static void 753 read_http_data(struct connection *c, struct read_buffer *rb) 754 { 755 struct http_connection_info *info = c->info; 756 int a; 757 set_connection_timeout(c); 758 if (rb->close == 2) { 759 http_end_request(c, 0, 0, S__OK); 760 return; 761 } 762 if (info->length != -2) { 763 int l = rb->len; 764 if (info->length >= 0 && info->length < l) 765 l = (int)info->length; 766 if ((off_t)(0UL + c->from + l) < 0) { 767 setcstate(c, S_LARGE_FILE); 768 abort_connection(c); 769 return; 770 } 771 c->received += l; 772 a = add_fragment(c->cache, c->from, rb->data, l); 773 if (a < 0) { 774 setcstate(c, a); 775 abort_connection(c); 776 return; 777 } 778 if (a == 1) 779 c->tries = 0; 780 if (info->length >= 0) 781 info->length -= l; 782 c->from += l; 783 kill_buffer_data(rb, l); 784 if (!info->length) { 785 http_end_request(c, 0, 0, S__OK); 786 return; 787 } 788 } else { 789 next_chunk: 790 if (info->chunk_remaining == -2) { 791 int l; 792 if ((l = is_line_in_buffer(rb))) { 793 if (l == -1) { 794 setcstate(c, S_HTTP_ERROR); 795 abort_connection(c); 796 return; 797 } 798 kill_buffer_data(rb, l); 799 if (l <= 2) { 800 http_end_request(c, 0, 0, S__OK); 801 return; 802 } 803 goto next_chunk; 804 } 805 } else if (info->chunk_remaining == -1) { 806 int l; 807 if ((l = is_line_in_buffer(rb))) { 808 char *end; 809 long n = 0; 810 if (l != -1) 811 n = strtol(cast_const_char rb->data, 812 &end, 16); 813 if (l == -1 || n < 0 || n >= INT_MAX 814 || cast_uchar end == rb->data) { 815 setcstate(c, S_HTTP_ERROR); 816 abort_connection(c); 817 return; 818 } 819 kill_buffer_data(rb, l); 820 if (!(info->chunk_remaining = (int)n)) 821 info->chunk_remaining = -2; 822 goto next_chunk; 823 } 824 } else { 825 int l = info->chunk_remaining; 826 if (l > rb->len) 827 l = rb->len; 828 if ((off_t)(0UL + c->from + l) < 0) { 829 setcstate(c, S_LARGE_FILE); 830 abort_connection(c); 831 return; 832 } 833 c->received += l; 834 a = add_fragment(c->cache, c->from, rb->data, l); 835 if (a < 0) { 836 setcstate(c, a); 837 abort_connection(c); 838 return; 839 } 840 if (a == 1) 841 c->tries = 0; 842 info->chunk_remaining -= l; 843 c->from += l; 844 kill_buffer_data(rb, l); 845 if (!info->chunk_remaining && rb->len >= 1) { 846 if (rb->data[0] == 10) 847 kill_buffer_data(rb, 1); 848 else { 849 if (rb->data[0] != 13 850 || (rb->len >= 2 851 && ((unsigned char *) 852 rb->data)[1] 853 != 10)) { 854 setcstate(c, S_HTTP_ERROR); 855 abort_connection(c); 856 return; 857 } 858 if (rb->len < 2) 859 goto read_more; 860 kill_buffer_data(rb, 2); 861 } 862 info->chunk_remaining = -1; 863 goto next_chunk; 864 } 865 } 866 } 867 read_more: 868 read_from_socket(c, c->sock1, rb, read_http_data); 869 setcstate(c, S_TRANS); 870 } 871 872 static int 873 get_header(struct read_buffer *rb) 874 { 875 int i; 876 if (rb->len <= 0) 877 return 0; 878 if (rb->data[0] != 'H') 879 return -2; 880 if (rb->len <= 1) 881 return 0; 882 if (((unsigned char *)rb->data)[1] != 'T') 883 return -2; 884 if (rb->len <= 2) 885 return 0; 886 if (((unsigned char *)rb->data)[2] != 'T') 887 return -2; 888 if (rb->len <= 3) 889 return 0; 890 if (((unsigned char *)rb->data)[3] != 'P') 891 return -2; 892 for (i = 0; i < rb->len; i++) { 893 unsigned char a = rb->data[i]; 894 if (!a) 895 return -1; 896 if (i < rb->len - 1 && a == 10 && rb->data[i + 1] == 10) 897 return i + 2; 898 if (i < rb->len - 3 && a == 13) { 899 if (rb->data[i + 1] != 10) 900 return -1; 901 if (rb->data[i + 2] == 13) { 902 if (rb->data[i + 3] != 10) 903 return -1; 904 return i + 4; 905 } 906 } 907 } 908 return 0; 909 } 910 911 static void 912 http_got_header(struct connection *c, struct read_buffer *rb) 913 { 914 off_t cf; 915 int state = c->state != S_PROC ? S_GETH : S_PROC; 916 unsigned char *head; 917 int a, h = 0, version = 0; 918 unsigned char *d; 919 struct cache_entry *e; 920 int previous_http_code; 921 struct http_connection_info *info; 922 unsigned char *host = remove_proxy_prefix(c->url); 923 set_connection_timeout(c); 924 info = c->info; 925 if (rb->close == 2) { 926 unsigned char *hs; 927 if (!c->tries && (hs = get_host_name(host))) { 928 if (info->bl_flags & BL_NO_CHARSET) 929 del_blacklist_entry(hs, BL_NO_CHARSET); 930 else { 931 add_blacklist_entry(hs, BL_NO_CHARSET); 932 c->tries = -1; 933 } 934 free(hs); 935 } 936 setcstate(c, S_CANT_READ); 937 retry_connection(c); 938 return; 939 } 940 rb->close = 0; 941 again: 942 if ((a = get_header(rb)) == -1) { 943 setcstate(c, S_HTTP_ERROR); 944 abort_connection(c); 945 return; 946 } 947 if (!a) { 948 read_from_socket(c, c->sock1, rb, http_got_header); 949 setcstate(c, state); 950 return; 951 } 952 if (a != -2) { 953 head = memacpy(rb->data, a); 954 kill_buffer_data(rb, a); 955 } else 956 head = stracpy( 957 cast_uchar 958 "HTTP/0.9 200 OK\r\nContent-Type: text/html\r\n\r\n"); 959 if (get_http_code(head, &h, &version) || h == 101) { 960 free(head); 961 setcstate(c, S_HTTP_ERROR); 962 abort_connection(c); 963 return; 964 } 965 if (check_http_server_bugs(host, c->info, head) 966 && is_connection_restartable(c)) { 967 free(head); 968 setcstate(c, S_RESTART); 969 retry_connection(c); 970 return; 971 } 972 if (h == 100) { 973 free(head); 974 state = S_PROC; 975 goto again; 976 } 977 if (h < 200) { 978 free(head); 979 setcstate(c, S_HTTP_ERROR); 980 abort_connection(c); 981 return; 982 } 983 if (info->https_forward && h >= 200 && h < 300) { 984 free(head); 985 free(c->info); 986 c->info = 0; 987 c->ssl = NULL; 988 continue_connection(c, &c->sock1, http_send_header); 989 return; 990 } 991 if (info->https_forward && h != 407) { 992 free(head); 993 setcstate(c, S_HTTPS_FWD_ERROR); 994 abort_connection(c); 995 return; 996 } 997 if (h != 401 && h != 407) { 998 unsigned char *cookie; 999 unsigned char *ch = head; 1000 while ((cookie = parse_http_header(ch, cast_uchar "Set-Cookie", 1001 &ch))) { 1002 set_cookie(NULL, host, cookie); 1003 free(cookie); 1004 } 1005 } 1006 if (h == 204) { 1007 free(head); 1008 http_end_request(c, 0, 0, S_HTTP_204); 1009 return; 1010 } 1011 if (h == 304) { 1012 free(head); 1013 http_end_request(c, 1, 0, S__OK); 1014 return; 1015 } 1016 if (h == 416 && c->from) { 1017 free(head); 1018 http_end_request(c, 0, 1, S__OK); 1019 return; 1020 } 1021 if (h == 431) { 1022 unsigned char *hs; 1023 if (!(info->bl_flags & BL_NO_CHARSET) 1024 && (hs = get_host_name(host))) { 1025 free(head); 1026 add_blacklist_entry(hs, BL_NO_CHARSET); 1027 free(hs); 1028 c->tries = -1; 1029 setcstate(c, S_RESTART); 1030 retry_connection(c); 1031 return; 1032 } 1033 } 1034 if ((h == 500 || h == 502 || h == 503 || h == 504) 1035 && http_options.retry_internal_errors 1036 && is_connection_restartable(c)) { 1037 /* !!! FIXME: wait some time ... */ 1038 if (is_last_try(c)) { 1039 unsigned char *h; 1040 if ((h = get_host_name(host))) { 1041 add_blacklist_entry(h, BL_NO_BZIP2); 1042 free(h); 1043 } 1044 } 1045 free(head); 1046 setcstate(c, S_RESTART); 1047 retry_connection(c); 1048 return; 1049 } 1050 if (!c->cache) { 1051 if (get_connection_cache_entry(c)) { 1052 free(head); 1053 setcstate(c, S_OUT_OF_MEM); 1054 abort_connection(c); 1055 return; 1056 } 1057 c->cache->refcount--; 1058 } 1059 e = c->cache; 1060 previous_http_code = e->http_code; 1061 e->http_code = h; 1062 free(e->head); 1063 e->head = head; 1064 if ((d = parse_http_header(head, cast_uchar "Expires", NULL))) { 1065 time_t t = parse_http_date(d); 1066 if (t && e->expire_time != 1) 1067 e->expire_time = t; 1068 free(d); 1069 } 1070 if ((d = parse_http_header(head, cast_uchar "Pragma", NULL))) { 1071 if (!casecmp(d, cast_uchar "no-cache", 8)) 1072 e->expire_time = 1; 1073 free(d); 1074 } 1075 if ((d = parse_http_header(head, cast_uchar "Cache-Control", NULL))) { 1076 unsigned char *f = d; 1077 while (1) { 1078 while (*f && (*f == ' ' || *f == ',')) 1079 f++; 1080 if (!*f) 1081 break; 1082 if (!casecmp(f, cast_uchar "no-cache", 8) 1083 || !casecmp(f, cast_uchar "must-revalidate", 15)) 1084 e->expire_time = 1; 1085 if (!casecmp(f, cast_uchar "max-age=", 8)) { 1086 if (e->expire_time != 1) { 1087 errno = 0; 1088 EINTRLOOPX(e->expire_time, time(NULL), 1089 (time_t)-1); 1090 e->expire_time += atoi((char *)(f + 8)); 1091 } 1092 } 1093 while (*f && *f != ',') 1094 f++; 1095 } 1096 free(d); 1097 } 1098 if (c->ssl) { 1099 free(e->ssl_info); 1100 e->ssl_info = get_cipher_string(c->ssl); 1101 free(e->ssl_authority); 1102 e->ssl_authority = stracpy(c->ssl->ca); 1103 } 1104 free(e->redirect); 1105 e->redirect = NULL; 1106 if ((h == 302 || h == 303 || h == 307 || h == 511) && !e->expire_time) 1107 e->expire_time = 1; 1108 if (h == 301 || h == 302 || h == 303 || h == 307 || h == 308) { 1109 if ((d = parse_http_header(e->head, cast_uchar "Location", 1110 NULL))) { 1111 unsigned char *user, *ins; 1112 unsigned char *newuser, *newpassword; 1113 if (!parse_url(d, NULL, &user, NULL, NULL, NULL, &ins, 1114 NULL, NULL, NULL, NULL, NULL, NULL) 1115 && !user && ins 1116 && (newuser = get_user_name(host))) { 1117 if (*newuser) { 1118 int ins_off = (int)(ins - d); 1119 newpassword = get_pass(host); 1120 if (!newpassword) 1121 newpassword = 1122 stracpy(cast_uchar ""); 1123 add_to_strn(&newuser, cast_uchar ":"); 1124 add_to_strn(&newuser, newpassword); 1125 add_to_strn(&newuser, cast_uchar "@"); 1126 extend_str(&d, strlen((char *)newuser)); 1127 ins = d + ins_off; 1128 memmove(ins + strlen((char *)newuser), 1129 ins, strlen((char *)ins) + 1); 1130 memcpy(ins, newuser, 1131 strlen((char *)newuser)); 1132 free(newpassword); 1133 } 1134 free(newuser); 1135 } 1136 free(e->redirect); 1137 e->redirect = d; 1138 if (h == 307 || h == 308) { 1139 unsigned char *p; 1140 if ((p = cast_uchar strchr(cast_const_char host, 1141 POST_CHAR))) 1142 add_to_strn(&e->redirect, p); 1143 } 1144 } 1145 } 1146 if (!e->expire_time && strchr((char *)c->url, POST_CHAR)) 1147 e->expire_time = 1; 1148 info->close = 0; 1149 info->length = -1; 1150 info->version = version; 1151 if ((d = parse_http_header(e->head, cast_uchar "Connection", NULL)) 1152 || (d = parse_http_header(e->head, cast_uchar "Proxy-Connection", 1153 NULL))) { 1154 if (!casestrcmp(d, cast_uchar "close")) 1155 info->close = 1; 1156 free(d); 1157 } else if (version < 11) 1158 info->close = 1; 1159 cf = c->from; 1160 c->from = 0; 1161 if ((d = parse_http_header(e->head, cast_uchar "Content-Range", 1162 NULL))) { 1163 if (strlen((char *)d) > 6) { 1164 d[5] = 0; 1165 if (!(casestrcmp(d, cast_uchar "bytes")) && d[6] >= '0' 1166 && d[6] <= '9') { 1167 long f = strtol((char *)(d + 6), NULL, 10); 1168 if (f >= 0 && (off_t)f >= 0 && (off_t)f == f) 1169 c->from = f; 1170 } 1171 } 1172 free(d); 1173 } else if (h == 206) { 1174 /* Hmm ... some servers send 206 partial but don't send 1175 * Content-Range */ 1176 c->from = cf; 1177 } 1178 if (cf && !c->from && !c->unrestartable) 1179 c->unrestartable = 1; 1180 if (c->from > cf || c->from < 0) { 1181 setcstate(c, S_HTTP_ERROR); 1182 abort_connection(c); 1183 return; 1184 } 1185 if ((d = parse_http_header(e->head, cast_uchar "Content-Length", 1186 NULL))) { 1187 char *ep; 1188 long l = strtol((char *)d, &ep, 10); 1189 if (!*ep && l >= 0 && (off_t)l >= 0 && (off_t)l == l) { 1190 if (!info->close || version >= 11 || h / 100 == 3) 1191 info->length = l; 1192 if (c->from + l >= 0) 1193 c->est_length = c->from + l; 1194 } 1195 free(d); 1196 } 1197 if ((d = parse_http_header(e->head, cast_uchar "Accept-Ranges", 1198 NULL))) { 1199 if (!casestrcmp(d, cast_uchar "none") && !c->unrestartable) 1200 c->unrestartable = 1; 1201 free(d); 1202 } else if (!c->unrestartable && !c->from) 1203 c->unrestartable = 1; 1204 if (info->bl_flags & BL_NO_RANGE && !c->unrestartable) 1205 c->unrestartable = 1; 1206 if ((d = parse_http_header(e->head, cast_uchar "Transfer-Encoding", 1207 NULL))) { 1208 if (!casestrcmp(d, cast_uchar "chunked")) { 1209 info->length = -2; 1210 info->chunk_remaining = -1; 1211 } 1212 free(d); 1213 } 1214 if (!info->close && info->length == -1) 1215 info->close = 1; 1216 if ((d = parse_http_header(e->head, cast_uchar "Last-Modified", 1217 NULL))) { 1218 if (e->last_modified && casestrcmp(e->last_modified, d)) { 1219 delete_entry_content(e); 1220 if (c->from) { 1221 c->from = 0; 1222 free(d); 1223 setcstate(c, S_MODIFIED); 1224 retry_connection(c); 1225 return; 1226 } 1227 } 1228 if (!e->last_modified) 1229 e->last_modified = d; 1230 else 1231 free(d); 1232 } 1233 if (!e->last_modified 1234 && (d = parse_http_header(e->head, cast_uchar "Date", NULL))) 1235 e->last_modified = d; 1236 if (info->length == -1 || (version < 11 && info->close)) 1237 rb->close = 1; 1238 1239 /* 1240 * Truncate entry if: 1241 * - it is compressed (the mix of an old and new document 1242 * would likely produce decompression error). 1243 * - it was http authentication (the user doesn't need to see the 1244 * authentication message). 1245 */ 1246 if ((d = parse_http_header(e->head, cast_uchar "Content-Encoding", 1247 NULL))) { 1248 free(d); 1249 truncate_entry(e, c->from, 0); 1250 } else if (previous_http_code == 401 || previous_http_code == 407) 1251 truncate_entry(e, c->from, 0); 1252 1253 if (info->https_forward && h == 407) { 1254 http_end_request(c, 0, 1, S__OK); 1255 return; 1256 } 1257 1258 read_http_data(c, rb); 1259 } 1260 1261 static void 1262 http_get_header(struct connection *c) 1263 { 1264 struct read_buffer *rb; 1265 set_connection_timeout_keepal(c); 1266 rb = alloc_read_buffer(); 1267 rb->close = 1; 1268 read_from_socket(c, c->sock1, rb, http_got_header); 1269 }