links

lynx-like text mode web browser
git clone anongit@rnpnr.xyz:links.git
Log | Files | Refs | Feed | README | LICENSE

http.c (32031B)


      1 /* http.c
      2  * HTTP protocol client implementation
      3  * (c) 2002 Mikulas Patocka
      4  * This file is a part of the Links program, released under GPL.
      5  */
      6 
      7 #include <limits.h>
      8 #include <stdlib.h>
      9 #include <string.h>
     10 
     11 #include "links.h"
     12 
     13 struct http_connection_info {
     14 	int bl_flags;
     15 	int http10;
     16 	int https_forward;
     17 	int close;
     18 	int send_close;
     19 	off_t length;
     20 	int version;
     21 	int chunk_remaining;
     22 };
     23 
     24 /* prototypes */
     25 static void http_send_header(struct connection *c);
     26 static void http_get_header(struct connection *c);
     27 static void test_restart(struct connection *c);
     28 static size_t add_user_agent(unsigned char **, size_t, const char *);
     29 static size_t add_referer(unsigned char **, size_t, unsigned char *,
     30                           unsigned char *);
     31 static size_t add_accept(unsigned char **, size_t);
     32 static size_t add_accept_encoding(unsigned char **, size_t, unsigned char *,
     33                                   struct connection *);
     34 static size_t add_accept_charset(unsigned char **, size_t,
     35                                  struct http_connection_info *);
     36 static size_t add_connection(unsigned char **, size_t, int, int, int);
     37 static size_t add_upgrade(unsigned char **, size_t);
     38 static size_t add_if_modified(unsigned char **, size_t, struct connection *);
     39 static size_t add_range(unsigned char **, size_t, unsigned char *,
     40                         struct connection *);
     41 static size_t add_pragma_no_cache(unsigned char **, size_t, int);
     42 static size_t add_proxy_auth_string(unsigned char **, size_t, unsigned char *);
     43 static size_t add_auth_string(unsigned char **, size_t, unsigned char *);
     44 static size_t add_post_header(unsigned char **, size_t, unsigned char **);
     45 static size_t add_extra_options(unsigned char **, size_t);
     46 
     47 /* Returns a string pointer with value of the item.
     48  * The string must be destroyed after usage with mem_free.
     49  */
     50 unsigned char *
     51 parse_http_header(unsigned char *head, unsigned char *item, unsigned char **ptr)
     52 {
     53 	unsigned char *i, *f, *g, *h;
     54 	if (!head)
     55 		return NULL;
     56 	for (f = head; *f; f++) {
     57 		if (*f != 10)
     58 			continue;
     59 		f++;
     60 		for (i = item; *i && *f; i++, f++)
     61 			if (upcase(*i) != upcase(*f))
     62 				goto cont;
     63 		if (!*f)
     64 			break;
     65 		if (f[0] == ':') {
     66 			while (f[1] == ' ')
     67 				f++;
     68 			for (g = ++f; *g >= ' '; g++)
     69 				;
     70 			while (g > f && g[-1] == ' ')
     71 				g--;
     72 			h = memacpy(f, g - f);
     73 			if (ptr)
     74 				*ptr = f;
     75 			return h;
     76 		}
     77 cont:
     78 		f--;
     79 	}
     80 	return NULL;
     81 }
     82 
     83 unsigned char *
     84 parse_header_param(unsigned char *x, unsigned char *e, int all)
     85 {
     86 	unsigned char u;
     87 	size_t le = strlen((char *)e);
     88 	int lp;
     89 	unsigned char *y = x;
     90 	if (!all) {
     91 a:
     92 		if (!(y = cast_uchar strchr((char *)y, ';')))
     93 			return NULL;
     94 	}
     95 	while (*y && (*y == ';' || *y <= ' '))
     96 		y++;
     97 	if (strlen((char *)y) < le)
     98 		return NULL;
     99 	if (casecmp(y, e, le))
    100 		goto a;
    101 	y += le;
    102 	while (*y && (*y <= ' ' || *y == '='))
    103 		y++;
    104 	u = ';';
    105 	if (*y == '\'' || *y == '"')
    106 		u = *y++;
    107 	lp = 0;
    108 	while (y[lp] >= ' ' && y[lp] != u) {
    109 		lp++;
    110 		if (lp == INT_MAX)
    111 			overalloc();
    112 	}
    113 	return memacpy(y, lp);
    114 }
    115 
    116 int
    117 get_http_code(unsigned char *head, int *code, int *version)
    118 {
    119 	if (!head)
    120 		return -1;
    121 	while (head[0] == ' ')
    122 		head++;
    123 	if (upcase(head[0]) != 'H' || upcase(head[1]) != 'T'
    124 	    || upcase(head[2]) != 'T' || upcase(head[3]) != 'P')
    125 		return -1;
    126 	if (head[4] == '/' && head[5] >= '0' && head[5] <= '9' && head[6] == '.'
    127 	    && head[7] >= '0' && head[7] <= '9' && head[8] <= ' ') {
    128 		if (version)
    129 			*version = (head[5] - '0') * 10 + head[7] - '0';
    130 	} else if (version)
    131 		*version = 0;
    132 	for (head += 4; *head > ' '; head++)
    133 		;
    134 	if (*head++ != ' ')
    135 		return -1;
    136 	if (head[0] < '1' || head[0] > '9' || head[1] < '0' || head[1] > '9'
    137 	    || head[2] < '0' || head[2] > '9') {
    138 		if (code)
    139 			*code = 200;
    140 		return 0;
    141 	}
    142 	if (code)
    143 		*code = (head[0] - '0') * 100 + (head[1] - '0') * 10 + head[2]
    144 		        - '0';
    145 	return 0;
    146 }
    147 
    148 static struct {
    149 	const char *name;
    150 	int bugs;
    151 } buggy_servers[] = {
    152 	{"mod_czech/3.1.0",      BL_HTTP10                        },
    153 	{ "Purveyor",            BL_HTTP10                        },
    154 	{ "Netscape-Enterprise", BL_HTTP10 | BL_NO_ACCEPT_LANGUAGE},
    155 	{ "Apache Coyote",       BL_HTTP10                        },
    156 	{ "lighttpd",            BL_HTTP10                        },
    157 	{ "FORPSI",              BL_NO_RANGE                      },
    158 	{ "Sausalito",           BL_HTTP10                        },
    159 	{ NULL,		  0				}
    160 };
    161 
    162 static int
    163 check_http_server_bugs(unsigned char *url, struct http_connection_info *info,
    164                        unsigned char *head)
    165 {
    166 	unsigned char *server;
    167 	int i, bugs;
    168 	if (!http_options.allow_blacklist || info->http10)
    169 		return 0;
    170 	if (!(server = parse_http_header(head, cast_uchar "Server", NULL)))
    171 		return 0;
    172 	bugs = 0;
    173 	for (i = 0; buggy_servers[i].name; i++)
    174 		if (strstr((char *)server, buggy_servers[i].name))
    175 			bugs |= buggy_servers[i].bugs;
    176 	free(server);
    177 	if (bugs && (server = get_host_name(url))) {
    178 		add_blacklist_entry(server, bugs);
    179 		free(server);
    180 		return bugs & ~BL_NO_RANGE;
    181 	}
    182 	return 0;
    183 }
    184 
    185 static void
    186 http_end_request(struct connection *c, int notrunc, int nokeepalive, int state)
    187 {
    188 	struct http_connection_info *info = c->info;
    189 	if (state == S__OK && c->cache) {
    190 		if (!notrunc)
    191 			truncate_entry(c->cache, c->from, 1);
    192 		c->cache->incomplete = 0;
    193 	}
    194 	setcstate(c, state);
    195 	if (c->info && !info->close && !info->send_close && !nokeepalive) {
    196 		add_keepalive_socket(c, HTTP_KEEPALIVE_TIMEOUT, 0);
    197 	} else
    198 		abort_connection(c);
    199 }
    200 
    201 void
    202 http_func(struct connection *c)
    203 {
    204 	/*setcstate(c, S_CONN);*/
    205 	/*set_connection_timeout(c);*/
    206 	if (get_keepalive_socket(c, NULL)) {
    207 		int p;
    208 		if ((p = get_port(c->url)) < 0) {
    209 			setcstate(c, S_BAD_URL);
    210 			abort_connection(c);
    211 			return;
    212 		}
    213 		make_connection(c, p, &c->sock1, http_send_header);
    214 	} else
    215 		http_send_header(c);
    216 }
    217 
    218 void
    219 proxy_func(struct connection *c)
    220 {
    221 	http_func(c);
    222 }
    223 
    224 static size_t
    225 add_url_to_str(unsigned char **str, size_t l, unsigned char *url)
    226 {
    227 	unsigned char *sp;
    228 	for (sp = url; *sp && *sp != POST_CHAR; sp++) {
    229 		if (*sp <= ' ' || *sp >= 127) {
    230 			unsigned char esc[4];
    231 			sprintf((char *)esc, "%%%02X", (int)*sp);
    232 			l = add_to_str(str, l, esc);
    233 		} else
    234 			l = add_chr_to_str(str, l, *sp);
    235 	}
    236 	return l;
    237 }
    238 
    239 static void
    240 http_send_header(struct connection *c)
    241 {
    242 	struct http_connection_info *info;
    243 	int http10 = http_options.http10;
    244 	int proxy;
    245 	unsigned char *hdr;
    246 	unsigned char *h, *u;
    247 	unsigned char *u2;
    248 	size_t l = 0;
    249 	unsigned char *post = NULL;
    250 	unsigned char *host;
    251 
    252 	if (!c->cache)
    253 		if (!find_in_cache(c->url, &c->cache))
    254 			c->cache->refcount--;
    255 
    256 	proxy = is_proxy_url(c->url);
    257 	host = remove_proxy_prefix(c->url);
    258 	set_connection_timeout_keepal(c);
    259 	info = mem_calloc(sizeof(struct http_connection_info));
    260 	c->info = info;
    261 	info->https_forward = !c->ssl && proxy && host
    262 	                      && !casecmp(host, cast_uchar "https://", 8);
    263 	if (c->ssl)
    264 		proxy = 0;
    265 	hdr = NULL;
    266 	if (!host) {
    267 http_bad_url:
    268 		free(hdr);
    269 		http_end_request(c, 0, 1, S_BAD_URL);
    270 		return;
    271 	}
    272 	if (!info->https_forward && (h = get_host_name(host))) {
    273 		info->bl_flags = get_blacklist_flags(h);
    274 		free(h);
    275 	}
    276 	if (info->bl_flags & BL_HTTP10)
    277 		http10 = 1;
    278 	info->http10 = http10;
    279 	if (!info->https_forward) {
    280 		post = cast_uchar strchr((char *)host, POST_CHAR);
    281 		if (post)
    282 			post++;
    283 	}
    284 	info->send_close = info->https_forward || http10;
    285 	if (info->https_forward) {
    286 		l = add_to_str(&hdr, l, cast_uchar "CONNECT ");
    287 		h = get_host_name(host);
    288 		if (!h)
    289 			goto http_bad_url;
    290 		l = add_to_str(&hdr, l, h);
    291 		free(h);
    292 		h = get_port_str(host);
    293 		if (!h)
    294 			h = stracpy(cast_uchar "443");
    295 		l = add_chr_to_str(&hdr, l, ':');
    296 		l = add_to_str(&hdr, l, h);
    297 		free(h);
    298 		goto added_connect;
    299 	} else if (!post)
    300 		l = add_to_str(&hdr, l, cast_uchar "GET ");
    301 	else {
    302 		l = add_to_str(&hdr, l, cast_uchar "POST ");
    303 		c->unrestartable = 2;
    304 	}
    305 	if (!proxy) {
    306 		l = add_chr_to_str(&hdr, l, '/');
    307 		u = get_url_data(host);
    308 	} else
    309 		u = host;
    310 
    311 	if (post && post < u)
    312 		goto http_bad_url;
    313 
    314 	u2 = u;
    315 	if (proxy && !*c->socks_proxy && *proxies.dns_append) {
    316 		unsigned char *u_host;
    317 		int u_host_len;
    318 		int u2_len = 0;
    319 		if (parse_url(u, NULL, NULL, NULL, NULL, NULL, &u_host,
    320 		              &u_host_len, NULL, NULL, NULL, NULL, NULL))
    321 			goto http_bad_url;
    322 		u2 = NULL;
    323 		u2_len =
    324 		    add_bytes_to_str(&u2, u2_len, u, u_host + u_host_len - u);
    325 		u2_len = add_to_str(&u2, u2_len, proxies.dns_append);
    326 		u2_len = add_to_str(&u2, u2_len, u_host + u_host_len);
    327 	}
    328 	l = add_url_to_str(&hdr, l, u2);
    329 	if (u2 != u)
    330 		free(u2);
    331 added_connect:
    332 	if (!http10)
    333 		l = add_to_str(&hdr, l, cast_uchar " HTTP/1.1\r\n");
    334 	else
    335 		l = add_to_str(&hdr, l, cast_uchar " HTTP/1.0\r\n");
    336 	if (!info->https_forward && (h = get_host_name(host))) {
    337 		l = add_to_str(&hdr, l, cast_uchar "Host: ");
    338 		if (*h && h[strlen((char *)h) - 1] == '.') {
    339 			h[strlen((char *)h) - 1] = 0;
    340 		}
    341 		if (h[0] == '[' && h[strlen((char *)h) - 1] == ']') {
    342 			char *pc = strchr((char *)h, '%');
    343 			if (pc) {
    344 				pc[0] = ']';
    345 				pc[1] = 0;
    346 			}
    347 		}
    348 		l = add_to_str(&hdr, l, h);
    349 		free(h);
    350 		if ((h = get_port_str(host))) {
    351 			if (strcmp(cast_char h, c->ssl ? "443" : "80")) {
    352 				l = add_chr_to_str(&hdr, l, ':');
    353 				l = add_to_str(&hdr, l, h);
    354 			}
    355 			free(h);
    356 		}
    357 		l = add_to_str(&hdr, l, cast_uchar "\r\n");
    358 	}
    359 	l = add_user_agent(&hdr, l,
    360 	                   info->https_forward ? NULL : cast_char host);
    361 	if (proxy)
    362 		l = add_proxy_auth_string(&hdr, l, c->url);
    363 	if (!info->https_forward) {
    364 		test_restart(c);
    365 		l = add_referer(&hdr, l, host, c->prev_url);
    366 		l = add_accept(&hdr, l);
    367 		l = add_accept_encoding(&hdr, l, host, c);
    368 		l = add_accept_charset(&hdr, l, info);
    369 		l = add_connection(&hdr, l, http10, proxy, !info->send_close);
    370 		l = add_upgrade(&hdr, l);
    371 		l = add_if_modified(&hdr, l, c);
    372 		l = add_range(&hdr, l, host, c);
    373 		l = add_pragma_no_cache(&hdr, l, c->no_cache);
    374 		l = add_auth_string(&hdr, l, host);
    375 		l = add_post_header(&hdr, l, &post);
    376 		l = add_cookies(&hdr, l, host);
    377 		l = add_extra_options(&hdr, l);
    378 	}
    379 	l = add_to_str(&hdr, l, cast_uchar "\r\n");
    380 	if (post) {
    381 		while (post[0] && post[1]) {
    382 			int h1, h2;
    383 			h1 = post[0] <= '9'   ? (unsigned)post[0] - '0'
    384 			     : post[0] >= 'A' ? upcase(post[0]) - 'A' + 10
    385 			                      : 0;
    386 			if (h1 < 0 || h1 >= 16)
    387 				h1 = 0;
    388 			h2 = post[1] <= '9'   ? (unsigned)post[1] - '0'
    389 			     : post[1] >= 'A' ? upcase(post[1]) - 'A' + 10
    390 			                      : 0;
    391 			if (h2 < 0 || h2 >= 16)
    392 				h2 = 0;
    393 			l = add_chr_to_str(&hdr, l, h1 * 16 + h2);
    394 			post += 2;
    395 		}
    396 	}
    397 	write_to_socket(c, c->sock1, hdr, l, http_get_header);
    398 	free(hdr);
    399 	setcstate(c, S_SENT);
    400 }
    401 
    402 static void
    403 test_restart(struct connection *c)
    404 {
    405 	/* If the cached entity is compressed, request the whole file and turn
    406 	 * off compression */
    407 	if (c->cache && c->from) {
    408 		unsigned char *d;
    409 		if ((d = parse_http_header(c->cache->head,
    410 		                           cast_uchar "Content-Encoding",
    411 		                           NULL))) {
    412 			free(d);
    413 			c->from = 0;
    414 			c->no_compress = 1;
    415 			if (c->tries >= 1) {
    416 				unsigned char *h;
    417 				if ((h = get_host_name(c->url))) {
    418 					add_blacklist_entry(h,
    419 					                    BL_NO_COMPRESSION);
    420 					free(h);
    421 				}
    422 			}
    423 		}
    424 	}
    425 }
    426 
    427 static size_t
    428 add_user_agent(unsigned char **hdr, size_t l, const char *url)
    429 {
    430 	l = add_to_str(hdr, l, cast_uchar "User-Agent: ");
    431 	if (SCRUB_HEADERS)
    432 		l = add_to_str(hdr, l,
    433 		               cast_uchar
    434 		               "Mozilla/5.0 (Windows NT 6.1; rv:60.0) "
    435 		               "Gecko/20100101 Firefox/60.0\r\n");
    436 	else if (!(*http_options.header.fake_useragent)) {
    437 		/*
    438 		 * Google started to return css-styled page for searches.
    439 		 * It returns non-css page if the user agent begins with Lynx.
    440 		 */
    441 		if (url
    442 		    && (casestrstr(cast_uchar url, cast_uchar "/www.google.")
    443 		        || casestrstr(cast_uchar url, cast_uchar "/google."))
    444 		    && strstr(url, "/search?")
    445 		    && (strstr(url, "?q=") || strstr(url, "&q="))
    446 		    && !strstr(url, "?tbm=isch") && !strstr(url, "&tbm=isch"))
    447 			l = add_to_str(hdr, l, cast_uchar("Lynx/"));
    448 
    449 		l = add_to_str(hdr, l, cast_uchar("Links (" VERSION "; "));
    450 		l = add_to_str(hdr, l, system_name);
    451 		l = add_to_str(hdr, l, cast_uchar "; ");
    452 		if (!list_empty(terminals)) {
    453 			unsigned char *t = cast_uchar "text";
    454 			l = add_to_str(hdr, l, t);
    455 		} else {
    456 			l = add_to_str(hdr, l, cast_uchar "dump");
    457 		}
    458 		l = add_to_str(hdr, l, cast_uchar ")\r\n");
    459 	} else {
    460 		l = add_to_str(hdr, l, http_options.header.fake_useragent);
    461 		l = add_to_str(hdr, l, cast_uchar "\r\n");
    462 	}
    463 	return l;
    464 }
    465 
    466 static size_t
    467 add_referer(unsigned char **hdr, size_t l, unsigned char *url,
    468             unsigned char *prev_url)
    469 {
    470 	l = add_to_str(hdr, l, cast_uchar "Referer: ");
    471 	l = add_url_to_str(hdr, l, url);
    472 	return add_to_str(hdr, l, cast_uchar "\r\n");
    473 }
    474 
    475 static size_t
    476 add_accept(unsigned char **hdr, size_t l)
    477 {
    478 	if (SCRUB_HEADERS)
    479 		return add_to_str(hdr, l,
    480 		                  cast_uchar
    481 		                  "Accept: "
    482 		                  "text/html,application/xhtml+xml,application/"
    483 		                  "xml;q=0.9,*/*;q=0.8\r\n");
    484 	return add_to_str(hdr, l, cast_uchar "Accept: */*\r\n");
    485 }
    486 
    487 static int
    488 advertise_compression(unsigned char *url, struct connection *c)
    489 {
    490 	struct http_connection_info *info = c->info;
    491 	char *extd;
    492 	if (c->no_compress || http_options.no_compression
    493 	    || info->bl_flags & BL_NO_COMPRESSION)
    494 		return 0;
    495 
    496 	/* Fix for bugzilla. The attachment may be compressed and if the server
    497 	   compresses it again, we can't decompress the inner compression */
    498 	if (strstr((char *)url, "/attachment.cgi?"))
    499 		return 0;
    500 
    501 	extd = strrchr((char *)url, '.');
    502 	if (extd && get_compress_by_extension(extd + 1, strchr((extd + 1), 0)))
    503 		return 0;
    504 	return 1;
    505 }
    506 
    507 static size_t
    508 add_accept_encoding(unsigned char **hdr, size_t l, unsigned char *url,
    509                     struct connection *c)
    510 {
    511 	if (advertise_compression(url, c)) {
    512 		size_t orig_l = l;
    513 		size_t l1;
    514 		l = add_to_str(hdr, l, cast_uchar "Accept-Encoding: ");
    515 		l1 = l;
    516 		l = add_to_str(hdr, l, cast_uchar "gzip, deflate");
    517 		if (l != l1)
    518 			l = add_to_str(hdr, l, cast_uchar "\r\n");
    519 		else
    520 			l = orig_l;
    521 	}
    522 
    523 	return l;
    524 }
    525 
    526 static size_t
    527 add_accept_charset(unsigned char **hdr, size_t l,
    528                    struct http_connection_info *info)
    529 {
    530 	static unsigned char *accept_charset = NULL;
    531 
    532 	if (SCRUB_HEADERS || info->bl_flags & BL_NO_CHARSET
    533 	    || http_options.no_accept_charset)
    534 		return l;
    535 
    536 	if (!accept_charset) {
    537 		unsigned char *cs, *ac;
    538 		size_t aclen = 0;
    539 		ac = NULL;
    540 		cs = get_cp_mime_name(0);
    541 		if (aclen)
    542 			aclen = add_chr_to_str(&ac, aclen, ',');
    543 		else
    544 			aclen = add_to_str(&ac, aclen,
    545 			                   cast_uchar "Accept-Charset: ");
    546 		aclen = add_to_str(&ac, aclen, cs);
    547 		if (aclen)
    548 			aclen = add_to_str(&ac, aclen, cast_uchar "\r\n");
    549 		if (!(accept_charset = cast_uchar strdup((char *)ac))) {
    550 			l = add_to_str(hdr, l, ac);
    551 			free(ac);
    552 			return l;
    553 		}
    554 		free(ac);
    555 	}
    556 	return add_to_str(hdr, l, accept_charset);
    557 }
    558 
    559 static size_t
    560 add_connection(unsigned char **hdr, size_t l, int http10, int proxy, int alive)
    561 {
    562 	if (!http10) {
    563 		if (!proxy)
    564 			l = add_to_str(hdr, l, cast_uchar "Connection: ");
    565 		else
    566 			l = add_to_str(hdr, l, cast_uchar "Proxy-Connection: ");
    567 		if (alive)
    568 			l = add_to_str(hdr, l, cast_uchar "keep-alive\r\n");
    569 		else
    570 			l = add_to_str(hdr, l, cast_uchar "close\r\n");
    571 	}
    572 	return l;
    573 }
    574 
    575 static size_t
    576 add_upgrade(unsigned char **hdr, size_t l)
    577 {
    578 	if (proxies.only_proxies)
    579 		return add_to_str(
    580 		    hdr, l, cast_uchar "Upgrade-Insecure-Requests: 1\r\n");
    581 	return l;
    582 }
    583 
    584 static size_t
    585 add_if_modified(unsigned char **hdr, size_t l, struct connection *c)
    586 {
    587 	struct cache_entry *e;
    588 	if ((e = c->cache)) {
    589 		int code = 0;
    590 		if (get_http_code(e->head, &code, NULL) || code >= 400)
    591 			return l;
    592 		if (!e->incomplete && e->head && c->no_cache <= NC_IF_MOD) {
    593 			unsigned char *m;
    594 			if (e->last_modified)
    595 				m = stracpy(e->last_modified);
    596 			else if ((m = parse_http_header(
    597 				      e->head, cast_uchar "Date", NULL)))
    598 				;
    599 			else if ((m = parse_http_header(
    600 				      e->head, cast_uchar "Expires", NULL)))
    601 				;
    602 			else
    603 				return l;
    604 			l = add_to_str(hdr, l,
    605 			               cast_uchar "If-Modified-Since: ");
    606 			l = add_to_str(hdr, l, m);
    607 			l = add_to_str(hdr, l, cast_uchar "\r\n");
    608 			free(m);
    609 		}
    610 	}
    611 
    612 	return l;
    613 }
    614 
    615 static size_t
    616 add_range(unsigned char **hdr, size_t l, unsigned char *url,
    617           struct connection *c)
    618 {
    619 	struct cache_entry *e;
    620 	struct http_connection_info *info = c->info;
    621 	if ((e = c->cache)) {
    622 		int code = 0;
    623 		if (!get_http_code(e->head, &code, NULL) && code >= 300)
    624 			return l;
    625 	}
    626 	if (c->from && c->no_cache < NC_IF_MOD
    627 	    && !(info->bl_flags & BL_NO_RANGE)) {
    628 		l = add_to_str(hdr, l, cast_uchar "Range: bytes=");
    629 		l = add_num_to_str(hdr, l, c->from);
    630 		l = add_to_str(hdr, l, cast_uchar "-\r\n");
    631 	}
    632 	return l;
    633 }
    634 
    635 static size_t
    636 add_pragma_no_cache(unsigned char **hdr, size_t l, int no_cache)
    637 {
    638 	if (no_cache >= NC_PR_NO_CACHE)
    639 		return add_to_str(
    640 		    hdr, l,
    641 		    cast_uchar
    642 		    "Pragma: no-cache\r\nCache-Control: no-cache\r\n");
    643 	return l;
    644 }
    645 
    646 static size_t
    647 add_proxy_auth_string(unsigned char **hdr, size_t l, unsigned char *url)
    648 {
    649 	unsigned char *h;
    650 	if ((h = get_auth_string(url, 1))) {
    651 		l = add_to_str(hdr, l, h);
    652 		free(h);
    653 	}
    654 	return l;
    655 }
    656 
    657 static size_t
    658 add_auth_string(unsigned char **hdr, size_t l, unsigned char *url)
    659 {
    660 	unsigned char *h;
    661 	if ((h = get_auth_string(url, 0))) {
    662 		l = add_to_str(hdr, l, h);
    663 		free(h);
    664 	}
    665 	return l;
    666 }
    667 
    668 static size_t
    669 add_post_header(unsigned char **hdr, size_t l, unsigned char **post)
    670 {
    671 	if (!*post)
    672 		return l;
    673 
    674 	unsigned char *pd = cast_uchar strchr((char *)*post, '\n');
    675 	if (pd) {
    676 		l = add_to_str(hdr, l, cast_uchar "Content-Type: ");
    677 		l = add_bytes_to_str(hdr, l, *post, pd - *post);
    678 		l = add_to_str(hdr, l, cast_uchar "\r\n");
    679 		*post = pd + 1;
    680 	}
    681 	l = add_to_str(hdr, l, cast_uchar "Content-Length: ");
    682 	l = add_num_to_str(hdr, l, strlen((char *)*post) / 2);
    683 	return add_to_str(hdr, l, cast_uchar "\r\n");
    684 }
    685 
    686 static size_t
    687 add_extra_options(unsigned char **hdr, size_t l)
    688 {
    689 	unsigned char *p = http_options.header.extra_header;
    690 	while (1) {
    691 		unsigned char *q = p + strcspn((char *)p, "\\");
    692 		if (p != q) {
    693 			unsigned char *c;
    694 			unsigned char *s = memacpy(p, q - p);
    695 			c = cast_uchar strchr((char *)s, ':');
    696 			if (c && casecmp(s, cast_uchar "Cookie:", 7)) {
    697 				unsigned char *v = NULL;
    698 				unsigned char *cc = memacpy(s, c - s);
    699 				unsigned char *x =
    700 				    parse_http_header(*hdr, cc, &v);
    701 				free(cc);
    702 				if (x) {
    703 					unsigned char *new_hdr;
    704 					size_t new_l;
    705 					free(x);
    706 					new_hdr = NULL;
    707 					new_l = add_bytes_to_str(
    708 					    &new_hdr, 0, *hdr, v - *hdr);
    709 					while (*++c == ' ')
    710 						;
    711 					new_l = add_to_str(&new_hdr, new_l, c);
    712 					new_l = add_to_str(
    713 					    &new_hdr, new_l,
    714 					    v + strcspn((char *)v, "\r\n"));
    715 					free(*hdr);
    716 					*hdr = new_hdr;
    717 					l = new_l;
    718 					goto already_added;
    719 				}
    720 			}
    721 			l = add_to_str(hdr, l, s);
    722 			l = add_to_str(hdr, l, cast_uchar "\r\n");
    723 already_added:
    724 			free(s);
    725 		}
    726 		if (!*q)
    727 			break;
    728 		p = q + 1;
    729 	}
    730 
    731 	return l;
    732 }
    733 
    734 static int
    735 is_line_in_buffer(struct read_buffer *rb)
    736 {
    737 	int l;
    738 	for (l = 0; l < rb->len; l++) {
    739 		if (rb->data[l] == 10)
    740 			return l + 1;
    741 		if (l < rb->len - 1 && rb->data[l] == 13
    742 		    && rb->data[l + 1] == 10)
    743 			return l + 2;
    744 		if (l == rb->len - 1 && rb->data[l] == 13)
    745 			return 0;
    746 		if (rb->data[l] < ' ')
    747 			return -1;
    748 	}
    749 	return 0;
    750 }
    751 
    752 static void
    753 read_http_data(struct connection *c, struct read_buffer *rb)
    754 {
    755 	struct http_connection_info *info = c->info;
    756 	int a;
    757 	set_connection_timeout(c);
    758 	if (rb->close == 2) {
    759 		http_end_request(c, 0, 0, S__OK);
    760 		return;
    761 	}
    762 	if (info->length != -2) {
    763 		int l = rb->len;
    764 		if (info->length >= 0 && info->length < l)
    765 			l = (int)info->length;
    766 		if ((off_t)(0UL + c->from + l) < 0) {
    767 			setcstate(c, S_LARGE_FILE);
    768 			abort_connection(c);
    769 			return;
    770 		}
    771 		c->received += l;
    772 		a = add_fragment(c->cache, c->from, rb->data, l);
    773 		if (a < 0) {
    774 			setcstate(c, a);
    775 			abort_connection(c);
    776 			return;
    777 		}
    778 		if (a == 1)
    779 			c->tries = 0;
    780 		if (info->length >= 0)
    781 			info->length -= l;
    782 		c->from += l;
    783 		kill_buffer_data(rb, l);
    784 		if (!info->length) {
    785 			http_end_request(c, 0, 0, S__OK);
    786 			return;
    787 		}
    788 	} else {
    789 next_chunk:
    790 		if (info->chunk_remaining == -2) {
    791 			int l;
    792 			if ((l = is_line_in_buffer(rb))) {
    793 				if (l == -1) {
    794 					setcstate(c, S_HTTP_ERROR);
    795 					abort_connection(c);
    796 					return;
    797 				}
    798 				kill_buffer_data(rb, l);
    799 				if (l <= 2) {
    800 					http_end_request(c, 0, 0, S__OK);
    801 					return;
    802 				}
    803 				goto next_chunk;
    804 			}
    805 		} else if (info->chunk_remaining == -1) {
    806 			int l;
    807 			if ((l = is_line_in_buffer(rb))) {
    808 				char *end;
    809 				long n = 0;
    810 				if (l != -1)
    811 					n = strtol(cast_const_char rb->data,
    812 					           &end, 16);
    813 				if (l == -1 || n < 0 || n >= INT_MAX
    814 				    || cast_uchar end == rb->data) {
    815 					setcstate(c, S_HTTP_ERROR);
    816 					abort_connection(c);
    817 					return;
    818 				}
    819 				kill_buffer_data(rb, l);
    820 				if (!(info->chunk_remaining = (int)n))
    821 					info->chunk_remaining = -2;
    822 				goto next_chunk;
    823 			}
    824 		} else {
    825 			int l = info->chunk_remaining;
    826 			if (l > rb->len)
    827 				l = rb->len;
    828 			if ((off_t)(0UL + c->from + l) < 0) {
    829 				setcstate(c, S_LARGE_FILE);
    830 				abort_connection(c);
    831 				return;
    832 			}
    833 			c->received += l;
    834 			a = add_fragment(c->cache, c->from, rb->data, l);
    835 			if (a < 0) {
    836 				setcstate(c, a);
    837 				abort_connection(c);
    838 				return;
    839 			}
    840 			if (a == 1)
    841 				c->tries = 0;
    842 			info->chunk_remaining -= l;
    843 			c->from += l;
    844 			kill_buffer_data(rb, l);
    845 			if (!info->chunk_remaining && rb->len >= 1) {
    846 				if (rb->data[0] == 10)
    847 					kill_buffer_data(rb, 1);
    848 				else {
    849 					if (rb->data[0] != 13
    850 					    || (rb->len >= 2
    851 					        && ((unsigned char *)
    852 					                rb->data)[1]
    853 					               != 10)) {
    854 						setcstate(c, S_HTTP_ERROR);
    855 						abort_connection(c);
    856 						return;
    857 					}
    858 					if (rb->len < 2)
    859 						goto read_more;
    860 					kill_buffer_data(rb, 2);
    861 				}
    862 				info->chunk_remaining = -1;
    863 				goto next_chunk;
    864 			}
    865 		}
    866 	}
    867 read_more:
    868 	read_from_socket(c, c->sock1, rb, read_http_data);
    869 	setcstate(c, S_TRANS);
    870 }
    871 
    872 static int
    873 get_header(struct read_buffer *rb)
    874 {
    875 	int i;
    876 	if (rb->len <= 0)
    877 		return 0;
    878 	if (rb->data[0] != 'H')
    879 		return -2;
    880 	if (rb->len <= 1)
    881 		return 0;
    882 	if (((unsigned char *)rb->data)[1] != 'T')
    883 		return -2;
    884 	if (rb->len <= 2)
    885 		return 0;
    886 	if (((unsigned char *)rb->data)[2] != 'T')
    887 		return -2;
    888 	if (rb->len <= 3)
    889 		return 0;
    890 	if (((unsigned char *)rb->data)[3] != 'P')
    891 		return -2;
    892 	for (i = 0; i < rb->len; i++) {
    893 		unsigned char a = rb->data[i];
    894 		if (!a)
    895 			return -1;
    896 		if (i < rb->len - 1 && a == 10 && rb->data[i + 1] == 10)
    897 			return i + 2;
    898 		if (i < rb->len - 3 && a == 13) {
    899 			if (rb->data[i + 1] != 10)
    900 				return -1;
    901 			if (rb->data[i + 2] == 13) {
    902 				if (rb->data[i + 3] != 10)
    903 					return -1;
    904 				return i + 4;
    905 			}
    906 		}
    907 	}
    908 	return 0;
    909 }
    910 
    911 static void
    912 http_got_header(struct connection *c, struct read_buffer *rb)
    913 {
    914 	off_t cf;
    915 	int state = c->state != S_PROC ? S_GETH : S_PROC;
    916 	unsigned char *head;
    917 	int a, h = 0, version = 0;
    918 	unsigned char *d;
    919 	struct cache_entry *e;
    920 	int previous_http_code;
    921 	struct http_connection_info *info;
    922 	unsigned char *host = remove_proxy_prefix(c->url);
    923 	set_connection_timeout(c);
    924 	info = c->info;
    925 	if (rb->close == 2) {
    926 		unsigned char *hs;
    927 		if (!c->tries && (hs = get_host_name(host))) {
    928 			if (info->bl_flags & BL_NO_CHARSET)
    929 				del_blacklist_entry(hs, BL_NO_CHARSET);
    930 			else {
    931 				add_blacklist_entry(hs, BL_NO_CHARSET);
    932 				c->tries = -1;
    933 			}
    934 			free(hs);
    935 		}
    936 		setcstate(c, S_CANT_READ);
    937 		retry_connection(c);
    938 		return;
    939 	}
    940 	rb->close = 0;
    941 again:
    942 	if ((a = get_header(rb)) == -1) {
    943 		setcstate(c, S_HTTP_ERROR);
    944 		abort_connection(c);
    945 		return;
    946 	}
    947 	if (!a) {
    948 		read_from_socket(c, c->sock1, rb, http_got_header);
    949 		setcstate(c, state);
    950 		return;
    951 	}
    952 	if (a != -2) {
    953 		head = memacpy(rb->data, a);
    954 		kill_buffer_data(rb, a);
    955 	} else
    956 		head = stracpy(
    957 		    cast_uchar
    958 		    "HTTP/0.9 200 OK\r\nContent-Type: text/html\r\n\r\n");
    959 	if (get_http_code(head, &h, &version) || h == 101) {
    960 		free(head);
    961 		setcstate(c, S_HTTP_ERROR);
    962 		abort_connection(c);
    963 		return;
    964 	}
    965 	if (check_http_server_bugs(host, c->info, head)
    966 	    && is_connection_restartable(c)) {
    967 		free(head);
    968 		setcstate(c, S_RESTART);
    969 		retry_connection(c);
    970 		return;
    971 	}
    972 	if (h == 100) {
    973 		free(head);
    974 		state = S_PROC;
    975 		goto again;
    976 	}
    977 	if (h < 200) {
    978 		free(head);
    979 		setcstate(c, S_HTTP_ERROR);
    980 		abort_connection(c);
    981 		return;
    982 	}
    983 	if (info->https_forward && h >= 200 && h < 300) {
    984 		free(head);
    985 		free(c->info);
    986 		c->info = 0;
    987 		c->ssl = NULL;
    988 		continue_connection(c, &c->sock1, http_send_header);
    989 		return;
    990 	}
    991 	if (info->https_forward && h != 407) {
    992 		free(head);
    993 		setcstate(c, S_HTTPS_FWD_ERROR);
    994 		abort_connection(c);
    995 		return;
    996 	}
    997 	if (h != 401 && h != 407) {
    998 		unsigned char *cookie;
    999 		unsigned char *ch = head;
   1000 		while ((cookie = parse_http_header(ch, cast_uchar "Set-Cookie",
   1001 		                                   &ch))) {
   1002 			set_cookie(NULL, host, cookie);
   1003 			free(cookie);
   1004 		}
   1005 	}
   1006 	if (h == 204) {
   1007 		free(head);
   1008 		http_end_request(c, 0, 0, S_HTTP_204);
   1009 		return;
   1010 	}
   1011 	if (h == 304) {
   1012 		free(head);
   1013 		http_end_request(c, 1, 0, S__OK);
   1014 		return;
   1015 	}
   1016 	if (h == 416 && c->from) {
   1017 		free(head);
   1018 		http_end_request(c, 0, 1, S__OK);
   1019 		return;
   1020 	}
   1021 	if (h == 431) {
   1022 		unsigned char *hs;
   1023 		if (!(info->bl_flags & BL_NO_CHARSET)
   1024 		    && (hs = get_host_name(host))) {
   1025 			free(head);
   1026 			add_blacklist_entry(hs, BL_NO_CHARSET);
   1027 			free(hs);
   1028 			c->tries = -1;
   1029 			setcstate(c, S_RESTART);
   1030 			retry_connection(c);
   1031 			return;
   1032 		}
   1033 	}
   1034 	if ((h == 500 || h == 502 || h == 503 || h == 504)
   1035 	    && http_options.retry_internal_errors
   1036 	    && is_connection_restartable(c)) {
   1037 		/* !!! FIXME: wait some time ... */
   1038 		if (is_last_try(c)) {
   1039 			unsigned char *h;
   1040 			if ((h = get_host_name(host))) {
   1041 				add_blacklist_entry(h, BL_NO_BZIP2);
   1042 				free(h);
   1043 			}
   1044 		}
   1045 		free(head);
   1046 		setcstate(c, S_RESTART);
   1047 		retry_connection(c);
   1048 		return;
   1049 	}
   1050 	if (!c->cache) {
   1051 		if (get_connection_cache_entry(c)) {
   1052 			free(head);
   1053 			setcstate(c, S_OUT_OF_MEM);
   1054 			abort_connection(c);
   1055 			return;
   1056 		}
   1057 		c->cache->refcount--;
   1058 	}
   1059 	e = c->cache;
   1060 	previous_http_code = e->http_code;
   1061 	e->http_code = h;
   1062 	free(e->head);
   1063 	e->head = head;
   1064 	if ((d = parse_http_header(head, cast_uchar "Expires", NULL))) {
   1065 		time_t t = parse_http_date(d);
   1066 		if (t && e->expire_time != 1)
   1067 			e->expire_time = t;
   1068 		free(d);
   1069 	}
   1070 	if ((d = parse_http_header(head, cast_uchar "Pragma", NULL))) {
   1071 		if (!casecmp(d, cast_uchar "no-cache", 8))
   1072 			e->expire_time = 1;
   1073 		free(d);
   1074 	}
   1075 	if ((d = parse_http_header(head, cast_uchar "Cache-Control", NULL))) {
   1076 		unsigned char *f = d;
   1077 		while (1) {
   1078 			while (*f && (*f == ' ' || *f == ','))
   1079 				f++;
   1080 			if (!*f)
   1081 				break;
   1082 			if (!casecmp(f, cast_uchar "no-cache", 8)
   1083 			    || !casecmp(f, cast_uchar "must-revalidate", 15))
   1084 				e->expire_time = 1;
   1085 			if (!casecmp(f, cast_uchar "max-age=", 8)) {
   1086 				if (e->expire_time != 1) {
   1087 					errno = 0;
   1088 					EINTRLOOPX(e->expire_time, time(NULL),
   1089 					           (time_t)-1);
   1090 					e->expire_time += atoi((char *)(f + 8));
   1091 				}
   1092 			}
   1093 			while (*f && *f != ',')
   1094 				f++;
   1095 		}
   1096 		free(d);
   1097 	}
   1098 	if (c->ssl) {
   1099 		free(e->ssl_info);
   1100 		e->ssl_info = get_cipher_string(c->ssl);
   1101 		free(e->ssl_authority);
   1102 		e->ssl_authority = stracpy(c->ssl->ca);
   1103 	}
   1104 	free(e->redirect);
   1105 	e->redirect = NULL;
   1106 	if ((h == 302 || h == 303 || h == 307 || h == 511) && !e->expire_time)
   1107 		e->expire_time = 1;
   1108 	if (h == 301 || h == 302 || h == 303 || h == 307 || h == 308) {
   1109 		if ((d = parse_http_header(e->head, cast_uchar "Location",
   1110 		                           NULL))) {
   1111 			unsigned char *user, *ins;
   1112 			unsigned char *newuser, *newpassword;
   1113 			if (!parse_url(d, NULL, &user, NULL, NULL, NULL, &ins,
   1114 			               NULL, NULL, NULL, NULL, NULL, NULL)
   1115 			    && !user && ins
   1116 			    && (newuser = get_user_name(host))) {
   1117 				if (*newuser) {
   1118 					int ins_off = (int)(ins - d);
   1119 					newpassword = get_pass(host);
   1120 					if (!newpassword)
   1121 						newpassword =
   1122 						    stracpy(cast_uchar "");
   1123 					add_to_strn(&newuser, cast_uchar ":");
   1124 					add_to_strn(&newuser, newpassword);
   1125 					add_to_strn(&newuser, cast_uchar "@");
   1126 					extend_str(&d, strlen((char *)newuser));
   1127 					ins = d + ins_off;
   1128 					memmove(ins + strlen((char *)newuser),
   1129 					        ins, strlen((char *)ins) + 1);
   1130 					memcpy(ins, newuser,
   1131 					       strlen((char *)newuser));
   1132 					free(newpassword);
   1133 				}
   1134 				free(newuser);
   1135 			}
   1136 			free(e->redirect);
   1137 			e->redirect = d;
   1138 			if (h == 307 || h == 308) {
   1139 				unsigned char *p;
   1140 				if ((p = cast_uchar strchr(cast_const_char host,
   1141 				                           POST_CHAR)))
   1142 					add_to_strn(&e->redirect, p);
   1143 			}
   1144 		}
   1145 	}
   1146 	if (!e->expire_time && strchr((char *)c->url, POST_CHAR))
   1147 		e->expire_time = 1;
   1148 	info->close = 0;
   1149 	info->length = -1;
   1150 	info->version = version;
   1151 	if ((d = parse_http_header(e->head, cast_uchar "Connection", NULL))
   1152 	    || (d = parse_http_header(e->head, cast_uchar "Proxy-Connection",
   1153 	                              NULL))) {
   1154 		if (!casestrcmp(d, cast_uchar "close"))
   1155 			info->close = 1;
   1156 		free(d);
   1157 	} else if (version < 11)
   1158 		info->close = 1;
   1159 	cf = c->from;
   1160 	c->from = 0;
   1161 	if ((d = parse_http_header(e->head, cast_uchar "Content-Range",
   1162 	                           NULL))) {
   1163 		if (strlen((char *)d) > 6) {
   1164 			d[5] = 0;
   1165 			if (!(casestrcmp(d, cast_uchar "bytes")) && d[6] >= '0'
   1166 			    && d[6] <= '9') {
   1167 				long f = strtol((char *)(d + 6), NULL, 10);
   1168 				if (f >= 0 && (off_t)f >= 0 && (off_t)f == f)
   1169 					c->from = f;
   1170 			}
   1171 		}
   1172 		free(d);
   1173 	} else if (h == 206) {
   1174 		/* Hmm ... some servers send 206 partial but don't send
   1175 		 * Content-Range */
   1176 		c->from = cf;
   1177 	}
   1178 	if (cf && !c->from && !c->unrestartable)
   1179 		c->unrestartable = 1;
   1180 	if (c->from > cf || c->from < 0) {
   1181 		setcstate(c, S_HTTP_ERROR);
   1182 		abort_connection(c);
   1183 		return;
   1184 	}
   1185 	if ((d = parse_http_header(e->head, cast_uchar "Content-Length",
   1186 	                           NULL))) {
   1187 		char *ep;
   1188 		long l = strtol((char *)d, &ep, 10);
   1189 		if (!*ep && l >= 0 && (off_t)l >= 0 && (off_t)l == l) {
   1190 			if (!info->close || version >= 11 || h / 100 == 3)
   1191 				info->length = l;
   1192 			if (c->from + l >= 0)
   1193 				c->est_length = c->from + l;
   1194 		}
   1195 		free(d);
   1196 	}
   1197 	if ((d = parse_http_header(e->head, cast_uchar "Accept-Ranges",
   1198 	                           NULL))) {
   1199 		if (!casestrcmp(d, cast_uchar "none") && !c->unrestartable)
   1200 			c->unrestartable = 1;
   1201 		free(d);
   1202 	} else if (!c->unrestartable && !c->from)
   1203 		c->unrestartable = 1;
   1204 	if (info->bl_flags & BL_NO_RANGE && !c->unrestartable)
   1205 		c->unrestartable = 1;
   1206 	if ((d = parse_http_header(e->head, cast_uchar "Transfer-Encoding",
   1207 	                           NULL))) {
   1208 		if (!casestrcmp(d, cast_uchar "chunked")) {
   1209 			info->length = -2;
   1210 			info->chunk_remaining = -1;
   1211 		}
   1212 		free(d);
   1213 	}
   1214 	if (!info->close && info->length == -1)
   1215 		info->close = 1;
   1216 	if ((d = parse_http_header(e->head, cast_uchar "Last-Modified",
   1217 	                           NULL))) {
   1218 		if (e->last_modified && casestrcmp(e->last_modified, d)) {
   1219 			delete_entry_content(e);
   1220 			if (c->from) {
   1221 				c->from = 0;
   1222 				free(d);
   1223 				setcstate(c, S_MODIFIED);
   1224 				retry_connection(c);
   1225 				return;
   1226 			}
   1227 		}
   1228 		if (!e->last_modified)
   1229 			e->last_modified = d;
   1230 		else
   1231 			free(d);
   1232 	}
   1233 	if (!e->last_modified
   1234 	    && (d = parse_http_header(e->head, cast_uchar "Date", NULL)))
   1235 		e->last_modified = d;
   1236 	if (info->length == -1 || (version < 11 && info->close))
   1237 		rb->close = 1;
   1238 
   1239 	/*
   1240 	 * Truncate entry if:
   1241 	 *	- it is compressed (the mix of an old and new document
   1242 	 *	  would likely produce decompression error).
   1243 	 *	- it was http authentication (the user doesn't need to see the
   1244 	 *	  authentication message).
   1245 	 */
   1246 	if ((d = parse_http_header(e->head, cast_uchar "Content-Encoding",
   1247 	                           NULL))) {
   1248 		free(d);
   1249 		truncate_entry(e, c->from, 0);
   1250 	} else if (previous_http_code == 401 || previous_http_code == 407)
   1251 		truncate_entry(e, c->from, 0);
   1252 
   1253 	if (info->https_forward && h == 407) {
   1254 		http_end_request(c, 0, 1, S__OK);
   1255 		return;
   1256 	}
   1257 
   1258 	read_http_data(c, rb);
   1259 }
   1260 
   1261 static void
   1262 http_get_header(struct connection *c)
   1263 {
   1264 	struct read_buffer *rb;
   1265 	set_connection_timeout_keepal(c);
   1266 	rb = alloc_read_buffer();
   1267 	rb->close = 1;
   1268 	read_from_socket(c, c->sock1, rb, http_got_header);
   1269 }