source: EcnlProtoTool/trunk/ntshell/webserver/http_parser.c@ 331

Last change on this file since 331 was 331, checked in by coas-nagasima, 6 years ago

prototoolに関連するプロジェクトをnewlibからmuslを使うよう変更・更新
ntshellをnewlibの下位の実装から、muslのsyscallの実装に変更・更新
以下のOSSをアップデート
・mruby-1.3.0
・musl-1.1.18
・onigmo-6.1.3
・tcc-0.9.27
以下のOSSを追加
・openssl-1.1.0e
・curl-7.57.0
・zlib-1.2.11
以下のmrbgemsを追加
・iij/mruby-digest
・iij/mruby-env
・iij/mruby-errno
・iij/mruby-iijson
・iij/mruby-ipaddr
・iij/mruby-mock
・iij/mruby-require
・iij/mruby-tls-openssl

  • Property svn:eol-style set to native
  • Property svn:mime-type set to text/x-csrc;charset=UTF-8
File size: 68.0 KB
Line 
1/* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2 *
3 * Additional changes are licensed under the same terms as NGINX and
4 * copyright Joyent, Inc. and other Node contributors. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24#include "http_parser.h"
25//#include <assert.h>
26#include <stddef.h>
27#include <ctype.h>
28#include <stdlib.h>
29#include <string.h>
30#include <limits.h>
31#include "core/ntlibc.h"
32
33#ifndef ULLONG_MAX
34# define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
35#endif
36
37#ifndef MIN
38# define MIN(a,b) ((a) < (b) ? (a) : (b))
39#endif
40
41#ifndef ARRAY_SIZE
42# define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
43#endif
44
45#ifndef BIT_AT
46# define BIT_AT(a, i) \
47 (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
48 (1 << ((unsigned int) (i) & 7))))
49#endif
50
51#ifndef ELEM_AT
52# define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
53#endif
54
55#define SET_ERRNO(e) \
56do { \
57 parser->http_errno = (e); \
58} while(0)
59
60#define CURRENT_STATE() p_state
61#define UPDATE_STATE(V) p_state = (enum state) (V);
62#define RETURN(V) \
63do { \
64 parser->state = CURRENT_STATE(); \
65 return (V); \
66} while (0);
67#define REEXECUTE() \
68 --p; \
69 break;
70
71
72#ifdef __GNUC__
73# define LIKELY(X) __builtin_expect(!!(X), 1)
74# define UNLIKELY(X) __builtin_expect(!!(X), 0)
75#else
76# define LIKELY(X) (X)
77# define UNLIKELY(X) (X)
78#endif
79
80
81/* Run the notify callback FOR, returning ER if it fails */
82#define CALLBACK_NOTIFY_(FOR, ER) \
83do { \
84 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
85 \
86 if (LIKELY(settings->on_##FOR)) { \
87 parser->state = CURRENT_STATE(); \
88 if (UNLIKELY(0 != settings->on_##FOR(parser))) { \
89 SET_ERRNO(HPE_CB_##FOR); \
90 } \
91 UPDATE_STATE(parser->state); \
92 \
93 /* We either errored above or got paused; get out */ \
94 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
95 return (ER); \
96 } \
97 } \
98} while (0)
99
100/* Run the notify callback FOR and consume the current byte */
101#define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
102
103/* Run the notify callback FOR and don't consume the current byte */
104#define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
105
106/* Run data callback FOR with LEN bytes, returning ER if it fails */
107#define CALLBACK_DATA_(FOR, LEN, ER) \
108do { \
109 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
110 \
111 if (FOR##_mark) { \
112 if (LIKELY(settings->on_##FOR)) { \
113 parser->state = CURRENT_STATE(); \
114 if (UNLIKELY(0 != \
115 settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
116 SET_ERRNO(HPE_CB_##FOR); \
117 } \
118 UPDATE_STATE(parser->state); \
119 \
120 /* We either errored above or got paused; get out */ \
121 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
122 return (ER); \
123 } \
124 } \
125 FOR##_mark = NULL; \
126 } \
127} while (0)
128
129/* Run the data callback FOR and consume the current byte */
130#define CALLBACK_DATA(FOR) \
131 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
132
133/* Run the data callback FOR and don't consume the current byte */
134#define CALLBACK_DATA_NOADVANCE(FOR) \
135 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
136
137/* Set the mark FOR; non-destructive if mark is already set */
138#define MARK(FOR) \
139do { \
140 if (!FOR##_mark) { \
141 FOR##_mark = p; \
142 } \
143} while (0)
144
145/* Don't allow the total size of the HTTP headers (including the status
146 * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect
147 * embedders against denial-of-service attacks where the attacker feeds
148 * us a never-ending header that the embedder keeps buffering.
149 *
150 * This check is arguably the responsibility of embedders but we're doing
151 * it on the embedder's behalf because most won't bother and this way we
152 * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger
153 * than any reasonable request or response so this should never affect
154 * day-to-day operation.
155 */
156#define COUNT_HEADER_SIZE(V) \
157do { \
158 parser->nread += (V); \
159 if (UNLIKELY(parser->nread > (HTTP_MAX_HEADER_SIZE))) { \
160 SET_ERRNO(HPE_HEADER_OVERFLOW); \
161 goto error; \
162 } \
163} while (0)
164
165
166#define PROXY_CONNECTION "proxy-connection"
167#define CONNECTION "connection"
168#define CONTENT_LENGTH "content-length"
169#define TRANSFER_ENCODING "transfer-encoding"
170#define UPGRADE "upgrade"
171#define CHUNKED "chunked"
172#define KEEP_ALIVE "keep-alive"
173#define CLOSE "close"
174
175
176static const char *method_strings[] =
177 {
178#define XX(num, name, string) #string,
179 HTTP_METHOD_MAP(XX)
180#undef XX
181 };
182
183
184/* Tokens as defined by rfc 2616. Also lowercases them.
185 * token = 1*<any CHAR except CTLs or separators>
186 * separators = "(" | ")" | "<" | ">" | "@"
187 * | "," | ";" | ":" | "\" | <">
188 * | "/" | "[" | "]" | "?" | "="
189 * | "{" | "}" | SP | HT
190 */
191static const char tokens[256] = {
192/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
193 0, 0, 0, 0, 0, 0, 0, 0,
194/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
195 0, 0, 0, 0, 0, 0, 0, 0,
196/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
197 0, 0, 0, 0, 0, 0, 0, 0,
198/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
199 0, 0, 0, 0, 0, 0, 0, 0,
200/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
201 0, '!', 0, '#', '$', '%', '&', '\'',
202/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
203 0, 0, '*', '+', 0, '-', '.', 0,
204/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
205 '0', '1', '2', '3', '4', '5', '6', '7',
206/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
207 '8', '9', 0, 0, 0, 0, 0, 0,
208/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
209 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
210/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
211 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
212/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
213 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
214/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
215 'x', 'y', 'z', 0, 0, 0, '^', '_',
216/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
217 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
218/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
219 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
220/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
221 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
222/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
223 'x', 'y', 'z', 0, '|', 0, '~', 0 };
224
225
226static const int8_t unhex[256] =
227 {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
228 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
229 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
230 , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
231 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
232 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
233 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
234 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
235 };
236
237
238#if HTTP_PARSER_STRICT
239# define T(v) 0
240#else
241# define T(v) v
242#endif
243
244
245static const uint8_t normal_url_char[32] = {
246/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
247 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
248/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
249 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
250/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
251 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
252/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
253 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
254/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
255 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
256/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
257 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
258/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
259 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
260/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
261 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
262/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
263 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
264/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
265 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
266/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
267 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
268/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
269 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
270/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
271 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
272/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
273 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
274/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
275 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
276/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
277 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
278
279#undef T
280
281enum state
282 { s_dead = 1 /* important that this is > 0 */
283
284 , s_start_req_or_res
285 , s_res_or_resp_H
286 , s_start_res
287 , s_res_H
288 , s_res_HT
289 , s_res_HTT
290 , s_res_HTTP
291 , s_res_first_http_major
292 , s_res_http_major
293 , s_res_first_http_minor
294 , s_res_http_minor
295 , s_res_first_status_code
296 , s_res_status_code
297 , s_res_status_start
298 , s_res_status
299 , s_res_line_almost_done
300
301 , s_start_req
302
303 , s_req_method
304 , s_req_spaces_before_url
305 , s_req_schema
306 , s_req_schema_slash
307 , s_req_schema_slash_slash
308 , s_req_server_start
309 , s_req_server
310 , s_req_server_with_at
311 , s_req_path
312 , s_req_query_string_start
313 , s_req_query_string
314 , s_req_fragment_start
315 , s_req_fragment
316 , s_req_http_start
317 , s_req_http_H
318 , s_req_http_HT
319 , s_req_http_HTT
320 , s_req_http_HTTP
321 , s_req_first_http_major
322 , s_req_http_major
323 , s_req_first_http_minor
324 , s_req_http_minor
325 , s_req_line_almost_done
326
327 , s_header_field_start
328 , s_header_field
329 , s_header_value_discard_ws
330 , s_header_value_discard_ws_almost_done
331 , s_header_value_discard_lws
332 , s_header_value_start
333 , s_header_value
334 , s_header_value_lws
335
336 , s_header_almost_done
337
338 , s_chunk_size_start
339 , s_chunk_size
340 , s_chunk_parameters
341 , s_chunk_size_almost_done
342
343 , s_headers_almost_done
344 , s_headers_done
345
346 /* Important: 's_headers_done' must be the last 'header' state. All
347 * states beyond this must be 'body' states. It is used for overflow
348 * checking. See the PARSING_HEADER() macro.
349 */
350
351 , s_chunk_data
352 , s_chunk_data_almost_done
353 , s_chunk_data_done
354
355 , s_body_identity
356 , s_body_identity_eof
357
358 , s_message_done
359 };
360
361
362#define PARSING_HEADER(state) (state <= s_headers_done)
363
364
365enum header_states
366 { h_general = 0
367 , h_C
368 , h_CO
369 , h_CON
370
371 , h_matching_connection
372 , h_matching_proxy_connection
373 , h_matching_content_length
374 , h_matching_transfer_encoding
375 , h_matching_upgrade
376
377 , h_connection
378 , h_content_length
379 , h_transfer_encoding
380 , h_upgrade
381
382 , h_matching_transfer_encoding_chunked
383 , h_matching_connection_token_start
384 , h_matching_connection_keep_alive
385 , h_matching_connection_close
386 , h_matching_connection_upgrade
387 , h_matching_connection_token
388
389 , h_transfer_encoding_chunked
390 , h_connection_keep_alive
391 , h_connection_close
392 , h_connection_upgrade
393 };
394
395enum http_host_state
396 {
397 s_http_host_dead = 1
398 , s_http_userinfo_start
399 , s_http_userinfo
400 , s_http_host_start
401 , s_http_host_v6_start
402 , s_http_host
403 , s_http_host_v6
404 , s_http_host_v6_end
405 , s_http_host_port_start
406 , s_http_host_port
407};
408
409/* Macros for character classes; depends on strict-mode */
410#define CR '\r'
411#define LF '\n'
412#define LOWER(c) (unsigned char)(c | 0x20)
413#define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
414#define IS_NUM(c) ((c) >= '0' && (c) <= '9')
415#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
416#define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
417#define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
418 (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
419 (c) == ')')
420#define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
421 (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
422 (c) == '$' || (c) == ',')
423
424#define STRICT_TOKEN(c) (tokens[(unsigned char)c])
425
426#if HTTP_PARSER_STRICT
427#define TOKEN(c) (tokens[(unsigned char)c])
428#define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
429#define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
430#else
431#define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
432#define IS_URL_CHAR(c) \
433 (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
434#define IS_HOST_CHAR(c) \
435 (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
436#endif
437
438
439#define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
440
441
442#if HTTP_PARSER_STRICT
443# define STRICT_CHECK(cond) \
444do { \
445 if (cond) { \
446 SET_ERRNO(HPE_STRICT); \
447 goto error; \
448 } \
449} while (0)
450# define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
451#else
452# define STRICT_CHECK(cond)
453# define NEW_MESSAGE() start_state
454#endif
455
456
457/* Map errno values to strings for human-readable output */
458#define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
459static struct {
460 const char *name;
461 const char *description;
462} http_strerror_tab[] = {
463 HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
464};
465#undef HTTP_STRERROR_GEN
466
467int http_message_needs_eof(const http_parser *parser);
468
469/* Our URL parser.
470 *
471 * This is designed to be shared by http_parser_execute() for URL validation,
472 * hence it has a state transition + byte-for-byte interface. In addition, it
473 * is meant to be embedded in http_parser_parse_url(), which does the dirty
474 * work of turning state transitions URL components for its API.
475 *
476 * This function should only be invoked with non-space characters. It is
477 * assumed that the caller cares about (and can detect) the transition between
478 * URL and non-URL states by looking for these.
479 */
480static enum state
481parse_url_char(enum state s, const char ch)
482{
483 if (ch == ' ' || ch == '\r' || ch == '\n') {
484 return s_dead;
485 }
486
487#if HTTP_PARSER_STRICT
488 if (ch == '\t' || ch == '\f') {
489 return s_dead;
490 }
491#endif
492
493 switch (s) {
494 case s_req_spaces_before_url:
495 /* Proxied requests are followed by scheme of an absolute URI (alpha).
496 * All methods except CONNECT are followed by '/' or '*'.
497 */
498
499 if (ch == '/' || ch == '*') {
500 return s_req_path;
501 }
502
503 if (IS_ALPHA(ch)) {
504 return s_req_schema;
505 }
506
507 break;
508
509 case s_req_schema:
510 if (IS_ALPHA(ch)) {
511 return s;
512 }
513
514 if (ch == ':') {
515 return s_req_schema_slash;
516 }
517
518 break;
519
520 case s_req_schema_slash:
521 if (ch == '/') {
522 return s_req_schema_slash_slash;
523 }
524
525 break;
526
527 case s_req_schema_slash_slash:
528 if (ch == '/') {
529 return s_req_server_start;
530 }
531
532 break;
533
534 case s_req_server_with_at:
535 if (ch == '@') {
536 return s_dead;
537 }
538
539 /* FALLTHROUGH */
540 case s_req_server_start:
541 case s_req_server:
542 if (ch == '/') {
543 return s_req_path;
544 }
545
546 if (ch == '?') {
547 return s_req_query_string_start;
548 }
549
550 if (ch == '@') {
551 return s_req_server_with_at;
552 }
553
554 if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
555 return s_req_server;
556 }
557
558 break;
559
560 case s_req_path:
561 if (IS_URL_CHAR(ch)) {
562 return s;
563 }
564
565 switch (ch) {
566 case '?':
567 return s_req_query_string_start;
568
569 case '#':
570 return s_req_fragment_start;
571 }
572
573 break;
574
575 case s_req_query_string_start:
576 case s_req_query_string:
577 if (IS_URL_CHAR(ch)) {
578 return s_req_query_string;
579 }
580
581 switch (ch) {
582 case '?':
583 /* allow extra '?' in query string */
584 return s_req_query_string;
585
586 case '#':
587 return s_req_fragment_start;
588 }
589
590 break;
591
592 case s_req_fragment_start:
593 if (IS_URL_CHAR(ch)) {
594 return s_req_fragment;
595 }
596
597 switch (ch) {
598 case '?':
599 return s_req_fragment;
600
601 case '#':
602 return s;
603 }
604
605 break;
606
607 case s_req_fragment:
608 if (IS_URL_CHAR(ch)) {
609 return s;
610 }
611
612 switch (ch) {
613 case '?':
614 case '#':
615 return s;
616 }
617
618 break;
619
620 default:
621 break;
622 }
623
624 /* We should never fall out of the switch above unless there's an error */
625 return s_dead;
626}
627
628size_t http_parser_execute (http_parser *parser,
629 const http_parser_settings *settings,
630 const char *data,
631 size_t len)
632{
633 char c, ch;
634 int8_t unhex_val;
635 const char *p = data;
636 const char *header_field_mark = 0;
637 const char *header_value_mark = 0;
638 const char *url_mark = 0;
639 const char *body_mark = 0;
640 const char *status_mark = 0;
641 enum state p_state = (enum state) parser->state;
642
643 /* We're in an error state. Don't bother doing anything. */
644 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
645 return 0;
646 }
647
648 if (len == 0) {
649 switch (CURRENT_STATE()) {
650 case s_body_identity_eof:
651 /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
652 * we got paused.
653 */
654 CALLBACK_NOTIFY_NOADVANCE(message_complete);
655 return 0;
656
657 case s_dead:
658 case s_start_req_or_res:
659 case s_start_res:
660 case s_start_req:
661 return 0;
662
663 default:
664 SET_ERRNO(HPE_INVALID_EOF_STATE);
665 return 1;
666 }
667 }
668
669
670 if (CURRENT_STATE() == s_header_field)
671 header_field_mark = data;
672 if (CURRENT_STATE() == s_header_value)
673 header_value_mark = data;
674 switch (CURRENT_STATE()) {
675 case s_req_path:
676 case s_req_schema:
677 case s_req_schema_slash:
678 case s_req_schema_slash_slash:
679 case s_req_server_start:
680 case s_req_server:
681 case s_req_server_with_at:
682 case s_req_query_string_start:
683 case s_req_query_string:
684 case s_req_fragment_start:
685 case s_req_fragment:
686 url_mark = data;
687 break;
688 case s_res_status:
689 status_mark = data;
690 break;
691 default:
692 break;
693 }
694
695 for (p=data; p != data + len; p++) {
696 ch = *p;
697
698 if (PARSING_HEADER(CURRENT_STATE()))
699 COUNT_HEADER_SIZE(1);
700
701 switch (CURRENT_STATE()) {
702
703 case s_dead:
704 /* this state is used after a 'Connection: close' message
705 * the parser will error out if it reads another message
706 */
707 if (LIKELY(ch == CR || ch == LF))
708 break;
709
710 SET_ERRNO(HPE_CLOSED_CONNECTION);
711 goto error;
712
713 case s_start_req_or_res:
714 {
715 if (ch == CR || ch == LF)
716 break;
717 parser->flags = 0;
718 parser->content_length = ULLONG_MAX;
719
720 if (ch == 'H') {
721 UPDATE_STATE(s_res_or_resp_H);
722
723 CALLBACK_NOTIFY(message_begin);
724 } else {
725 parser->type = HTTP_REQUEST;
726 UPDATE_STATE(s_start_req);
727 REEXECUTE();
728 }
729
730 break;
731 }
732
733 case s_res_or_resp_H:
734 if (ch == 'T') {
735 parser->type = HTTP_RESPONSE;
736 UPDATE_STATE(s_res_HT);
737 } else {
738 if (UNLIKELY(ch != 'E')) {
739 SET_ERRNO(HPE_INVALID_CONSTANT);
740 goto error;
741 }
742
743 parser->type = HTTP_REQUEST;
744 parser->method = HTTP_HEAD;
745 parser->index = 2;
746 UPDATE_STATE(s_req_method);
747 }
748 break;
749
750 case s_start_res:
751 {
752 parser->flags = 0;
753 parser->content_length = ULLONG_MAX;
754
755 switch (ch) {
756 case 'H':
757 UPDATE_STATE(s_res_H);
758 break;
759
760 case CR:
761 case LF:
762 break;
763
764 default:
765 SET_ERRNO(HPE_INVALID_CONSTANT);
766 goto error;
767 }
768
769 CALLBACK_NOTIFY(message_begin);
770 break;
771 }
772
773 case s_res_H:
774 STRICT_CHECK(ch != 'T');
775 UPDATE_STATE(s_res_HT);
776 break;
777
778 case s_res_HT:
779 STRICT_CHECK(ch != 'T');
780 UPDATE_STATE(s_res_HTT);
781 break;
782
783 case s_res_HTT:
784 STRICT_CHECK(ch != 'P');
785 UPDATE_STATE(s_res_HTTP);
786 break;
787
788 case s_res_HTTP:
789 STRICT_CHECK(ch != '/');
790 UPDATE_STATE(s_res_first_http_major);
791 break;
792
793 case s_res_first_http_major:
794 if (UNLIKELY(ch < '0' || ch > '9')) {
795 SET_ERRNO(HPE_INVALID_VERSION);
796 goto error;
797 }
798
799 parser->http_major = ch - '0';
800 UPDATE_STATE(s_res_http_major);
801 break;
802
803 /* major HTTP version or dot */
804 case s_res_http_major:
805 {
806 if (ch == '.') {
807 UPDATE_STATE(s_res_first_http_minor);
808 break;
809 }
810
811 if (!IS_NUM(ch)) {
812 SET_ERRNO(HPE_INVALID_VERSION);
813 goto error;
814 }
815
816 parser->http_major *= 10;
817 parser->http_major += ch - '0';
818
819 if (UNLIKELY(parser->http_major > 999)) {
820 SET_ERRNO(HPE_INVALID_VERSION);
821 goto error;
822 }
823
824 break;
825 }
826
827 /* first digit of minor HTTP version */
828 case s_res_first_http_minor:
829 if (UNLIKELY(!IS_NUM(ch))) {
830 SET_ERRNO(HPE_INVALID_VERSION);
831 goto error;
832 }
833
834 parser->http_minor = ch - '0';
835 UPDATE_STATE(s_res_http_minor);
836 break;
837
838 /* minor HTTP version or end of request line */
839 case s_res_http_minor:
840 {
841 if (ch == ' ') {
842 UPDATE_STATE(s_res_first_status_code);
843 break;
844 }
845
846 if (UNLIKELY(!IS_NUM(ch))) {
847 SET_ERRNO(HPE_INVALID_VERSION);
848 goto error;
849 }
850
851 parser->http_minor *= 10;
852 parser->http_minor += ch - '0';
853
854 if (UNLIKELY(parser->http_minor > 999)) {
855 SET_ERRNO(HPE_INVALID_VERSION);
856 goto error;
857 }
858
859 break;
860 }
861
862 case s_res_first_status_code:
863 {
864 if (!IS_NUM(ch)) {
865 if (ch == ' ') {
866 break;
867 }
868
869 SET_ERRNO(HPE_INVALID_STATUS);
870 goto error;
871 }
872 parser->status_code = ch - '0';
873 UPDATE_STATE(s_res_status_code);
874 break;
875 }
876
877 case s_res_status_code:
878 {
879 if (!IS_NUM(ch)) {
880 switch (ch) {
881 case ' ':
882 UPDATE_STATE(s_res_status_start);
883 break;
884 case CR:
885 UPDATE_STATE(s_res_line_almost_done);
886 break;
887 case LF:
888 UPDATE_STATE(s_header_field_start);
889 break;
890 default:
891 SET_ERRNO(HPE_INVALID_STATUS);
892 goto error;
893 }
894 break;
895 }
896
897 parser->status_code *= 10;
898 parser->status_code += ch - '0';
899
900 if (UNLIKELY(parser->status_code > 999)) {
901 SET_ERRNO(HPE_INVALID_STATUS);
902 goto error;
903 }
904
905 break;
906 }
907
908 case s_res_status_start:
909 {
910 if (ch == CR) {
911 UPDATE_STATE(s_res_line_almost_done);
912 break;
913 }
914
915 if (ch == LF) {
916 UPDATE_STATE(s_header_field_start);
917 break;
918 }
919
920 MARK(status);
921 UPDATE_STATE(s_res_status);
922 parser->index = 0;
923 break;
924 }
925
926 case s_res_status:
927 if (ch == CR) {
928 UPDATE_STATE(s_res_line_almost_done);
929 CALLBACK_DATA(status);
930 break;
931 }
932
933 if (ch == LF) {
934 UPDATE_STATE(s_header_field_start);
935 CALLBACK_DATA(status);
936 break;
937 }
938
939 break;
940
941 case s_res_line_almost_done:
942 STRICT_CHECK(ch != LF);
943 UPDATE_STATE(s_header_field_start);
944 break;
945
946 case s_start_req:
947 {
948 if (ch == CR || ch == LF)
949 break;
950 parser->flags = 0;
951 parser->content_length = ULLONG_MAX;
952
953 if (UNLIKELY(!IS_ALPHA(ch))) {
954 SET_ERRNO(HPE_INVALID_METHOD);
955 goto error;
956 }
957
958 parser->method = (enum http_method) 0;
959 parser->index = 1;
960 switch (ch) {
961 case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
962 case 'D': parser->method = HTTP_DELETE; break;
963 case 'G': parser->method = HTTP_GET; break;
964 case 'H': parser->method = HTTP_HEAD; break;
965 case 'L': parser->method = HTTP_LOCK; break;
966 case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
967 case 'N': parser->method = HTTP_NOTIFY; break;
968 case 'O': parser->method = HTTP_OPTIONS; break;
969 case 'P': parser->method = HTTP_POST;
970 /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
971 break;
972 case 'R': parser->method = HTTP_REPORT; break;
973 case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
974 case 'T': parser->method = HTTP_TRACE; break;
975 case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
976 default:
977 SET_ERRNO(HPE_INVALID_METHOD);
978 goto error;
979 }
980 UPDATE_STATE(s_req_method);
981
982 CALLBACK_NOTIFY(message_begin);
983
984 break;
985 }
986
987 case s_req_method:
988 {
989 const char *matcher;
990 if (UNLIKELY(ch == '\0')) {
991 SET_ERRNO(HPE_INVALID_METHOD);
992 goto error;
993 }
994
995 matcher = method_strings[parser->method];
996 if (ch == ' ' && matcher[parser->index] == '\0') {
997 UPDATE_STATE(s_req_spaces_before_url);
998 } else if (ch == matcher[parser->index]) {
999 ; /* nada */
1000 } else if (parser->method == HTTP_CONNECT) {
1001 if (parser->index == 1 && ch == 'H') {
1002 parser->method = HTTP_CHECKOUT;
1003 } else if (parser->index == 2 && ch == 'P') {
1004 parser->method = HTTP_COPY;
1005 } else {
1006 SET_ERRNO(HPE_INVALID_METHOD);
1007 goto error;
1008 }
1009 } else if (parser->method == HTTP_MKCOL) {
1010 if (parser->index == 1 && ch == 'O') {
1011 parser->method = HTTP_MOVE;
1012 } else if (parser->index == 1 && ch == 'E') {
1013 parser->method = HTTP_MERGE;
1014 } else if (parser->index == 1 && ch == '-') {
1015 parser->method = HTTP_MSEARCH;
1016 } else if (parser->index == 2 && ch == 'A') {
1017 parser->method = HTTP_MKACTIVITY;
1018 } else if (parser->index == 3 && ch == 'A') {
1019 parser->method = HTTP_MKCALENDAR;
1020 } else {
1021 SET_ERRNO(HPE_INVALID_METHOD);
1022 goto error;
1023 }
1024 } else if (parser->method == HTTP_SUBSCRIBE) {
1025 if (parser->index == 1 && ch == 'E') {
1026 parser->method = HTTP_SEARCH;
1027 } else {
1028 SET_ERRNO(HPE_INVALID_METHOD);
1029 goto error;
1030 }
1031 } else if (parser->index == 1 && parser->method == HTTP_POST) {
1032 if (ch == 'R') {
1033 parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
1034 } else if (ch == 'U') {
1035 parser->method = HTTP_PUT; /* or HTTP_PURGE */
1036 } else if (ch == 'A') {
1037 parser->method = HTTP_PATCH;
1038 } else {
1039 SET_ERRNO(HPE_INVALID_METHOD);
1040 goto error;
1041 }
1042 } else if (parser->index == 2) {
1043 if (parser->method == HTTP_PUT) {
1044 if (ch == 'R') {
1045 parser->method = HTTP_PURGE;
1046 } else {
1047 SET_ERRNO(HPE_INVALID_METHOD);
1048 goto error;
1049 }
1050 } else if (parser->method == HTTP_UNLOCK) {
1051 if (ch == 'S') {
1052 parser->method = HTTP_UNSUBSCRIBE;
1053 } else {
1054 SET_ERRNO(HPE_INVALID_METHOD);
1055 goto error;
1056 }
1057 } else {
1058 SET_ERRNO(HPE_INVALID_METHOD);
1059 goto error;
1060 }
1061 } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
1062 parser->method = HTTP_PROPPATCH;
1063 } else {
1064 SET_ERRNO(HPE_INVALID_METHOD);
1065 goto error;
1066 }
1067
1068 ++parser->index;
1069 break;
1070 }
1071
1072 case s_req_spaces_before_url:
1073 {
1074 if (ch == ' ') break;
1075
1076 MARK(url);
1077 if (parser->method == HTTP_CONNECT) {
1078 UPDATE_STATE(s_req_server_start);
1079 }
1080
1081 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1082 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1083 SET_ERRNO(HPE_INVALID_URL);
1084 goto error;
1085 }
1086
1087 break;
1088 }
1089
1090 case s_req_schema:
1091 case s_req_schema_slash:
1092 case s_req_schema_slash_slash:
1093 case s_req_server_start:
1094 {
1095 switch (ch) {
1096 /* No whitespace allowed here */
1097 case ' ':
1098 case CR:
1099 case LF:
1100 SET_ERRNO(HPE_INVALID_URL);
1101 goto error;
1102 default:
1103 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1104 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1105 SET_ERRNO(HPE_INVALID_URL);
1106 goto error;
1107 }
1108 }
1109
1110 break;
1111 }
1112
1113 case s_req_server:
1114 case s_req_server_with_at:
1115 case s_req_path:
1116 case s_req_query_string_start:
1117 case s_req_query_string:
1118 case s_req_fragment_start:
1119 case s_req_fragment:
1120 {
1121 switch (ch) {
1122 case ' ':
1123 UPDATE_STATE(s_req_http_start);
1124 CALLBACK_DATA(url);
1125 break;
1126 case CR:
1127 case LF:
1128 parser->http_major = 0;
1129 parser->http_minor = 9;
1130 UPDATE_STATE((ch == CR) ?
1131 s_req_line_almost_done :
1132 s_header_field_start);
1133 CALLBACK_DATA(url);
1134 break;
1135 default:
1136 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1137 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1138 SET_ERRNO(HPE_INVALID_URL);
1139 goto error;
1140 }
1141 }
1142 break;
1143 }
1144
1145 case s_req_http_start:
1146 switch (ch) {
1147 case 'H':
1148 UPDATE_STATE(s_req_http_H);
1149 break;
1150 case ' ':
1151 break;
1152 default:
1153 SET_ERRNO(HPE_INVALID_CONSTANT);
1154 goto error;
1155 }
1156 break;
1157
1158 case s_req_http_H:
1159 STRICT_CHECK(ch != 'T');
1160 UPDATE_STATE(s_req_http_HT);
1161 break;
1162
1163 case s_req_http_HT:
1164 STRICT_CHECK(ch != 'T');
1165 UPDATE_STATE(s_req_http_HTT);
1166 break;
1167
1168 case s_req_http_HTT:
1169 STRICT_CHECK(ch != 'P');
1170 UPDATE_STATE(s_req_http_HTTP);
1171 break;
1172
1173 case s_req_http_HTTP:
1174 STRICT_CHECK(ch != '/');
1175 UPDATE_STATE(s_req_first_http_major);
1176 break;
1177
1178 /* first digit of major HTTP version */
1179 case s_req_first_http_major:
1180 if (UNLIKELY(ch < '1' || ch > '9')) {
1181 SET_ERRNO(HPE_INVALID_VERSION);
1182 goto error;
1183 }
1184
1185 parser->http_major = ch - '0';
1186 UPDATE_STATE(s_req_http_major);
1187 break;
1188
1189 /* major HTTP version or dot */
1190 case s_req_http_major:
1191 {
1192 if (ch == '.') {
1193 UPDATE_STATE(s_req_first_http_minor);
1194 break;
1195 }
1196
1197 if (UNLIKELY(!IS_NUM(ch))) {
1198 SET_ERRNO(HPE_INVALID_VERSION);
1199 goto error;
1200 }
1201
1202 parser->http_major *= 10;
1203 parser->http_major += ch - '0';
1204
1205 if (UNLIKELY(parser->http_major > 999)) {
1206 SET_ERRNO(HPE_INVALID_VERSION);
1207 goto error;
1208 }
1209
1210 break;
1211 }
1212
1213 /* first digit of minor HTTP version */
1214 case s_req_first_http_minor:
1215 if (UNLIKELY(!IS_NUM(ch))) {
1216 SET_ERRNO(HPE_INVALID_VERSION);
1217 goto error;
1218 }
1219
1220 parser->http_minor = ch - '0';
1221 UPDATE_STATE(s_req_http_minor);
1222 break;
1223
1224 /* minor HTTP version or end of request line */
1225 case s_req_http_minor:
1226 {
1227 if (ch == CR) {
1228 UPDATE_STATE(s_req_line_almost_done);
1229 break;
1230 }
1231
1232 if (ch == LF) {
1233 UPDATE_STATE(s_header_field_start);
1234 break;
1235 }
1236
1237 /* XXX allow spaces after digit? */
1238
1239 if (UNLIKELY(!IS_NUM(ch))) {
1240 SET_ERRNO(HPE_INVALID_VERSION);
1241 goto error;
1242 }
1243
1244 parser->http_minor *= 10;
1245 parser->http_minor += ch - '0';
1246
1247 if (UNLIKELY(parser->http_minor > 999)) {
1248 SET_ERRNO(HPE_INVALID_VERSION);
1249 goto error;
1250 }
1251
1252 break;
1253 }
1254
1255 /* end of request line */
1256 case s_req_line_almost_done:
1257 {
1258 if (UNLIKELY(ch != LF)) {
1259 SET_ERRNO(HPE_LF_EXPECTED);
1260 goto error;
1261 }
1262
1263 UPDATE_STATE(s_header_field_start);
1264 break;
1265 }
1266
1267 case s_header_field_start:
1268 {
1269 if (ch == CR) {
1270 UPDATE_STATE(s_headers_almost_done);
1271 break;
1272 }
1273
1274 if (ch == LF) {
1275 /* they might be just sending \n instead of \r\n so this would be
1276 * the second \n to denote the end of headers*/
1277 UPDATE_STATE(s_headers_almost_done);
1278 REEXECUTE();
1279 }
1280
1281 c = TOKEN(ch);
1282
1283 if (UNLIKELY(!c)) {
1284 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1285 goto error;
1286 }
1287
1288 MARK(header_field);
1289
1290 parser->index = 0;
1291 UPDATE_STATE(s_header_field);
1292
1293 switch (c) {
1294 case 'c':
1295 parser->header_state = h_C;
1296 break;
1297
1298 case 'p':
1299 parser->header_state = h_matching_proxy_connection;
1300 break;
1301
1302 case 't':
1303 parser->header_state = h_matching_transfer_encoding;
1304 break;
1305
1306 case 'u':
1307 parser->header_state = h_matching_upgrade;
1308 break;
1309
1310 default:
1311 parser->header_state = h_general;
1312 break;
1313 }
1314 break;
1315 }
1316
1317 case s_header_field:
1318 {
1319 const char* start = p;
1320 for (; p != data + len; p++) {
1321 ch = *p;
1322 c = TOKEN(ch);
1323
1324 if (!c)
1325 break;
1326
1327 switch (parser->header_state) {
1328 case h_general:
1329 break;
1330
1331 case h_C:
1332 parser->index++;
1333 parser->header_state = (c == 'o' ? h_CO : h_general);
1334 break;
1335
1336 case h_CO:
1337 parser->index++;
1338 parser->header_state = (c == 'n' ? h_CON : h_general);
1339 break;
1340
1341 case h_CON:
1342 parser->index++;
1343 switch (c) {
1344 case 'n':
1345 parser->header_state = h_matching_connection;
1346 break;
1347 case 't':
1348 parser->header_state = h_matching_content_length;
1349 break;
1350 default:
1351 parser->header_state = h_general;
1352 break;
1353 }
1354 break;
1355
1356 /* connection */
1357
1358 case h_matching_connection:
1359 parser->index++;
1360 if (parser->index > sizeof(CONNECTION)-1
1361 || c != CONNECTION[parser->index]) {
1362 parser->header_state = h_general;
1363 } else if (parser->index == sizeof(CONNECTION)-2) {
1364 parser->header_state = h_connection;
1365 }
1366 break;
1367
1368 /* proxy-connection */
1369
1370 case h_matching_proxy_connection:
1371 parser->index++;
1372 if (parser->index > sizeof(PROXY_CONNECTION)-1
1373 || c != PROXY_CONNECTION[parser->index]) {
1374 parser->header_state = h_general;
1375 } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1376 parser->header_state = h_connection;
1377 }
1378 break;
1379
1380 /* content-length */
1381
1382 case h_matching_content_length:
1383 parser->index++;
1384 if (parser->index > sizeof(CONTENT_LENGTH)-1
1385 || c != CONTENT_LENGTH[parser->index]) {
1386 parser->header_state = h_general;
1387 } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1388 parser->header_state = h_content_length;
1389 }
1390 break;
1391
1392 /* transfer-encoding */
1393
1394 case h_matching_transfer_encoding:
1395 parser->index++;
1396 if (parser->index > sizeof(TRANSFER_ENCODING)-1
1397 || c != TRANSFER_ENCODING[parser->index]) {
1398 parser->header_state = h_general;
1399 } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1400 parser->header_state = h_transfer_encoding;
1401 }
1402 break;
1403
1404 /* upgrade */
1405
1406 case h_matching_upgrade:
1407 parser->index++;
1408 if (parser->index > sizeof(UPGRADE)-1
1409 || c != UPGRADE[parser->index]) {
1410 parser->header_state = h_general;
1411 } else if (parser->index == sizeof(UPGRADE)-2) {
1412 parser->header_state = h_upgrade;
1413 }
1414 break;
1415
1416 case h_connection:
1417 case h_content_length:
1418 case h_transfer_encoding:
1419 case h_upgrade:
1420 if (ch != ' ') parser->header_state = h_general;
1421 break;
1422
1423 default:
1424 assert(0 && "Unknown header_state");
1425 break;
1426 }
1427 }
1428
1429 COUNT_HEADER_SIZE(p - start);
1430
1431 if (p == data + len) {
1432 --p;
1433 break;
1434 }
1435
1436 if (ch == ':') {
1437 UPDATE_STATE(s_header_value_discard_ws);
1438 CALLBACK_DATA(header_field);
1439 break;
1440 }
1441
1442 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1443 goto error;
1444 }
1445
1446 case s_header_value_discard_ws:
1447 if (ch == ' ' || ch == '\t') break;
1448
1449 if (ch == CR) {
1450 UPDATE_STATE(s_header_value_discard_ws_almost_done);
1451 break;
1452 }
1453
1454 if (ch == LF) {
1455 UPDATE_STATE(s_header_value_discard_lws);
1456 break;
1457 }
1458
1459 /* FALLTHROUGH */
1460
1461 case s_header_value_start:
1462 {
1463 MARK(header_value);
1464
1465 UPDATE_STATE(s_header_value);
1466 parser->index = 0;
1467
1468 c = LOWER(ch);
1469
1470 switch (parser->header_state) {
1471 case h_upgrade:
1472 parser->flags |= F_UPGRADE;
1473 parser->header_state = h_general;
1474 break;
1475
1476 case h_transfer_encoding:
1477 /* looking for 'Transfer-Encoding: chunked' */
1478 if ('c' == c) {
1479 parser->header_state = h_matching_transfer_encoding_chunked;
1480 } else {
1481 parser->header_state = h_general;
1482 }
1483 break;
1484
1485 case h_content_length:
1486 if (UNLIKELY(!IS_NUM(ch))) {
1487 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1488 goto error;
1489 }
1490
1491 parser->content_length = ch - '0';
1492 break;
1493
1494 case h_connection:
1495 /* looking for 'Connection: keep-alive' */
1496 if (c == 'k') {
1497 parser->header_state = h_matching_connection_keep_alive;
1498 /* looking for 'Connection: close' */
1499 } else if (c == 'c') {
1500 parser->header_state = h_matching_connection_close;
1501 } else if (c == 'u') {
1502 parser->header_state = h_matching_connection_upgrade;
1503 } else {
1504 parser->header_state = h_matching_connection_token;
1505 }
1506 break;
1507
1508 /* Multi-value `Connection` header */
1509 case h_matching_connection_token_start:
1510 break;
1511
1512 default:
1513 parser->header_state = h_general;
1514 break;
1515 }
1516 break;
1517 }
1518
1519 case s_header_value:
1520 {
1521 const char* start = p;
1522 enum header_states h_state = (enum header_states) parser->header_state;
1523 for (; p != data + len; p++) {
1524 ch = *p;
1525 if (ch == CR) {
1526 UPDATE_STATE(s_header_almost_done);
1527 parser->header_state = h_state;
1528 CALLBACK_DATA(header_value);
1529 break;
1530 }
1531
1532 if (ch == LF) {
1533 UPDATE_STATE(s_header_almost_done);
1534 COUNT_HEADER_SIZE(p - start);
1535 parser->header_state = h_state;
1536 CALLBACK_DATA_NOADVANCE(header_value);
1537 REEXECUTE();
1538 }
1539
1540 c = LOWER(ch);
1541
1542 switch (h_state) {
1543 case h_general:
1544 {
1545 const char* p_cr;
1546 const char* p_lf;
1547 size_t limit = data + len - p;
1548
1549 limit = MIN(limit, HTTP_MAX_HEADER_SIZE);
1550
1551 p_cr = (const char*) memchr(p, CR, limit);
1552 p_lf = (const char*) memchr(p, LF, limit);
1553 if (p_cr != NULL) {
1554 if (p_lf != NULL && p_cr >= p_lf)
1555 p = p_lf;
1556 else
1557 p = p_cr;
1558 } else if (UNLIKELY(p_lf != NULL)) {
1559 p = p_lf;
1560 } else {
1561 p = data + len;
1562 }
1563 --p;
1564
1565 break;
1566 }
1567
1568 case h_connection:
1569 case h_transfer_encoding:
1570 assert(0 && "Shouldn't get here.");
1571 break;
1572
1573 case h_content_length:
1574 {
1575 uint64_t t;
1576
1577 if (ch == ' ') break;
1578
1579 if (UNLIKELY(!IS_NUM(ch))) {
1580 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1581 parser->header_state = h_state;
1582 goto error;
1583 }
1584
1585 t = parser->content_length;
1586 t *= 10;
1587 t += ch - '0';
1588
1589 /* Overflow? Test against a conservative limit for simplicity. */
1590 if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1591 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1592 parser->header_state = h_state;
1593 goto error;
1594 }
1595
1596 parser->content_length = t;
1597 break;
1598 }
1599
1600 /* Transfer-Encoding: chunked */
1601 case h_matching_transfer_encoding_chunked:
1602 parser->index++;
1603 if (parser->index > sizeof(CHUNKED)-1
1604 || c != CHUNKED[parser->index]) {
1605 h_state = h_general;
1606 } else if (parser->index == sizeof(CHUNKED)-2) {
1607 h_state = h_transfer_encoding_chunked;
1608 }
1609 break;
1610
1611 case h_matching_connection_token_start:
1612 /* looking for 'Connection: keep-alive' */
1613 if (c == 'k') {
1614 h_state = h_matching_connection_keep_alive;
1615 /* looking for 'Connection: close' */
1616 } else if (c == 'c') {
1617 h_state = h_matching_connection_close;
1618 } else if (c == 'u') {
1619 h_state = h_matching_connection_upgrade;
1620 } else if (STRICT_TOKEN(c)) {
1621 h_state = h_matching_connection_token;
1622 } else {
1623 h_state = h_general;
1624 }
1625 break;
1626
1627 /* looking for 'Connection: keep-alive' */
1628 case h_matching_connection_keep_alive:
1629 parser->index++;
1630 if (parser->index > sizeof(KEEP_ALIVE)-1
1631 || c != KEEP_ALIVE[parser->index]) {
1632 h_state = h_matching_connection_token;
1633 } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1634 h_state = h_connection_keep_alive;
1635 }
1636 break;
1637
1638 /* looking for 'Connection: close' */
1639 case h_matching_connection_close:
1640 parser->index++;
1641 if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1642 h_state = h_matching_connection_token;
1643 } else if (parser->index == sizeof(CLOSE)-2) {
1644 h_state = h_connection_close;
1645 }
1646 break;
1647
1648 /* looking for 'Connection: upgrade' */
1649 case h_matching_connection_upgrade:
1650 parser->index++;
1651 if (parser->index > sizeof(UPGRADE) - 1 ||
1652 c != UPGRADE[parser->index]) {
1653 h_state = h_matching_connection_token;
1654 } else if (parser->index == sizeof(UPGRADE)-2) {
1655 h_state = h_connection_upgrade;
1656 }
1657 break;
1658
1659 case h_matching_connection_token:
1660 if (ch == ',') {
1661 h_state = h_matching_connection_token_start;
1662 parser->index = 0;
1663 }
1664 break;
1665
1666 case h_transfer_encoding_chunked:
1667 if (ch != ' ') h_state = h_general;
1668 break;
1669
1670 case h_connection_keep_alive:
1671 case h_connection_close:
1672 case h_connection_upgrade:
1673 if (ch == ',') {
1674 if (h_state == h_connection_keep_alive) {
1675 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1676 } else if (h_state == h_connection_close) {
1677 parser->flags |= F_CONNECTION_CLOSE;
1678 } else if (h_state == h_connection_upgrade) {
1679 parser->flags |= F_CONNECTION_UPGRADE;
1680 }
1681 h_state = h_matching_connection_token_start;
1682 parser->index = 0;
1683 } else if (ch != ' ') {
1684 h_state = h_matching_connection_token;
1685 }
1686 break;
1687
1688 default:
1689 UPDATE_STATE(s_header_value);
1690 h_state = h_general;
1691 break;
1692 }
1693 }
1694 parser->header_state = h_state;
1695
1696 COUNT_HEADER_SIZE(p - start);
1697
1698 if (p == data + len)
1699 --p;
1700 break;
1701 }
1702
1703 case s_header_almost_done:
1704 {
1705 STRICT_CHECK(ch != LF);
1706
1707 UPDATE_STATE(s_header_value_lws);
1708 break;
1709 }
1710
1711 case s_header_value_lws:
1712 {
1713 if (ch == ' ' || ch == '\t') {
1714 UPDATE_STATE(s_header_value_start);
1715 REEXECUTE();
1716 }
1717
1718 /* finished the header */
1719 switch (parser->header_state) {
1720 case h_connection_keep_alive:
1721 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1722 break;
1723 case h_connection_close:
1724 parser->flags |= F_CONNECTION_CLOSE;
1725 break;
1726 case h_transfer_encoding_chunked:
1727 parser->flags |= F_CHUNKED;
1728 break;
1729 case h_connection_upgrade:
1730 parser->flags |= F_CONNECTION_UPGRADE;
1731 break;
1732 default:
1733 break;
1734 }
1735
1736 UPDATE_STATE(s_header_field_start);
1737 REEXECUTE();
1738 }
1739
1740 case s_header_value_discard_ws_almost_done:
1741 {
1742 STRICT_CHECK(ch != LF);
1743 UPDATE_STATE(s_header_value_discard_lws);
1744 break;
1745 }
1746
1747 case s_header_value_discard_lws:
1748 {
1749 if (ch == ' ' || ch == '\t') {
1750 UPDATE_STATE(s_header_value_discard_ws);
1751 break;
1752 } else {
1753 switch (parser->header_state) {
1754 case h_connection_keep_alive:
1755 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1756 break;
1757 case h_connection_close:
1758 parser->flags |= F_CONNECTION_CLOSE;
1759 break;
1760 case h_connection_upgrade:
1761 parser->flags |= F_CONNECTION_UPGRADE;
1762 break;
1763 case h_transfer_encoding_chunked:
1764 parser->flags |= F_CHUNKED;
1765 break;
1766 default:
1767 break;
1768 }
1769
1770 /* header value was empty */
1771 MARK(header_value);
1772 UPDATE_STATE(s_header_field_start);
1773 CALLBACK_DATA_NOADVANCE(header_value);
1774 REEXECUTE();
1775 }
1776 }
1777
1778 case s_headers_almost_done:
1779 {
1780 STRICT_CHECK(ch != LF);
1781
1782 if (parser->flags & F_TRAILING) {
1783 /* End of a chunked request */
1784 UPDATE_STATE(NEW_MESSAGE());
1785 CALLBACK_NOTIFY(message_complete);
1786 break;
1787 }
1788
1789 UPDATE_STATE(s_headers_done);
1790
1791 /* Set this here so that on_headers_complete() callbacks can see it */
1792 parser->upgrade =
1793 ((parser->flags & (F_UPGRADE | F_CONNECTION_UPGRADE)) ==
1794 (F_UPGRADE | F_CONNECTION_UPGRADE) ||
1795 parser->method == HTTP_CONNECT);
1796
1797 /* Here we call the headers_complete callback. This is somewhat
1798 * different than other callbacks because if the user returns 1, we
1799 * will interpret that as saying that this message has no body. This
1800 * is needed for the annoying case of recieving a response to a HEAD
1801 * request.
1802 *
1803 * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1804 * we have to simulate it by handling a change in errno below.
1805 */
1806 if (settings->on_headers_complete) {
1807 switch (settings->on_headers_complete(parser)) {
1808 case 0:
1809 break;
1810
1811 case 1:
1812 parser->flags |= F_SKIPBODY;
1813 break;
1814
1815 default:
1816 SET_ERRNO(HPE_CB_headers_complete);
1817 RETURN(p - data); /* Error */
1818 }
1819 }
1820
1821 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1822 RETURN(p - data);
1823 }
1824
1825 REEXECUTE();
1826 }
1827
1828 case s_headers_done:
1829 {
1830 STRICT_CHECK(ch != LF);
1831
1832 parser->nread = 0;
1833
1834 /* Exit, the rest of the connect is in a different protocol. */
1835 if (parser->upgrade) {
1836 UPDATE_STATE(NEW_MESSAGE());
1837 CALLBACK_NOTIFY(message_complete);
1838 RETURN((p - data) + 1);
1839 }
1840
1841 if (parser->flags & F_SKIPBODY) {
1842 UPDATE_STATE(NEW_MESSAGE());
1843 CALLBACK_NOTIFY(message_complete);
1844 } else if (parser->flags & F_CHUNKED) {
1845 /* chunked encoding - ignore Content-Length header */
1846 UPDATE_STATE(s_chunk_size_start);
1847 } else {
1848 if (parser->content_length == 0) {
1849 /* Content-Length header given but zero: Content-Length: 0\r\n */
1850 UPDATE_STATE(NEW_MESSAGE());
1851 CALLBACK_NOTIFY(message_complete);
1852 } else if (parser->content_length != ULLONG_MAX) {
1853 /* Content-Length header given and non-zero */
1854 UPDATE_STATE(s_body_identity);
1855 } else {
1856 if (parser->type == HTTP_REQUEST ||
1857 !http_message_needs_eof(parser)) {
1858 /* Assume content-length 0 - read the next */
1859 UPDATE_STATE(NEW_MESSAGE());
1860 CALLBACK_NOTIFY(message_complete);
1861 } else {
1862 /* Read body until EOF */
1863 UPDATE_STATE(s_body_identity_eof);
1864 }
1865 }
1866 }
1867
1868 break;
1869 }
1870
1871 case s_body_identity:
1872 {
1873 uint64_t to_read = MIN(parser->content_length,
1874 (uint64_t) ((data + len) - p));
1875
1876 assert(parser->content_length != 0
1877 && parser->content_length != ULLONG_MAX);
1878
1879 /* The difference between advancing content_length and p is because
1880 * the latter will automaticaly advance on the next loop iteration.
1881 * Further, if content_length ends up at 0, we want to see the last
1882 * byte again for our message complete callback.
1883 */
1884 MARK(body);
1885 parser->content_length -= to_read;
1886 p += to_read - 1;
1887
1888 if (parser->content_length == 0) {
1889 UPDATE_STATE(s_message_done);
1890
1891 /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1892 *
1893 * The alternative to doing this is to wait for the next byte to
1894 * trigger the data callback, just as in every other case. The
1895 * problem with this is that this makes it difficult for the test
1896 * harness to distinguish between complete-on-EOF and
1897 * complete-on-length. It's not clear that this distinction is
1898 * important for applications, but let's keep it for now.
1899 */
1900 CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1901 REEXECUTE();
1902 }
1903
1904 break;
1905 }
1906
1907 /* read until EOF */
1908 case s_body_identity_eof:
1909 MARK(body);
1910 p = data + len - 1;
1911
1912 break;
1913
1914 case s_message_done:
1915 UPDATE_STATE(NEW_MESSAGE());
1916 CALLBACK_NOTIFY(message_complete);
1917 break;
1918
1919 case s_chunk_size_start:
1920 {
1921 assert(parser->nread == 1);
1922 assert(parser->flags & F_CHUNKED);
1923
1924 unhex_val = unhex[(unsigned char)ch];
1925 if (UNLIKELY(unhex_val == -1)) {
1926 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1927 goto error;
1928 }
1929
1930 parser->content_length = unhex_val;
1931 UPDATE_STATE(s_chunk_size);
1932 break;
1933 }
1934
1935 case s_chunk_size:
1936 {
1937 uint64_t t;
1938
1939 assert(parser->flags & F_CHUNKED);
1940
1941 if (ch == CR) {
1942 UPDATE_STATE(s_chunk_size_almost_done);
1943 break;
1944 }
1945
1946 unhex_val = unhex[(unsigned char)ch];
1947
1948 if (unhex_val == -1) {
1949 if (ch == ';' || ch == ' ') {
1950 UPDATE_STATE(s_chunk_parameters);
1951 break;
1952 }
1953
1954 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1955 goto error;
1956 }
1957
1958 t = parser->content_length;
1959 t *= 16;
1960 t += unhex_val;
1961
1962 /* Overflow? Test against a conservative limit for simplicity. */
1963 if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
1964 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1965 goto error;
1966 }
1967
1968 parser->content_length = t;
1969 break;
1970 }
1971
1972 case s_chunk_parameters:
1973 {
1974 assert(parser->flags & F_CHUNKED);
1975 /* just ignore this shit. TODO check for overflow */
1976 if (ch == CR) {
1977 UPDATE_STATE(s_chunk_size_almost_done);
1978 break;
1979 }
1980 break;
1981 }
1982
1983 case s_chunk_size_almost_done:
1984 {
1985 assert(parser->flags & F_CHUNKED);
1986 STRICT_CHECK(ch != LF);
1987
1988 parser->nread = 0;
1989
1990 if (parser->content_length == 0) {
1991 parser->flags |= F_TRAILING;
1992 UPDATE_STATE(s_header_field_start);
1993 } else {
1994 UPDATE_STATE(s_chunk_data);
1995 }
1996 break;
1997 }
1998
1999 case s_chunk_data:
2000 {
2001 uint64_t to_read = MIN(parser->content_length,
2002 (uint64_t) ((data + len) - p));
2003
2004 assert(parser->flags & F_CHUNKED);
2005 assert(parser->content_length != 0
2006 && parser->content_length != ULLONG_MAX);
2007
2008 /* See the explanation in s_body_identity for why the content
2009 * length and data pointers are managed this way.
2010 */
2011 MARK(body);
2012 parser->content_length -= to_read;
2013 p += to_read - 1;
2014
2015 if (parser->content_length == 0) {
2016 UPDATE_STATE(s_chunk_data_almost_done);
2017 }
2018
2019 break;
2020 }
2021
2022 case s_chunk_data_almost_done:
2023 assert(parser->flags & F_CHUNKED);
2024 assert(parser->content_length == 0);
2025 STRICT_CHECK(ch != CR);
2026 UPDATE_STATE(s_chunk_data_done);
2027 CALLBACK_DATA(body);
2028 break;
2029
2030 case s_chunk_data_done:
2031 assert(parser->flags & F_CHUNKED);
2032 STRICT_CHECK(ch != LF);
2033 parser->nread = 0;
2034 UPDATE_STATE(s_chunk_size_start);
2035 break;
2036
2037 default:
2038 assert(0 && "unhandled state");
2039 SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
2040 goto error;
2041 }
2042 }
2043
2044 /* Run callbacks for any marks that we have leftover after we ran our of
2045 * bytes. There should be at most one of these set, so it's OK to invoke
2046 * them in series (unset marks will not result in callbacks).
2047 *
2048 * We use the NOADVANCE() variety of callbacks here because 'p' has already
2049 * overflowed 'data' and this allows us to correct for the off-by-one that
2050 * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
2051 * value that's in-bounds).
2052 */
2053
2054 assert(((header_field_mark ? 1 : 0) +
2055 (header_value_mark ? 1 : 0) +
2056 (url_mark ? 1 : 0) +
2057 (body_mark ? 1 : 0) +
2058 (status_mark ? 1 : 0)) <= 1);
2059
2060 CALLBACK_DATA_NOADVANCE(header_field);
2061 CALLBACK_DATA_NOADVANCE(header_value);
2062 CALLBACK_DATA_NOADVANCE(url);
2063 CALLBACK_DATA_NOADVANCE(body);
2064 CALLBACK_DATA_NOADVANCE(status);
2065
2066 RETURN(len);
2067
2068error:
2069 if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
2070 SET_ERRNO(HPE_UNKNOWN);
2071 }
2072
2073 RETURN(p - data);
2074}
2075
2076
2077/* Does the parser need to see an EOF to find the end of the message? */
2078int
2079http_message_needs_eof (const http_parser *parser)
2080{
2081 if (parser->type == HTTP_REQUEST) {
2082 return 0;
2083 }
2084
2085 /* See RFC 2616 section 4.4 */
2086 if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
2087 parser->status_code == 204 || /* No Content */
2088 parser->status_code == 304 || /* Not Modified */
2089 parser->flags & F_SKIPBODY) { /* response to a HEAD request */
2090 return 0;
2091 }
2092
2093 if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
2094 return 0;
2095 }
2096
2097 return 1;
2098}
2099
2100
2101int
2102http_should_keep_alive (const http_parser *parser)
2103{
2104 if (parser->http_major > 0 && parser->http_minor > 0) {
2105 /* HTTP/1.1 */
2106 if (parser->flags & F_CONNECTION_CLOSE) {
2107 return 0;
2108 }
2109 } else {
2110 /* HTTP/1.0 or earlier */
2111 if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
2112 return 0;
2113 }
2114 }
2115
2116 return !http_message_needs_eof(parser);
2117}
2118
2119
2120const char *
2121http_method_str (enum http_method m)
2122{
2123 return ELEM_AT(method_strings, m, "<unknown>");
2124}
2125
2126
2127void
2128http_parser_init (http_parser *parser, enum http_parser_type t)
2129{
2130 void *data = parser->data; /* preserve application data */
2131 memset(parser, 0, sizeof(*parser));
2132 parser->data = data;
2133 parser->type = t;
2134 parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
2135 parser->http_errno = HPE_OK;
2136}
2137
2138const char *
2139http_errno_name(enum http_errno err) {
2140 assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
2141 return http_strerror_tab[err].name;
2142}
2143
2144const char *
2145http_errno_description(enum http_errno err) {
2146 assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
2147 return http_strerror_tab[err].description;
2148}
2149
2150static enum http_host_state
2151http_parse_host_char(enum http_host_state s, const char ch) {
2152 switch(s) {
2153 case s_http_userinfo:
2154 case s_http_userinfo_start:
2155 if (ch == '@') {
2156 return s_http_host_start;
2157 }
2158
2159 if (IS_USERINFO_CHAR(ch)) {
2160 return s_http_userinfo;
2161 }
2162 break;
2163
2164 case s_http_host_start:
2165 if (ch == '[') {
2166 return s_http_host_v6_start;
2167 }
2168
2169 if (IS_HOST_CHAR(ch)) {
2170 return s_http_host;
2171 }
2172
2173 break;
2174
2175 case s_http_host:
2176 if (IS_HOST_CHAR(ch)) {
2177 return s_http_host;
2178 }
2179
2180 /* FALLTHROUGH */
2181 case s_http_host_v6_end:
2182 if (ch == ':') {
2183 return s_http_host_port_start;
2184 }
2185
2186 break;
2187
2188 case s_http_host_v6:
2189 if (ch == ']') {
2190 return s_http_host_v6_end;
2191 }
2192
2193 /* FALLTHROUGH */
2194 case s_http_host_v6_start:
2195 if (IS_HEX(ch) || ch == ':' || ch == '.') {
2196 return s_http_host_v6;
2197 }
2198
2199 break;
2200
2201 case s_http_host_port:
2202 case s_http_host_port_start:
2203 if (IS_NUM(ch)) {
2204 return s_http_host_port;
2205 }
2206
2207 break;
2208
2209 default:
2210 break;
2211 }
2212 return s_http_host_dead;
2213}
2214
2215static int
2216http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2217 enum http_host_state s;
2218
2219 const char *p;
2220 size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2221
2222 u->field_data[UF_HOST].len = 0;
2223
2224 s = found_at ? s_http_userinfo_start : s_http_host_start;
2225
2226 for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2227 enum http_host_state new_s = http_parse_host_char(s, *p);
2228
2229 if (new_s == s_http_host_dead) {
2230 return 1;
2231 }
2232
2233 switch(new_s) {
2234 case s_http_host:
2235 if (s != s_http_host) {
2236 u->field_data[UF_HOST].off = p - buf;
2237 }
2238 u->field_data[UF_HOST].len++;
2239 break;
2240
2241 case s_http_host_v6:
2242 if (s != s_http_host_v6) {
2243 u->field_data[UF_HOST].off = p - buf;
2244 }
2245 u->field_data[UF_HOST].len++;
2246 break;
2247
2248 case s_http_host_port:
2249 if (s != s_http_host_port) {
2250 u->field_data[UF_PORT].off = p - buf;
2251 u->field_data[UF_PORT].len = 0;
2252 u->field_set |= (1 << UF_PORT);
2253 }
2254 u->field_data[UF_PORT].len++;
2255 break;
2256
2257 case s_http_userinfo:
2258 if (s != s_http_userinfo) {
2259 u->field_data[UF_USERINFO].off = p - buf ;
2260 u->field_data[UF_USERINFO].len = 0;
2261 u->field_set |= (1 << UF_USERINFO);
2262 }
2263 u->field_data[UF_USERINFO].len++;
2264 break;
2265
2266 default:
2267 break;
2268 }
2269 s = new_s;
2270 }
2271
2272 /* Make sure we don't end somewhere unexpected */
2273 switch (s) {
2274 case s_http_host_start:
2275 case s_http_host_v6_start:
2276 case s_http_host_v6:
2277 case s_http_host_port_start:
2278 case s_http_userinfo:
2279 case s_http_userinfo_start:
2280 return 1;
2281 default:
2282 break;
2283 }
2284
2285 return 0;
2286}
2287
2288int
2289http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2290 struct http_parser_url *u)
2291{
2292 enum state s;
2293 const char *p;
2294 enum http_parser_url_fields uf, old_uf;
2295 int found_at = 0;
2296
2297 u->port = u->field_set = 0;
2298 s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2299 old_uf = UF_MAX;
2300
2301 for (p = buf; p < buf + buflen; p++) {
2302 s = parse_url_char(s, *p);
2303
2304 /* Figure out the next field that we're operating on */
2305 switch (s) {
2306 case s_dead:
2307 return 1;
2308
2309 /* Skip delimeters */
2310 case s_req_schema_slash:
2311 case s_req_schema_slash_slash:
2312 case s_req_server_start:
2313 case s_req_query_string_start:
2314 case s_req_fragment_start:
2315 continue;
2316
2317 case s_req_schema:
2318 uf = UF_SCHEMA;
2319 break;
2320
2321 case s_req_server_with_at:
2322 found_at = 1;
2323
2324 /* FALLTROUGH */
2325 case s_req_server:
2326 uf = UF_HOST;
2327 break;
2328
2329 case s_req_path:
2330 uf = UF_PATH;
2331 break;
2332
2333 case s_req_query_string:
2334 uf = UF_QUERY;
2335 break;
2336
2337 case s_req_fragment:
2338 uf = UF_FRAGMENT;
2339 break;
2340
2341 default:
2342 assert(!"Unexpected state");
2343 return 1;
2344 }
2345
2346 /* Nothing's changed; soldier on */
2347 if (uf == old_uf) {
2348 u->field_data[uf].len++;
2349 continue;
2350 }
2351
2352 u->field_data[uf].off = p - buf;
2353 u->field_data[uf].len = 1;
2354
2355 u->field_set |= (1 << uf);
2356 old_uf = uf;
2357 }
2358
2359 /* host must be present if there is a schema */
2360 /* parsing http:///toto will fail */
2361 if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
2362 if (http_parse_host(buf, u, found_at) != 0) {
2363 return 1;
2364 }
2365 }
2366
2367 /* CONNECT requests can only contain "hostname:port" */
2368 if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2369 return 1;
2370 }
2371
2372 if (u->field_set & (1 << UF_PORT)) {
2373 /* Don't bother with endp; we've already validated the string */
2374 unsigned long v = ntlibc_strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2375
2376 /* Ports have a max value of 2^16 */
2377 if (v > 0xffff) {
2378 return 1;
2379 }
2380
2381 u->port = (uint16_t) v;
2382 }
2383
2384 return 0;
2385}
2386
2387void
2388http_parser_pause(http_parser *parser, int paused) {
2389 /* Users should only be pausing/unpausing a parser that is not in an error
2390 * state. In non-debug builds, there's not much that we can do about this
2391 * other than ignore it.
2392 */
2393 if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2394 HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2395 SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2396 } else {
2397 assert(0 && "Attempting to pause parser in error state");
2398 }
2399}
2400
2401int
2402http_body_is_final(const struct http_parser *parser) {
2403 return parser->state == s_message_done;
2404}
2405
2406unsigned long
2407http_parser_version(void) {
2408 return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2409 HTTP_PARSER_VERSION_MINOR * 0x00100 |
2410 HTTP_PARSER_VERSION_PATCH * 0x00001;
2411}
Note: See TracBrowser for help on using the repository browser.