diff --git a/fw/http_parser.c b/fw/http_parser.c index 8980734b4..5f53214f4 100644 --- a/fw/http_parser.c +++ b/fw/http_parser.c @@ -380,31 +380,31 @@ do { \ __FSM_I_MATCH_MOVE_fixup_finish(alphabet, to, flag, {}) /* Conditional transition from state @st to @st_next. */ -#define __FSM_TX_COND(st, condition, st_next, field) \ -__FSM_STATE(st) { \ +#define __FSM_TX_COND(st, condition, st_next, field, ...) \ +__FSM_STATE(st, __VA_ARGS__) { \ if (likely(condition)) \ __FSM_MOVE_f(st_next, field); \ TFW_PARSER_BLOCK(st); \ } -#define __FSM_TX_COND_nofixup(st, condition, st_next) \ -__FSM_STATE(st) { \ +#define __FSM_TX_COND_nofixup(st, condition, st_next, ...) \ +__FSM_STATE(st, __VA_ARGS__) { \ if (likely(condition)) \ __FSM_MOVE_nofixup(st_next); \ TFW_PARSER_BLOCK(st); \ } /* Automaton transition from state @st to @st_next on character @ch. */ -#define __FSM_TX(st, ch, st_next) \ - __FSM_TX_COND(st, c == (ch), st_next, &parser->hdr) -#define __FSM_TX_nofixup(st, ch, st_next) \ - __FSM_TX_COND_nofixup(st, c == (ch), st_next) +#define __FSM_TX(st, ch, st_next, ...) \ + __FSM_TX_COND(st, c == (ch), st_next, &parser->hdr, __VA_ARGS__) +#define __FSM_TX_nofixup(st, ch, st_next, ...) \ + __FSM_TX_COND_nofixup(st, c == (ch), st_next, __VA_ARGS__) /* Case-insensitive version of __FSM_TX(). */ -#define __FSM_TX_LC(st, ch, st_next, field) \ - __FSM_TX_COND(st, TFW_LC(c) == (ch), st_next, field) -#define __FSM_TX_LC_nofixup(st, ch, st_next) \ - __FSM_TX_COND_nofixup(st, TFW_LC(c) == (ch), st_next) +#define __FSM_TX_LC(st, ch, st_next, field, ...) \ + __FSM_TX_COND(st, TFW_LC(c) == (ch), st_next, field, __VA_ARGS__) +#define __FSM_TX_LC_nofixup(st, ch, st_next, ...) \ + __FSM_TX_COND_nofixup(st, TFW_LC(c) == (ch), st_next, __VA_ARGS__) /* * Automaton transition with alphabet checking and fallback state. @@ -3837,119 +3837,7 @@ tfw_http_parse_req(void *req_data, unsigned char *data, size_t len, __FSM_STATE(Req_Uri, hot) { if (likely(c == '/')) __FSM_JMP(Req_UriMark); - - if (likely(__data_available(p, 7) - && C4_INT_LCM(p, 'h', 't', 't', 'p') - && *(p + 4) == ':' && *(p + 5) == '/' - && *(p + 6) == '/')) - __FSM_MOVE_nofixup_n(Req_UriAuthorityStart, 7); - - /* "http://" slow path - step char-by-char. */ - if (likely(TFW_LC(c) == 'h')) - __FSM_MOVE_nofixup(Req_UriSchH); - - TFW_PARSER_BLOCK(Req_Uri); - } - - /* - * URI host part. - * RFC 3986 chapter 3.2: authority = [userinfo@]host[:port] - * - * Authority parsing: it can be "host" or "userinfo@host" (port is - * parsed later). At the beginning we don't know, which of variants we - * have. So we fill req->host, and if we get '@', we copy host to - * req->userinfo, reset req->host and fill it. - */ - __FSM_STATE(Req_UriAuthorityStart) { - if (likely(isalnum(c) || c == '.' || c == '-')) { - __msg_field_open(&req->host, p); - __FSM_MOVE_f(Req_UriAuthority, &req->host); - } else if (likely(c == '/')) { - /* - * The case where "Host:" header value is empty. - * A special TfwStr{} string is created that has - * a valid pointer and the length of zero. - */ - T_DBG3("Handling http:///path\n"); - tfw_http_msg_set_str_data(msg, &req->host, p); - req->host.flags |= TFW_STR_COMPLETE; - __FSM_JMP(Req_UriMark); - } else if (c == '[') { - __msg_field_open(&req->host, p); - __FSM_MOVE_f(Req_UriAuthorityIPv6, &req->host); - } - TFW_PARSER_BLOCK(Req_UriAuthorityStart); - } - - __FSM_STATE(Req_UriAuthority) { - if (likely(isalnum(c) || c == '.' || c == '-' || c == '@')) { - if (unlikely(c == '@')) { - if (!TFW_STR_EMPTY(&req->userinfo)) { - T_DBG("Second '@' in authority\n"); - TFW_PARSER_BLOCK(Req_UriAuthority); - } - T_DBG3("Authority contains userinfo\n"); - /* copy current host to userinfo */ - req->userinfo = req->host; - __msg_field_finish(&req->userinfo, p); - TFW_STR_INIT(&req->host); - - __FSM_MOVE_nofixup(Req_UriAuthorityResetHost); - } - - __FSM_MOVE_f(Req_UriAuthority, &req->host); - } - __FSM_JMP(Req_UriAuthorityEnd); - } - - __FSM_STATE(Req_UriAuthorityIPv6) { - if (likely(isxdigit(c) || c == ':')) { - __FSM_MOVE_f(Req_UriAuthorityIPv6, &req->host); - } else if(c == ']') { - __FSM_MOVE_f(Req_UriAuthorityEnd, &req->host); - } - TFW_PARSER_BLOCK(Req_UriAuthorityIPv6); - } - - __FSM_STATE(Req_UriAuthorityResetHost) { - if (likely(isalnum(c) || c == '.' || c == '-')) { - __msg_field_open(&req->host, p); - __FSM_MOVE_f(Req_UriAuthority, &req->host); - } else if (c == '[') { - __msg_field_open(&req->host, p); - __FSM_MOVE_f(Req_UriAuthorityIPv6, &req->host); - } - __FSM_JMP(Req_UriAuthorityEnd); - } - - __FSM_STATE(Req_UriAuthorityEnd) { - if (c == ':') - __FSM_MOVE_f(Req_UriPort, &req->host); - /* Authority End */ - __msg_field_finish(&req->host, p); - T_DBG3("Userinfo len = %i, host len = %i\n", - (int)req->userinfo.len, (int)req->host.len); - if (likely(c == '/')) { - __FSM_JMP(Req_UriMark); - } - else if (c == ' ') { - __FSM_MOVE_nofixup(Req_HttpVer); - } - TFW_PARSER_BLOCK(Req_UriAuthorityEnd); - } - - /* Host port in URI */ - __FSM_STATE(Req_UriPort) { - if (likely(isdigit(c))) - __FSM_MOVE_f(Req_UriPort, &req->host); - __msg_field_finish(&req->host, p); - if (likely(c == '/')) { - __FSM_JMP(Req_UriMark); - } - else if (c == ' ') { - __FSM_MOVE_nofixup(Req_HttpVer); - } - TFW_PARSER_BLOCK(Req_UriPort); + __FSM_JMP(Req_UriRareForms); } __FSM_STATE(Req_UriMark, hot) { @@ -4735,13 +4623,174 @@ Req_Method_1CharStep: __attribute__((cold)) __FSM_MOVE_nofixup_n(Req_MUSpace, 0); } - /* process URI scheme: "http://" */ - __FSM_TX_LC_nofixup(Req_UriSchH, 't', Req_UriSchHt); - __FSM_TX_LC_nofixup(Req_UriSchHt, 't', Req_UriSchHtt); - __FSM_TX_LC_nofixup(Req_UriSchHtt, 'p', Req_UriSchHttp); - __FSM_TX_nofixup(Req_UriSchHttp, ':', Req_UriSchHttpColon); - __FSM_TX_nofixup(Req_UriSchHttpColon, '/', Req_UriSchHttpColonSlash); - __FSM_TX_nofixup(Req_UriSchHttpColonSlash, '/', Req_UriAuthorityStart); + __FSM_STATE(Req_UriRareForms, cold) { + /* There is also authority form as in RFC7230#section-5.3.3, + * but it only used with CONNECT that is not supported */ + /* Asterisk form as in RFC7230#section-5.3.4 */ + if (req->method == TFW_HTTP_METH_OPTIONS && c == '*') + __FSM_MOVE_nofixup(Req_UriMarkEnd); + /* Absolute form as in RFC7230#section-5.3.2 */ + __FSM_JMP(Req_UriAbsoluteForm); + } + + __FSM_STATE(Req_UriAbsoluteForm, cold) { + /* Rare form so there is no need to speed-up matching with + * fast path prefixing */ + if (likely(TFW_LC(c) == 'h')) + __FSM_MOVE_nofixup(Req_UriSchH); + else if (TFW_LC(c) == 'w') + __FSM_MOVE_nofixup(Req_UriSchW); + + TFW_PARSER_BLOCK(Req_UriAbsoluteForm); + } + + /* process URI scheme */ + /* path for 'http://' and 'https://' */ + __FSM_TX_LC_nofixup(Req_UriSchH, 't', Req_UriSchHt, cold); + __FSM_TX_LC_nofixup(Req_UriSchHt, 't', Req_UriSchHtt, cold); + __FSM_TX_LC_nofixup(Req_UriSchHtt, 'p', Req_UriSchHttp, cold); + __FSM_STATE(Req_UriSchHttp, cold) { + switch (TFW_LC(c)) { + case ':': + __FSM_MOVE_nofixup(Req_UriSchHttpColon); + case 's': + __FSM_MOVE_nofixup(Req_UriSchHttps); + } + TFW_PARSER_BLOCK(Req_UriSchHttp); + } + /* http */ + __FSM_TX_nofixup(Req_UriSchHttpColon, '/', Req_UriSchHttpColonSlash, + cold); + __FSM_TX_nofixup(Req_UriSchHttpColonSlash, '/', Req_UriAuthorityStart, + cold); + /* https */ + __FSM_TX_nofixup(Req_UriSchHttps, ':', Req_UriSchHttpsColon, cold); + __FSM_TX_nofixup(Req_UriSchHttpsColon, '/', Req_UriSchHttpsColonSlash, + cold); + __FSM_TX_nofixup(Req_UriSchHttpsColonSlash, '/', Req_UriAuthorityStart, + cold); + /* path for 'ws://' and 'wss://' */ + __FSM_TX_LC_nofixup(Req_UriSchW, 's', Req_UriSchWs, cold); + __FSM_STATE(Req_UriSchWs, cold) { + switch (TFW_LC(c)) { + case ':': + __FSM_MOVE_nofixup(Req_UriSchWsColon); + case 's': + __FSM_MOVE_nofixup(Req_UriSchWss); + } + TFW_PARSER_BLOCK(Req_UriSchWs); + } + /* ws */ + __FSM_TX_nofixup(Req_UriSchWsColon, '/', Req_UriSchWsColonSlash, cold); + __FSM_TX_nofixup(Req_UriSchWsColonSlash, '/', Req_UriAuthorityStart, + cold); + /* wss */ + __FSM_TX_nofixup(Req_UriSchWss, ':', Req_UriSchWssColon, cold); + __FSM_TX_nofixup(Req_UriSchWssColon, '/', Req_UriSchWssColonSlash, + cold); + __FSM_TX_nofixup(Req_UriSchWssColonSlash, '/', Req_UriAuthorityStart, + cold); + + /* + * URI host part. + * RFC 3986 chapter 3.2: authority = [userinfo@]host[:port] + * + * Authority parsing: it can be "host" or "userinfo@host" (port is + * parsed later). At the beginning we don't know, which of variants we + * have. So we fill req->host, and if we get '@', we copy host to + * req->userinfo, reset req->host and fill it. + */ + __FSM_STATE(Req_UriAuthorityStart, cold) { + if (likely(isalnum(c) || c == '.' || c == '-')) { + __msg_field_open(&req->host, p); + __FSM_MOVE_f(Req_UriAuthority, &req->host); + } else if (likely(c == '/')) { + /* + * The case where "Host:" header value is empty. + * A special TfwStr{} string is created that has + * a valid pointer and the length of zero. + */ + T_DBG3("Handling http:///path\n"); + tfw_http_msg_set_str_data(msg, &req->host, p); + req->host.flags |= TFW_STR_COMPLETE; + __FSM_JMP(Req_UriMark); + } else if (c == '[') { + __msg_field_open(&req->host, p); + __FSM_MOVE_f(Req_UriAuthorityIPv6, &req->host); + } + TFW_PARSER_BLOCK(Req_UriAuthorityStart); + } + + __FSM_STATE(Req_UriAuthority, cold) { + if (likely(isalnum(c) || c == '.' || c == '-' || c == '@')) { + if (unlikely(c == '@')) { + if (!TFW_STR_EMPTY(&req->userinfo)) { + T_DBG("Second '@' in authority\n"); + TFW_PARSER_BLOCK(Req_UriAuthority); + } + T_DBG3("Authority contains userinfo\n"); + /* copy current host to userinfo */ + req->userinfo = req->host; + __msg_field_finish(&req->userinfo, p); + TFW_STR_INIT(&req->host); + + __FSM_MOVE_nofixup(Req_UriAuthorityResetHost); + } + + __FSM_MOVE_f(Req_UriAuthority, &req->host); + } + __FSM_JMP(Req_UriAuthorityEnd); + } + + __FSM_STATE(Req_UriAuthorityIPv6, cold) { + if (likely(isxdigit(c) || c == ':')) { + __FSM_MOVE_f(Req_UriAuthorityIPv6, &req->host); + } else if(c == ']') { + __FSM_MOVE_f(Req_UriAuthorityEnd, &req->host); + } + TFW_PARSER_BLOCK(Req_UriAuthorityIPv6); + } + + __FSM_STATE(Req_UriAuthorityResetHost, cold) { + if (likely(isalnum(c) || c == '.' || c == '-')) { + __msg_field_open(&req->host, p); + __FSM_MOVE_f(Req_UriAuthority, &req->host); + } else if (c == '[') { + __msg_field_open(&req->host, p); + __FSM_MOVE_f(Req_UriAuthorityIPv6, &req->host); + } + __FSM_JMP(Req_UriAuthorityEnd); + } + + __FSM_STATE(Req_UriAuthorityEnd, cold) { + if (c == ':') + __FSM_MOVE_f(Req_UriPort, &req->host); + /* Authority End */ + __msg_field_finish(&req->host, p); + T_DBG3("Userinfo len = %i, host len = %i\n", + (int)req->userinfo.len, (int)req->host.len); + if (likely(c == '/')) { + __FSM_JMP(Req_UriMark); + } + else if (c == ' ') { + __FSM_MOVE_nofixup(Req_HttpVer); + } + TFW_PARSER_BLOCK(Req_UriAuthorityEnd); + } + + /* Host port in URI */ + __FSM_STATE(Req_UriPort, cold) { + if (likely(isdigit(c))) + __FSM_MOVE_f(Req_UriPort, &req->host); + __msg_field_finish(&req->host, p); + if (likely(c == '/')) { + __FSM_JMP(Req_UriMark); + } + else if (c == ' ') { + __FSM_MOVE_nofixup(Req_HttpVer); + } + TFW_PARSER_BLOCK(Req_UriPort); + } /* Parse HTTP version (1.1 and 1.0 are supported). */ __FSM_TX_nofixup(Req_HttpVerT1, 'T', Req_HttpVerT2); @@ -5731,7 +5780,7 @@ do { \ __FSM_I_field_chunk_flags(fld, TFW_STR_HDR_VALUE); \ __FSM_EXIT(CSTR_POSTPONE); \ } - + #define H2_TRY_STR_LAMBDA_fixup(str, fld, lambda, curr_st, next_st) \ H2_TRY_STR_2LAMBDA_fixup(str, fld, {}, lambda, curr_st, next_st) diff --git a/fw/t/unit/test_http_parser.c b/fw/t/unit/test_http_parser.c index 2e6515a77..141524062 100644 --- a/fw/t/unit/test_http_parser.c +++ b/fw/t/unit/test_http_parser.c @@ -657,6 +657,25 @@ TEST(http_parser, parses_req_uri) EXPECT_TFWSTR_EQ(&req->host, "natsys-lab.com"); } + FOR_REQ("GET https://userame@natsys-lab.com HTTP/1.1\r\n\r\n") + { + EXPECT_TFWSTR_EQ(&req->host, "natsys-lab.com"); + } + + FOR_REQ("GET ws://userame@natsys-lab.com HTTP/1.1\r\n\r\n") + { + EXPECT_TFWSTR_EQ(&req->host, "natsys-lab.com"); + } + + FOR_REQ("GET wss://userame@natsys-lab.com HTTP/1.1\r\n\r\n") + { + EXPECT_TFWSTR_EQ(&req->host, "natsys-lab.com"); + } + + FOR_REQ("OPTIONS * HTTP/1.1\r\n\r\n"); + + EXPECT_BLOCK_REQ("GET sch://userame@natsys-lab.com HTTP/1.1\r\n\r\n"); + EXPECT_BLOCK_REQ("GET \x7f HTTP/1.1\r\n" "Host: test\r\n" "\r\n");