From b2052a433fd151d7d745ee9c5b27a2031db1dc32 Mon Sep 17 00:00:00 2001 From: "Adam C. Foltzer" Date: Wed, 8 Jun 2022 15:57:33 -0700 Subject: [PATCH] feat(ext): support non-canonical HTTP/1 reason phrases (#2792) Add a new extension type `hyper::ext::ReasonPhrase` gated by either the `ffi` or `http1` Cargo features. When enabled, store any non-canonical reason phrases in this extension when parsing responses, and write this reason phrase instead of the canonical reason phrase when emitting responses. Reason phrases are a disused corner of the spec that implementations ought to treat as opaque blobs of bytes. Unfortunately, real-world traffic sometimes does depend on being able to inspect and manipulate them. Non-canonical reason phrases are checked for validity at runtime to prevent invalid and dangerous characters from being emitted when writing responses. An `unsafe` escape hatch is present for hyper itself to create reason phrases that have been parsed (and therefore implicitly validated) by httparse. --- src/ext.rs | 5 + src/ext/h1_reason_phrase.rs | 221 ++++++++++++++++++++++++++++++++++++ src/ffi/http_types.rs | 7 +- src/proto/h1/role.rs | 51 +++++---- tests/client.rs | 56 +++++++++ tests/server.rs | 43 +++++++ 6 files changed, 354 insertions(+), 29 deletions(-) create mode 100644 src/ext/h1_reason_phrase.rs diff --git a/src/ext.rs b/src/ext.rs index 0e257a61b7..6c54821e9d 100644 --- a/src/ext.rs +++ b/src/ext.rs @@ -11,6 +11,11 @@ use std::collections::HashMap; #[cfg(feature = "http2")] use std::fmt; +#[cfg(any(feature = "http1", feature = "ffi"))] +mod h1_reason_phrase; +#[cfg(any(feature = "http1", feature = "ffi"))] +pub use h1_reason_phrase::ReasonPhrase; + #[cfg(feature = "http2")] /// Represents the `:protocol` pseudo-header used by /// the [Extended CONNECT Protocol]. diff --git a/src/ext/h1_reason_phrase.rs b/src/ext/h1_reason_phrase.rs new file mode 100644 index 0000000000..021b632b6d --- /dev/null +++ b/src/ext/h1_reason_phrase.rs @@ -0,0 +1,221 @@ +use std::convert::TryFrom; + +use bytes::Bytes; + +/// A reason phrase in an HTTP/1 response. +/// +/// # Clients +/// +/// For clients, a `ReasonPhrase` will be present in the extensions of the `http::Response` returned +/// for a request if the reason phrase is different from the canonical reason phrase for the +/// response's status code. For example, if a server returns `HTTP/1.1 200 Awesome`, the +/// `ReasonPhrase` will be present and contain `Awesome`, but if a server returns `HTTP/1.1 200 OK`, +/// the response will not contain a `ReasonPhrase`. +/// +/// ```no_run +/// # #[cfg(all(feature = "tcp", feature = "client", feature = "http1"))] +/// # async fn fake_fetch() -> hyper::Result<()> { +/// use hyper::{Client, Uri}; +/// use hyper::ext::ReasonPhrase; +/// +/// let res = Client::new().get(Uri::from_static("http://example.com/non_canonical_reason")).await?; +/// +/// // Print out the non-canonical reason phrase, if it has one... +/// if let Some(reason) = res.extensions().get::() { +/// println!("non-canonical reason: {}", std::str::from_utf8(reason.as_bytes()).unwrap()); +/// } +/// # Ok(()) +/// # } +/// ``` +/// +/// # Servers +/// +/// When a `ReasonPhrase` is present in the extensions of the `http::Response` written by a server, +/// its contents will be written in place of the canonical reason phrase when responding via HTTP/1. +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct ReasonPhrase(Bytes); + +impl ReasonPhrase { + /// Gets the reason phrase as bytes. + pub fn as_bytes(&self) -> &[u8] { + &self.0 + } + + /// Converts a static byte slice to a reason phrase. + pub fn from_static(reason: &'static [u8]) -> Self { + // TODO: this can be made const once MSRV is >= 1.57.0 + if find_invalid_byte(reason).is_some() { + panic!("invalid byte in static reason phrase"); + } + Self(Bytes::from_static(reason)) + } + + /// Converts a `Bytes` directly into a `ReasonPhrase` without validating. + /// + /// Use with care; invalid bytes in a reason phrase can cause serious security problems if + /// emitted in a response. + pub unsafe fn from_bytes_unchecked(reason: Bytes) -> Self { + Self(reason) + } +} + +impl TryFrom<&[u8]> for ReasonPhrase { + type Error = InvalidReasonPhrase; + + fn try_from(reason: &[u8]) -> Result { + if let Some(bad_byte) = find_invalid_byte(reason) { + Err(InvalidReasonPhrase { bad_byte }) + } else { + Ok(Self(Bytes::copy_from_slice(reason))) + } + } +} + +impl TryFrom> for ReasonPhrase { + type Error = InvalidReasonPhrase; + + fn try_from(reason: Vec) -> Result { + if let Some(bad_byte) = find_invalid_byte(&reason) { + Err(InvalidReasonPhrase { bad_byte }) + } else { + Ok(Self(Bytes::from(reason))) + } + } +} + +impl TryFrom for ReasonPhrase { + type Error = InvalidReasonPhrase; + + fn try_from(reason: String) -> Result { + if let Some(bad_byte) = find_invalid_byte(reason.as_bytes()) { + Err(InvalidReasonPhrase { bad_byte }) + } else { + Ok(Self(Bytes::from(reason))) + } + } +} + +impl TryFrom for ReasonPhrase { + type Error = InvalidReasonPhrase; + + fn try_from(reason: Bytes) -> Result { + if let Some(bad_byte) = find_invalid_byte(&reason) { + Err(InvalidReasonPhrase { bad_byte }) + } else { + Ok(Self(reason)) + } + } +} + +impl Into for ReasonPhrase { + fn into(self) -> Bytes { + self.0 + } +} + +impl AsRef<[u8]> for ReasonPhrase { + fn as_ref(&self) -> &[u8] { + &self.0 + } +} + +/// Error indicating an invalid byte when constructing a `ReasonPhrase`. +/// +/// See [the spec][spec] for details on allowed bytes. +/// +/// [spec]: https://httpwg.org/http-core/draft-ietf-httpbis-messaging-latest.html#rfc.section.4.p.7 +#[derive(Debug)] +pub struct InvalidReasonPhrase { + bad_byte: u8, +} + +impl std::fmt::Display for InvalidReasonPhrase { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Invalid byte in reason phrase: {}", self.bad_byte) + } +} + +impl std::error::Error for InvalidReasonPhrase {} + +const fn is_valid_byte(b: u8) -> bool { + // See https://www.rfc-editor.org/rfc/rfc5234.html#appendix-B.1 + const fn is_vchar(b: u8) -> bool { + 0x21 <= b && b <= 0x7E + } + + // See https://httpwg.org/http-core/draft-ietf-httpbis-semantics-latest.html#fields.values + // + // The 0xFF comparison is technically redundant, but it matches the text of the spec more + // clearly and will be optimized away. + #[allow(unused_comparisons)] + const fn is_obs_text(b: u8) -> bool { + 0x80 <= b && b <= 0xFF + } + + // See https://httpwg.org/http-core/draft-ietf-httpbis-messaging-latest.html#rfc.section.4.p.7 + b == b'\t' || b == b' ' || is_vchar(b) || is_obs_text(b) +} + +const fn find_invalid_byte(bytes: &[u8]) -> Option { + let mut i = 0; + while i < bytes.len() { + let b = bytes[i]; + if !is_valid_byte(b) { + return Some(b); + } + i += 1; + } + None +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn basic_valid() { + const PHRASE: &'static [u8] = b"OK"; + assert_eq!(ReasonPhrase::from_static(PHRASE).as_bytes(), PHRASE); + assert_eq!(ReasonPhrase::try_from(PHRASE).unwrap().as_bytes(), PHRASE); + } + + #[test] + fn empty_valid() { + const PHRASE: &'static [u8] = b""; + assert_eq!(ReasonPhrase::from_static(PHRASE).as_bytes(), PHRASE); + assert_eq!(ReasonPhrase::try_from(PHRASE).unwrap().as_bytes(), PHRASE); + } + + #[test] + fn obs_text_valid() { + const PHRASE: &'static [u8] = b"hyp\xe9r"; + assert_eq!(ReasonPhrase::from_static(PHRASE).as_bytes(), PHRASE); + assert_eq!(ReasonPhrase::try_from(PHRASE).unwrap().as_bytes(), PHRASE); + } + + const NEWLINE_PHRASE: &'static [u8] = b"hyp\ner"; + + #[test] + #[should_panic] + fn newline_invalid_panic() { + ReasonPhrase::from_static(NEWLINE_PHRASE); + } + + #[test] + fn newline_invalid_err() { + assert!(ReasonPhrase::try_from(NEWLINE_PHRASE).is_err()); + } + + const CR_PHRASE: &'static [u8] = b"hyp\rer"; + + #[test] + #[should_panic] + fn cr_invalid_panic() { + ReasonPhrase::from_static(CR_PHRASE); + } + + #[test] + fn cr_invalid_err() { + assert!(ReasonPhrase::try_from(CR_PHRASE).is_err()); + } +} diff --git a/src/ffi/http_types.rs b/src/ffi/http_types.rs index 868d58e72f..ea10f139cb 100644 --- a/src/ffi/http_types.rs +++ b/src/ffi/http_types.rs @@ -6,7 +6,7 @@ use super::body::{hyper_body, hyper_buf}; use super::error::hyper_code; use super::task::{hyper_task_return_type, AsTaskType}; use super::{UserDataPointer, HYPER_ITER_CONTINUE}; -use crate::ext::{HeaderCaseMap, OriginalHeaderOrder}; +use crate::ext::{HeaderCaseMap, OriginalHeaderOrder, ReasonPhrase}; use crate::header::{HeaderName, HeaderValue}; use crate::{Body, HeaderMap, Method, Request, Response, Uri}; @@ -25,9 +25,6 @@ pub struct hyper_headers { orig_order: OriginalHeaderOrder, } -#[derive(Debug)] -pub(crate) struct ReasonPhrase(pub(crate) Bytes); - pub(crate) struct RawHeaders(pub(crate) hyper_buf); pub(crate) struct OnInformational { @@ -365,7 +362,7 @@ impl hyper_response { fn reason_phrase(&self) -> &[u8] { if let Some(reason) = self.0.extensions().get::() { - return &reason.0; + return reason.as_bytes(); } if let Some(reason) = self.0.status().canonical_reason() { diff --git a/src/proto/h1/role.rs b/src/proto/h1/role.rs index 76fc16d15e..d6272eeab2 100644 --- a/src/proto/h1/role.rs +++ b/src/proto/h1/role.rs @@ -1,7 +1,6 @@ use std::fmt::{self, Write}; use std::mem::MaybeUninit; -#[cfg(any(test, feature = "server", feature = "ffi"))] use bytes::Bytes; use bytes::BytesMut; #[cfg(feature = "server")] @@ -377,7 +376,13 @@ impl Http1Transaction for Server { let init_cap = 30 + msg.head.headers.len() * AVERAGE_HEADER_SIZE; dst.reserve(init_cap); - if msg.head.version == Version::HTTP_11 && msg.head.subject == StatusCode::OK { + + let custom_reason_phrase = msg.head.extensions.get::(); + + if msg.head.version == Version::HTTP_11 + && msg.head.subject == StatusCode::OK + && custom_reason_phrase.is_none() + { extend(dst, b"HTTP/1.1 200 OK\r\n"); } else { match msg.head.version { @@ -392,15 +397,21 @@ impl Http1Transaction for Server { extend(dst, msg.head.subject.as_str().as_bytes()); extend(dst, b" "); - // a reason MUST be written, as many parsers will expect it. - extend( - dst, - msg.head - .subject - .canonical_reason() - .unwrap_or("") - .as_bytes(), - ); + + if let Some(reason) = custom_reason_phrase { + extend(dst, reason.as_bytes()); + } else { + // a reason MUST be written, as many parsers will expect it. + extend( + dst, + msg.head + .subject + .canonical_reason() + .unwrap_or("") + .as_bytes(), + ); + } + extend(dst, b"\r\n"); } @@ -944,9 +955,6 @@ impl Http1Transaction for Client { trace!("Response.parse Complete({})", len); let status = StatusCode::from_u16(res.code.unwrap())?; - #[cfg(not(feature = "ffi"))] - let reason = (); - #[cfg(feature = "ffi")] let reason = { let reason = res.reason.unwrap(); // Only save the reason phrase if it isn't the canonical reason @@ -970,12 +978,7 @@ impl Http1Transaction for Client { Err(httparse::Error::Version) if ctx.h09_responses => { trace!("Response.parse accepted HTTP/0.9 response"); - #[cfg(not(feature = "ffi"))] - let reason = (); - #[cfg(feature = "ffi")] - let reason = None; - - (0, StatusCode::OK, reason, Version::HTTP_09, 0) + (0, StatusCode::OK, None, Version::HTTP_09, 0) } Err(e) => return Err(e.into()), } @@ -1058,12 +1061,12 @@ impl Http1Transaction for Client { extensions.insert(header_order); } - #[cfg(feature = "ffi")] if let Some(reason) = reason { - extensions.insert(crate::ffi::ReasonPhrase(reason)); + // Safety: httparse ensures that only valid reason phrase bytes are present in this + // field. + let reason = unsafe { crate::ext::ReasonPhrase::from_bytes_unchecked(reason) }; + extensions.insert(reason); } - #[cfg(not(feature = "ffi"))] - drop(reason); #[cfg(feature = "ffi")] if ctx.raw_headers { diff --git a/tests/client.rs b/tests/client.rs index 417e9bf2d9..88b3ee0d4f 100644 --- a/tests/client.rs +++ b/tests/client.rs @@ -2271,6 +2271,62 @@ mod conn { future::join(server, client).await; } + #[tokio::test] + async fn get_custom_reason_phrase() { + let _ = ::pretty_env_logger::try_init(); + let listener = TkTcpListener::bind(SocketAddr::from(([127, 0, 0, 1], 0))) + .await + .unwrap(); + let addr = listener.local_addr().unwrap(); + + let server = async move { + let mut sock = listener.accept().await.unwrap().0; + let mut buf = [0; 4096]; + let n = sock.read(&mut buf).await.expect("read 1"); + + // Notably: + // - Just a path, since just a path was set + // - No host, since no host was set + let expected = "GET /a HTTP/1.1\r\n\r\n"; + assert_eq!(s(&buf[..n]), expected); + + sock.write_all(b"HTTP/1.1 200 Alright\r\nContent-Length: 0\r\n\r\n") + .await + .unwrap(); + }; + + let client = async move { + let tcp = tcp_connect(&addr).await.expect("connect"); + let (mut client, conn) = conn::handshake(tcp).await.expect("handshake"); + + tokio::task::spawn(async move { + conn.await.expect("http conn"); + }); + + let req = Request::builder() + .uri("/a") + .body(Default::default()) + .unwrap(); + let mut res = client.send_request(req).await.expect("send_request"); + assert_eq!(res.status(), hyper::StatusCode::OK); + assert_eq!( + res.extensions() + .get::() + .expect("custom reason phrase is present") + .as_bytes(), + &b"Alright"[..] + ); + assert_eq!(res.headers().len(), 1); + assert_eq!( + res.headers().get(http::header::CONTENT_LENGTH).unwrap(), + "0" + ); + assert!(res.body_mut().next().await.is_none()); + }; + + future::join(server, client).await; + } + #[test] fn incoming_content_length() { use hyper::body::HttpBody; diff --git a/tests/server.rs b/tests/server.rs index 3362b51855..af5b5e9961 100644 --- a/tests/server.rs +++ b/tests/server.rs @@ -1,6 +1,7 @@ #![deny(warnings)] #![deny(rust_2018_idioms)] +use std::convert::TryInto; use std::future::Future; use std::io::{self, Read, Write}; use std::net::TcpListener as StdTcpListener; @@ -383,6 +384,33 @@ mod response_body_lengths { } } +#[test] +fn get_response_custom_reason_phrase() { + let _ = pretty_env_logger::try_init(); + let server = serve(); + server.reply().reason_phrase("Cool"); + let mut req = connect(server.addr()); + req.write_all( + b"\ + GET / HTTP/1.1\r\n\ + Host: example.domain\r\n\ + Connection: close\r\n\ + \r\n\ + ", + ) + .unwrap(); + + let mut response = String::new(); + req.read_to_string(&mut response).unwrap(); + + let mut lines = response.lines(); + assert_eq!(lines.next(), Some("HTTP/1.1 200 Cool")); + + let mut lines = lines.skip_while(|line| !line.is_empty()); + assert_eq!(lines.next(), Some("")); + assert_eq!(lines.next(), None); +} + #[test] fn get_chunked_response_with_ka() { let foo_bar = b"foo bar baz"; @@ -2671,6 +2699,17 @@ impl<'a> ReplyBuilder<'a> { self } + fn reason_phrase(self, reason: &str) -> Self { + self.tx + .lock() + .unwrap() + .send(Reply::ReasonPhrase( + reason.as_bytes().try_into().expect("reason phrase"), + )) + .unwrap(); + self + } + fn version(self, version: hyper::Version) -> Self { self.tx .lock() @@ -2744,6 +2783,7 @@ struct TestService { #[derive(Debug)] enum Reply { Status(hyper::StatusCode), + ReasonPhrase(hyper::ext::ReasonPhrase), Version(hyper::Version), Header(HeaderName, HeaderValue), Body(hyper::Body), @@ -2799,6 +2839,9 @@ impl TestService { Reply::Status(s) => { *res.status_mut() = s; } + Reply::ReasonPhrase(reason) => { + res.extensions_mut().insert(reason); + } Reply::Version(v) => { *res.version_mut() = v; }