Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refine and strengthen diagnostics for problematic URLs #1342

Merged
merged 25 commits into from Apr 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
6cbe65d
Reorder tests and add username assertion placeholders
EliahKagan Apr 11, 2024
1bdfdd9
Prepare for adding Url::user_argument_safe
EliahKagan Apr 11, 2024
db40382
feat: Add `Url::user_argument_safe()`
EliahKagan Apr 11, 2024
169a698
Expand and introduce tests of more combinations
EliahKagan Apr 11, 2024
e8e2463
Clarify circumstances when *_argument_safe should be used
EliahKagan Apr 11, 2024
0356345
Fix ambiguous host journey test snapshot names
EliahKagan Apr 11, 2024
8c94fd4
Add ambiguous path journey tests
EliahKagan Apr 12, 2024
a38f646
Add expected snapshot for ambiguous path `receive`
EliahKagan Apr 12, 2024
d639b3b
Add ambiguous username journey tests
EliahKagan Apr 12, 2024
a51003d
Add expected snapshots for ambiguous username journey tests
EliahKagan Apr 12, 2024
5428609
Add ambiguous user unit tests, and more for hostname
EliahKagan Apr 12, 2024
f56ad39
fix: Prevent usernames with leading `-` from being passed to SSH
EliahKagan Apr 12, 2024
2e7517e
Comment gix_transport::client::blocking_io::ssh::connect
EliahKagan Apr 12, 2024
beef8d2
Give `looks_like_argument` a more accurate name
EliahKagan Apr 12, 2024
29941e6
Sketch *_as_argument methods and supporting enum
EliahKagan Apr 12, 2024
5457998
feat: Add `ArgumentSafety` and `Url::*_as_argument()` methods
EliahKagan Apr 12, 2024
1b0af07
Give `ArgumentSafety` traits; test `*_as_argument` methods
EliahKagan Apr 12, 2024
4dda375
Start on using {user,host}_as_argument in prepare_invocation
EliahKagan Apr 12, 2024
2911623
Reallow `user@-arg...` in prepare_invocation
EliahKagan Apr 12, 2024
524739b
Try, so far unsuccessfully, to add missing `-G` test
EliahKagan Apr 12, 2024
902367f
Test that leading-`-` host names aren't used in `-G` check
EliahKagan Apr 13, 2024
cf59f57
Use `Url::host_as_argument()` in `ssh::connect()`
EliahKagan Apr 12, 2024
03fb64a
(Re)add a short, more specific comment about user@
EliahKagan Apr 13, 2024
09311b0
refactor `gix-url`
Byron Apr 13, 2024
996310b
refactor `gix-transport` with minor edits to comments
Byron Apr 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
23 changes: 21 additions & 2 deletions gix-transport/src/client/blocking_io/file.rs
Expand Up @@ -291,7 +291,7 @@ pub fn connect(
mod tests {
mod ssh {
mod connect {
use crate::{client::blocking_io::ssh::connect, Protocol};
use crate::{client::blocking_io::ssh, Protocol};

#[test]
fn path() {
Expand All @@ -304,10 +304,29 @@ mod tests {
("user@host.xy:~/repo", "~/repo"),
] {
let url = gix_url::parse((*url).into()).expect("valid url");
let cmd = connect(url, Protocol::V1, Default::default(), false).expect("parse success");
let cmd = ssh::connect(url, Protocol::V1, Default::default(), false).expect("parse success");
assert_eq!(cmd.path, expected, "the path will be substituted by the remote shell");
}
}

#[test]
fn ambiguous_host_disallowed() {
for url in [
"ssh://-oProxyCommand=open$IFS-aCalculator/foo",
"user@-oProxyCommand=open$IFS-aCalculator:username/repo",
] {
let url = gix_url::parse((*url).into()).expect("valid url");
let options = ssh::connect::Options {
command: Some("unrecognized".into()),
disallow_shell: false,
kind: None,
};
assert!(matches!(
ssh::connect(url, Protocol::V1, options, false),
Err(ssh::Error::AmbiguousHostName { host }) if host == "-oProxyCommand=open$IFS-aCalculator",
));
}
}
}
}
}
12 changes: 9 additions & 3 deletions gix-transport/src/client/blocking_io/ssh/mod.rs
@@ -1,5 +1,7 @@
use std::process::Stdio;

use gix_url::ArgumentSafety::*;

use crate::{client::blocking_io, Protocol};

/// The error used in [`connect()`].
Expand Down Expand Up @@ -42,6 +44,8 @@ pub mod invocation {
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error("Username '{user}' could be mistaken for a command-line argument")]
AmbiguousUserName { user: String },
#[error("Host name '{host}' could be mistaken for a command-line argument")]
AmbiguousHostName { host: String },
#[error("The 'Simple' ssh variant doesn't support {function}")]
Expand Down Expand Up @@ -116,9 +120,11 @@ pub fn connect(
.stdin(Stdio::null())
.with_shell()
.arg("-G")
.arg(url.host_argument_safe().ok_or_else(|| Error::AmbiguousHostName {
host: url.host().expect("set in ssh urls").into(),
})?),
.arg(match url.host_as_argument() {
Usable(host) => host,
Dangerous(host) => Err(Error::AmbiguousHostName { host: host.into() })?,
Absent => panic!("BUG: host should always be present in SSH URLs"),
}),
);
gix_features::trace::debug!(cmd = ?cmd, "invoking `ssh` for feature check");
kind = if cmd.status().ok().map_or(false, |status| status.success()) {
Expand Down
30 changes: 15 additions & 15 deletions gix-transport/src/client/blocking_io/ssh/program_kind.rs
Expand Up @@ -2,6 +2,8 @@ use std::{ffi::OsStr, io::ErrorKind};

use bstr::{BString, ByteSlice, ByteVec};

use gix_url::ArgumentSafety::*;

use crate::{
client::{ssh, ssh::ProgramKind},
Protocol,
Expand Down Expand Up @@ -60,23 +62,21 @@ impl ProgramKind {
}
}
};
let host_as_ssh_arg = match url.user() {
Some(user) => {
let host = url.host().expect("present in ssh urls");
format!("{user}@{host}")
}
None => {
let host = url
.host_argument_safe()
.ok_or_else(|| ssh::invocation::Error::AmbiguousHostName {
host: url.host().expect("ssh host always set").into(),
})?;
host.into()
}

let host_maybe_with_user_as_ssh_arg = match (url.user_as_argument(), url.host_as_argument()) {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a great use of match and one of the main reasons Rust is so powerful! Never again can one miss handling a possible program state, as long as one remembers to match on them that is :).

(Usable(user), Usable(host)) => format!("{user}@{host}"),
(Usable(user), Dangerous(host)) => format!("{user}@{host}"), // The `user@` makes it safe.
(Absent, Usable(host)) => host.into(),
(Dangerous(user), _) => Err(ssh::invocation::Error::AmbiguousUserName { user: user.into() })?,
(_, Dangerous(host)) => Err(ssh::invocation::Error::AmbiguousHostName { host: host.into() })?,
(_, Absent) => panic!("BUG: host should always be present in SSH URLs"),
};

// Try to force ssh to yield english messages (for parsing later)
Ok(prepare.arg(host_as_ssh_arg).env("LANG", "C").env("LC_ALL", "C"))
Ok(prepare
.arg(host_maybe_with_user_as_ssh_arg)
// Try to force ssh to yield English messages (for parsing later).
.env("LANG", "C")
.env("LC_ALL", "C"))
}

/// Note that the caller has to assure that the ssh program is launched in English by setting the locale.
Expand Down
45 changes: 43 additions & 2 deletions gix-transport/src/client/blocking_io/ssh/tests.rs
Expand Up @@ -144,22 +144,63 @@ mod program_kind {
assert!(call_args(kind, "ssh://user@host:43/p", Protocol::V2).ends_with("-P 43 user@host"));
}
}

#[test]
fn ambiguous_host_is_allowed_with_user() {
fn ambiguous_user_is_disallowed_explicit_ssh() {
assert!(matches!(
try_call(ProgramKind::Ssh, "ssh://-arg@host/p", Protocol::V2),
Err(ssh::invocation::Error::AmbiguousUserName { user }) if user == "-arg"
))
}

#[test]
fn ambiguous_user_is_disallowed_implicit_ssh() {
assert!(matches!(
try_call(ProgramKind::Ssh, "-arg@host:p/q", Protocol::V2),
Err(ssh::invocation::Error::AmbiguousUserName { user }) if user == "-arg"
))
}

#[test]
fn ambiguous_host_is_allowed_with_user_explicit_ssh() {
assert_eq!(
call_args(ProgramKind::Ssh, "ssh://user@-arg/p", Protocol::V2),
joined(&["ssh", "-o", "SendEnv=GIT_PROTOCOL", "user@-arg"])
);
}

#[test]
fn ambiguous_host_is_disallowed() {
fn ambiguous_host_is_allowed_with_user_implicit_ssh() {
assert_eq!(
call_args(ProgramKind::Ssh, "user@-arg:p/q", Protocol::V2),
joined(&["ssh", "-o", "SendEnv=GIT_PROTOCOL", "user@-arg"])
);
}

#[test]
fn ambiguous_host_is_disallowed_without_user() {
assert!(matches!(
try_call(ProgramKind::Ssh, "ssh://-arg/p", Protocol::V2),
Err(ssh::invocation::Error::AmbiguousHostName { host }) if host == "-arg"
));
}

#[test]
fn ambiguous_user_and_host_remain_disallowed_together_explicit_ssh() {
assert!(matches!(
try_call(ProgramKind::Ssh, "ssh://-arg@host/p", Protocol::V2),
Err(ssh::invocation::Error::AmbiguousUserName { user }) if user == "-arg"
));
}

#[test]
fn ambiguous_user_and_host_remain_disallowed_together_implicit_ssh() {
assert!(matches!(
try_call(ProgramKind::Ssh, "-userarg@-hostarg:p/q", Protocol::V2),
Err(ssh::invocation::Error::AmbiguousUserName { user }) if user == "-userarg"
));
}

#[test]
fn simple_cannot_handle_any_arguments() {
assert!(matches!(
Expand Down
108 changes: 91 additions & 17 deletions gix-url/src/lib.rs
Expand Up @@ -55,6 +55,27 @@ pub fn expand_path(user: Option<&expand_path::ForUser>, path: &BStr) -> Result<P
})
}

/// Classification of a portion of a URL by whether it is *syntactically* safe to pass as an argument to a command-line program.
///
/// Various parts of URLs can be specified to begin with `-`. If they are used as options to a command-line application
/// such as an SSH client, they will be treated as options rather than as non-option arguments as the developer intended.
/// This is a security risk, because URLs are not always trusted and can often be composed or influenced by an attacker.
/// See <https://secure.phabricator.com/T12961> for details.
///
/// # Security Warning
///
/// This type only expresses known *syntactic* risk. It does not cover other risks, such as passing a personal access
/// token as a username rather than a password in an application that logs usernames.
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
pub enum ArgumentSafety<'a> {
/// May be safe. There is nothing to pass, so there is nothing dangerous.
Absent,
/// May be safe. The argument does not begin with a `-` and so will not be confused as an option.
Usable(&'a str),
/// Dangerous! Begins with `-` and could be treated as an option. Use the value in error messages only.
Dangerous(&'a str),
}

/// A URL with support for specialized git related capabilities.
///
/// Additionally there is support for [deserialization](Url::from_bytes()) and [serialization](Url::to_bstring()).
Expand Down Expand Up @@ -85,12 +106,12 @@ pub struct Url {
pub port: Option<u16>,
/// The path portion of the URL, usually the location of the git repository.
///
/// # Security-Warning
/// # Security Warning
///
/// URLs allow paths to start with `-` which makes it possible to mask command-line arguments as path which then leads to
/// the invocation of programs from an attacker controlled URL. See <https://secure.phabricator.com/T12961> for details.
///
/// If this value is going to be used in a command-line application, call [Self::path_argument_safe()] instead.
/// If this value is ever going to be passed to a command-line application, call [Self::path_argument_safe()] instead.
pub path: BString,
}

Expand Down Expand Up @@ -164,48 +185,101 @@ impl Url {

/// Access
impl Url {
/// Returns the user mentioned in the url, if present.
/// Return the username mentioned in the URL, if present.
///
/// # Security Warning
///
/// URLs allow usernames to start with `-` which makes it possible to mask command-line arguments as username which then leads to
/// the invocation of programs from an attacker controlled URL. See <https://secure.phabricator.com/T12961> for details.
///
/// If this value is ever going to be passed to a command-line application, call [Self::user_argument_safe()] instead.
pub fn user(&self) -> Option<&str> {
self.user.as_deref()
}
/// Returns the password mentioned in the url, if present.

/// Classify the username of this URL by whether it is safe to pass as a command-line argument.
///
/// Use this method instead of [Self::user()] if the host is going to be passed to a command-line application.
/// If the unsafe and absent cases need not be distinguished, [Self::user_argument_safe()] may also be used.
pub fn user_as_argument(&self) -> ArgumentSafety<'_> {
match self.user() {
Some(user) if looks_like_command_line_option(user.as_bytes()) => ArgumentSafety::Dangerous(user),
Some(user) => ArgumentSafety::Usable(user),
None => ArgumentSafety::Absent,
}
}

/// Return the username of this URL if present *and* if it can't be mistaken for a command-line argument.
///
/// Use this method or [Self::user_as_argument()] instead of [Self::user()] if the host is going to be
/// passed to a command-line application. Prefer [Self::user_as_argument()] unless the unsafe and absent
/// cases need not be distinguished from each other.
pub fn user_argument_safe(&self) -> Option<&str> {
match self.user_as_argument() {
ArgumentSafety::Usable(user) => Some(user),
_ => None,
}
}

/// Return the password mentioned in the url, if present.
pub fn password(&self) -> Option<&str> {
self.password.as_deref()
}
/// Returns the host mentioned in the url, if present.

/// Return the host mentioned in the URL, if present.
///
/// # Security-Warning
/// # Security Warning
///
/// URLs allow hosts to start with `-` which makes it possible to mask command-line arguments as host which then leads to
/// the invocation of programs from an attacker controlled URL. See <https://secure.phabricator.com/T12961> for details.
///
/// If this value is going to be used in a command-line application, call [Self::host_argument_safe()] instead.
/// If this value is ever going to be passed to a command-line application, call [Self::host_as_argument()]
/// or [Self::host_argument_safe()] instead.
pub fn host(&self) -> Option<&str> {
self.host.as_deref()
}

/// Classify the host of this URL by whether it is safe to pass as a command-line argument.
///
/// Use this method instead of [Self::host()] if the host is going to be passed to a command-line application.
/// If the unsafe and absent cases need not be distinguished, [Self::host_argument_safe()] may also be used.
pub fn host_as_argument(&self) -> ArgumentSafety<'_> {
match self.host() {
Some(host) if looks_like_command_line_option(host.as_bytes()) => ArgumentSafety::Dangerous(host),
Some(host) => ArgumentSafety::Usable(host),
None => ArgumentSafety::Absent,
}
}

/// Return the host of this URL if present *and* if it can't be mistaken for a command-line argument.
///
/// Use this method if the host is going to be passed to a command-line application.
/// Use this method or [Self::host_as_argument()] instead of [Self::host()] if the host is going to be
/// passed to a command-line application. Prefer [Self::host_as_argument()] unless the unsafe and absent
/// cases need not be distinguished from each other.
pub fn host_argument_safe(&self) -> Option<&str> {
self.host().filter(|host| !looks_like_argument(host.as_bytes()))
match self.host_as_argument() {
ArgumentSafety::Usable(host) => Some(host),
_ => None,
}
}

/// Return the path of this URL *and* if it can't be mistaken for a command-line argument.
/// Return the path of this URL *if* it can't be mistaken for a command-line argument.
/// Note that it always begins with a slash, which is ignored for this comparison.
///
/// Use this method if the path is going to be passed to a command-line application.
/// Use this method instead of accessing [Self::path] directly if the path is going to be passed to a
/// command-line application, unless it is certain that the leading `/` will always be included.
pub fn path_argument_safe(&self) -> Option<&BStr> {
self.path
.get(1..)
.and_then(|truncated| (!looks_like_argument(truncated)).then_some(self.path.as_ref()))
.and_then(|truncated| (!looks_like_command_line_option(truncated)).then_some(self.path.as_ref()))
}

/// Returns true if the path portion of the url is `/`.
/// Return true if the path portion of the URL is `/`.
pub fn path_is_root(&self) -> bool {
self.path == "/"
}
/// Returns the actual or default port for use according to the url scheme.

/// Return the actual or default port for use according to the URL scheme.
/// Note that there may be no default port either.
pub fn port_or_default(&self) -> Option<u16> {
self.port.or_else(|| {
Expand All @@ -221,13 +295,13 @@ impl Url {
}
}

fn looks_like_argument(b: &[u8]) -> bool {
fn looks_like_command_line_option(b: &[u8]) -> bool {
b.first() == Some(&b'-')
}

/// Transformation
impl Url {
/// Turn a file url like `file://relative` into `file:///root/relative`, hence it assures the url's path component is absolute, using
/// Turn a file URL like `file://relative` into `file:///root/relative`, hence it assures the URL's path component is absolute, using
/// `current_dir` if necessary.
pub fn canonicalized(&self, current_dir: &std::path::Path) -> Result<Self, gix_path::realpath::Error> {
let mut res = self.clone();
Expand Down Expand Up @@ -287,7 +361,7 @@ impl Url {

/// Deserialization
impl Url {
/// Parse a URL from `bytes`
/// Parse a URL from `bytes`.
pub fn from_bytes(bytes: &BStr) -> Result<Self, parse::Error> {
parse(bytes)
}
Expand Down