diff --git a/Cargo.lock b/Cargo.lock index 0997c92e7..69639c7f6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -375,6 +375,8 @@ dependencies = [ "palette", "pathdiff", "regex", + "serde", + "serde_json", "shell-words", "smol_str", "structopt", @@ -945,6 +947,9 @@ name = "serde" version = "1.0.118" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06c64263859d87aa2eb554587e2d23183398d617427327cf2b3d0ed8c69e4800" +dependencies = [ + "serde_derive", +] [[package]] name = "serde_derive" @@ -959,9 +964,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.61" +version = "1.0.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fceb2595057b6891a4ee808f70054bd2d12f0e97f1cbb78689b59f676df325a" +checksum = "e277c495ac6cd1a01a58d0a0c574568b4d1ddf14f59965c6a58b8d96400b54f3" dependencies = [ "itoa", "ryu", diff --git a/Cargo.toml b/Cargo.toml index 9ea4d2119..8f1f8602a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,8 @@ lazy_static = "1.4" palette = "0.6.0" pathdiff = "0.2.1" regex = "1.4.6" +serde = { version = "1.0.118", features = ["derive"] } +serde_json = "1.0.70" shell-words = "1.0.0" smol_str = "0.1.18" structopt = "0.3.25" diff --git a/README.md b/README.md index 8cc6b0a09..8a2aa1f41 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ show = delta log = delta blame = delta + grep = delta reflog = delta [interactive] @@ -64,6 +65,7 @@ Code evolves, and we all spend time studying diffs. Delta aims to make this both - [Choosing colors (styles)](#choosing-colors-styles) - [Line numbers](#line-numbers) - [Side-by-side view](#side-by-side-view) + - [Grep](#grep) - ["Features": named groups of settings](#features-named-groups-of-settings) - [Custom themes](#custom-themes) - [diff-highlight and diff-so-fancy emulation](#diff-highlight-and-diff-so-fancy-emulation) @@ -151,6 +153,7 @@ Here's what `git show` can look like with git configured to use delta: - Git style strings (foreground color, background color, font attributes) are supported for >20 stylable elements - Side-by-side view with line-wrapping - Line numbering +- Handles grep output with file paths from `rg`, `git grep`, `grep`, etc - `diff-highlight` and `diff-so-fancy` emulation modes - Stylable box/line decorations to draw attention to commit, file and hunk header sections. - Support for Git's `--color-moved` feature. @@ -410,6 +413,15 @@ In contrast, the long replacement line in the right panel overflows by almost an For control over the details of line wrapping, see `--wrap-max-lines`, `--wrap-left-symbol`, `--wrap-right-symbol`, `--wrap-right-percent`, `--wrap-right-prefix-symbol`, `--inline-hint-style`. Line wrapping was implemented by @th1000s. +### Grep + +Delta applies syntax-highlighting and other enhancements to standard grep output such as from `git grep`, [ripgrep](https://github.com/BurntSushi/ripgrep/) (aka `rg`), grep, etc. +To use with `git grep`, set delta as the pager for `grep` in the `[pager]` section of your gitconfig. See the example at the [top of the page](#get-started). +Output from other grep tools can be piped to delta: e.g. `rg -Hn --color=always`, `grep -Hn --color=always`, etc. +To customize the colors and syntax highlighting, see `grep-match-line-style`, `grep-match-word-style`, `grep-contexct-line-style`, `grep-file-style`, `grep-line-number-style`. +Ripgrep's `rg --json` output format is supported; this avoids certain file name parsing ambiguities that are inevitable with the standard grep output formats. +Note that `git grep` can display the "function context" for matches and that delta handles this output specially: see the `-p` and `-W` options of `git grep`. + ### "Features": named groups of settings All delta options can go under the `[delta]` section in your git config file. However, you can also use named "features" to keep things organized: these are sections in git config like `[delta "my-feature"]`. Here's an example using two custom features: diff --git a/src/cli.rs b/src/cli.rs index 558d21416..fdf541383 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -445,6 +445,40 @@ pub struct Opt { )] pub blame_timestamp_format: String, + #[structopt(long = "grep-match-line-style")] + /// Style (foreground, background, attributes) for matching lines of code in + /// grep output. See STYLES section. Defaults to plus-style. + pub grep_match_line_style: Option, + + #[structopt(long = "grep-match-word-style")] + /// Style (foreground, background, attributes) for the specific matching + /// substrings within a matching line of code in grep output. See STYLES + /// section. Defaults to plus-style. + pub grep_match_word_style: Option, + + #[structopt(long = "grep-context-line-style")] + /// Style (foreground, background, attributes) for non-matching lines of + /// code in grep output. See STYLES section. Defaults to zero-style. + pub grep_context_line_style: Option, + + #[structopt(long = "grep-file-style")] + /// Style (foreground, background, attributes) for file paths in grep + /// output. See STYLES section. Defaults to hunk-header-file-path-style. + pub grep_file_style: Option, + + #[structopt(long = "grep-line-number-style")] + /// Style (foreground, background, attributes) for line numbers in grep + /// output. See STYLES section. Defaults to hunk-header-line-number-style. + pub grep_line_number_style: Option, + + #[structopt(long = "grep-separator-symbol", default_value = ":")] + /// Symbol used in grep output to separate file path (and line number) from + /// the line of file contents. Defaults to ":" for both match and context + /// lines, since many terminal emulators recognize constructs like + /// "/path/to/file:7:". However, standard grep output uses "-" for context + /// lines: set this option to "keep" to keep the original separator symbols. + pub grep_separator_symbol: String, + /// Default language used for syntax highlighting when this cannot be /// inferred from a filename. It will typically make sense to set this in /// per-repository git config (.git/config) diff --git a/src/config.rs b/src/config.rs index 71aa8382e..12b829454 100644 --- a/src/config.rs +++ b/src/config.rs @@ -80,6 +80,12 @@ pub struct Config { pub git_config: Option, pub git_minus_style: Style, pub git_plus_style: Style, + pub grep_context_line_style: Style, + pub grep_file_style: Style, + pub grep_line_number_style: Style, + pub grep_match_line_style: Style, + pub grep_match_word_style: Style, + pub grep_separator_symbol: String, pub hunk_header_file_style: Style, pub hunk_header_line_number_style: Style, pub hunk_header_style_include_file_path: bool, @@ -243,6 +249,12 @@ impl From for Config { file_style: styles["file-style"], git_config: opt.git_config, git_config_entries: opt.git_config_entries, + grep_context_line_style: styles["grep-context-line-style"], + grep_file_style: styles["grep-file-style"], + grep_line_number_style: styles["grep-line-number-style"], + grep_match_line_style: styles["grep-match-line-style"], + grep_match_word_style: styles["grep-match-word-style"], + grep_separator_symbol: opt.grep_separator_symbol, hunk_header_file_style: styles["hunk-header-file-style"], hunk_header_line_number_style: styles["hunk-header-line-number-style"], hunk_header_style: styles["hunk-header-style"], diff --git a/src/delta.rs b/src/delta.rs index fbf45c53d..bfb7761f3 100644 --- a/src/delta.rs +++ b/src/delta.rs @@ -23,6 +23,7 @@ pub enum State { SubmoduleLog, // In a submodule section, with gitconfig diff.submodule = log SubmoduleShort(String), // In a submodule section, with gitconfig diff.submodule = short Blame(String, Option), // In a line of `git blame` output (commit, repeat_blame_line). + Grep, // In a line of `git grep` output Unknown, // The following elements are created when a line is wrapped to display it: HunkZeroWrapped, // Wrapped unchanged line @@ -121,6 +122,7 @@ impl<'a> StateMachine<'a> { || self.handle_submodule_short_line()? || self.handle_hunk_line()? || self.handle_blame_line()? + || self.handle_grep_line()? || self.should_skip_line() || self.emit_line_unchanged()?; } @@ -133,7 +135,13 @@ impl<'a> StateMachine<'a> { fn ingest_line(&mut self, raw_line_bytes: &[u8]) { // TODO: retain raw_line as Cow self.raw_line = String::from_utf8_lossy(raw_line_bytes).to_string(); - if self.config.max_line_length > 0 && self.raw_line.len() > self.config.max_line_length { + if self.config.max_line_length > 0 + && self.raw_line.len() > self.config.max_line_length + // We must not truncate ripgrep --json output + // TODO: An alternative might be to truncate `line` but retain + // `raw_line` untruncated? + && !self.raw_line.starts_with('{') + { self.raw_line = ansi::truncate_str( &self.raw_line, self.config.max_line_length, diff --git a/src/handlers/file_meta.rs b/src/handlers/file_meta.rs index 8a1ea0d3e..bea05625b 100644 --- a/src/handlers/file_meta.rs +++ b/src/handlers/file_meta.rs @@ -210,7 +210,7 @@ fn get_file_extension_from_file_meta_line_file_path(path: &str) -> Option<&str> } /// Attempt to parse input as a file path and return extension as a &str. -fn get_extension(s: &str) -> Option<&str> { +pub fn get_extension(s: &str) -> Option<&str> { let path = Path::new(s); path.extension() .and_then(|e| e.to_str()) diff --git a/src/handlers/grep.rs b/src/handlers/grep.rs new file mode 100644 index 000000000..a7e57d1b6 --- /dev/null +++ b/src/handlers/grep.rs @@ -0,0 +1,829 @@ +use std::borrow::Cow; + +use lazy_static::lazy_static; +use regex::Regex; +use serde::Deserialize; +use unicode_segmentation::UnicodeSegmentation; + +use crate::ansi; +use crate::delta::{State, StateMachine}; +use crate::handlers::{self, ripgrep_json}; +use crate::paint::{self, BgShouldFill, StyleSectionSpecifier}; +use crate::style::Style; +use crate::utils::process; + +#[derive(Debug, PartialEq)] +pub struct GrepLine<'b> { + pub path: Cow<'b, str>, + pub line_number: Option, + pub line_type: LineType, + pub code: Cow<'b, str>, + pub submatches: Option>, +} + +#[derive(Clone, Copy, Debug, PartialEq, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum LineType { + ContextHeader, + Context, + Match, + Ignore, +} + +struct GrepOutputConfig { + add_navigate_marker_to_matches: bool, + render_context_header_as_hunk_header: bool, + pad_line_number: bool, +} + +lazy_static! { + static ref OUTPUT_CONFIG: GrepOutputConfig = make_output_config(); +} + +impl<'a> StateMachine<'a> { + // If this is a line of git grep output then render it accordingly. + pub fn handle_grep_line(&mut self) -> std::io::Result { + self.painter.emit()?; + let mut handled_line = false; + + let try_parse = matches!(&self.state, State::Grep | State::Unknown); + + if try_parse { + if let Some(mut grep_line) = parse_grep_line(&self.line) { + if matches!(grep_line.line_type, LineType::Ignore) { + handled_line = true; + return Ok(handled_line); + } + + // Emit syntax-highlighted code + // TODO: Determine the language less frequently, e.g. only when the file changes. + if let Some(lang) = handlers::file_meta::get_extension(&grep_line.path) + .or_else(|| self.config.default_language.as_deref()) + { + self.painter.set_syntax(Some(lang)); + self.painter.set_highlighter(); + } + self.state = State::Grep; + + match ( + &grep_line.line_type, + OUTPUT_CONFIG.render_context_header_as_hunk_header, + ) { + // Emit context header line + (LineType::ContextHeader, true) => handlers::hunk_header::write_hunk_header( + &grep_line.code, + &[(grep_line.line_number.unwrap_or(0), 0)], + &mut self.painter, + &self.line, + &grep_line.path, + self.config, + )?, + _ => { + if self.config.navigate { + write!( + self.painter.writer, + "{}", + match ( + &grep_line.line_type, + OUTPUT_CONFIG.add_navigate_marker_to_matches + ) { + (LineType::Match, true) => "• ", + (_, true) => " ", + _ => "", + } + )? + } + // Emit file & line-number + let separator = if self.config.grep_separator_symbol == "keep" { + // grep, rg, and git grep use ":" for matching lines + // and "-" for non-matching lines (and `git grep -W` + // uses "=" for a context header line). + match grep_line.line_type { + LineType::Match => ":", + LineType::Context => "-", + LineType::ContextHeader => "=", + LineType::Ignore => "", + } + } else { + // But ":" results in a "file/path:number:" + // construct that terminal emulators are more likely + // to recognize and render as a clickable link. If + // navigate is enabled then there is already a good + // visual indicator of match lines (in addition to + // the grep-match-style highlighting) and so we use + // ":" for matches and non-matches alike. + &self.config.grep_separator_symbol + }; + write!( + self.painter.writer, + "{}", + paint::paint_file_path_with_line_number( + grep_line.line_number, + &grep_line.path, + OUTPUT_CONFIG.pad_line_number, + separator, + true, + Some(self.config.grep_file_style), + Some(self.config.grep_line_number_style), + self.config + ) + )?; + + // Emit code line + let code_style_sections = + match (&grep_line.line_type, &grep_line.submatches) { + (LineType::Match, Some(submatches)) => { + // We expand tabs at this late stage because + // the tabs are escaped in the JSON, so + // expansion must come after JSON parsing. + // (At the time of writing, we are in this + // arm iff we are handling `ripgrep --json` + // output.) + grep_line.code = self + .painter + .expand_tabs(grep_line.code.graphemes(true)) + .into(); + make_style_sections( + &grep_line.code, + submatches, + self.config.grep_match_word_style, + self.config.grep_match_line_style, + ) + } + (LineType::Match, None) => { + // HACK: We need tabs expanded, and we need + // the &str passed to + // `get_code_style_sections` to live long + // enough. But at the point it is guaranteed + // that this handler is going to handle this + // line, so mutating it is acceptable. + self.raw_line = + self.painter.expand_tabs(self.raw_line.graphemes(true)); + get_code_style_sections( + &self.raw_line, + self.config.grep_match_word_style, + self.config.grep_match_line_style, + &grep_line, + ) + .unwrap_or( + StyleSectionSpecifier::Style( + self.config.grep_match_line_style, + ), + ) + } + _ => StyleSectionSpecifier::Style( + self.config.grep_context_line_style, + ), + }; + self.painter.syntax_highlight_and_paint_line( + &format!("{}\n", grep_line.code), + code_style_sections, + self.state.clone(), + BgShouldFill::default(), + ) + } + } + handled_line = true + } + } + Ok(handled_line) + } +} + +fn make_style_sections<'a>( + line: &'a str, + submatches: &[(usize, usize)], + match_style: Style, + non_match_style: Style, +) -> StyleSectionSpecifier<'a> { + let mut sections = Vec::new(); + let mut curr = 0; + for (start_, end_) in submatches { + let (start, end) = (*start_, *end_); + if start > curr { + sections.push((non_match_style, &line[curr..start])) + }; + sections.push((match_style, &line[start..end])); + curr = end; + } + if curr < line.len() { + sections.push((non_match_style, &line[curr..])) + } + StyleSectionSpecifier::StyleSections(sections) +} + +// Return style sections describing colors received from git. +fn get_code_style_sections<'b>( + raw_line: &'b str, + match_style: Style, + non_match_style: Style, + grep: &GrepLine, +) -> Option> { + if let Some(raw_code_start) = ansi::ansi_preserving_index( + raw_line, + match grep.line_number { + Some(n) => format!("{}:{}:", grep.path, n).len(), + None => grep.path.len() + 1, + }, + ) { + let match_style_sections = ansi::parse_style_sections(&raw_line[raw_code_start..]) + .iter() + .map(|(ansi_term_style, s)| { + if ansi_term_style.foreground.is_some() { + (match_style, *s) + } else { + (non_match_style, *s) + } + }) + .collect(); + Some(StyleSectionSpecifier::StyleSections(match_style_sections)) + } else { + None + } +} + +fn make_output_config() -> GrepOutputConfig { + match process::calling_process().as_deref() { + Some(process::CallingProcess::GitGrep((longs, shorts))) + if shorts.contains("-W") || longs.contains("--function-context") => + { + // --function-context is in effect: i.e. the entire function is + // being displayed. In that case we don't render the first line as a + // header, since the second line is the true next line, and it will + // be more readable to have these displayed normally. We do add the + // navigate marker, since match lines will be surrounded by (many) + // non-match lines. And, since we are printing (many) successive lines + // of code, we pad line numbers <100 in order to maintain code + // alignment up to line 9999. + GrepOutputConfig { + render_context_header_as_hunk_header: false, + add_navigate_marker_to_matches: true, + pad_line_number: true, + } + } + Some(process::CallingProcess::GitGrep((longs, shorts))) + if shorts.contains("-p") || longs.contains("--show-function") => + { + // --show-function is in effect, i.e. the function header is being + // displayed, along with matches within the function. Therefore we + // render the first line as a header, but we do not add the navigate + // marker, since all non-header lines are matches. + GrepOutputConfig { + render_context_header_as_hunk_header: true, + add_navigate_marker_to_matches: false, + pad_line_number: false, + } + } + _ => GrepOutputConfig { + render_context_header_as_hunk_header: true, + add_navigate_marker_to_matches: false, + pad_line_number: false, + }, + } +} + +enum GrepLineRegex { + FilePathWithFileExtension, + FilePathWithoutSeparatorCharacters, +} + +lazy_static! { + static ref GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION: Regex = + make_grep_line_regex(GrepLineRegex::FilePathWithFileExtension); +} + +lazy_static! { + static ref GREP_LINE_REGEX_ASSUMING_NO_INTERNAL_SEPARATOR_CHARS: Regex = + make_grep_line_regex(GrepLineRegex::FilePathWithoutSeparatorCharacters); +} + +// See tests for example grep lines +fn make_grep_line_regex(regex_variant: GrepLineRegex) -> Regex { + // Grep tools such as `git grep` and `rg` emit lines like the following, + // where "xxx" represents arbitrary code. Note that there are 3 possible + // "separator characters": ':', '-', '='. + + // The format is ambiguous, but we attempt to parse it. + + // src/co-7-fig.rs:xxx + // src/co-7-fig.rs:7:xxx + // src/co-7-fig.rs-xxx + // src/co-7-fig.rs-7-xxx + // src/co-7-fig.rs=xxx + // src/co-7-fig.rs=7=xxx + + // Makefile:xxx + // Makefile:7:xxx + // Makefile-xxx + // Makefile-7-xxx + + // Make-7-file:xxx + // Make-7-file:7:xxx + // Make-7-file-xxx + // Make-7-file-7-xxx + + let file_path = match regex_variant { + GrepLineRegex::FilePathWithFileExtension => { + r" + ( # 1. file name (colons not allowed) + [^:|\ ] # try to be strict about what a file path can start with + [^:]* # anything + \.[^.\ :=-]{1,6} # extension + ) + " + } + GrepLineRegex::FilePathWithoutSeparatorCharacters => { + r" + ( # 1. file name (colons not allowed) + [^:|\ =-] # try to be strict about what a file path can start with + [^:=-]* # anything except separators + [^:\ ] # a file name cannot end with whitespace + ) + " + } + }; + + Regex::new(&format!( + "(?x) +^ +{file_path} +(?: + ( + : # 2. match marker + (?:([0-9]+):)? # 3. optional: line number followed by second match marker + ) + | + ( + - # 4. nomatch marker + (?:([0-9]+)-)? # 5. optional: line number followed by second nomatch marker + ) + | + ( + = # 6. match marker + (?:([0-9]+)=)? # 7. optional: line number followed by second header marker + ) +) +(.*) # 8. code (i.e. line contents) +$ +", + file_path = file_path + )) + .unwrap() +} + +pub fn parse_grep_line(line: &str) -> Option { + if line.starts_with('{') { + ripgrep_json::parse_line(line) + } else { + match process::calling_process().as_deref() { + Some(process::CallingProcess::GitGrep(_)) + | Some(process::CallingProcess::OtherGrep) => [ + &*GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION, + &*GREP_LINE_REGEX_ASSUMING_NO_INTERNAL_SEPARATOR_CHARS, + ] + .iter() + .find_map(|regex| _parse_grep_line(*regex, line)), + _ => None, + } + } +} + +pub fn _parse_grep_line<'b>(regex: &Regex, line: &'b str) -> Option> { + let caps = regex.captures(line)?; + let file = caps.get(1).unwrap().as_str().into(); + let (line_type, line_number) = &[ + (2, LineType::Match), + (4, LineType::Context), + (6, LineType::ContextHeader), + ] + .iter() + .find_map(|(i, line_type)| { + if caps.get(*i).is_some() { + let line_number: Option = + caps.get(i + 1).map(|m| m.as_str().parse().ok()).flatten(); + Some((*line_type, line_number)) + } else { + None + } + }) + .unwrap(); // The regex matches so one of the three alternatrives must have matched + let code = caps.get(8).unwrap().as_str().into(); + + Some(GrepLine { + path: file, + line_number: *line_number, + line_type: *line_type, + code, + submatches: None, + }) +} + +#[cfg(test)] +mod tests { + use crate::handlers::grep::{parse_grep_line, GrepLine, LineType}; + use crate::utils::process::tests::cfg; + + #[test] + fn test_parse_grep_match() { + let fake_parent_grep_command = "git --doesnt-matter grep --nor-this nor_this -- nor_this"; + { + let _args = cfg::WithArgs::new(&fake_parent_grep_command); + assert_eq!( + parse_grep_line("src/co-7-fig.rs:xxx"), + Some(GrepLine { + path: "src/co-7-fig.rs".into(), + line_number: None, + line_type: LineType::Match, + code: "xxx".into(), + submatches: None, + }) + ); + } + { + let _args = cfg::WithArgs::new(&fake_parent_grep_command); + assert_eq!( + parse_grep_line("src/config.rs:use crate::minusplus::MinusPlus;"), + Some(GrepLine { + path: "src/config.rs".into(), + line_number: None, + line_type: LineType::Match, + code: "use crate::minusplus::MinusPlus;".into(), + submatches: None, + }) + ); + } + { + let _args = cfg::WithArgs::new(&fake_parent_grep_command); + assert_eq!( + parse_grep_line( + "src/config.rs: pub line_numbers_style_minusplus: MinusPlus