From 3f8feb4992f49b108002ccbbdfbc885499804163 Mon Sep 17 00:00:00 2001 From: Jonathan LEI Date: Wed, 25 Jan 2023 02:48:17 +0000 Subject: [PATCH] Changed file picker --- Cargo.lock | 69 ++++++ helix-term/src/commands.rs | 41 ++++ helix-term/src/keymap/default.rs | 3 +- helix-term/src/ui/menu.rs | 34 ++- helix-vcs/Cargo.toml | 4 + helix-vcs/src/diff.rs | 28 +++ helix-vcs/src/git.rs | 368 ++++++++++++++++++++++++++++++- helix-vcs/src/lib.rs | 17 +- 8 files changed, 558 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f7812f4d4e08..4e8fb5dab197 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -82,6 +82,15 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "block-buffer" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cce20737498f97b993470a6e536b8523f0af7892a4f928cceb1ac5e52ebe7e" +dependencies = [ + "generic-array", +] + [[package]] name = "bstr" version = "0.2.17" @@ -241,6 +250,15 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" +[[package]] +name = "cpufeatures" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d997bd5e24a5928dd43e46dc529867e207907fe0b239c3477d924f7f2ca320" +dependencies = [ + "libc", +] + [[package]] name = "crc32fast" version = "1.3.2" @@ -285,6 +303,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "cxx" version = "1.0.82" @@ -342,6 +370,16 @@ dependencies = [ "parking_lot_core 0.9.4", ] +[[package]] +name = "digest" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8168378f4e5023e7218c89c891c0fd8ecdb5e5e4f18cb78f38cf245dd021e76f" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "dirs" version = "4.0.0" @@ -528,6 +566,16 @@ dependencies = [ "thread_local", ] +[[package]] +name = "generic-array" +version = "0.14.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.8" @@ -1248,11 +1296,15 @@ dependencies = [ name = "helix-vcs" version = "0.6.0" dependencies = [ + "anyhow", + "content_inspector", "git-repository", "helix-core", + "ignore", "imara-diff", "log", "parking_lot 0.12.1", + "sha1", "tempfile", "tokio", ] @@ -1895,6 +1947,17 @@ dependencies = [ "serde", ] +[[package]] +name = "sha1" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f04293dc80c3993519f2d7f6f511707ee7094fe0c6d3406feb330cdb3540eba3" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sha1_smol" version = "1.0.0" @@ -2234,6 +2297,12 @@ dependencies = [ "regex", ] +[[package]] +name = "typenum" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" + [[package]] name = "unicase" version = "2.6.0" diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index e70914016d60..11fc5621526c 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -272,6 +272,7 @@ impl MappableCommand { buffer_picker, "Open buffer picker", jumplist_picker, "Open jumplist picker", symbol_picker, "Open symbol picker", + changed_file_picker, "Open changed file picker", select_references_to_symbol_under_cursor, "Select symbol references", workspace_symbol_picker, "Open workspace symbol picker", diagnostics_picker, "Open diagnostic picker", @@ -2454,6 +2455,46 @@ fn jumplist_picker(cx: &mut Context) { cx.push_layer(Box::new(overlayed(picker))); } +fn changed_file_picker(cx: &mut Context) { + let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("./")); + + let entries = match cx.editor.diff_providers.get_changed_files(&cwd) { + Ok(entries) => entries, + Err(err) => { + cx.editor.set_error(format!("{err}")); + return; + } + }; + + let added = cx.editor.theme.get("diff.plus"); + let deleted = cx.editor.theme.get("diff.minus"); + let modified = cx.editor.theme.get("diff.delta"); + + let picker = FilePicker::new( + entries, + ui::menu::FileChangeData { + cwd, + style_untracked: added, + style_modified: modified, + style_deleted: deleted, + style_renamed: modified, + }, + |cx, meta, action| { + let path_to_open = meta.path(); + if let Err(e) = cx.editor.open(path_to_open, action) { + let err = if let Some(err) = e.source() { + format!("{}", err) + } else { + format!("unable to open \"{}\"", path_to_open.display()) + }; + cx.editor.set_error(err); + } + }, + |_editor, meta| Some((meta.path().to_path_buf().into(), None)), + ); + cx.push_layer(Box::new(overlayed(picker))); +} + impl ui::menu::Item for MappableCommand { type Data = ReverseKeymap; diff --git a/helix-term/src/keymap/default.rs b/helix-term/src/keymap/default.rs index ef93dee08a77..0cdc92afc3d0 100644 --- a/helix-term/src/keymap/default.rs +++ b/helix-term/src/keymap/default.rs @@ -216,9 +216,10 @@ pub fn default() -> HashMap { "S" => workspace_symbol_picker, "d" => diagnostics_picker, "D" => workspace_diagnostics_picker, + "g" => changed_file_picker, "a" => code_action, "'" => last_picker, - "g" => { "Debug (experimental)" sticky=true + "G" => { "Debug (experimental)" sticky=true "l" => dap_launch, "b" => dap_toggle_breakpoint, "c" => dap_continue, diff --git a/helix-term/src/ui/menu.rs b/helix-term/src/ui/menu.rs index e92578c5a136..11f7c100faff 100644 --- a/helix-term/src/ui/menu.rs +++ b/helix-term/src/ui/menu.rs @@ -11,7 +11,8 @@ pub use tui::widgets::{Cell, Row}; use fuzzy_matcher::skim::SkimMatcherV2 as Matcher; use fuzzy_matcher::FuzzyMatcher; -use helix_view::{graphics::Rect, Editor}; +use helix_vcs::FileChange; +use helix_view::{graphics::Rect, theme::Style, Editor}; use tui::layout::Constraint; pub trait Item { @@ -43,6 +44,37 @@ impl Item for PathBuf { } } +pub struct FileChangeData { + pub cwd: PathBuf, + pub style_untracked: Style, + pub style_modified: Style, + pub style_deleted: Style, + pub style_renamed: Style, +} + +impl Item for FileChange { + type Data = FileChangeData; + + fn format(&self, data: &Self::Data) -> Row { + let (sign, style) = match self { + Self::Untracked { .. } => ("[+]", data.style_untracked), + Self::Modified { .. } => ("[~]", data.style_modified), + Self::Deleted { .. } => ("[-]", data.style_deleted), + Self::Renamed { .. } => ("[>]", data.style_modified), + }; + let path = self.path(); + + Row::new(vec![ + sign.to_owned(), + path.strip_prefix(&data.cwd) + .unwrap_or(path) + .to_string_lossy() + .to_string(), + ]) + .style(style) + } +} + pub struct Menu { options: Vec, editor_data: T::Data, diff --git a/helix-vcs/Cargo.toml b/helix-vcs/Cargo.toml index 19b660a60f54..cb38042a056c 100644 --- a/helix-vcs/Cargo.toml +++ b/helix-vcs/Cargo.toml @@ -13,11 +13,15 @@ homepage = "https://helix-editor.com" [dependencies] helix-core = { version = "0.6", path = "../helix-core" } +anyhow = "1.0" tokio = { version = "1", features = ["rt", "rt-multi-thread", "time", "sync", "parking_lot", "macros"] } parking_lot = "0.12" +content_inspector = "0.2.4" git-repository = { version = "0.32", default-features = false , optional = true } +ignore = "0.4" imara-diff = "0.1.5" +sha1 = "0.10.0" log = "0.4" diff --git a/helix-vcs/src/diff.rs b/helix-vcs/src/diff.rs index 9c6a362f7db8..6b31621ffbb5 100644 --- a/helix-vcs/src/diff.rs +++ b/helix-vcs/src/diff.rs @@ -1,4 +1,5 @@ use std::ops::Range; +use std::path::{Path, PathBuf}; use std::sync::Arc; use helix_core::Rope; @@ -277,3 +278,30 @@ impl FileHunks<'_> { } } } + +pub enum FileChange { + Untracked { + path: PathBuf, + }, + Modified { + path: PathBuf, + }, + Deleted { + path: PathBuf, + }, + Renamed { + from_path: PathBuf, + to_path: PathBuf, + }, +} + +impl FileChange { + pub fn path(&self) -> &Path { + match self { + Self::Untracked { path } => path, + Self::Modified { path } => path, + Self::Deleted { path } => path, + Self::Renamed { to_path, .. } => to_path, + } + } +} diff --git a/helix-vcs/src/git.rs b/helix-vcs/src/git.rs index 432159b6cdb8..623794c4e690 100644 --- a/helix-vcs/src/git.rs +++ b/helix-vcs/src/git.rs @@ -1,17 +1,65 @@ -use std::path::Path; +use std::{ + collections::{hash_map::Entry, HashMap, HashSet}, + io::Read, + path::{Path, PathBuf}, +}; +use anyhow::Result; +use git::index::{entry::Mode, State}; use git::objs::tree::EntryMode; -use git::sec::trust::DefaultForLevel; +use git::{prelude::FindExt, sec::trust::DefaultForLevel}; use git::{Commit, ObjectId, Repository, ThreadSafeRepository}; use git_repository as git; +use ignore::WalkBuilder; +use sha1::Digest; -use crate::DiffProvider; +use crate::{DiffProvider, FileChange}; #[cfg(test)] mod test; pub struct Git; +/// A subset of `git_repository::objs::tree::EntryMode` that actually makes sense for tree nodes. +#[derive(Hash, PartialEq, Eq)] +enum FileEntryMode { + Blob, + BlobExecutable, + Link, +} + +#[derive(Default)] +struct RawChanges { + additions: Vec, + deletions: HashMap>, + modifications: Vec, +} + +#[derive(Hash, PartialEq, Eq)] +struct RawAddition { + entry_mode: FileEntryMode, + oid: ObjectId, + path: PathBuf, +} + +#[derive(Hash, PartialEq, Eq)] +struct RawDeletion { + entry_mode: FileEntryMode, + oid: ObjectId, + path: PathBuf, +} + +#[allow(unused)] +struct RawModification { + previous_entry_mode: FileEntryMode, + previous_oid: ObjectId, + + entry_mode: FileEntryMode, + oid: ObjectId, + + path: PathBuf, +} + impl Git { fn open_repo(path: &Path, ceiling_dir: Option<&Path>) -> Option { // custom open options @@ -51,6 +99,111 @@ impl Git { ) .ok() } + + /// Emulates the result of running `git status` from the command line. + fn status(repo: &Repository) -> Result> { + let autocrlf = repo + .config_snapshot() + .boolean("core.autocrlf") + .unwrap_or(false); + + let work_dir = repo + .work_dir() + .ok_or_else(|| anyhow::anyhow!("working tree not found"))?; + + // TODO: allow diffing against another ref + let head_tree = repo.head_commit()?.tree()?; + let head_state = State::from_tree(&head_tree.id, |oid, buf| { + repo.objects.find_tree_iter(oid, buf).ok() + })?; + + let mut head_tree_set = HashSet::new(); + let mut submodule_paths = vec![]; + + let mut raw_changes = RawChanges::default(); + + for item in head_state.entries() { + let full_path = work_dir.join(&PathBuf::from(item.path(&head_state).to_string())); + + if item.mode == Mode::COMMIT { + submodule_paths.push(full_path); + } else { + let old_entry_mode = match item.mode { + Mode::FILE => FileEntryMode::Blob, + Mode::FILE_EXECUTABLE => FileEntryMode::BlobExecutable, + Mode::SYMLINK => FileEntryMode::Link, + _ => anyhow::bail!("unexpected entry mode"), + }; + + match git_meta_from_path(&full_path, autocrlf)? { + Some((new_entry_mode, new_oid)) => { + // On Windows, physical files are _always_ inferred as `Blob`. We simply don't + // compare the entry mode as it's pointless. + let entry_mode_changed = { + #[cfg(unix)] + { + new_entry_mode != old_entry_mode + } + + #[cfg(not(unix))] + { + false + } + }; + + if entry_mode_changed || new_oid != item.id { + raw_changes.add_modification(RawModification { + previous_entry_mode: old_entry_mode, + previous_oid: item.id, + entry_mode: new_entry_mode, + oid: new_oid, + path: full_path.clone(), + }); + } + } + None => { + raw_changes.add_deletion(RawDeletion { + entry_mode: old_entry_mode, + oid: item.id, + path: full_path.clone(), + }); + } + } + + head_tree_set.insert(full_path); + } + } + + // Looks for untracked files by walking the fs and probing the (cached) head tree + // TODO: use build_parallel() to speed this up + for entry in WalkBuilder::new(work_dir) + .hidden(false) + .ignore(false) + .filter_entry(move |entry| { + entry.file_name() != ".git" + && !submodule_paths + .iter() + .any(|submodule| entry.path().starts_with(submodule)) + }) + .build() + { + let entry = entry?; + if !entry.file_type().map_or(false, |ft| ft.is_dir()) { + let full_path = entry.path(); + let meta = git_meta_from_path(full_path, autocrlf)? + .ok_or_else(|| anyhow::anyhow!("file moved between checks"))?; + if !head_tree_set.contains(full_path) { + raw_changes.add_addition(RawAddition { + entry_mode: meta.0, + oid: meta.1, + path: full_path.to_path_buf(), + }) + } + } + } + + Ok(raw_changes.into()) + } } impl DiffProvider for Git { @@ -88,6 +241,90 @@ impl DiffProvider for Git { } Some(data) } + + fn get_changed_files(&self, cwd: &Path) -> Result> { + Self::status( + &Self::open_repo(cwd, None) + .ok_or_else(|| anyhow::anyhow!("no Git repository found"))? + .to_thread_local(), + ) + } +} + +impl RawChanges { + pub fn add_addition(&mut self, addition: RawAddition) { + self.additions.push(addition); + } + + pub fn add_deletion(&mut self, deletion: RawDeletion) { + match self.deletions.entry(deletion.oid) { + Entry::Occupied(entry) => { + entry.into_mut().push(deletion); + } + Entry::Vacant(entry) => { + entry.insert(vec![deletion]); + } + } + } + + pub fn add_modification(&mut self, modification: RawModification) { + self.modifications.push(modification); + } +} + +impl From for Vec { + // Unlike Git, we only look for pure renames at the moment. + // TODO: detect renames with minor changes + fn from(mut raw: RawChanges) -> Self { + let mut status_entries = vec![]; + + let additions_left = if !raw.additions.is_empty() && !raw.deletions.is_empty() { + let mut unmatched_additions = vec![]; + + for add in raw.additions.into_iter() { + let matched_deletions = match raw.deletions.entry(add.oid) { + Entry::Occupied(entry) => entry.into_mut(), + Entry::Vacant(_) => { + unmatched_additions.push(add); + continue; + } + }; + + // Impossible to have an empty vec inside + let chosen_deletion = matched_deletions.pop().expect("unexpected empty vec"); + if matched_deletions.is_empty() { + raw.deletions.remove(&add.oid); + } + + status_entries.push(FileChange::Renamed { + from_path: chosen_deletion.path.to_owned(), + to_path: add.path.to_owned(), + }); + } + + unmatched_additions + } else { + raw.additions + }; + + additions_left + .into_iter() + .for_each(|item| status_entries.push(FileChange::Untracked { path: item.path })); + raw.deletions + .values() + .into_iter() + .flat_map(|val| val.iter()) + .for_each(|item| { + status_entries.push(FileChange::Deleted { + path: item.path.to_owned(), + }) + }); + raw.modifications + .into_iter() + .for_each(|item| status_entries.push(FileChange::Modified { path: item.path })); + + status_entries + } } /// Finds the object that contains the contents of a file at a specific commit. @@ -103,3 +340,128 @@ fn find_file_in_commit(repo: &Repository, commit: &Commit, file: &Path) -> Optio EntryMode::Blob | EntryMode::BlobExecutable => Some(tree_entry.object_id()), } } + +fn git_meta_from_path( + path: &Path, + autocrlf: bool, +) -> Result, std::io::Error> { + // Windows doesn't support symlinks. This block runs fine but is just wasting CPU cycles. + #[cfg(not(windows))] + match path.symlink_metadata() { + Ok(meta) => { + if meta.is_symlink() { + let link_content = std::fs::read_link(path)?; + let link_content = link_content.to_string_lossy(); + let link_content = link_content.as_bytes(); + + let mut hasher = sha1::Sha1::default(); + hasher.update(b"blob "); + hasher.update(format!("{}", link_content.len()).as_bytes()); + hasher.update(b"\0"); + hasher.update(link_content); + + let hash: [u8; 20] = hasher.finalize().into(); + + return Ok(Some((FileEntryMode::Link, ObjectId::from(hash)))); + } + } + Err(_) => return Ok(None), + }; + + // Not a symlink for sure from this point + Ok(match path.metadata() { + Ok(meta) => { + if meta.is_file() { + let entry_mode = { + #[cfg(unix)] + { + use std::os::unix::prelude::PermissionsExt; + if meta.permissions().mode() & 0o111 != 0 { + FileEntryMode::BlobExecutable + } else { + FileEntryMode::Blob + } + } + + #[cfg(not(unix))] + { + FileEntryMode::Blob + } + }; + + let oid = { + let mut file = std::fs::File::open(path)?; + + // `git::features::hash::Sha1` doesn't implement `Write` so we use the + // underlying crate directly for max perf. + let mut hasher = sha1::Sha1::default(); + hasher.update(b"blob "); + + if autocrlf { + // When autocrlf is on, we either have to fit the whole file into memory, + // or we read the file twice. Either way is not optimal. How should we + // handle this? + // + // With the current implementation, there's no way we can handle huge files + // that do not fit into memory. Maybe we can set a size limit? Anything + // over a certain size will simply be read twice: once for getting the + // normalized size, and once for the hasher updates? + const BUFFER_SIZE: usize = 8 * 1024; + let mut buffer = [0u8; BUFFER_SIZE]; + + let mut len = file.read(&mut buffer)?; + if content_inspector::inspect(&buffer[..len]) + == content_inspector::ContentType::BINARY + { + // No CRLF handling! We update the part already read + the remaining + // content in the file. + hasher.update(format!("{}", meta.len()).as_bytes()); + hasher.update(b"\0"); + + hasher.update(&buffer[..len]); + std::io::copy(&mut file, &mut hasher)?; + } else { + // It's a text file. CRLF transformation as planned. + let mut normalized_file = Vec::with_capacity(meta.len() as usize); + let mut was_cr = false; + + loop { + buffer[..len].iter().for_each(|byte| { + if was_cr && *byte == b'\n' { + normalized_file.pop(); + } + normalized_file.push(*byte); + was_cr = *byte == b'\r'; + }); + + if len < BUFFER_SIZE { + break; + } + len = file.read(&mut buffer)?; + } + + hasher.update(format!("{}", normalized_file.len()).as_bytes()); + hasher.update(b"\0"); + + hasher.update(&normalized_file); + } + } else { + hasher.update(format!("{}", meta.len()).as_bytes()); + hasher.update(b"\0"); + + std::io::copy(&mut file, &mut hasher)?; + } + + let hash: [u8; 20] = hasher.finalize().into(); + ObjectId::from(hash) + }; + + Some((entry_mode, oid)) + } else { + // It's a non-symlink folder. Git doesn't track folders. Same as deletion. + None + } + } + Err(_) => None, + }) +} diff --git a/helix-vcs/src/lib.rs b/helix-vcs/src/lib.rs index 97320d32518f..7d6d34a66112 100644 --- a/helix-vcs/src/lib.rs +++ b/helix-vcs/src/lib.rs @@ -1,5 +1,7 @@ use std::path::Path; +use anyhow::Result; + #[cfg(feature = "git")] pub use git::Git; #[cfg(not(feature = "git"))] @@ -10,7 +12,7 @@ mod git; mod diff; -pub use diff::{DiffHandle, Hunk}; +pub use diff::{DiffHandle, FileChange, Hunk}; pub trait DiffProvider { /// Returns the data that a diff should be computed against @@ -18,6 +20,8 @@ pub trait DiffProvider { /// The data is returned as raw byte without any decoding or encoding performed /// to ensure all file encodings are handled correctly. fn get_diff_base(&self, file: &Path) -> Option>; + + fn get_changed_files(&self, cwd: &Path) -> Result>; } #[doc(hidden)] @@ -26,6 +30,10 @@ impl DiffProvider for Dummy { fn get_diff_base(&self, _file: &Path) -> Option> { None } + + fn get_changed_files(&self, _cwd: &Path) -> Result> { + anyhow::bail!("dummy diff provider") + } } pub struct DiffProviderRegistry { @@ -38,6 +46,13 @@ impl DiffProviderRegistry { .iter() .find_map(|provider| provider.get_diff_base(file)) } + + pub fn get_changed_files(&self, cwd: &Path) -> Result> { + self.providers + .iter() + .find_map(|provider| provider.get_changed_files(cwd).ok()) + .ok_or_else(|| anyhow::anyhow!("no diff provider returns success")) + } } impl Default for DiffProviderRegistry {