Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WebAssembly Bindings #42

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
40 changes: 40 additions & 0 deletions .github/workflows/releases.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: Releases

on:
push:
tags:
- "v*"
workflow_dispatch:

env:
RUST_BACKTRACE: 1
CARGO_NET_GIT_FETCH_WITH_CLI: true

jobs:
publish-to-wapm:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
toolchain: 1.41.0
profile: minimal
override: true
- name: Rust Cache
uses: Swatinem/rust-cache@v2
- name: Install WebAssembly targets
run: rustup target add wasm32-unknown-unknown
- name: Setup Wasmer
uses: wasmerio/setup-wasmer@v1
- name: Install cargo-wapm
uses: actions-rs/cargo@v1
with:
command: install
args: cargo-wapm --verbose --debug
- name: Login to wapm.io
run: |
wapm config set registry.url https://registry.wapm.io
wapm login ${{ secrets.WAPM_IO_TOKEN }}
- name: Publish to wapm.io
run: cargo wapm
working-directory: wasm
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ exclude = [
[package.metadata.docs.rs]
all-features = true

[workspace]
members = ["wasm"]

[features]
default = ["text"]
text = []
Expand Down
31 changes: 31 additions & 0 deletions wasm/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
[package]
name = "wasm"
version = "2.2.0"
authors = [
"Armin Ronacher <armin.ronacher@active-4.com>",
"Pierre-Étienne Meunier <pe@pijul.org>",
"Brandon Williams <bwilliams.eng@gmail.com>",
]
rust-version = "1.41"
license = "Apache-2.0"
description = "A diff library for Rust"
repository = "https://github.com/mitsuhiko/similar"
edition = "2018"
publish = false
readme = "../README.md"

[lib]
crate-type = ["rlib", "cdylib"]

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
ouroboros = "0.15.5"
upstream = { version = "2.2.0", path = "..", package = "similar" }
wit-bindgen-rust = { git = "https://github.com/wasmerio/wit-bindgen", branch = "wasmer", version = "0.1.0" }

[package.metadata.wapm]
namespace = "Michael-F-Bryan"
package = "similar"
abi = "none"
bindings = { wit-exports = "similar.wit", wit-bindgen = "0.1.0" }
136 changes: 136 additions & 0 deletions wasm/similar.wit
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
/// Shortcut for making a line level diff.
diff-lines: func(alg: algorithm, old: string, new: string) -> list<tuple<change-tag, string>>

/// Shortcut for making a word level diff.
diff-words: func(alg: algorithm, old: string, new: string) -> list<tuple<change-tag, string>>

/// Shortcut for making a character level diff.
diff-chars: func(alg: algorithm, old: string, new: string) -> list<tuple<change-tag, string>>

/// Shortcut for diffing two lists of strings.
diff-lists: func(alg: algorithm, old: list<string>, new: list<string>) -> list<tuple<change-tag, list<string>>>

/// A quick way to get a unified diff as a string.
unified-diff: func(
alg: algorithm,
old: string,
new: string,
context-radius: u32,
header: option<tuple<string, string>>,
) -> string

resource config {
static default: func() -> config
/// Changes the algorithm.
///
/// The default algorithm is myers.
algorithm: func(alg: algorithm)
/// Changes the newline termination flag.
///
/// The default is automatic based on input. This flag controls the
/// behavior of [`TextDiff::iter_changes`] and unified diff generation
/// with regards to newlines. When the flag is set to `false` (which
/// is the default) then newlines are added. Otherwise the newlines
/// from the source sequences are reused.
newline-terminated: func(yes: bool)
/// Creates a diff of lines.
///
/// This splits the text `old` and `new` into lines preserving newlines
/// in the input. Line diffs are very common and because of that enjoy
/// special handling in similar. When a line diff is created with this
/// method the `newline_terminated` flag is flipped to `true` and will
/// influence the behavior of unified diff generation.
diff-lines: func(old: string, new: string) -> text-diff
/// Creates a diff of words.
///
/// This splits the text into words and whitespace.
///
/// Note on word diffs: because the text differ will tokenize the strings
/// into small segments it can be inconvenient to work with the results
/// depending on the use case.
diff-words: func(old: string, new: string) -> text-diff
/// Creates a diff of characters.
///
/// Note on character diffs: because the text differ will tokenize the strings
/// into small segments it can be inconvenient to work with the results
/// depending on the use case.
diff-chars: func(old: string, new: string) -> text-diff
}

/// Captures diff op codes for textual diffs.
resource text-diff {
/// The name of the algorithm that created the diff.
algorithm: func() -> algorithm
/// Returns true if items in the slice are newline terminated.
///
/// This flag is used by the unified diff writer to determine if extra
/// newlines have to be added.
newline-terminated: func() -> bool
/// Return a measure of the sequences’ similarity in the range `0..=1`.
///
/// A ratio of `1.0` means the two sequences are a complete match, a ratio
/// of `0.0` would indicate completely distinct sequences.
ratio: func() -> float32
changes: func() -> list<change>
}

/// The different algorithms that can be used when diffing.
enum algorithm {
/// Myers' diff algorithm.
///
/// * time: `O((N+M)D)`
/// * space `O(N+M)`
///
/// See [the original article by Eugene W. Myers](http://www.xmailserver.org/diff2.pdf)
/// describing it.
///
/// The implementation of this algorithm is based on the implementation by
/// Brandon Williams.
///
/// # Heuristics
///
/// At present this implementation of Myers' does not implement any more
/// advanced heuristics that would solve some pathological cases. For
/// instance passing two large and completely distinct sequences to the
/// algorithm will make it spin without making reasonable progress.
/// Currently, the only protection in the library against this is to pass a
/// deadline to the diffing algorithm.
///
/// For potential improvements here see
/// [similar#15](https://github.com/mitsuhiko/similar/issues/15).
myers,
/// Patience diff algorithm.
///
/// * time: `O(N log N + M log M + (N+M)D)`
/// * space: `O(N+M)`
///
/// Tends to give more human-readable outputs. See [Bram Cohen's blog
/// post](https://bramcohen.livejournal.com/73318.html) describing it.
///
/// This is based on the patience implementation of
/// [pijul](https://pijul.org/) by Pierre-Étienne Meunier.
patience,
/// Hunt–McIlroy / Hunt–Szymanski LCS diff algorithm.
///
/// time: O((NM)D log (M)D)
/// space O(MN)
lcs,
}

/// The tag of a change.
enum change-tag {
/// The change indicates equality (not a change).
equal,
/// The change indicates deleted text.
delete,
/// The change indicates inserted text.
insert,
}

record change {
tag: change-tag,
old-index: option<u32>,
new-index: option<u32>,
value: string,
missing-newline: bool,
}
163 changes: 163 additions & 0 deletions wasm/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
use std::{cell::RefCell, convert::TryInto};

use upstream::TextDiffConfig;
use wit_bindgen_rust::Handle;

use crate::similar::{Algorithm, Change};

wit_bindgen_rust::export!("similar.wit");

pub struct Similar;

impl crate::similar::Similar for Similar {
fn diff_lines(alg: Algorithm, old: String, new: String) -> Vec<(similar::ChangeTag, String)> {
upstream::utils::diff_lines(alg.into(), &old, &new)
.into_iter()
.map(|(tag, s)| (tag.into(), s.to_string()))
.collect()
}

fn diff_words(alg: Algorithm, old: String, new: String) -> Vec<(similar::ChangeTag, String)> {
upstream::utils::diff_words(alg.into(), &old, &new)
.into_iter()
.map(|(tag, s)| (tag.into(), s.to_string()))
.collect()
}

fn diff_chars(alg: Algorithm, old: String, new: String) -> Vec<(similar::ChangeTag, String)> {
upstream::utils::diff_chars(alg.into(), &old, &new)
.into_iter()
.map(|(tag, s)| (tag.into(), s.to_string()))
.collect()
}

fn diff_lists(
alg: Algorithm,
old: Vec<String>,
new: Vec<String>,
) -> Vec<(similar::ChangeTag, Vec<String>)> {
upstream::utils::diff_slices(alg.into(), &old, &new)
.into_iter()
.map(|(tag, items)| (tag.into(), items.to_vec()))
.collect()
}

fn unified_diff(
alg: Algorithm,
old: String,
new: String,
context_radius: u32,
header: Option<(String, String)>,
) -> String {
upstream::udiff::unified_diff(
alg.into(),
&old,
&new,
context_radius as usize,
header.as_ref().map(|(l, r)| (l.as_str(), r.as_str())),
)
}
}

pub struct Config(RefCell<TextDiffConfig>);

impl crate::similar::Config for Config {
fn default() -> Handle<Config> {
Handle::new(Config(RefCell::new(TextDiffConfig::default())))
}

fn algorithm(&self, alg: Algorithm) {
self.0.borrow_mut().algorithm(alg.into());
}

fn newline_terminated(&self, yes: bool) {
self.0.borrow_mut().newline_terminated(yes);
}

fn diff_lines(&self, old: String, new: String) -> Handle<TextDiff> {
Handle::new(TextDiff::new(old, new, |old, new| {
self.0.borrow().diff_lines(old, new)
}))
}

fn diff_words(&self, old: String, new: String) -> Handle<TextDiff> {
Handle::new(TextDiff::new(old, new, |old, new| {
self.0.borrow().diff_words(old, new)
}))
}

fn diff_chars(&self, old: String, new: String) -> Handle<TextDiff> {
Handle::new(TextDiff::new(old, new, |old, new| {
self.0.borrow().diff_chars(old, new)
}))
}
}

#[ouroboros::self_referencing]
pub struct TextDiff {
old: String,
new: String,
#[borrows(old, new)]
#[not_covariant]
diff: upstream::TextDiff<'this, 'this, 'this, str>,
}

impl crate::similar::TextDiff for TextDiff {
fn algorithm(&self) -> Algorithm {
self.with_diff(|d| d.algorithm()).into()
}

fn newline_terminated(&self) -> bool {
self.with_diff(|d| d.newline_terminated())
}

fn ratio(&self) -> f32 {
self.with_diff(|d| d.ratio())
}

fn changes(&self) -> Vec<Change> {
self.with_diff(|d| d.iter_all_changes().map(|c| c.into()).collect())
}
}

impl From<upstream::Algorithm> for similar::Algorithm {
fn from(value: upstream::Algorithm) -> Self {
match value {
upstream::Algorithm::Myers => similar::Algorithm::Myers,
upstream::Algorithm::Patience => similar::Algorithm::Patience,
upstream::Algorithm::Lcs => similar::Algorithm::Lcs,
}
}
}

impl From<similar::Algorithm> for upstream::Algorithm {
fn from(value: similar::Algorithm) -> Self {
match value {
similar::Algorithm::Myers => upstream::Algorithm::Myers,
similar::Algorithm::Patience => upstream::Algorithm::Patience,
similar::Algorithm::Lcs => upstream::Algorithm::Lcs,
}
}
}

impl From<upstream::Change<&'_ str>> for similar::Change {
fn from(value: upstream::Change<&'_ str>) -> Self {
similar::Change {
tag: value.tag().into(),
old_index: value.old_index().map(|ix| ix.try_into().unwrap()),
new_index: value.new_index().map(|ix| ix.try_into().unwrap()),
value: value.value().to_string(),
missing_newline: value.missing_newline(),
}
}
}

impl From<upstream::ChangeTag> for similar::ChangeTag {
fn from(value: upstream::ChangeTag) -> Self {
match value {
upstream::ChangeTag::Equal => similar::ChangeTag::Equal,
upstream::ChangeTag::Delete => similar::ChangeTag::Delete,
upstream::ChangeTag::Insert => similar::ChangeTag::Insert,
}
}
}