Skip to content

Commit

Permalink
Make slice_str similar to truncate_str
Browse files Browse the repository at this point in the history
  • Loading branch information
remi-dupre committed Feb 7, 2024
1 parent cd1a6b4 commit ce77cc5
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 149 deletions.
90 changes: 0 additions & 90 deletions src/ansi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ use std::{
str::CharIndices,
};

use crate::utils::char_width;

#[derive(Debug, Clone, Copy)]
enum State {
Start,
Expand Down Expand Up @@ -269,63 +267,8 @@ impl<'a> Iterator for AnsiCodeIterator<'a> {

impl<'a> FusedIterator for AnsiCodeIterator<'a> {}

/// Slice a `&str` in terms of text width. This means that only the text
/// columns strictly between `start` and `stop` will be kept.
///
/// If a multi-columns character overlaps with the end of the interval it will
/// not be included. In such a case, the result will be less than `end - start`
/// columns wide.
pub fn slice_ansi_str(s: &str, start: usize, end: usize) -> &str {
if end <= start {
return "";
}

let mut pos = 0;
let mut res_start = 0;
let mut res_end = 0;

'outer: for (sub, is_ansi) in AnsiCodeIterator::new(s) {
// As ansi symbols have a width of 0 we can safely early-interupt
// the outer for loop only if current pos strictly greater than
// `end`.
if pos > end {
break;
}

if is_ansi {
if pos < start {
res_start += sub.len();
res_end = res_start;
} else if pos <= end {
res_end += sub.len();
} else {
break 'outer;
}
} else {
for c in sub.chars() {
let c_width = char_width(c);

if pos < start {
res_start += c.len_utf8();
res_end = res_start;
} else if pos + c_width <= end {
res_end += c.len_utf8();
} else {
break 'outer;
}

pos += char_width(c);
}
}
}

&s[res_start..res_end]
}

#[cfg(test)]
mod tests {
use crate::measure_text_width;

use super::*;

use lazy_static::lazy_static;
Expand Down Expand Up @@ -492,37 +435,4 @@ mod tests {
assert_eq!(iter.rest_slice(), "");
assert_eq!(iter.next(), None);
}

#[test]
fn test_slice_ansi_str() {
// Note that 🐶 is two columns wide
let test_str = "Hello\x1b[31m🐶\x1b[1m🐶\x1b[0m world!";
assert_eq!(slice_ansi_str(test_str, 5, 5), "");
assert_eq!(slice_ansi_str(test_str, 0, test_str.len()), test_str);

if cfg!(feature = "unicode-width") {
assert_eq!(slice_ansi_str(test_str, 0, 5), "Hello\x1b[31m");
assert_eq!(slice_ansi_str(test_str, 0, 6), "Hello\x1b[31m");
assert_eq!(measure_text_width(test_str), 16);
assert_eq!(slice_ansi_str(test_str, 0, 5), "Hello\x1b[31m");
assert_eq!(slice_ansi_str(test_str, 0, 6), "Hello\x1b[31m");
assert_eq!(slice_ansi_str(test_str, 0, 7), "Hello\x1b[31m🐶\x1b[1m");
assert_eq!(slice_ansi_str(test_str, 7, 21), "\x1b[1m🐶\x1b[0m world!");
assert_eq!(slice_ansi_str(test_str, 8, 21), "\x1b[0m world!");
assert_eq!(slice_ansi_str(test_str, 9, 21), "\x1b[0m world!");

assert_eq!(
slice_ansi_str(test_str, 4, 9),
"o\x1b[31m🐶\x1b[1m🐶\x1b[0m"
);
} else {
assert_eq!(slice_ansi_str(test_str, 0, 5), "Hello\x1b[31m");
assert_eq!(slice_ansi_str(test_str, 0, 6), "Hello\x1b[31m🐶\u{1b}[1m");

assert_eq!(
slice_ansi_str(test_str, 4, 9),
"o\x1b[31m🐶\x1b[1m🐶\x1b[0m w"
);
}
}
}
6 changes: 3 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,12 @@ pub use crate::term::{
};
pub use crate::utils::{
colors_enabled, colors_enabled_stderr, measure_text_width, pad_str, pad_str_with,
set_colors_enabled, set_colors_enabled_stderr, style, truncate_str, Alignment, Attribute,
Color, Emoji, Style, StyledObject,
set_colors_enabled, set_colors_enabled_stderr, slice_str, style, truncate_str, Alignment,
Attribute, Color, Emoji, Style, StyledObject,
};

#[cfg(feature = "ansi-parsing")]
pub use crate::ansi::{slice_ansi_str, strip_ansi_codes, AnsiCodeIterator};
pub use crate::ansi::{strip_ansi_codes, AnsiCodeIterator};

mod common_term;
mod kb;
Expand Down
173 changes: 117 additions & 56 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::borrow::Cow;
use std::collections::BTreeSet;
use std::env;
use std::fmt;
use std::ops::Range;
use std::sync::atomic::{AtomicBool, Ordering};

use lazy_static::lazy_static;
Expand Down Expand Up @@ -724,7 +725,7 @@ fn str_width(s: &str) -> usize {
}

#[cfg(feature = "ansi-parsing")]
pub(crate) fn char_width(c: char) -> usize {
fn char_width(c: char) -> usize {
#[cfg(feature = "unicode-width")]
{
use unicode_width::UnicodeWidthChar;
Expand All @@ -737,80 +738,98 @@ pub(crate) fn char_width(c: char) -> usize {
}
}

/// Truncates a string to a certain number of characters.
/// Slice a `&str` in terms of text width. This means that only the text
/// columns strictly between `start` and `stop` will be kept.
///
/// This ensures that escape codes are not screwed up in the process.
/// If the maximum length is hit the string will be truncated but
/// escapes code will still be honored. If truncation takes place
/// the tail string will be appended.
pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> {
/// If a multi-columns character overlaps with the end of the interval it will
/// not be included. In such a case, the result will be less than `end - start`
/// columns wide.
///
/// This ensures that escape codes are not screwed up in the process. And if
/// non-empty head and tail are specified, they are inserted between the ANSI
/// symbols from truncated bounds and the slice.
pub fn slice_str<'a>(s: &'a str, head: &str, bounds: Range<usize>, tail: &str) -> Cow<'a, str> {
#[cfg(feature = "ansi-parsing")]
{
use std::cmp::Ordering;
let mut iter = AnsiCodeIterator::new(s);
let mut length = 0;
let mut rv = None;

while let Some(item) = iter.next() {
match item {
(s, false) => {
if rv.is_none() {
if str_width(s) + length > width - str_width(tail) {
let ts = iter.current_slice();

let mut s_byte = 0;
let mut s_width = 0;
let rest_width = width - str_width(tail) - length;
for c in s.chars() {
s_byte += c.len_utf8();
s_width += char_width(c);
match s_width.cmp(&rest_width) {
Ordering::Equal => break,
Ordering::Greater => {
s_byte -= c.len_utf8();
break;
}
Ordering::Less => continue,
}
}

let idx = ts.len() - s.len() + s_byte;
let mut buf = ts[..idx].to_string();
buf.push_str(tail);
rv = Some(buf);
}
length += str_width(s);
}
let mut pos = 0;
let mut slice = 0..0;

// ANSI symbols outside of the slice
let mut front_ansi = String::new();
let mut back_ansi = String::new();

// Iterate through each ANSI symbol or unicode character while keeping
// track of:
// - pos: cumulated width of characters iterated so far
// - slice: char indices of the part of the string for which `pos`
// was inside bounds
for (sub, is_ansi) in AnsiCodeIterator::new(s) {
if is_ansi {
if pos < bounds.start {
// An ANSI symbol before the interval: keep for later
front_ansi.push_str(sub);
slice.start += sub.len();
slice.end = slice.start;
} else if pos <= bounds.end {
// An ANSI symbol inside of the interval: extend the slice
slice.end += sub.len();
} else {
// An ANSI symbol after the interval: keep for later
back_ansi.push_str(sub);
}
(s, true) => {
if let Some(ref mut rv) = rv {
rv.push_str(s);
} else {
for c in sub.chars() {
let c_width = char_width(c);

if pos < bounds.start {
// The char is before the interval: move the slice back
slice.start += c.len_utf8();
slice.end = slice.start;
} else if pos + c_width <= bounds.end {
// The char fits into the interval: extend the slice
slice.end += c.len_utf8();
}

pos += c_width;
}
}
}

if let Some(buf) = rv {
Cow::Owned(buf)
let slice = &s[slice];

if front_ansi.is_empty() && back_ansi.is_empty() && head.is_empty() && tail.is_empty() {
Cow::Borrowed(slice)
} else {
Cow::Borrowed(s)
Cow::Owned(front_ansi + head + slice + tail + &back_ansi)
}
}

#[cfg(not(feature = "ansi-parsing"))]
{
if s.len() <= width - tail.len() {
Cow::Borrowed(s)
let slice = s.get(bounds).unwrap_or("");

if head.is_empty() && tail.is_empty() {
Cow::Borrowed(slice)
} else {
Cow::Owned(format!(
"{}{}",
s.get(..width - tail.len()).unwrap_or_default(),
tail
))
Cow::Owned(format!("{head}{slice}{tail}"))
}
}
}

/// Truncates a string to a certain number of characters.
///
/// This ensures that escape codes are not screwed up in the process.
/// If the maximum length is hit the string will be truncated but
/// escapes code will still be honored. If truncation takes place
/// the tail string will be appended.
pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> {
if measure_text_width(s) > width {
let tail_width = measure_text_width(tail);
slice_str(s, "", 0..width.saturating_sub(tail_width), tail)
} else {
Cow::Borrowed(s)
}
}

/// Pads a string to fill a certain number of characters.
///
/// This will honor ansi codes correctly and allows you to align a string
Expand Down Expand Up @@ -919,8 +938,50 @@ fn test_truncate_str() {
);
}

#[test]
fn test_slice_ansi_str() {
// Note that 🐶 is two columns wide
let test_str = "Hello\x1b[31m🐶\x1b[1m🐶\x1b[0m world!";
assert_eq!(slice_str(test_str, "", 0..test_str.len(), ""), test_str);

if cfg!(feature = "unicode-width") && cfg!(feature = "ansi-parsing") {
assert_eq!(measure_text_width(test_str), 16);

assert_eq!(
slice_str(test_str, "", 5..5, ""),
"\u{1b}[31m\u{1b}[1m\u{1b}[0m"
);

assert_eq!(
slice_str(test_str, "", 0..5, ""),
"Hello\x1b[31m\x1b[1m\x1b[0m"
);

assert_eq!(
slice_str(test_str, "", 0..6, ""),
"Hello\x1b[31m\x1b[1m\x1b[0m"
);

assert_eq!(
slice_str(test_str, "", 0..7, ""),
"Hello\x1b[31m🐶\x1b[1m\x1b[0m"
);

assert_eq!(
slice_str(test_str, "", 4..9, ""),
"o\x1b[31m🐶\x1b[1m🐶\x1b[0m"
);

assert_eq!(
slice_str(test_str, "", 7..21, ""),
"\x1b[31m\x1b[1m🐶\x1b[0m world!"
);
}
}

#[test]
fn test_truncate_str_no_ansi() {
assert_eq!(&truncate_str("foo bar", 7, "!"), "foo bar");
assert_eq!(&truncate_str("foo bar", 5, ""), "foo b");
assert_eq!(&truncate_str("foo bar", 5, "!"), "foo !");
assert_eq!(&truncate_str("foo bar baz", 10, "..."), "foo bar...");
Expand Down

0 comments on commit ce77cc5

Please sign in to comment.