Skip to content

Commit

Permalink
Make slice_str similar to truncate_str
Browse files Browse the repository at this point in the history
  • Loading branch information
remi-dupre committed Feb 7, 2024
1 parent cd1a6b4 commit 3324138
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 140 deletions.
90 changes: 0 additions & 90 deletions src/ansi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ use std::{
str::CharIndices,
};

use crate::utils::char_width;

#[derive(Debug, Clone, Copy)]
enum State {
Start,
Expand Down Expand Up @@ -269,63 +267,8 @@ impl<'a> Iterator for AnsiCodeIterator<'a> {

impl<'a> FusedIterator for AnsiCodeIterator<'a> {}

/// Slice a `&str` in terms of text width. This means that only the text
/// columns strictly between `start` and `stop` will be kept.
///
/// If a multi-columns character overlaps with the end of the interval it will
/// not be included. In such a case, the result will be less than `end - start`
/// columns wide.
pub fn slice_ansi_str(s: &str, start: usize, end: usize) -> &str {
if end <= start {
return "";
}

let mut pos = 0;
let mut res_start = 0;
let mut res_end = 0;

'outer: for (sub, is_ansi) in AnsiCodeIterator::new(s) {
// As ansi symbols have a width of 0 we can safely early-interupt
// the outer for loop only if current pos strictly greater than
// `end`.
if pos > end {
break;
}

if is_ansi {
if pos < start {
res_start += sub.len();
res_end = res_start;
} else if pos <= end {
res_end += sub.len();
} else {
break 'outer;
}
} else {
for c in sub.chars() {
let c_width = char_width(c);

if pos < start {
res_start += c.len_utf8();
res_end = res_start;
} else if pos + c_width <= end {
res_end += c.len_utf8();
} else {
break 'outer;
}

pos += char_width(c);
}
}
}

&s[res_start..res_end]
}

#[cfg(test)]
mod tests {
use crate::measure_text_width;

use super::*;

use lazy_static::lazy_static;
Expand Down Expand Up @@ -492,37 +435,4 @@ mod tests {
assert_eq!(iter.rest_slice(), "");
assert_eq!(iter.next(), None);
}

#[test]
fn test_slice_ansi_str() {
// Note that 🐶 is two columns wide
let test_str = "Hello\x1b[31m🐶\x1b[1m🐶\x1b[0m world!";
assert_eq!(slice_ansi_str(test_str, 5, 5), "");
assert_eq!(slice_ansi_str(test_str, 0, test_str.len()), test_str);

if cfg!(feature = "unicode-width") {
assert_eq!(slice_ansi_str(test_str, 0, 5), "Hello\x1b[31m");
assert_eq!(slice_ansi_str(test_str, 0, 6), "Hello\x1b[31m");
assert_eq!(measure_text_width(test_str), 16);
assert_eq!(slice_ansi_str(test_str, 0, 5), "Hello\x1b[31m");
assert_eq!(slice_ansi_str(test_str, 0, 6), "Hello\x1b[31m");
assert_eq!(slice_ansi_str(test_str, 0, 7), "Hello\x1b[31m🐶\x1b[1m");
assert_eq!(slice_ansi_str(test_str, 7, 21), "\x1b[1m🐶\x1b[0m world!");
assert_eq!(slice_ansi_str(test_str, 8, 21), "\x1b[0m world!");
assert_eq!(slice_ansi_str(test_str, 9, 21), "\x1b[0m world!");

assert_eq!(
slice_ansi_str(test_str, 4, 9),
"o\x1b[31m🐶\x1b[1m🐶\x1b[0m"
);
} else {
assert_eq!(slice_ansi_str(test_str, 0, 5), "Hello\x1b[31m");
assert_eq!(slice_ansi_str(test_str, 0, 6), "Hello\x1b[31m🐶\u{1b}[1m");

assert_eq!(
slice_ansi_str(test_str, 4, 9),
"o\x1b[31m🐶\x1b[1m🐶\x1b[0m w"
);
}
}
}
6 changes: 3 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,12 @@ pub use crate::term::{
};
pub use crate::utils::{
colors_enabled, colors_enabled_stderr, measure_text_width, pad_str, pad_str_with,
set_colors_enabled, set_colors_enabled_stderr, style, truncate_str, Alignment, Attribute,
Color, Emoji, Style, StyledObject,
set_colors_enabled, set_colors_enabled_stderr, slice_str, style, truncate_str, Alignment,
Attribute, Color, Emoji, Style, StyledObject,
};

#[cfg(feature = "ansi-parsing")]
pub use crate::ansi::{slice_ansi_str, strip_ansi_codes, AnsiCodeIterator};
pub use crate::ansi::{strip_ansi_codes, AnsiCodeIterator};

mod common_term;
mod kb;
Expand Down
139 changes: 92 additions & 47 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -724,7 +724,7 @@ fn str_width(s: &str) -> usize {
}

#[cfg(feature = "ansi-parsing")]
pub(crate) fn char_width(c: char) -> usize {
fn char_width(c: char) -> usize {
#[cfg(feature = "unicode-width")]
{
use unicode_width::UnicodeWidthChar;
Expand All @@ -737,66 +737,90 @@ pub(crate) fn char_width(c: char) -> usize {
}
}

/// Truncates a string to a certain number of characters.
/// Slice a `&str` in terms of text width. This means that only the text
/// columns strictly between `start` and `stop` will be kept.
///
/// If a multi-columns character overlaps with the end of the interval it will
/// not be included. In such a case, the result will be less than `end - start`
/// columns wide.
///
/// This ensures that escape codes are not screwed up in the process.
/// If the maximum length is hit the string will be truncated but
/// escapes code will still be honored. If truncation takes place
/// the tail string will be appended.
pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> {
pub fn slice_str(s: &str, start: usize, end: usize) -> Cow<'_, str> {
#[cfg(feature = "ansi-parsing")]
{
use std::cmp::Ordering;
let mut iter = AnsiCodeIterator::new(s);
let mut length = 0;
let mut rv = None;

while let Some(item) = iter.next() {
match item {
(s, false) => {
if rv.is_none() {
if str_width(s) + length > width - str_width(tail) {
let ts = iter.current_slice();

let mut s_byte = 0;
let mut s_width = 0;
let rest_width = width - str_width(tail) - length;
for c in s.chars() {
s_byte += c.len_utf8();
s_width += char_width(c);
match s_width.cmp(&rest_width) {
Ordering::Equal => break,
Ordering::Greater => {
s_byte -= c.len_utf8();
break;
}
Ordering::Less => continue,
}
}

let idx = ts.len() - s.len() + s_byte;
let mut buf = ts[..idx].to_string();
buf.push_str(tail);
rv = Some(buf);
}
length += str_width(s);
}
let mut pos = 0;
let mut slice_start = 0;
let mut slice_end = 0;

// ANSI symbols outside of the slice
let mut front_ansi = String::new();
let mut back_ansi = String::new();

for (sub, is_ansi) in AnsiCodeIterator::new(s) {
if is_ansi {
if pos < start {
front_ansi.push_str(sub);
slice_start += sub.len();
slice_end = slice_start;
} else if pos <= end {
slice_end += sub.len();
} else {
back_ansi.push_str(sub);
}
(s, true) => {
if let Some(ref mut rv) = rv {
rv.push_str(s);
} else {
for c in sub.chars() {
let c_width = char_width(c);

if pos < start {
slice_start += c.len_utf8();
slice_end = slice_start;
} else if pos + c_width <= end {
slice_end += c.len_utf8();
}

pos += char_width(c);
}
}
}

if let Some(buf) = rv {
Cow::Owned(buf)
let slice = &s[slice_start..slice_end];

if front_ansi.is_empty() && back_ansi.is_empty() {
Cow::Borrowed(slice)
} else {
Cow::Borrowed(s)
Cow::Owned(front_ansi + slice + &back_ansi)
}
}
#[cfg(not(feature = "ansi-parsing"))]
{
Cow::Borrowed(s.get(start..end).unwrap_or_default())
}
}

/// Truncates a string to a certain number of characters.
///
/// This ensures that escape codes are not screwed up in the process.
/// If the maximum length is hit the string will be truncated but
/// escapes code will still be honored. If truncation takes place
/// the tail string will be appended.
pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> {
#[cfg(feature = "ansi-parsing")]
{
let s_width = measure_text_width(s);

if s_width <= width {
return Cow::Borrowed(s);
}

let tail_width = measure_text_width(tail);
let mut sliced = slice_str(s, 0, width.saturating_sub(tail_width));

if !tail.is_empty() {
sliced.to_mut().push_str(tail);
}

sliced
}
#[cfg(not(feature = "ansi-parsing"))]
{
if s.len() <= width - tail.len() {
Expand Down Expand Up @@ -919,6 +943,27 @@ fn test_truncate_str() {
);
}

#[test]
fn test_slice_ansi_str() {
// Note that 🐶 is two columns wide
let test_str = "Hello\x1b[31m🐶\x1b[1m🐶\x1b[0m world!";
assert_eq!(slice_str(test_str, 0, test_str.len()), test_str);

if cfg!(feature = "unicode-width") && cfg!(feature = "ansi-parsing") {
assert_eq!(slice_str(test_str, 5, 5), "\u{1b}[31m\u{1b}[1m\u{1b}[0m");
assert_eq!(measure_text_width(test_str), 16);
assert_eq!(slice_str(test_str, 0, 5), "Hello\x1b[31m\x1b[1m\x1b[0m");
assert_eq!(slice_str(test_str, 0, 6), "Hello\x1b[31m\x1b[1m\x1b[0m");
assert_eq!(slice_str(test_str, 0, 7), "Hello\x1b[31m🐶\x1b[1m\x1b[0m");
assert_eq!(slice_str(test_str, 4, 9), "o\x1b[31m🐶\x1b[1m🐶\x1b[0m");

assert_eq!(
slice_str(test_str, 7, 21),
"\x1b[31m\x1b[1m🐶\x1b[0m world!"
);
}
}

#[test]
fn test_truncate_str_no_ansi() {
assert_eq!(&truncate_str("foo bar", 5, ""), "foo b");
Expand Down

0 comments on commit 3324138

Please sign in to comment.