Skip to content

Commit

Permalink
syntax: add 'std' feature
Browse files Browse the repository at this point in the history
In effect, this adds support for no_std by depending on only core and
alloc. There is still currently some benefit to enabling std support,
namely, getting the 'std::error::Error' trait impls for the various
error types. (Although, it seems like the 'Error' trait is going to get
moved to 'core' finally.) Otherwise, the only 'std' things we use are in
tests for tweaking stack sizes.

This is the first step in an effort to make 'regex' itself work without
depending on 'std'. 'regex' itself will be more precarious since it uses
things like HashMap and Mutex that we'll need to find a way around.
Getting around HashMap is easy (just use BTreeMap), but figuring out how
to synchronize the threadpool will be interesting.

Ref #476, Ref #477
  • Loading branch information
BurntSushi committed Mar 5, 2023
1 parent 476d494 commit 317ba15
Show file tree
Hide file tree
Showing 18 changed files with 246 additions and 179 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Expand Up @@ -14,7 +14,8 @@ finite automata and guarantees linear time matching on all inputs.
categories = ["text-processing"]
autotests = false
exclude = ["/scripts/*", "/.github/*"]
edition = "2018"
edition = "2021"
resolver = "2"

[workspace]
members = [
Expand Down
3 changes: 2 additions & 1 deletion regex-syntax/Cargo.toml
Expand Up @@ -13,7 +13,8 @@ edition = "2021"
# Features are documented in the "Crate features" section of the crate docs:
# https://docs.rs/regex-syntax/*/#crate-features
[features]
default = ["unicode"]
default = ["std", "unicode"]
std = []

unicode = [
"unicode-age",
Expand Down
32 changes: 17 additions & 15 deletions regex-syntax/src/ast/mod.rs
Expand Up @@ -2,8 +2,9 @@
Defines an abstract syntax for regular expressions.
*/

use std::cmp::Ordering;
use std::fmt;
use core::cmp::Ordering;

use alloc::{boxed::Box, string::String, vec, vec::Vec};

pub use crate::ast::visitor::{visit, Visitor};

Expand Down Expand Up @@ -174,23 +175,24 @@ pub enum ErrorKind {
UnsupportedLookAround,
}

#[cfg(feature = "std")]
impl std::error::Error for Error {}

impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
impl core::fmt::Display for Error {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
crate::error::Formatter::from(self).fmt(f)
}
}

impl fmt::Display for ErrorKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
impl core::fmt::Display for ErrorKind {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
use self::ErrorKind::*;
match *self {
CaptureLimitExceeded => write!(
f,
"exceeded the maximum number of \
capturing groups ({})",
::std::u32::MAX
u32::MAX
),
ClassEscapeInvalid => {
write!(f, "invalid escape sequence found in character class")
Expand Down Expand Up @@ -283,8 +285,8 @@ pub struct Span {
pub end: Position,
}

impl fmt::Debug for Span {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
impl core::fmt::Debug for Span {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(f, "Span({:?}, {:?})", self.start, self.end)
}
}
Expand Down Expand Up @@ -316,8 +318,8 @@ pub struct Position {
pub column: usize,
}

impl fmt::Debug for Position {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
impl core::fmt::Debug for Position {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(
f,
"Position(o: {:?}, l: {:?}, c: {:?})",
Expand Down Expand Up @@ -497,8 +499,8 @@ impl Ast {
///
/// This implementation uses constant stack space and heap space proportional
/// to the size of the `Ast`.
impl fmt::Display for Ast {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
impl core::fmt::Display for Ast {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
use crate::ast::print::Printer;
Printer::new().print(self, f)
}
Expand Down Expand Up @@ -1315,7 +1317,7 @@ pub enum Flag {
/// space but heap space proportional to the depth of the `Ast`.
impl Drop for Ast {
fn drop(&mut self) {
use std::mem;
use core::mem;

match *self {
Ast::Empty(_)
Expand Down Expand Up @@ -1365,7 +1367,7 @@ impl Drop for Ast {
/// stack space but heap space proportional to the depth of the `ClassSet`.
impl Drop for ClassSet {
fn drop(&mut self) {
use std::mem;
use core::mem;

match *self {
ClassSet::Item(ref item) => match *item {
Expand Down
57 changes: 30 additions & 27 deletions regex-syntax/src/ast/parse.rs
Expand Up @@ -2,17 +2,26 @@
This module provides a regular expression parser.
*/

use std::borrow::Borrow;
use std::cell::{Cell, RefCell};
use std::mem;
use std::result;

use crate::ast::{self, Ast, Position, Span};
use crate::either::Either;

use crate::is_meta_character;

type Result<T> = result::Result<T, ast::Error>;
use core::{
borrow::Borrow,
cell::{Cell, RefCell},
mem,
};

use alloc::{
boxed::Box,
string::{String, ToString},
vec,
vec::Vec,
};

use crate::{
ast::{self, Ast, Position, Span},
either::Either,
is_meta_character,
};

type Result<T> = core::result::Result<T, ast::Error>;

/// A primitive is an expression with no sub-expressions. This includes
/// literals, assertions and non-set character classes. This representation
Expand Down Expand Up @@ -1533,9 +1542,6 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
/// Assuming the preconditions are met, this routine can never fail.
#[inline(never)]
fn parse_octal(&self) -> ast::Literal {
use std::char;
use std::u32;

assert!(self.parser().octal);
assert!('0' <= self.char() && self.char() <= '7');
let start = self.pos();
Expand Down Expand Up @@ -1600,9 +1606,6 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
&self,
kind: ast::HexLiteralKind,
) -> Result<ast::Literal> {
use std::char;
use std::u32;

let mut scratch = self.parser().scratch.borrow_mut();
scratch.clear();

Expand Down Expand Up @@ -1646,9 +1649,6 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
&self,
kind: ast::HexLiteralKind,
) -> Result<ast::Literal> {
use std::char;
use std::u32;

let mut scratch = self.parser().scratch.borrow_mut();
scratch.clear();

Expand Down Expand Up @@ -2146,7 +2146,7 @@ impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
let new = self.depth.checked_add(1).ok_or_else(|| {
self.p.error(
span.clone(),
ast::ErrorKind::NestLimitExceeded(::std::u32::MAX),
ast::ErrorKind::NestLimitExceeded(u32::MAX),
)
})?;
let limit = self.p.parser().nest_limit;
Expand Down Expand Up @@ -2297,11 +2297,14 @@ fn specialize_err<T>(

#[cfg(test)]
mod tests {
use std::ops::Range;
use core::ops::Range;

use alloc::format;

use super::{Parser, ParserBuilder, ParserI, Primitive};
use crate::ast::{self, Ast, Position, Span};

use super::*;

// Our own assert_eq, which has slightly better formatting (but honestly
// still kind of crappy).
macro_rules! assert_eq {
Expand Down Expand Up @@ -4272,7 +4275,7 @@ bar
Ok(Primitive::Literal(ast::Literal {
span: span(0..pat.len()),
kind: ast::LiteralKind::Octal,
c: ::std::char::from_u32(i).unwrap(),
c: char::from_u32(i).unwrap(),
}))
);
}
Expand Down Expand Up @@ -4347,7 +4350,7 @@ bar
Ok(Primitive::Literal(ast::Literal {
span: span(0..pat.len()),
kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
c: ::std::char::from_u32(i).unwrap(),
c: char::from_u32(i).unwrap(),
}))
);
}
Expand Down Expand Up @@ -4378,7 +4381,7 @@ bar
#[test]
fn parse_hex_four() {
for i in 0..65536 {
let c = match ::std::char::from_u32(i) {
let c = match char::from_u32(i) {
None => continue,
Some(c) => c,
};
Expand Down Expand Up @@ -4442,7 +4445,7 @@ bar
#[test]
fn parse_hex_eight() {
for i in 0..65536 {
let c = match ::std::char::from_u32(i) {
let c = match char::from_u32(i) {
None => continue,
Some(c) => c,
};
Expand Down
14 changes: 10 additions & 4 deletions regex-syntax/src/ast/print.rs
Expand Up @@ -2,10 +2,13 @@
This module provides a regular expression printer for `Ast`.
*/

use std::fmt;
use core::fmt;

use crate::ast::visitor::{self, Visitor};
use crate::ast::{self, Ast};
use crate::ast::{
self,
visitor::{self, Visitor},
Ast,
};

/// A builder for constructing a printer.
///
Expand Down Expand Up @@ -395,9 +398,12 @@ impl<W: fmt::Write> Writer<W> {

#[cfg(test)]
mod tests {
use super::Printer;
use alloc::string::String;

use crate::ast::parse::ParserBuilder;

use super::*;

fn roundtrip(given: &str) {
roundtrip_with(|b| b, given);
}
Expand Down
10 changes: 5 additions & 5 deletions regex-syntax/src/ast/visitor.rs
@@ -1,4 +1,4 @@
use std::fmt;
use alloc::{vec, vec::Vec};

use crate::ast::{self, Ast};

Expand Down Expand Up @@ -475,8 +475,8 @@ impl<'a> ClassInduct<'a> {
}
}

impl<'a> fmt::Debug for ClassFrame<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
impl<'a> core::fmt::Debug for ClassFrame<'a> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
let x = match *self {
ClassFrame::Union { .. } => "Union",
ClassFrame::Binary { .. } => "Binary",
Expand All @@ -487,8 +487,8 @@ impl<'a> fmt::Debug for ClassFrame<'a> {
}
}

impl<'a> fmt::Debug for ClassInduct<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
impl<'a> core::fmt::Debug for ClassInduct<'a> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
let x = match *self {
ClassInduct::Item(it) => match *it {
ast::ClassSetItem::Empty(_) => "Item(Empty)",
Expand Down
19 changes: 13 additions & 6 deletions regex-syntax/src/error.rs
@@ -1,9 +1,13 @@
use std::cmp;
use std::fmt;
use std::result;
use core::{cmp, fmt, result};

use crate::ast;
use crate::hir;
use alloc::{
format,
string::{String, ToString},
vec,
vec::Vec,
};

use crate::{ast, hir};

/// A type alias for dealing with errors returned by this crate.
pub type Result<T> = result::Result<T, Error>;
Expand Down Expand Up @@ -35,6 +39,7 @@ impl From<hir::Error> for Error {
}
}

#[cfg(feature = "std")]
impl std::error::Error for Error {}

impl fmt::Display for Error {
Expand Down Expand Up @@ -266,11 +271,13 @@ impl<'p> Spans<'p> {
}

fn repeat_char(c: char, count: usize) -> String {
::std::iter::repeat(c).take(count).collect()
core::iter::repeat(c).take(count).collect()
}

#[cfg(test)]
mod tests {
use alloc::string::ToString;

use crate::ast::parse::Parser;

fn assert_panic_message(pattern: &str, expected_msg: &str) {
Expand Down
8 changes: 3 additions & 5 deletions regex-syntax/src/hir/interval.rs
@@ -1,8 +1,6 @@
use std::char;
use std::cmp;
use std::fmt::Debug;
use std::slice;
use std::u8;
use core::{char, cmp, fmt::Debug, slice};

use alloc::vec::Vec;

use crate::unicode;

Expand Down

0 comments on commit 317ba15

Please sign in to comment.