Skip to content

Commit

Permalink
datamodel: implement PSL string literal grammar proposal
Browse files Browse the repository at this point in the history
Simplify pest grammar of string literals

We want to tokenize invalid escape sequences and report them as such
later in validation.

This results in better error messages across the board.

closes prisma/prisma#4167
  • Loading branch information
tomhoule committed Jun 22, 2022
1 parent f202450 commit af4cc1c
Show file tree
Hide file tree
Showing 16 changed files with 391 additions and 89 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -353,3 +353,41 @@ async fn introspecting_json_defaults_on_cockroach(api: &TestApi) -> TestResult {

Ok(())
}

#[test_connector(tags(CockroachDb))]
async fn string_defaults_that_need_escaping(api: &TestApi) -> TestResult {
let setup = r#"
CREATE TABLE "stringstest" (
id INTEGER PRIMARY KEY,
needs_escaping TEXT NOT NULL DEFAULT $$
abc def
backspaces: \abcd\
(tab character)
and "quotes" and a vertical tabulation here -><-
$$
);
"#;

api.raw_cmd(setup).await;

let expected = expect![[r#"
generator client {
provider = "prisma-client-js"
}
datasource db {
provider = "cockroachdb"
url = "env(TEST_DATABASE_URL)"
}
model stringstest {
id Int @id
needs_escaping String @default("\nabc def\nbackspaces: \\abcd\\\n\t(tab character)\nand \"quotes\" and a vertical tabulation here ->x16<-\n\n")
}
"#]];

api.expect_datamodel(&expected).await;

Ok(())
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,55 @@ use barrel::types;
use indoc::indoc;
use introspection_engine_tests::test_api::*;

#[test_connector(tags(Postgres))]
async fn string_defaults_that_need_escaping(api: &TestApi) -> TestResult {
let setup = r#"
CREATE TABLE "stringstest" (
id INTEGER PRIMARY KEY,
needs_escaping TEXT NOT NULL DEFAULT $$
abc def
backspaces: \abcd\
(tab character)
and "quotes" and a vertical tabulation here -><-
$$
);
"#;

api.raw_cmd(setup).await;

let expected = expect![[r#"
generator client {
provider = "prisma-client-js"
}
datasource db {
provider = "postgresql"
url = "env(TEST_DATABASE_URL)"
}
model stringstest {
id Int @id
needs_escaping String @default("\nabc def\nbackspaces: \\abcd\\\n\t(tab character)\nand \"quotes\" and a vertical tabulation here ->\u0016<-\n\n")
}
"#]];

api.expect_datamodel(&expected).await;

Ok(())
}

#[test_connector(tags(Postgres))]
async fn a_table_with_descending_unique(api: &TestApi) -> TestResult {
let setup = indoc! {r#"
let setup = r#"
CREATE TABLE "A" (
id INTEGER NOT NULL,
a INTEGER NOT NULL,
CONSTRAINT A_pkey PRIMARY KEY (id)
);
CREATE UNIQUE INDEX "A_a_key" ON "A" (a DESC);
"#};
"#;

api.raw_cmd(setup).await;

Expand Down
14 changes: 5 additions & 9 deletions libs/datamodel/core/tests/config/datasources.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,21 +127,17 @@ fn unescaped_windows_paths_give_a_good_error() {
"#};

let expect = expect![[r#"
[1;91merror[0m: [1mError validating: This line is not a valid definition within a datasource. If the value is a windows-style path, `\` must be escaped as `\\`.[0m
[1;91merror[0m: [1mUnknown escape sequence. If the value is a windows-style path, `\` must be escaped as `\\`.[0m
--> schema.prisma:3
 | 
 2 |  provider = "sqlite"
 3 |  url = "file:c:\Windows32\data.db"
 4 | }
 | 
error: Argument "url" is missing in data source block "ds".
--> schema.prisma:1
 3 |  url = "file:c:\Windows32\data.db"
 | 
error: Unknown escape sequence. If the value is a windows-style path, `\` must be escaped as `\\`.
--> schema.prisma:3
 | 
 1 | datasource ds {
 2 |  provider = "sqlite"
 3 |  url = "file:c:\Windows32\data.db"
 4 | }
 3 |  url = "file:c:\Windows32\data.db"
 | 
"#]];

Expand Down
5 changes: 3 additions & 2 deletions libs/datamodel/core/tests/config/generators.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,11 +145,12 @@ fn hidden_preview_features_setting_must_work() {

expected.assert_eq(&rendered);
}

#[test]
fn back_slashes_in_providers_must_work() {
let schema = indoc! {r#"
generator mygen {
provider = "../folder\ with\\ space/my\ generator.js"
provider = "../folder\twith\ttabs/my\tgenerator.js"
}
"#};

Expand All @@ -159,7 +160,7 @@ fn back_slashes_in_providers_must_work() {
"name": "mygen",
"provider": {
"fromEnvVar": null,
"value": "../folder with\\ space/my generator.js"
"value": "../folder\twith\ttabs/my\tgenerator.js"
},
"output": null,
"config": {},
Expand Down
79 changes: 77 additions & 2 deletions libs/datamodel/core/tests/parsing/literals.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::common::parse;
use crate::common::*;
use indoc::indoc;
use pretty_assertions::assert_eq;
use std::path::Path;
Expand Down Expand Up @@ -34,7 +34,7 @@ fn strings_with_quotes_are_unescaped() {
}

#[test]
fn strings_with_newlines_are_unescpaed() {
fn strings_with_newlines_are_unescaped() {
let input = indoc!(
r#"
model Category {
Expand All @@ -59,6 +59,81 @@ fn strings_with_newlines_are_unescpaed() {
);
}

#[test]
fn strings_with_escaped_unicode_codepoints_are_unescaped() {
let input = indoc!(
r#"
model Category {
id String @id
name String @default("mfw \u56e7 - \u56E7 ^^")
// Escaped UTF-16 with surrogate pair (rolling eyes emoji).
nameUtf16 String @default("oh my \ud83d\ude44...")
}"#
);

let mut dml = parse(input);
let cat = dml.models_mut().find(|m| m.name == "Category").unwrap();
let name = cat.scalar_fields().find(|f| f.name == "name").unwrap();

assert_eq!(
name.default_value
.as_ref()
.unwrap()
.as_single()
.unwrap()
.as_string()
.unwrap(),
"mfw 囧 - 囧 ^^"
);

let nameutf16 = cat.scalar_fields().find(|f| f.name == "nameUtf16").unwrap();

assert_eq!(
nameutf16.default_value
.as_ref()
.unwrap()
.as_single()
.unwrap()
.as_string()
.unwrap(),
"oh my 🙄..."
);
}

#[test]
fn string_literals_with_invalid_unicode_escapes() {
let input = indoc!(
r#"
model Category {
id String @id
name String @default("Something \uD802 \ut \u12")
}"#
);

let expectation = expect![[r#"
error: Invalid unicode escape sequence.
--> schema.prisma:3
 | 
 2 |  id String @id
 3 |  name String @default("Something \uD802 \ut \u12")
 | 
error: Invalid unicode escape sequence.
--> schema.prisma:3
 | 
 2 |  id String @id
 3 |  name String @default("Something \uD802 \ut \u12")
 | 
error: Invalid unicode escape sequence.
--> schema.prisma:3
 | 
 2 |  id String @id
 3 |  name String @default("Something \uD802 \ut \u12")
 | 
"#]];

expect_error(input, &expectation);
}

#[test]
fn relative_sqlite_paths_can_be_modified() {
let schema = indoc!(
Expand Down
15 changes: 8 additions & 7 deletions libs/datamodel/schema-ast/src/parser/datamodel.pest
Original file line number Diff line number Diff line change
Expand Up @@ -133,12 +133,13 @@ number = @{ ASCII_DIGIT+ }

numeric_literal = @{ ("-")? ~ ASCII_DIGIT+ ~("." ~ ASCII_DIGIT+)? }

// String literals with support for escaped characters.
string_escaped_predefined = { "n" | "r" | "t" | "\\" | "0" | "\"" | "'" | SPACE_SEPARATOR }
string_escape = { "\\" ~ string_escaped_predefined }
// This is only used to escape in the parser. The string above is still treated as atomic.
string_raw = { (!("\\" | "\"" | NEWLINE ) ~ ANY)+ }
string_content = ${ (string_raw | string_escape )* }
string_literal = { "\"" ~ string_content ~ "\"" }
// String literals. We follow exactly the same grammar as JSON strings
// References:
// - https://www.ietf.org/rfc/rfc4627.txt
// - https://www.json.org/json-en.html
UNICODE_CONTROL_CHARACTER = _{ '\u{0000}'..'\u{001F}' }
string_escape = _{ "\\" ~ ANY }
string_content = @{ (string_escape | !("\"" | UNICODE_CONTROL_CHARACTER) ~ ANY)* }
string_literal = ${ "\"" ~ string_content ~ "\"" }

constant_literal = @{ path_identifier }
16 changes: 11 additions & 5 deletions libs/datamodel/schema-ast/src/parser/parse_arguments.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,26 @@
use diagnostics::Diagnostics;

use super::{
helpers::{parsing_catch_all, ToIdentifier, Token, TokenExtensions},
parse_expression::parse_expression,
Rule,
};
use crate::ast;

pub(crate) fn parse_arguments_list(token: &Token<'_>, arguments: &mut ast::ArgumentsList) {
pub(crate) fn parse_arguments_list(
token: &Token<'_>,
arguments: &mut ast::ArgumentsList,
diagnostics: &mut Diagnostics,
) {
debug_assert_eq!(token.as_rule(), Rule::arguments_list);
for current in token.relevant_children() {
match current.as_rule() {
// This is a named arg.
Rule::named_argument => arguments.arguments.push(parse_named_arg(&current)),
Rule::named_argument => arguments.arguments.push(parse_named_arg(&current, diagnostics)),
// This is an unnamed arg.
Rule::expression => arguments.arguments.push(ast::Argument {
name: None,
value: parse_expression(&current),
value: parse_expression(&current, diagnostics),
span: ast::Span::from(current.as_span()),
}),
// This is an argument without a value.
Expand All @@ -36,15 +42,15 @@ pub(crate) fn parse_arguments_list(token: &Token<'_>, arguments: &mut ast::Argum
}
}

fn parse_named_arg(token: &Token<'_>) -> ast::Argument {
fn parse_named_arg(token: &Token<'_>, diagnostics: &mut Diagnostics) -> ast::Argument {
debug_assert_eq!(token.as_rule(), Rule::named_argument);
let mut name: Option<ast::Identifier> = None;
let mut argument: Option<ast::Expression> = None;

for current in token.relevant_children() {
match current.as_rule() {
Rule::argument_name => name = Some(current.to_id()),
Rule::expression => argument = Some(parse_expression(&current)),
Rule::expression => argument = Some(parse_expression(&current, diagnostics)),
_ => parsing_catch_all(&current, "attribute argument"),
}
}
Expand Down
6 changes: 3 additions & 3 deletions libs/datamodel/schema-ast/src/parser/parse_attribute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ use super::{
};
use crate::{ast::*, parser::parse_arguments::parse_arguments_list};

pub fn parse_attribute(token: &Token<'_>) -> Attribute {
pub fn parse_attribute(token: &Token<'_>, diagnostics: &mut diagnostics::Diagnostics) -> Attribute {
let mut name: Option<Identifier> = None;
let mut arguments: ArgumentsList = ArgumentsList::default();

for current in token.relevant_children() {
match current.as_rule() {
Rule::attribute => return parse_attribute(&current),
Rule::attribute => return parse_attribute(&current, diagnostics),
Rule::attribute_name => name = Some(current.to_id()),
Rule::arguments_list => parse_arguments_list(&current, &mut arguments),
Rule::arguments_list => parse_arguments_list(&current, &mut arguments, diagnostics),
_ => parsing_catch_all(&current, "attribute"),
}
}
Expand Down
4 changes: 2 additions & 2 deletions libs/datamodel/schema-ast/src/parser/parse_composite_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ pub(crate) fn parse_composite_type(token: &Token<'_>, diagnostics: &mut Diagnost
Rule::TYPE_KEYWORD => (),
Rule::non_empty_identifier => name = Some(current.to_id()),
Rule::block_level_attribute => {
let attr = parse_attribute(&current);
let attr = parse_attribute(&current, diagnostics);

let err = match attr.name.name.as_str() {
"map" => {
Expand Down Expand Up @@ -67,7 +67,7 @@ pub(crate) fn parse_composite_type(token: &Token<'_>, diagnostics: &mut Diagnost

diagnostics.push_error(err);
}
Rule::field_declaration => match parse_field(&name.as_ref().unwrap().name, &current) {
Rule::field_declaration => match parse_field(&name.as_ref().unwrap().name, &current, diagnostics) {
Ok(field) => {
for attr in field.attributes.iter() {
let error = match attr.name.name.as_str() {
Expand Down
13 changes: 9 additions & 4 deletions libs/datamodel/schema-ast/src/parser/parse_enum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ pub fn parse_enum(token: &Token<'_>, diagnostics: &mut Diagnostics) -> Enum {
for current in token.relevant_children() {
match current.as_rule() {
Rule::non_empty_identifier => name = Some(current.to_id()),
Rule::block_level_attribute => attributes.push(parse_attribute(&current)),
Rule::enum_value_declaration => match parse_enum_value(&name.as_ref().unwrap().name, &current) {
Rule::block_level_attribute => attributes.push(parse_attribute(&current, diagnostics)),
Rule::enum_value_declaration => match parse_enum_value(&name.as_ref().unwrap().name, &current, diagnostics)
{
Ok(enum_value) => values.push(enum_value),
Err(err) => diagnostics.push_error(err),
},
Expand Down Expand Up @@ -45,7 +46,11 @@ pub fn parse_enum(token: &Token<'_>, diagnostics: &mut Diagnostics) -> Enum {
}
}

fn parse_enum_value(enum_name: &str, token: &Token<'_>) -> Result<EnumValue, DatamodelError> {
fn parse_enum_value(
enum_name: &str,
token: &Token<'_>,
diagnostics: &mut Diagnostics,
) -> Result<EnumValue, DatamodelError> {
let mut name: Option<Identifier> = None;
let mut attributes: Vec<Attribute> = vec![];
let mut comments: Vec<String> = vec![];
Expand All @@ -55,7 +60,7 @@ fn parse_enum_value(enum_name: &str, token: &Token<'_>) -> Result<EnumValue, Dat
match current.as_rule() {
Rule::non_empty_identifier => name = Some(current.to_id()),
Rule::maybe_empty_identifier => name = Some(current.to_id()),
Rule::attribute => attributes.push(parse_attribute(&current)),
Rule::attribute => attributes.push(parse_attribute(&current, diagnostics)),
Rule::number => {
return Err(DatamodelError::new_enum_validation_error(
format!(
Expand Down

0 comments on commit af4cc1c

Please sign in to comment.