From a531e40eeeeafca16b790db5e39c36ecd5f23362 Mon Sep 17 00:00:00 2001 From: Martin Winandy Date: Sat, 3 Sep 2022 08:43:41 +0200 Subject: [PATCH] Add lexer for properties files (#670) --- lexers/embedded/properties.xml | 27 ++++++++++ lexers/testdata/properties.actual | 22 ++++++++ lexers/testdata/properties.expected | 80 +++++++++++++++++++++++++++++ 3 files changed, 129 insertions(+) create mode 100644 lexers/embedded/properties.xml create mode 100644 lexers/testdata/properties.actual create mode 100644 lexers/testdata/properties.expected diff --git a/lexers/embedded/properties.xml b/lexers/embedded/properties.xml new file mode 100644 index 000000000..0bb4c4605 --- /dev/null +++ b/lexers/embedded/properties.xml @@ -0,0 +1,27 @@ + + + properties + java-properties + *.properties + text/x-java-properties + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/lexers/testdata/properties.actual b/lexers/testdata/properties.actual new file mode 100644 index 000000000..823a6bc8d --- /dev/null +++ b/lexers/testdata/properties.actual @@ -0,0 +1,22 @@ +# You are reading a comment in ".properties" file. +! The exclamation mark can also be used for comments. +# Lines with "properties" contain a key and a value separated by a delimiting character. +# There are 3 delimiting characters: '=' (equal), ':' (colon) and whitespace (space, \t and \f). +website = https://en.wikipedia.org/ +language : English +# White space that appears between the key, the value and the delimiter is ignored. +# This means that the following are equivalent (other than for readability). +hello=hello +hello = hello +# Keys with the same name will be overwritten by the key that is the furthest in a file. +# For example the final value for "duplicateKey" will be "second". +duplicateKey = first +duplicateKey = second +# If you need to add newlines and carriage returns, they need to be escaped using \n and \r respectively. +# You can also optionally escape tabs with \t for readability purposes. +valueWithEscapes = This is a newline\n and a carriage return\r and a tab\t. +# You can also use Unicode escape characters (maximum of four hexadecimal digits). +# In the following example, the value for "encodedHelloInJapanese" is "こんにちは". +encodedHelloInJapanese = \u3053\u3093\u306b\u3061\u306f +# But with more modern file encodings like UTF-8, you can directly use supported characters. +helloInJapanese = こんにちは diff --git a/lexers/testdata/properties.expected b/lexers/testdata/properties.expected new file mode 100644 index 000000000..c5081c110 --- /dev/null +++ b/lexers/testdata/properties.expected @@ -0,0 +1,80 @@ +[ + {"type":"CommentSingle","value":"# You are reading a comment in \".properties\" file."}, + {"type":"Text","value":"\n"}, + {"type":"CommentSingle","value":"! The exclamation mark can also be used for comments."}, + {"type":"Text","value":"\n"}, + {"type":"CommentSingle","value":"# Lines with \"properties\" contain a key and a value separated by a delimiting character."}, + {"type":"Text","value":"\n"}, + {"type":"CommentSingle","value":"# There are 3 delimiting characters: '=' (equal), ':' (colon) and whitespace (space, \\t and \\f)."}, + {"type":"Text","value":"\n"}, + {"type":"NameAttribute","value":"website"}, + {"type":"Text","value":" "}, + {"type":"Operator","value":"="}, + {"type":"Text","value":" "}, + {"type":"LiteralString","value":"https://en.wikipedia.org/"}, + {"type":"Text","value":"\n"}, + {"type":"NameAttribute","value":"language"}, + {"type":"Text","value":" "}, + {"type":"Operator","value":":"}, + {"type":"Text","value":" "}, + {"type":"LiteralString","value":"English"}, + {"type":"Text","value":"\n"}, + {"type":"CommentSingle","value":"# White space that appears between the key, the value and the delimiter is ignored."}, + {"type":"Text","value":"\n"}, + {"type":"CommentSingle","value":"# This means that the following are equivalent (other than for readability)."}, + {"type":"Text","value":"\n"}, + {"type":"NameAttribute","value":"hello"}, + {"type":"Operator","value":"="}, + {"type":"LiteralString","value":"hello"}, + {"type":"Text","value":"\n"}, + {"type":"NameAttribute","value":"hello"}, + {"type":"Text","value":" "}, + {"type":"Operator","value":"="}, + {"type":"Text","value":" "}, + {"type":"LiteralString","value":"hello"}, + {"type":"Text","value":"\n"}, + {"type":"CommentSingle","value":"# Keys with the same name will be overwritten by the key that is the furthest in a file."}, + {"type":"Text","value":"\n"}, + {"type":"CommentSingle","value":"# For example the final value for \"duplicateKey\" will be \"second\"."}, + {"type":"Text","value":"\n"}, + {"type":"NameAttribute","value":"duplicateKey"}, + {"type":"Text","value":" "}, + {"type":"Operator","value":"="}, + {"type":"Text","value":" "}, + {"type":"LiteralString","value":"first"}, + {"type":"Text","value":"\n"}, + {"type":"NameAttribute","value":"duplicateKey"}, + {"type":"Text","value":" "}, + {"type":"Operator","value":"="}, + {"type":"Text","value":" "}, + {"type":"LiteralString","value":"second"}, + {"type":"Text","value":"\n"}, + {"type":"CommentSingle","value":"# If you need to add newlines and carriage returns, they need to be escaped using \\n and \\r respectively."}, + {"type":"Text","value":"\n"}, + {"type":"CommentSingle","value":"# You can also optionally escape tabs with \\t for readability purposes."}, + {"type":"Text","value":"\n"}, + {"type":"NameAttribute","value":"valueWithEscapes"}, + {"type":"Text","value":" "}, + {"type":"Operator","value":"="}, + {"type":"Text","value":" "}, + {"type":"LiteralString","value":"This is a newline\\n and a carriage return\\r and a tab\\t."}, + {"type":"Text","value":"\n"}, + {"type":"CommentSingle","value":"# You can also use Unicode escape characters (maximum of four hexadecimal digits)."}, + {"type":"Text","value":"\n"}, + {"type":"CommentSingle","value":"# In the following example, the value for \"encodedHelloInJapanese\" is \"こんにちは\"."}, + {"type":"Text","value":"\n"}, + {"type":"NameAttribute","value":"encodedHelloInJapanese"}, + {"type":"Text","value":" "}, + {"type":"Operator","value":"="}, + {"type":"Text","value":" "}, + {"type":"LiteralString","value":"\\u3053\\u3093\\u306b\\u3061\\u306f"}, + {"type":"Text","value":"\n"}, + {"type":"CommentSingle","value":"# But with more modern file encodings like UTF-8, you can directly use supported characters."}, + {"type":"Text","value":"\n"}, + {"type":"NameAttribute","value":"helloInJapanese"}, + {"type":"Text","value":" "}, + {"type":"Operator","value":"="}, + {"type":"Text","value":" "}, + {"type":"LiteralString","value":"こんにちは"}, + {"type":"Text","value":"\n"} +]