From a531e40eeeeafca16b790db5e39c36ecd5f23362 Mon Sep 17 00:00:00 2001
From: Martin Winandy <martin.winandy@pmwmedia.de>
Date: Sat, 3 Sep 2022 08:43:41 +0200
Subject: [PATCH] Add lexer for properties files (#670)

---
 lexers/embedded/properties.xml      | 27 ++++++++++
 lexers/testdata/properties.actual   | 22 ++++++++
 lexers/testdata/properties.expected | 80 +++++++++++++++++++++++++++++
 3 files changed, 129 insertions(+)
 create mode 100644 lexers/embedded/properties.xml
 create mode 100644 lexers/testdata/properties.actual
 create mode 100644 lexers/testdata/properties.expected
diff --git a/lexers/embedded/properties.xml b/lexers/embedded/properties.xml
new file mode 100644
index 000000000..0bb4c4605
--- /dev/null
+++ b/lexers/embedded/properties.xml
@@ -0,0 +1,27 @@
+<lexer>
+  <config>
+    <name>properties</name>
+    <alias>java-properties</alias>
+    <filename>*.properties</filename>
+    <mime_type>text/x-java-properties</mime_type>
+  </config>
+  <rules>
+    <state name="root">
+      <rule pattern="\s+">
+        <token type="Text"/>
+      </rule>
+      <rule pattern="^[;#!].*">
+        <token type="CommentSingle"/>
+      </rule>
+      <rule pattern="^(.+?)([ \t]*)([=:])([ \t]*)(.*)">
+        <bygroups>
+          <token type="NameAttribute"/>
+          <token type="Text"/>
+          <token type="Operator"/>
+          <token type="Text"/>
+          <token type="LiteralString"/>
+        </bygroups>
+      </rule>
+    </state>
+  </rules>
+</lexer>
\ No newline at end of file
diff --git a/lexers/testdata/properties.actual b/lexers/testdata/properties.actual
new file mode 100644
index 000000000..823a6bc8d
--- /dev/null
+++ b/lexers/testdata/properties.actual
@@ -0,0 +1,22 @@
+# You are reading a comment in ".properties" file.
+! The exclamation mark can also be used for comments.
+# Lines with "properties" contain a key and a value separated by a delimiting character.
+# There are 3 delimiting characters: '=' (equal), ':' (colon) and whitespace (space, \t and \f).
+website = https://en.wikipedia.org/
+language : English
+# White space that appears between the key, the value and the delimiter is ignored.
+# This means that the following are equivalent (other than for readability).
+hello=hello
+hello = hello
+# Keys with the same name will be overwritten by the key that is the furthest in a file.
+# For example the final value for "duplicateKey" will be "second".
+duplicateKey = first
+duplicateKey = second
+# If you need to add newlines and carriage returns, they need to be escaped using \n and \r respectively.
+# You can also optionally escape tabs with \t for readability purposes.
+valueWithEscapes = This is a newline\n and a carriage return\r and a tab\t.
+# You can also use Unicode escape characters (maximum of four hexadecimal digits).
+# In the following example, the value for "encodedHelloInJapanese" is "こんにちは".
+encodedHelloInJapanese = \u3053\u3093\u306b\u3061\u306f
+# But with more modern file encodings like UTF-8, you can directly use supported characters.
+helloInJapanese = こんにちは
diff --git a/lexers/testdata/properties.expected b/lexers/testdata/properties.expected
new file mode 100644
index 000000000..c5081c110
--- /dev/null
+++ b/lexers/testdata/properties.expected
@@ -0,0 +1,80 @@
+[
+  {"type":"CommentSingle","value":"# You are reading a comment in \".properties\" file."},
+  {"type":"Text","value":"\n"},
+  {"type":"CommentSingle","value":"! The exclamation mark can also be used for comments."},
+  {"type":"Text","value":"\n"},
+  {"type":"CommentSingle","value":"# Lines with \"properties\" contain a key and a value separated by a delimiting character."},
+  {"type":"Text","value":"\n"},
+  {"type":"CommentSingle","value":"# There are 3 delimiting characters: '=' (equal), ':' (colon) and whitespace (space, \\t and \\f)."},
+  {"type":"Text","value":"\n"},
+  {"type":"NameAttribute","value":"website"},
+  {"type":"Text","value":" "},
+  {"type":"Operator","value":"="},
+  {"type":"Text","value":" "},
+  {"type":"LiteralString","value":"https://en.wikipedia.org/"},
+  {"type":"Text","value":"\n"},
+  {"type":"NameAttribute","value":"language"},
+  {"type":"Text","value":" "},
+  {"type":"Operator","value":":"},
+  {"type":"Text","value":" "},
+  {"type":"LiteralString","value":"English"},
+  {"type":"Text","value":"\n"},
+  {"type":"CommentSingle","value":"# White space that appears between the key, the value and the delimiter is ignored."},
+  {"type":"Text","value":"\n"},
+  {"type":"CommentSingle","value":"# This means that the following are equivalent (other than for readability)."},
+  {"type":"Text","value":"\n"},
+  {"type":"NameAttribute","value":"hello"},
+  {"type":"Operator","value":"="},
+  {"type":"LiteralString","value":"hello"},
+  {"type":"Text","value":"\n"},
+  {"type":"NameAttribute","value":"hello"},
+  {"type":"Text","value":" "},
+  {"type":"Operator","value":"="},
+  {"type":"Text","value":" "},
+  {"type":"LiteralString","value":"hello"},
+  {"type":"Text","value":"\n"},
+  {"type":"CommentSingle","value":"# Keys with the same name will be overwritten by the key that is the furthest in a file."},
+  {"type":"Text","value":"\n"},
+  {"type":"CommentSingle","value":"# For example the final value for \"duplicateKey\" will be \"second\"."},
+  {"type":"Text","value":"\n"},
+  {"type":"NameAttribute","value":"duplicateKey"},
+  {"type":"Text","value":" "},
+  {"type":"Operator","value":"="},
+  {"type":"Text","value":" "},
+  {"type":"LiteralString","value":"first"},
+  {"type":"Text","value":"\n"},
+  {"type":"NameAttribute","value":"duplicateKey"},
+  {"type":"Text","value":" "},
+  {"type":"Operator","value":"="},
+  {"type":"Text","value":" "},
+  {"type":"LiteralString","value":"second"},
+  {"type":"Text","value":"\n"},
+  {"type":"CommentSingle","value":"# If you need to add newlines and carriage returns, they need to be escaped using \\n and \\r respectively."},
+  {"type":"Text","value":"\n"},
+  {"type":"CommentSingle","value":"# You can also optionally escape tabs with \\t for readability purposes."},
+  {"type":"Text","value":"\n"},
+  {"type":"NameAttribute","value":"valueWithEscapes"},
+  {"type":"Text","value":" "},
+  {"type":"Operator","value":"="},
+  {"type":"Text","value":" "},
+  {"type":"LiteralString","value":"This is a newline\\n and a carriage return\\r and a tab\\t."},
+  {"type":"Text","value":"\n"},
+  {"type":"CommentSingle","value":"# You can also use Unicode escape characters (maximum of four hexadecimal digits)."},
+  {"type":"Text","value":"\n"},
+  {"type":"CommentSingle","value":"# In the following example, the value for \"encodedHelloInJapanese\" is \"こんにちは\"."},
+  {"type":"Text","value":"\n"},
+  {"type":"NameAttribute","value":"encodedHelloInJapanese"},
+  {"type":"Text","value":" "},
+  {"type":"Operator","value":"="},
+  {"type":"Text","value":" "},
+  {"type":"LiteralString","value":"\\u3053\\u3093\\u306b\\u3061\\u306f"},
+  {"type":"Text","value":"\n"},
+  {"type":"CommentSingle","value":"# But with more modern file encodings like UTF-8, you can directly use supported characters."},
+  {"type":"Text","value":"\n"},
+  {"type":"NameAttribute","value":"helloInJapanese"},
+  {"type":"Text","value":" "},
+  {"type":"Operator","value":"="},
+  {"type":"Text","value":" "},
+  {"type":"LiteralString","value":"こんにちは"},
+  {"type":"Text","value":"\n"}
+]