Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Add lexer for properties files (#670)
  • Loading branch information
pmwmedia committed Sep 3, 2022
1 parent dbb09a5 commit a531e40
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 0 deletions.
27 changes: 27 additions & 0 deletions lexers/embedded/properties.xml
@@ -0,0 +1,27 @@
<lexer>
<config>
<name>properties</name>
<alias>java-properties</alias>
<filename>*.properties</filename>
<mime_type>text/x-java-properties</mime_type>
</config>
<rules>
<state name="root">
<rule pattern="\s+">
<token type="Text"/>
</rule>
<rule pattern="^[;#!].*">
<token type="CommentSingle"/>
</rule>
<rule pattern="^(.+?)([ \t]*)([=:])([ \t]*)(.*)">
<bygroups>
<token type="NameAttribute"/>
<token type="Text"/>
<token type="Operator"/>
<token type="Text"/>
<token type="LiteralString"/>
</bygroups>
</rule>
</state>
</rules>
</lexer>
22 changes: 22 additions & 0 deletions lexers/testdata/properties.actual
@@ -0,0 +1,22 @@
# You are reading a comment in ".properties" file.
! The exclamation mark can also be used for comments.
# Lines with "properties" contain a key and a value separated by a delimiting character.
# There are 3 delimiting characters: '=' (equal), ':' (colon) and whitespace (space, \t and \f).
website = https://en.wikipedia.org/
language : English
# White space that appears between the key, the value and the delimiter is ignored.
# This means that the following are equivalent (other than for readability).
hello=hello
hello = hello
# Keys with the same name will be overwritten by the key that is the furthest in a file.
# For example the final value for "duplicateKey" will be "second".
duplicateKey = first
duplicateKey = second
# If you need to add newlines and carriage returns, they need to be escaped using \n and \r respectively.
# You can also optionally escape tabs with \t for readability purposes.
valueWithEscapes = This is a newline\n and a carriage return\r and a tab\t.
# You can also use Unicode escape characters (maximum of four hexadecimal digits).
# In the following example, the value for "encodedHelloInJapanese" is "こんにちは".
encodedHelloInJapanese = \u3053\u3093\u306b\u3061\u306f
# But with more modern file encodings like UTF-8, you can directly use supported characters.
helloInJapanese = こんにちは
80 changes: 80 additions & 0 deletions lexers/testdata/properties.expected
@@ -0,0 +1,80 @@
[
{"type":"CommentSingle","value":"# You are reading a comment in \".properties\" file."},
{"type":"Text","value":"\n"},
{"type":"CommentSingle","value":"! The exclamation mark can also be used for comments."},
{"type":"Text","value":"\n"},
{"type":"CommentSingle","value":"# Lines with \"properties\" contain a key and a value separated by a delimiting character."},
{"type":"Text","value":"\n"},
{"type":"CommentSingle","value":"# There are 3 delimiting characters: '=' (equal), ':' (colon) and whitespace (space, \\t and \\f)."},
{"type":"Text","value":"\n"},
{"type":"NameAttribute","value":"website"},
{"type":"Text","value":" "},
{"type":"Operator","value":"="},
{"type":"Text","value":" "},
{"type":"LiteralString","value":"https://en.wikipedia.org/"},
{"type":"Text","value":"\n"},
{"type":"NameAttribute","value":"language"},
{"type":"Text","value":" "},
{"type":"Operator","value":":"},
{"type":"Text","value":" "},
{"type":"LiteralString","value":"English"},
{"type":"Text","value":"\n"},
{"type":"CommentSingle","value":"# White space that appears between the key, the value and the delimiter is ignored."},
{"type":"Text","value":"\n"},
{"type":"CommentSingle","value":"# This means that the following are equivalent (other than for readability)."},
{"type":"Text","value":"\n"},
{"type":"NameAttribute","value":"hello"},
{"type":"Operator","value":"="},
{"type":"LiteralString","value":"hello"},
{"type":"Text","value":"\n"},
{"type":"NameAttribute","value":"hello"},
{"type":"Text","value":" "},
{"type":"Operator","value":"="},
{"type":"Text","value":" "},
{"type":"LiteralString","value":"hello"},
{"type":"Text","value":"\n"},
{"type":"CommentSingle","value":"# Keys with the same name will be overwritten by the key that is the furthest in a file."},
{"type":"Text","value":"\n"},
{"type":"CommentSingle","value":"# For example the final value for \"duplicateKey\" will be \"second\"."},
{"type":"Text","value":"\n"},
{"type":"NameAttribute","value":"duplicateKey"},
{"type":"Text","value":" "},
{"type":"Operator","value":"="},
{"type":"Text","value":" "},
{"type":"LiteralString","value":"first"},
{"type":"Text","value":"\n"},
{"type":"NameAttribute","value":"duplicateKey"},
{"type":"Text","value":" "},
{"type":"Operator","value":"="},
{"type":"Text","value":" "},
{"type":"LiteralString","value":"second"},
{"type":"Text","value":"\n"},
{"type":"CommentSingle","value":"# If you need to add newlines and carriage returns, they need to be escaped using \\n and \\r respectively."},
{"type":"Text","value":"\n"},
{"type":"CommentSingle","value":"# You can also optionally escape tabs with \\t for readability purposes."},
{"type":"Text","value":"\n"},
{"type":"NameAttribute","value":"valueWithEscapes"},
{"type":"Text","value":" "},
{"type":"Operator","value":"="},
{"type":"Text","value":" "},
{"type":"LiteralString","value":"This is a newline\\n and a carriage return\\r and a tab\\t."},
{"type":"Text","value":"\n"},
{"type":"CommentSingle","value":"# You can also use Unicode escape characters (maximum of four hexadecimal digits)."},
{"type":"Text","value":"\n"},
{"type":"CommentSingle","value":"# In the following example, the value for \"encodedHelloInJapanese\" is \"こんにちは\"."},
{"type":"Text","value":"\n"},
{"type":"NameAttribute","value":"encodedHelloInJapanese"},
{"type":"Text","value":" "},
{"type":"Operator","value":"="},
{"type":"Text","value":" "},
{"type":"LiteralString","value":"\\u3053\\u3093\\u306b\\u3061\\u306f"},
{"type":"Text","value":"\n"},
{"type":"CommentSingle","value":"# But with more modern file encodings like UTF-8, you can directly use supported characters."},
{"type":"Text","value":"\n"},
{"type":"NameAttribute","value":"helloInJapanese"},
{"type":"Text","value":" "},
{"type":"Operator","value":"="},
{"type":"Text","value":" "},
{"type":"LiteralString","value":"こんにちは"},
{"type":"Text","value":"\n"}
]

0 comments on commit a531e40

Please sign in to comment.