Skip to content

Commit

Permalink
Improve properties file lexer (#671)
Browse files Browse the repository at this point in the history
  • Loading branch information
pmwmedia committed Sep 7, 2022
1 parent a531e40 commit a238967
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 7 deletions.
32 changes: 25 additions & 7 deletions lexers/embedded/properties.xml
Expand Up @@ -7,21 +7,39 @@
</config>
<rules>
<state name="root">
<rule pattern="\s+">
<token type="Text"/>
</rule>
<rule pattern="^[;#!].*">
<token type="CommentSingle"/>
<rule pattern="^([ \t\f]*)([#!].*)">
<bygroups>
<token type="Text"/>
<token type="CommentSingle"/>
</bygroups>
</rule>
<rule pattern="^(.+?)([ \t]*)([=:])([ \t]*)(.*)">
<rule pattern="^([ \t\f]*)(\S+?)([ \t\f]*)([=:])([ \t\f]*)(.*(?:(?&lt;=\\)\n.*)*)">
<bygroups>
<token type="Text"/>
<token type="NameAttribute"/>
<token type="Text"/>
<token type="Operator"/>
<token type="Text"/>
<token type="LiteralString"/>
</bygroups>
</rule>
<rule pattern="^([ \t\f]*)(\S+)([ \t\f]+)(.*(?:(?&lt;=\\)\n.*)*)">
<bygroups>
<token type="Text"/>
<token type="NameAttribute"/>
<token type="Text"/>
<token type="LiteralString"/>
</bygroups>
</rule>
<rule pattern="^([ \t\f]*)(\w+)$">
<bygroups>
<token type="Text"/>
<token type="NameAttribute"/>
</bygroups>
</rule>
<rule pattern="\n">
<token type="Text"/>
</rule>
</state>
</rules>
</lexer>
</lexer>
13 changes: 13 additions & 0 deletions lexers/testdata/properties.actual
Expand Up @@ -8,10 +8,19 @@ language : English
# This means that the following are equivalent (other than for readability).
hello=hello
hello = hello
topic .properties files
# A word on a line will just create a key with no value.
empty
# Keys with the same name will be overwritten by the key that is the furthest in a file.
# For example the final value for "duplicateKey" will be "second".
duplicateKey = first
duplicateKey = second
# Adding a \ at the end of a line means that the value continues to the next line.
towLines = This line \
continues
threeLines: This value \
has even \
three lines
# If you need to add newlines and carriage returns, they need to be escaped using \n and \r respectively.
# You can also optionally escape tabs with \t for readability purposes.
valueWithEscapes = This is a newline\n and a carriage return\r and a tab\t.
Expand All @@ -20,3 +29,7 @@ valueWithEscapes = This is a newline\n and a carriage return\r and a tab\t.
encodedHelloInJapanese = \u3053\u3093\u306b\u3061\u306f
# But with more modern file encodings like UTF-8, you can directly use supported characters.
helloInJapanese = こんにちは
# Comments and keys can have leading whitespace
foo = I have leading whitespace
# Comments and keys can have trailing whitespace as part of the comment
bar = I have trailing whitespace as part of the value
37 changes: 37 additions & 0 deletions lexers/testdata/properties.expected
Expand Up @@ -33,6 +33,14 @@
{"type":"Text","value":" "},
{"type":"LiteralString","value":"hello"},
{"type":"Text","value":"\n"},
{"type":"NameAttribute","value":"topic"},
{"type":"Text","value":" "},
{"type":"LiteralString","value":".properties files"},
{"type":"Text","value":"\n"},
{"type":"CommentSingle","value":"# A word on a line will just create a key with no value."},
{"type":"Text","value":"\n"},
{"type":"NameAttribute","value":"empty"},
{"type":"Text","value":"\n"},
{"type":"CommentSingle","value":"# Keys with the same name will be overwritten by the key that is the furthest in a file."},
{"type":"Text","value":"\n"},
{"type":"CommentSingle","value":"# For example the final value for \"duplicateKey\" will be \"second\"."},
Expand All @@ -49,6 +57,19 @@
{"type":"Text","value":" "},
{"type":"LiteralString","value":"second"},
{"type":"Text","value":"\n"},
{"type":"CommentSingle","value":"# Adding a \\ at the end of a line means that the value continues to the next line."},
{"type":"Text","value":"\n"},
{"type":"NameAttribute","value":"towLines"},
{"type":"Text","value":" "},
{"type":"Operator","value":"="},
{"type":"Text","value":" "},
{"type":"LiteralString","value":"This line \\\n continues"},
{"type":"Text","value":"\n"},
{"type":"NameAttribute","value":"threeLines"},
{"type":"Operator","value":":"},
{"type":"Text","value":" "},
{"type":"LiteralString","value":"This value \\\n has even \\\n three lines"},
{"type":"Text","value":"\n"},
{"type":"CommentSingle","value":"# If you need to add newlines and carriage returns, they need to be escaped using \\n and \\r respectively."},
{"type":"Text","value":"\n"},
{"type":"CommentSingle","value":"# You can also optionally escape tabs with \\t for readability purposes."},
Expand Down Expand Up @@ -76,5 +97,21 @@
{"type":"Operator","value":"="},
{"type":"Text","value":" "},
{"type":"LiteralString","value":"こんにちは"},
{"type":"Text","value":"\n "},
{"type":"CommentSingle","value":"# Comments and keys can have leading whitespace"},
{"type":"Text","value":"\n "},
{"type":"NameAttribute","value":"foo"},
{"type":"Text","value":" "},
{"type":"Operator","value":"="},
{"type":"Text","value":" "},
{"type":"LiteralString","value":"I have leading whitespace"},
{"type":"Text","value":"\n"},
{"type":"CommentSingle","value":"# Comments and keys can have trailing whitespace as part of the comment "},
{"type":"Text","value":"\n"},
{"type":"NameAttribute","value":"bar"},
{"type":"Text","value":" "},
{"type":"Operator","value":"="},
{"type":"Text","value":" "},
{"type":"LiteralString","value":"I have trailing whitespace as part of the value "},
{"type":"Text","value":"\n"}
]

0 comments on commit a238967

Please sign in to comment.