cucumber · temyers · Nov 8, 2022 · Nov 25, 2022 · Nov 25, 2022 · Nov 28, 2022
diff --git a/javascript/test/GherkinInMarkdownTokenMatcherTest.ts b/javascript/test/GherkinInMarkdownTokenMatcherTest.ts
@@ -34,35 +34,58 @@ describe('GherkinInMarkdownTokenMatcher', function () {
     assert.strictEqual(token.matchedText, 'hello')
   })
 
+  it('matches FeatureLine without the Feature: keyword', () => {
+    const line = new GherkinLine('# hello', location.line)
+    const token = new Token(line, location)
+    assert(tm.match_FeatureLine(token))
+    assert.strictEqual(token.matchedType, TokenType.FeatureLine)
+    assert.strictEqual(token.matchedKeyword, undefined)
+    assert.strictEqual(token.matchedText, '# hello')
+  })
+
   it('matches bullet Step', () => {
     const line = new GherkinLine('  *  Given I have 3 cukes', location.line)
     const token = new Token(line, location)
     assert(tm.match_StepLine(token))
     assert.strictEqual(token.matchedType, TokenType.StepLine)
     assert.strictEqual(token.matchedKeyword, 'Given ')
+    assert.strictEqual(token.matchedKeywordType, 'Context')
     assert.strictEqual(token.matchedText, 'I have 3 cukes')
     assert.strictEqual(token.location.column, 6)
   })
-
+  
   it('matches plus Step', () => {
     const line = new GherkinLine('  +  Given I have 3 cukes', location.line)
     const token = new Token(line, location)
     assert(tm.match_StepLine(token))
     assert.strictEqual(token.matchedType, TokenType.StepLine)
     assert.strictEqual(token.matchedKeyword, 'Given ')
+    assert.strictEqual(token.matchedKeywordType, 'Context')
     assert.strictEqual(token.matchedText, 'I have 3 cukes')
     assert.strictEqual(token.location.column, 6)
   })
-
+  
   it('matches hyphen Step', () => {
     const line = new GherkinLine('  -  Given I have 3 cukes', location.line)
     const token = new Token(line, location)
     assert(tm.match_StepLine(token))
     assert.strictEqual(token.matchedType, TokenType.StepLine)
     assert.strictEqual(token.matchedKeyword, 'Given ')
+    assert.strictEqual(token.matchedKeywordType, 'Context')
     assert.strictEqual(token.matchedText, 'I have 3 cukes')
     assert.strictEqual(token.location.column, 6)
   })
+
+  it('matches a when Step', () => {
+    const line = new GherkinLine('  -  When I do something', location.line)
+    const token = new Token(line, location)
+    assert(tm.match_StepLine(token))
+    assert.strictEqual(token.matchedType, TokenType.StepLine)
+    assert.strictEqual(token.matchedKeyword, 'When ')
+    assert.strictEqual(token.matchedKeywordType, 'Action')
+    assert.strictEqual(token.matchedText, 'I do something')
+    assert.strictEqual(token.location.column, 6)
+  })
 
   it('matches arbitrary text as Other', () => {
     const line = new GherkinLine('Whatever', location.line)
@@ -186,4 +209,21 @@ describe('GherkinInMarkdownTokenMatcher', function () {
     ]
     assert.deepStrictEqual(t.matchedItems, expectedItems)
   })
+
+  it('matches arbitrary text as Empty after the FeatureLine has already been matched', () => {
+    // White Box testing - implementation detail...
+    // Given the FeatureLine has already been matched
+    const tFeatureLine = new Token(new GherkinLine('# something arbitrary', location.line), location);
+    assert(tm.match_FeatureLine(tFeatureLine))
+
+
+    const t = new Token(new GherkinLine('arbitrary text', location.line), location);
+    // (tm as any).matchedFeatureLine = true
+    assert(tm.match_Empty(t))
+    assert.strictEqual(t.matchedType, TokenType.Empty)
+    const expectedItems: Item[] =undefined
+    assert.deepStrictEqual(t.matchedItems, expectedItems)
+    assert.strictEqual(t.matchedKeyword, undefined)
+    assert.strictEqual(t.matchedText, undefined)
+  } )
 })
diff --git a/python/Makefile b/python/Makefile
@@ -8,8 +8,8 @@ SOURCE_FILES = $(shell find . -name "*.py" | grep -v $(GHERKIN_PARSER))
 GHERKIN = bin/gherkin
 GHERKIN_GENERATE_TOKENS = bin/gherkin-generate-tokens
 
-GOOD_FEATURE_FILES = $(shell find ../testdata/good -name "*.feature")
-BAD_FEATURE_FILES  = $(shell find ../testdata/bad -name "*.feature")
+GOOD_FEATURE_FILES = $(shell find ../testdata/good -name "*.feature" -o -name "*.feature.md")
+BAD_FEATURE_FILES  = $(shell find ../testdata/bad -name "*.feature" -o -name "*.feature.md")
 
 TOKENS       = $(patsubst ../testdata/%,acceptance/testdata/%.tokens,$(GOOD_FEATURE_FILES))
 ASTS         = $(patsubst ../testdata/%,acceptance/testdata/%.ast.ndjson,$(GOOD_FEATURE_FILES))

diff --git a/python/bin/gherkin_generate_tokens.py b/python/bin/gherkin_generate_tokens.py
@@ -7,12 +7,18 @@
 from gherkin.token_scanner import TokenScanner
 from gherkin.token_formatter_builder import TokenFormatterBuilder
 from gherkin.parser import Parser
+from gherkin.token_matcher_markdown import GherkinInMarkdownTokenMatcher
 
 files = sys.argv[1:]
 if sys.version_info < (3, 0) and os.name != 'nt':  # for Python2 unless on Windows native
     UTF8Writer = codecs.getwriter('utf8')
     sys.stdout = UTF8Writer(sys.stdout)
+
 parser = Parser(TokenFormatterBuilder())
 for file in files:
     scanner = TokenScanner(file)
-    print(parser.parse(scanner))
+
+    if(file.endswith('.md')):
+        print(parser.parse(scanner, GherkinInMarkdownTokenMatcher()) )
+    else:
+        print(parser.parse(scanner))
diff --git a/python/gherkin/stream/gherkin_events.py b/python/gherkin/stream/gherkin_events.py
@@ -3,6 +3,7 @@
 from gherkin.pickles.compiler import Compiler
 from gherkin.errors import ParserError, CompositeParserException
 from gherkin.stream.id_generator import IdGenerator
+from gherkin.token_matcher_markdown import GherkinInMarkdownTokenMatcher
 
 def create_errors(errors, uri):
     for error in errors:
@@ -28,7 +29,10 @@ def enum(self, source_event):
         source = source_event['source']['data']
 
         try:
-            gherkin_document = self.parser.parse(source)
+            matcher=None
+            if(uri.endswith('.md')):
+                matcher=GherkinInMarkdownTokenMatcher()
+            gherkin_document = self.parser.parse(source, matcher)
             gherkin_document['uri'] = uri
 
             if (self.options.print_source):

diff --git a/python/gherkin/stream/source_events.py b/python/gherkin/stream/source_events.py
@@ -5,11 +5,18 @@ def source_event(path):
         'source': {
             'uri': path,
             'data': io.open(path, 'r', encoding='utf8', newline='').read(),
-            'mediaType': 'text/x.cucumber.gherkin+plain'
+            'mediaType': _media_type(path)
         }
     }
     return event
 
+
+def _media_type(path):
+    if(path.endswith(".feature")):
+        return 'text/x.cucumber.gherkin+plain'
+    if(path.endswith(".feature.md")):
+        return 'text/x.cucumber.gherkin+markdown'
+
 class SourceEvents:
     def __init__(self, paths):
         self.paths = paths

diff --git a/python/gherkin/token_matcher_markdown.py b/python/gherkin/token_matcher_markdown.py
@@ -25,6 +25,7 @@ def match_FeatureLine(self, token):
 
         if(self.matched_feature_line):
             self._set_token_matched(token,None)
+            return False
 
         # We first try to match "# Feature: blah"
         result = self._match_title_line(KEYWORD_PREFIX_HEADER, self.dialect.feature_keywords, ':', token, 'FeatureLine')
@@ -34,6 +35,7 @@ def match_FeatureLine(self, token):
 
         if not result:
             self._set_token_matched(token,'FeatureLine',token.line.get_line_text())
+            result=True
         self.matched_feature_line=result
         return result
 
@@ -82,8 +84,9 @@ def match_Comment(self, token):
         if(token.line.startswith('|')):
             table_cells = token.line.table_cells
             if(self._is_gfm_table_separator(table_cells)):
+                self._set_token_matched(token,"Comment")
                 return True
-        return self._set_token_matched(token,None,False)
+        return self._set_token_matched(token,None)
 
     def match_Empty(self, token):
 
@@ -180,11 +183,16 @@ def _match_title_line(self, prefix, keywords, keywordSuffix, token, token_type):
         match = re.search(u'{}({}){}(.*)'.format(prefix, keywords_or_list, keywordSuffix), token.line.get_line_text())
         indent = token.line.indent
         result = False
-
+        matchedKeywordType=None
         if(match):
             matchedKeyword = match.group(2)
             indent += len(match.group(1))
-            self._set_token_matched(token, token_type, match.group(3).strip(), matchedKeyword, indent=indent)
+
+            # only set the keyword type if this is a step keyword
+            if( matchedKeyword in self.keyword_types ):
+                matchedKeywordType = self.keyword_types[matchedKeyword][0]    
+
+            self._set_token_matched(token, token_type, match.group(3).strip(), matchedKeyword, keyword_type=matchedKeywordType, indent=indent)
             return True
         return False
 

diff --git a/python/test/gherkin_in_markdown_token_matcher_test.py b/python/test/gherkin_in_markdown_token_matcher_test.py
@@ -6,7 +6,16 @@
 from gherkin.gherkin_line import GherkinLine
 location = { 'line': 1, 'column': 1 }
 
-def test_it_matches_FeatureLine():
+def test_it_matches_FeatureLineH1():
+    tm = GherkinInMarkdownTokenMatcher('en')
+    line = GherkinLine('''# Feature: hello''',location['line'])
+    token = Token(gherkin_line=line, location=location)
+    assert tm.match_FeatureLine(token)
+    assert token.matched_type == 'FeatureLine'
+    assert token.matched_keyword == 'Feature'
+    assert token.matched_text == 'hello'
+
+def test_it_matches_FeatureLineH2():
     tm = GherkinInMarkdownTokenMatcher('en')
     line = GherkinLine('''## Feature: hello''',location['line'])
     token = Token(gherkin_line=line, location=location)
@@ -24,6 +33,15 @@ def test_it_matches_FeatureLine_in_French():
     assert token.matched_keyword == u'Fonctionnalité'
     assert token.matched_text == 'hello'
 
+def test_it_matches_FeatureLine_without_the_Feature_keyword():
+    tm = GherkinInMarkdownTokenMatcher('en')
+    line = GherkinLine('''# hello''',location['line'])
+    token = Token(gherkin_line=line, location=location)
+    assert tm.match_FeatureLine(token)
+    assert token.matched_type == 'FeatureLine'
+    assert token.matched_keyword == None
+    assert token.matched_text == '# hello'
+
 def test_it_matches_bullet_Step():
     tm = GherkinInMarkdownTokenMatcher('en')
     line = GherkinLine('''  *  Given I have 3 cukes''',location['line'])
@@ -41,6 +59,7 @@ def test_it_matches_plus_Step():
     assert tm.match_StepLine(token)
     assert token.matched_type == 'StepLine'
     assert token.matched_keyword == 'Given '
+    assert token.matched_keyword_type == 'Context'
     assert token.matched_text == 'I have 3 cukes'
     assert token.location['column'] == 6
 
@@ -51,9 +70,22 @@ def test_it_matches_hyphen_Step():
     assert tm.match_StepLine(token)
     assert token.matched_type == 'StepLine'
     assert token.matched_keyword == 'Given '
+    assert token.matched_keyword_type == 'Context'
     assert token.matched_text == 'I have 3 cukes'
     assert token.location['column'] == 6
 
+def test_it_matches_a_when_Step():
+    tm = GherkinInMarkdownTokenMatcher('en')
+    line = GherkinLine('''  -  When I do something''',location['line'])
+    token = Token(gherkin_line=line, location=location)
+    assert tm.match_StepLine(token)
+    assert token.matched_type == 'StepLine'
+    assert token.matched_keyword == 'When '
+    assert token.matched_keyword_type == 'Action'
+    assert token.matched_text == 'I do something'
+    assert token.location['column'] == 6
+
+
 def test_it_matches_arbitrary_text_as_Other():
     tm = GherkinInMarkdownTokenMatcher('en')
     line = GherkinLine('''Whatever''',location['line'])
@@ -151,11 +183,13 @@ def test_it_matches_table_separator_row_as_comment():
     l1 = GherkinLine('  | h1 | h2 |',location['line'])
     t1 = Token(l1,location)
     assert tm.match_TableRow(t1)
+    assert t1.location['column'] == 3
 
     l2 = GherkinLine('  | --- | --- |',location['line'])
     t2 = Token(l2,location)
     assert not tm.match_TableRow(t2)
     assert tm.match_Comment(t2)
+    assert t2.location['column'] == 3
 
 def test_it_matches_indented_tags():
     tm = GherkinInMarkdownTokenMatcher('en')
@@ -229,4 +263,32 @@ def test_it_matches_ExamplesLine():
     assert tm.match_ExamplesLine(token)
     assert token.matched_type == 'ExamplesLine'
     assert token.matched_keyword == 'Examples'
-    assert token.matched_text == ''
+    assert token.matched_text == ''
+
+def test_it_matches_Empty():
+    tm = GherkinInMarkdownTokenMatcher('en')
+    line = GherkinLine('''''',location['line'])
+    token = Token(gherkin_line=line, location=location)
+    assert tm.match_Empty(token)
+    assert token.matched_type == 'Empty'
+    assert token.matched_keyword == None
+    assert token.matched_text == None
+
+def test_it_matches_arbitrary_text_as_Empty_after_the_FeatureLine_has_already_been_matched():
+    # White Box testing - implementation detail...
+    # Given the FeatureLine has already been matched
+    tm = GherkinInMarkdownTokenMatcher('en')
+
+    line = GherkinLine('''# something arbitrary''',location['line'])
+    token = Token(gherkin_line=line, location=location)
+    assert(tm.match_FeatureLine(token))
+
+    line = GherkinLine('''arbitrary text''',location['line'])
+    token=Token(gherkin_line=line, location=location)
+
+    assert(tm.match_Empty(token))
+    assert token.matched_type == 'Empty'
+    assert token.matched_items == []
+    assert token.matched_keyword == None
+    assert token.matched_text == None
+    pass