Skip to content

Commit

Permalink
Parse new lines for plain scalar style (#66)
Browse files Browse the repository at this point in the history
* Parse new lines for plain scalar style
  • Loading branch information
lwronski committed Sep 7, 2021
1 parent b01e6bc commit 146b5e7
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 53 deletions.
Expand Up @@ -6,7 +6,6 @@ import org.virtuslab.yaml.internal.load.reader.StringReader

import scala.annotation.tailrec
import scala.collection.mutable

import token.Token
case class ReaderCtx(
stateStack: mutable.Stack[ReaderState],
Expand Down Expand Up @@ -38,10 +37,13 @@ case class ReaderCtx(
closeOpenedCollectionMapping(indent)
case _ => ()

def getIndentOfLatestCollection(): Option[Int] =
stateStack.headOption.map(_.indent)

def appendState(state: ReaderState): Unit = stateStack.push(state)

def closeOpenedFlowMapping(): List[Token] = stateStack.headOption match
case Some(ReaderState.FlowMapping) =>
case Some(ReaderState.FlowMapping(_)) =>
stateStack.pop()
List(Token.FlowMappingEnd(reader.pos()))
case _ =>
Expand All @@ -52,7 +54,7 @@ case class ReaderCtx(
case Some(ReaderState.Sequence(_)) =>
stateStack.pop()
List(Token.SequenceEnd(reader.pos()))
case Some(ReaderState.FlowSequence) =>
case Some(ReaderState.FlowSequence(_)) =>
stateStack.pop()
List(Token.FlowSequenceEnd(reader.pos()))
case _ =>
Expand All @@ -70,15 +72,16 @@ case class ReaderCtx(

def isAllowedSpecialCharacter(char: Char): Boolean =
stateStack.headOption match
case Some(ReaderState.FlowMapping) if char == '}' => false
case Some(ReaderState.FlowMapping) | Some(ReaderState.FlowSequence) if char == ',' => false
case Some(ReaderState.FlowSequence) if char == ']' => false
case _ => true
case Some(ReaderState.FlowMapping(_)) if char == '}' => false
case Some(ReaderState.FlowMapping(_)) | Some(ReaderState.FlowSequence(_)) if char == ',' =>
false
case Some(ReaderState.FlowSequence(_)) if char == ']' => false
case _ => true

def isFlowMapping(): Boolean =
stateStack.headOption match
case Some(ReaderState.FlowMapping) => true
case _ => false
case Some(ReaderState.FlowMapping(_)) => true
case _ => false

def closeOpenedScopes(): List[Token] =
@tailrec
Expand All @@ -94,9 +97,9 @@ case class ReaderCtx(

loop(Nil)

def parseDocumentStart(): List[Token] =
def parseDocumentStart(indent: Int): List[Token] =
val closedScopes = closeOpenedScopes()
stateStack.push(ReaderState.Document)
stateStack.push(ReaderState.Document(indent))
closedScopes :+ Token.DocumentStart(reader.pos())

def parseDocumentEnd(): List[Token] =
Expand Down
@@ -1,10 +1,11 @@
package org.virtuslab.yaml.internal.load.reader

sealed trait ReaderState
sealed trait ReaderState:
def indent: Int

case object ReaderState:
case object Stream extends ReaderState
case object Document extends ReaderState
final case class Mapping(indent: Int) extends ReaderState
final case class Sequence(indent: Int) extends ReaderState
case object FlowMapping extends ReaderState
case object FlowSequence extends ReaderState
final case class Document(indent: Int) extends ReaderState
final case class Mapping(indent: Int) extends ReaderState
final case class Sequence(indent: Int) extends ReaderState
final case class FlowMapping(indent: Int) extends ReaderState
final case class FlowSequence(indent: Int) extends ReaderState
Expand Up @@ -13,9 +13,8 @@ trait Tokenizer:

private[yaml] class Scanner(str: String) extends Tokenizer {

private val ctx = ReaderCtx.init(str)
private val in = ctx.reader
private var indent = 0
private val ctx = ReaderCtx.init(str)
private val in = ctx.reader

override def peekToken(): Token = ctx.tokens.headOption match
case Some(token) => token
Expand Down Expand Up @@ -47,7 +46,7 @@ private[yaml] class Scanner(str: String) extends Tokenizer {

private def parseDocumentStart(): List[Token] =
in.skipN(4)
ctx.parseDocumentStart()
ctx.parseDocumentStart(in.column)

private def isDocumentEnd =
in.peekN(3) == "..." && in.peek(3).exists(_.isWhitespace)
Expand All @@ -58,7 +57,7 @@ private[yaml] class Scanner(str: String) extends Tokenizer {

private def parseFlowSequenceStart() =
in.skipCharacter()
ctx.appendState(ReaderState.FlowSequence)
ctx.appendState(ReaderState.FlowSequence(in.column))
List(FlowSequenceStart(in.pos()))

private def parseFlowSequenceEnd() =
Expand All @@ -67,21 +66,20 @@ private[yaml] class Scanner(str: String) extends Tokenizer {

private def parseFlowMappingStart() =
in.skipCharacter()
ctx.appendState(ReaderState.FlowMapping)
ctx.appendState(ReaderState.FlowMapping(in.column))
List(FlowMappingStart(in.pos()))

private def parseFlowMappingEnd() =
in.skipCharacter()
ctx.closeOpenedFlowMapping()

private def parseBlockSequence() =
ctx.closeOpenedCollectionSequences(indent)
if (ctx.shouldParseSequenceEntry(indent)) then
ctx.closeOpenedCollectionSequences(in.column)
if (ctx.shouldParseSequenceEntry(in.column)) then
in.skipCharacter()
indent += 1
getNextTokens()
else
ctx.appendState(ReaderState.Sequence(indent))
ctx.appendState(ReaderState.Sequence(in.column))
List(SequenceStart(in.pos()))

private def parseDoubleQuoteValue(): Token =
Expand Down Expand Up @@ -111,13 +109,11 @@ private[yaml] class Scanner(str: String) extends Tokenizer {
*/
private def parseBlockHeader(): Unit =
while (in.peek() == Some(' ')) {
indent += 1
in.skipCharacter()
}

if in.isNewline then
in.skipCharacter()
indent = 0
parseBlockHeader()

/**
Expand All @@ -142,7 +138,7 @@ private[yaml] class Scanner(str: String) extends Tokenizer {

parseBlockHeader()

val foldedIndent = indent
val foldedIndent = in.column
skipUntilNextIndent(foldedIndent)

@tailrec
Expand All @@ -151,7 +147,7 @@ private[yaml] class Scanner(str: String) extends Tokenizer {
case Some('\n') =>
sb.append(in.read())
skipUntilNextIndent(foldedIndent)
if (!in.isWhitespace && indent != foldedIndent) then sb.result()
if (!in.isWhitespace && in.column != foldedIndent) then sb.result()
else readLiteral()
case Some(char) =>
sb.append(in.read())
Expand All @@ -170,7 +166,7 @@ private[yaml] class Scanner(str: String) extends Tokenizer {
val chompingIndicator = parseChompingIndicator()

parseBlockHeader()
val foldedIndent = indent
val foldedIndent = in.column
skipUntilNextIndent(foldedIndent)

def chompedEmptyLines() =
Expand All @@ -192,7 +188,7 @@ private[yaml] class Scanner(str: String) extends Tokenizer {
} else {
in.skipCharacter()
skipUntilNextIndent(foldedIndent)
if (!in.isWhitespace && indent != foldedIndent) then sb.result()
if (!in.isWhitespace && in.column != foldedIndent) then sb.result()
else
sb.append(" ")
readFolded()
Expand Down Expand Up @@ -235,22 +231,27 @@ private[yaml] class Scanner(str: String) extends Tokenizer {
}

private def parseScalarValue(): Token = {
val sb = new StringBuilder
val sb = new StringBuilder
val scalarIndent = in.column

def readScalar(): String =
in.peek() match
case Some(':')
if in.peekNext() == Some(' ') || in.peekNext() == Some('\n') || in
.peekNext() == Some('\r') =>
sb.result()
case Some(':') if in.isNextWhitespace => sb.result()
case Some(char) if !ctx.isAllowedSpecialCharacter(char) => sb.result()
case Some(' ') if in.peekNext() == Some('#') => sb.result()
case Some('\n') | Some('\r') | None => sb.result()
case _ if in.isNewline =>
skipUntilNextChar()
sb.append(' ')
if (ctx.getIndentOfLatestCollection().exists(in.column > _)) readScalar()
else sb.result()
case Some(char) =>
sb.append(in.read())
readScalar()
case None => sb.result()

val pos = in.pos()
Scalar(readScalar().trim, ScalarStyle.Plain, pos)
val pos = in.pos()
val scalar = readScalar()
Scalar(scalar.trim, ScalarStyle.Plain, pos)
}

private def fetchValue(): List[Token] =
Expand All @@ -264,35 +265,29 @@ private[yaml] class Scanner(str: String) extends Tokenizer {

in.peek() match
case Some(':') =>
ctx.closeOpenedCollectionMapping(indent)
ctx.closeOpenedCollectionMapping(scalar.pos.column)
in.skipCharacter()

if (ctx.shouldParseMappingEntry(indent)) then
if (ctx.shouldParseMappingEntry(scalar.pos.column)) then
List(Token.Key(scalar.pos), scalar, Token.Value(scalar.pos))
else if (!ctx.isFlowMapping()) then
ctx.appendState(ReaderState.Mapping(indent))
ctx.appendState(ReaderState.Mapping(scalar.pos.column))
List(MappingStart(scalar.pos), Token.Key(scalar.pos), scalar, Token.Value(scalar.pos))
else List(scalar)
case _ => List(scalar)

def skipUntilNextToken(): Unit =
while (in.peek() == Some(' ')) do
indent += 1
in.skipCharacter()
while (in.peek() == Some(' ')) do in.skipCharacter()

if in.peek() == Some('#') then skipComment()

if (in.isNewline) then {
in.skipCharacter()
indent = 0
skipUntilNextToken()
}

def skipUntilNextIndent(indentBlock: Int): Unit =
indent = 0
while (in.peek() == Some(' ') && indent < indentBlock) do
indent += 1
in.skipCharacter()
while (in.peek() == Some(' ') && in.column < indentBlock) do in.skipCharacter()

def skipUntilNextChar() =
while (in.isWhitespace) do in.skipCharacter()
Expand Down
Expand Up @@ -48,6 +48,61 @@ class ScalarSpec extends BaseParseSuite:
assertEventsEquals(events, expectedEvents)
}

test("should parse plain scalar wihth new lines") {
val yaml =
s"""description: new lines
| rest.
|properties: object
|""".stripMargin

val reader = Scanner(yaml)
val events = ParserImpl.getEvents(reader)

val expectedEvents = List(
StreamStart,
DocumentStart(),
MappingStart(),
Scalar("description", ScalarStyle.Plain),
Scalar(
"new lines rest.",
ScalarStyle.Plain
),
Scalar("properties", ScalarStyle.Plain),
Scalar("object", ScalarStyle.Plain),
MappingEnd(),
DocumentEnd(),
StreamEnd
)

assertEventsEquals(events, expectedEvents)
}

test("should parse multine line plain scalar value") {
val yaml =
s"""|description: multiline
| plain
| scalar
|type: string
|""".stripMargin

val reader = Scanner(yaml)
val events = ParserImpl.getEvents(reader)

val expectedEvents = List(
StreamStart,
DocumentStart(),
MappingStart(),
Scalar("description", ScalarStyle.Plain),
Scalar("multiline plain scalar", ScalarStyle.Plain),
Scalar("type", ScalarStyle.Plain),
Scalar("string", ScalarStyle.Plain),
MappingEnd(),
DocumentEnd(),
StreamEnd
)
assertEventsEquals(events, expectedEvents)
}

test("should parse single quote scalar value with multiline") {
val yaml =
s"""description: 'multiline
Expand Down

0 comments on commit 146b5e7

Please sign in to comment.