/
CharArrayReader.scala
161 lines (138 loc) Β· 4.35 KB
/
CharArrayReader.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
package scala.meta.internal.mtags
import scala.meta.Dialect
import scala.meta.inputs._
import scala.meta.internal.tokenizers.Reporter
private[meta] case class CharArrayReader private (
buf: Array[Char],
dialect: Dialect,
reporter: Reporter,
/** the last read character */
var ch: Int = Chars.SU,
/** The offset one past the last read character */
var begCharOffset: Int = -1, // included
var endCharOffset: Int = 0, // excluded
/** The start offset of the current line */
var lineStartOffset: Int = 0,
/** The start offset of the line before the current one */
private var lastLineStartOffset: Int = 0
) {
def this(input: Input, dialect: Dialect, reporter: Reporter) =
this(buf = input.chars, dialect = dialect, reporter = reporter)
import reporter._
/** Advance one character; reducing CR;LF pairs to just LF */
final def nextChar(): Unit = {
nextRawChar()
if (ch < ' ') {
skipCR()
potentialLineEnd()
}
if (ch == '"' && !dialect.allowMultilinePrograms) {
readerError(
"double quotes are not allowed in single-line quasiquotes",
at = begCharOffset
)
}
}
final def nextCommentChar(): Unit = {
if (endCharOffset >= buf.length) {
ch = Chars.SU
} else {
ch = buf(endCharOffset)
begCharOffset = endCharOffset
endCharOffset += 1
checkLineEnd()
}
}
/**
* Advance one character, leaving CR;LF pairs intact. This is for use in multi-line strings, so
* there are no "potential line ends" here.
*/
final def nextRawChar(): Unit = {
if (endCharOffset >= buf.length) {
ch = Chars.SU
} else {
begCharOffset = endCharOffset
val (hi, hiEnd) = readUnicodeChar(endCharOffset)
if (!Character.isHighSurrogate(hi)) {
ch = hi
endCharOffset = hiEnd
} else if (hiEnd >= buf.length)
readerError("invalid unicode surrogate pair", at = begCharOffset)
else {
val (lo, loEnd) = readUnicodeChar(hiEnd)
if (!Character.isLowSurrogate(lo)) {
ch = hi
endCharOffset = hiEnd
} else {
ch = Character.toCodePoint(hi, lo)
endCharOffset = loEnd
}
}
}
}
def nextNonWhitespace: Int = {
while (ch == ' ' || ch == '\t') nextRawChar()
ch
}
/** Read next char interpreting \\uxxxx escapes; doesn't mutate internal state */
private def readUnicodeChar(offset: Int): (Char, Int) = {
val c = buf(offset)
val firstOffset = offset + 1 // offset after a single character
def evenSlashPrefix: Boolean = {
var p = firstOffset - 2
while (p >= 0 && buf(p) == '\\') p -= 1
(firstOffset - p) % 2 == 0
}
if (
c != '\\' || firstOffset >= buf.length || buf(
firstOffset
) != 'u' || !evenSlashPrefix
)
return (c, firstOffset)
var escapedOffset = firstOffset // offset after an escaped character
escapedOffset += 1
while (escapedOffset < buf.length && buf(escapedOffset) == 'u')
escapedOffset += 1
// need 4 digits
if (escapedOffset + 3 >= buf.length)
return (c, firstOffset)
def udigit: Int =
try Chars.digit2int(buf(escapedOffset), 16)
finally escapedOffset += 1
val code = udigit << 12 | udigit << 8 | udigit << 4 | udigit
(code.toChar, escapedOffset)
}
/** replace CR;LF by LF */
private def skipCR() =
if (
ch == Chars.CR && endCharOffset < buf.length && buf(endCharOffset) == '\\'
) {
val (c, nextOffset) = readUnicodeChar(endCharOffset)
if (c == Chars.LF) {
ch = Chars.LF
endCharOffset = nextOffset
}
}
/** Handle line ends */
private def potentialLineEnd(): Unit = {
if (checkLineEnd() && !dialect.allowMultilinePrograms) {
readerError(
"line breaks are not allowed in single-line quasiquotes",
at = begCharOffset
)
}
}
private def checkLineEnd(): Boolean = {
val ok = ch == Chars.LF || ch == Chars.FF
if (ok) {
lastLineStartOffset = lineStartOffset
lineStartOffset = endCharOffset
}
ok
}
/** A new reader that takes off at the current character position */
def lookaheadReader: CharArrayReader = copy()
/** A mystery why CharArrayReader.nextChar() returns Unit */
def getc(): Int = { nextChar(); ch }
final def wasMultiChar: Boolean = begCharOffset < endCharOffset - 1
}