/
trie.go
168 lines (146 loc) · 4.84 KB
/
trie.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
package useragent
import (
"strings"
)
type Result struct {
Match string
// 0: Unknown, 1: Browser, 2: OS, 3: Type
Type uint8
// Precedence value for each result type to determine which result
// should be overwritten.
Precedence uint8
}
// RuneTrie is a trie of runes with string keys and interface{} values.
type RuneTrie struct {
children map[rune]*RuneTrie
result []Result
}
// NewRuneTrie allocates and returns a new *RuneTrie.
func NewRuneTrie() *RuneTrie {
return new(RuneTrie)
}
// Get returns the value stored at the given key. Returns nil for internal
// nodes or for nodes with a value of nil.
func (trie *RuneTrie) Get(key string) UserAgent {
node := trie
ua := UserAgent{
precedence: Precedence{},
}
// Flag to indicate if we are currently iterating over a version number.
var isVersion bool
// A buffer to store the version number.
var versionBuffer strings.Builder
// Number of runes to skip when iterating over the trie. This is used
// to skip over version numbers or language codes.
var skipCount uint8
// Skip until we encounter whitespace.
var skipUntilWhitespace bool
for i, r := range key {
if skipUntilWhitespace {
if r == ' ' {
skipUntilWhitespace = false
} else {
continue
}
}
if skipCount > 0 {
skipCount--
continue
}
if isVersion {
// If we encounter any unknown characters, we can assume the version number is over.
if !IsDigit(r) && r != '.' {
isVersion = false
} else {
// Add to rune buffer
versionBuffer.WriteRune(r)
continue
}
}
// We want to strip any other version numbers from other products to get more hits
// to the trie.
//
// We also do not use a switch here as Go does not generate a jump table for
// switch statements with no integral constants. Benchmarking shows that ops
// go down if we try to migrate statements like this to a switch.
if IsDigit(r) || (r == '.' && len(key) > i+1 && IsDigit(rune(key[i+1]))) {
continue
}
// Identify and skip language codes e.g. en-US, zh-cn, en_US, ZH_cn
if len(key) > i+6 && r == ' ' && IsLetter(rune(key[i+1])) && IsLetter(rune(key[i+2])) && (key[i+3] == '-' || key[i+3] == '_') && IsLetter(rune(key[i+4])) && IsLetter(rune(key[i+5])) && (key[i+6] == ' ' || key[i+6] == ')' || key[i+6] == ';') {
// Add the number of runes to skip to the skip count.
skipCount += 6
continue
}
switch r {
case ' ', ';', ')', '(', ',', '_', '-', '/':
continue
}
// If result exists, we can append it to the value.
for _, result := range node.result {
matched := ua.addMatch(result)
// If we matched a browser of the highest precedence, we can mark the
// next set of runes as the version number we want to store.
//
// We also reject any version numbers related to Safari since it has a
// separate key for its version number.
if (matched && result.Type == BrowserMatch && result.Match != Safari) || (result.Type == VersionMatch && ua.Version == "") {
// Clear version buffer if it has old values.
versionBuffer.Reset()
skipCount = 1 // We want to omit the slash after the browser name.
isVersion = true
}
// If we matched a mobile token, we want to strip everything after it
// until we reach whitespace to get around random device IDs.
if matched && result.Match == Mobile {
// We need to clear the result so we can match the next token.
node.result = nil
skipUntilWhitespace = true
}
}
// We need to catch the flag change after the loop since it isn't possible
// for a continue to affect an outer loop.
if skipUntilWhitespace {
continue
}
// Set the next node to the child of the current node.
next := node.children[r]
if next == nil {
continue // No match found, but we can try to match the next rune.
}
node = next
}
// Store version buffer into the user agent struct.
ua.Version = versionBuffer.String()
return ua
}
// Put inserts the value into the trie at the given key, replacing any
// existing items. At the end of key tokens, a result is stored marking
// a potential match for a browser, device, or OS using the indexes provided
// by MatchTokenIndexes.
func (trie *RuneTrie) Put(key string) {
node := trie
matchResults := MatchTokenIndexes(key)
for keyIndex, r := range key {
// Reset the result slice for each new rune.
node.result = []Result{}
// If we encounter a match, we can store it in the trie.
for _, result := range matchResults {
if keyIndex == result.EndIndex-1 {
result := Result{Match: result.Match, Type: result.MatchType, Precedence: result.Precedence}
node.result = append(node.result, result)
}
}
child := node.children[r]
if child == nil {
// If no map is found, create a new one.
if node.children == nil {
node.children = map[rune]*RuneTrie{}
}
// Store new runes in the trie.
child = new(RuneTrie)
node.children[r] = child
}
node = child
}
}