-
Notifications
You must be signed in to change notification settings - Fork 0
/
u6-lzw.js
310 lines (278 loc) · 9.57 KB
/
u6-lzw.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
var BASE_CW = 0x102;
function concatByteArrays(arrays) {
var totalLength = arrays.reduce(function(len, arr) { return len + arr.length; }, 0);
var mergedArray = new Uint8Array(totalLength);
var offset = 0;
try {
arrays.forEach(function (arr) {
mergedArray.set(arr, offset);
offset += arr.length;
});
} catch (ex) {
console.log(arrays, totalLength, offset);
throw ex;
}
return mergedArray;
}
function readCodewords(rom, startAddr) {
var currentAddr = startAddr;
var cwSize = 9;
var cwNext = BASE_CW;
var buf = 0; // Buf holds a variable number of bits from the input stream so non-byte-aligned reads can be made
var bufLen = 0;
var codewords = [];
for(var i = 0; i < 0x4000; i++) {
// Read enough bits into `buf` that at least one codeword can be read
while (bufLen < cwSize) {
buf = buf | rom[currentAddr] << bufLen;
bufLen += 8;
currentAddr++;
}
// Take one codeword out of buf
cw = buf & (Math.pow(2, cwSize) - 1);
buf = buf >> cwSize;
bufLen -= cwSize;
if (cw === 0x100) {
// 0x100 - reinit dictionary
codewords.push(cw);
cwSize = 9;
// This is one less than 0x102 because the next cw is never a dictionary item and doesn't increment the counter
cwNext = BASE_CW - 1;
} else if (cw === 0x101) {
// 0x101 - end of stream
codewords.push(cw);
return { codewords: codewords, bytesRead: currentAddr - startAddr };
} else if (cw > cwNext) {
codewords.push(cw);
throw new Error(`Codeword not in dictionary: ${cw} should be <= ${cwNext}`);
} else {
codewords.push(cw);
cwNext += 1;
if (cwNext >= (1 << cwSize)) {
if (cwSize + 1 <= 12) {
cwSize += 1;
} else {
// Codeword can't grow beyond 12 bits. Next command should be a reinit or else U6's dictionary
// will overflow and cause memory corruption.
}
}
}
}
throw new Error('Input was longer than expected (>16kB)');
}
function decompressLZW(codewords) {
function getDictEntry(cw) {
var chars = [];
while (cw > 0xFF) {
var entry = dictionary[cw - BASE_CW];
chars.unshift(entry[0]);
cw = entry[1];
}
chars.unshift([cw]);
return concatByteArrays(chars)
}
var dictionary = []; // Array of [decompressed string, next codeword]
var prevCw = 0;
var output = [];
codewords.forEach(function(cw) {
if (cw === 0x100) {
// 0x100 - reinit dictionary
dictionary = []
} else if (cw === 0x101) {
// 0x101 - end stream
} else if (prevCw === 0x100) {
// Last codeword was a reset - this codeword skips the dictionary completely
output.push([cw])
} else {
var string;
if (cw < BASE_CW + dictionary.length) {
// Normal case - cw is either a dictionary entry (>0x102) or a literal value
string = getDictEntry(cw);
output.push(string);
} else {
// Special case - use last cw's string, then repeat its first byte.
// I'm not sure what the rationale is for this transformation, but it saves bytes.
// This codepath isn't used in most files, but it's used by the title screen data.
string = getDictEntry(prevCw);
output.push(string);
output.push(string.subarray(0, 1));
}
dictionary.push([string.subarray(0, 1), prevCw]);
}
prevCw = cw;
});
return concatByteArrays(output);
}
function decompressRLE(inputBytes) {
var rleOn = false;
var lastDatum = 0;
var result = [];
inputBytes.forEach(function(datum) {
if (rleOn) {
rleOn = false;
if (datum === 0) {
result.push(0x81);
} else {
for (var i = 0; i < datum - 1; i++) {
result.push(lastDatum);
}
}
} else if (datum === 0x81) {
rleOn = true;
} else {
result.push(datum);
lastDatum = datum;
}
});
return new Uint8Array(result);
}
function decompress(rom, startAddress) {
return decompressRLE(decompressLZW(readCodewords(rom, startAddress).codewords));
}
function compressRLE(data) {
function outputChar(char) {
if (char === 0x81) {
// Escape a literal 0x81 byte, as it would otherwise start an RLE command
output.push([0x81, 0x00])
} else if (char != null) {
output.push([char])
}
}
function outputCharSequence(char, count) {
if (count > 2 && char !== 0x81) {
outputChar(char);
output.push([0x81, count])
} else {
for (var i = 0; i < count; i++) {
outputChar(char);
}
}
}
var lastChar = null;
var count = 0;
var output = [];
data.forEach(function (char) {
if (char !== lastChar || count === 255) {
// Character has changed - write to output
outputCharSequence(lastChar, count);
lastChar = char;
count = 1
} else {
// Counting how many times this character occurs before emitting an RLE command
count++;
}
});
// Finish up encoding the last char sequence
outputCharSequence(lastChar, count);
return concatByteArrays(output);
}
function compressLZW(data) {
function dataStartsWith(prefix) {
if (data.length < prefix.length) {
return false;
}
for (var i = 0; i < prefix.length; i++) {
if (prefix[i] !== data[i]) {
return false;
}
}
return true;
}
var dictionary = []; // Array of Uint8Arrays
var prev = data.subarray(0, 1);
var prevCw = 0;
var output = [];
// Initialize stream with a reinit command followed by a dictionary-oblivious char
output.push(0x100, data[0]);
data = data.subarray(1); // Advance by 1 byte
while (data.length > 0) {
var longestIdx = 0;
var longestLen = 0;
// Brute-force search for the longest dictionary entry that prefixes data
dictionary.forEach(function (entry, i) {
if (entry.length > longestLen && dataStartsWith(entry)) {
longestIdx = i;
longestLen = entry.length;
}
});
// The decompressor supports a special case when cw == cw_next it repeats the
// last cw's string, then repeats the first byte of that string.
var special = null;
if (prevCw >= BASE_CW) {
var prevString = dictionary[prevCw - BASE_CW];
special = concatByteArrays([prevString, prevString.subarray(0,1)]);
} else {
special = null;
}
var string;
var cw;
if (special != null && special.length >= longestLen && dataStartsWith(special)) {
// The special case was better than a dictionary match, use it
string = special;
cw = BASE_CW + dictionary.length;
} else if (longestLen > 0) {
// Found a good dictionary match
string = dictionary[longestIdx];
cw = BASE_CW + longestIdx;
} else {
// No dictionary match - emit the raw value
string = data.subarray(0, 1);
cw = string[0];
}
output.push(cw);
dictionary.push(concatByteArrays([prev, string.subarray(0, 1)]));
prev = string;
prevCw = cw;
data = data.subarray(string.length);
if (dictionary.length + BASE_CW >= 0x1000 && data.length > 0) {
// Exceeded maximum dictionary size. Reinit the stream.
prev = data.subarray(0, 1);
prevCw = 0;
output.push(0x100, data[0]);
dictionary = [];
data = data.subarray(1);
}
}
output.push(0x101); // End stream
return output;
}
function packCodewords(codewords) {
var outputBytes = [];
var residual = 0; // Residual is the buffer of unwritten bits that aren't enough to make a full byte
var residualBits = 0;
var cwNext = BASE_CW - 1;
codewords.forEach(function (cw) {
var cwBits;
if (cwNext < 0x200) { cwBits = 9; }
else if (cwNext < 0x400) { cwBits = 10; }
else if (cwNext < 0x800) { cwBits = 11; }
else { cwBits = 12; }
residual |= cw << residualBits;
residualBits += cwBits;
// Output as many full bytes as possible, leaving the rest in residual
while (residualBits >= 8) {
outputBytes.push(residual & 0xFF);
residual >>= 8;
residualBits -= 8;
}
// Update codeword size in case stream has been reset
if (cw === 0x100) {
cwNext = BASE_CW - 1;
} else {
cwNext++;
}
});
while (residualBits > 0) {
outputBytes.push(residual & 0xFF);
residual >>= 8;
residualBits -= 8;
}
return new Uint8Array(outputBytes);
}
function compress(data) {
return packCodewords(compressLZW(compressRLE(data)))
}
module.exports = {
readCodewords, decompressLZW, decompressRLE, decompress,
compressRLE, compressLZW, packCodewords, compress,
};