Skip to content

Commit

Permalink
Improve parsing TIFF files (#482)
Browse files Browse the repository at this point in the history
  • Loading branch information
Borewit committed Aug 23, 2021
1 parent c037ba7 commit 82c9ccb
Show file tree
Hide file tree
Showing 11 changed files with 72 additions and 47 deletions.
110 changes: 70 additions & 40 deletions core.js
Expand Up @@ -68,7 +68,7 @@ export async function fileTypeFromTokenizer(tokenizer) {
}

async function _fromTokenizer(tokenizer) {
let buffer = Buffer.alloc(minimumBytes);
const buffer = Buffer.alloc(minimumBytes);
const bytesRead = 12;
const check = (header, options) => _check(buffer, header, options);
const checkString = (header, options) => check(stringToBytes(header), options);
Expand Down Expand Up @@ -617,58 +617,87 @@ async function _fromTokenizer(tokenizer) {
};
}

// TIFF, little-endian type
if (check([0x49, 0x49, 0x2A, 0x0])) {
if (checkString('CR', {offset: 8})) {
return {
ext: 'cr2',
mime: 'image/x-canon-cr2',
};
async function readTiffTag(bigEndian) {
const tagId = await tokenizer.readToken(bigEndian ? Token.UINT16_BE : Token.UINT16_LE);
tokenizer.ignore(10);
switch (tagId) {
case 50_341:
return {
ext: 'arw',
mime: 'image/x-sony-arw',
};
case 50_706:
return {
ext: 'dng',
mime: 'image/x-adobe-dng',
};
default:
}
}

if (check([0x1C, 0x00, 0xFE, 0x00], {offset: 8}) || check([0x1F, 0x00, 0x0B, 0x00], {offset: 8})) {
return {
ext: 'nef',
mime: 'image/x-nikon-nef',
};
async function readTiffIFD(bigEndian) {
const numberOfTags = await tokenizer.readToken(bigEndian ? Token.UINT16_BE : Token.UINT16_LE);
for (let n = 0; n < numberOfTags; ++n) {
const fileType = await readTiffTag(bigEndian);
if (fileType) {
return fileType;
}
}
}

if (
check([0x08, 0x00, 0x00, 0x00], {offset: 4})
&& (check([0x2D, 0x00, 0xFE, 0x00], {offset: 8})
|| check([0x27, 0x00, 0xFE, 0x00], {offset: 8}))
) {
return {
ext: 'dng',
mime: 'image/x-adobe-dng',
async function readTiffHeader(bigEndian) {
const version = (bigEndian ? Token.UINT16_BE : Token.UINT16_LE).get(buffer, 2);
const ifdOffset = (bigEndian ? Token.UINT32_BE : Token.UINT32_LE).get(buffer, 4);

if (version === 42) {
// TIFF file header
if (ifdOffset >= 6) {
if (checkString('CR', {offset: 8})) {
return {
ext: 'cr2',
mime: 'image/x-canon-cr2',
};
}

if (ifdOffset >= 8 && (check([0x1C, 0x00, 0xFE, 0x00], {offset: 8}) || check([0x1F, 0x00, 0x0B, 0x00], {offset: 8}))) {
return {
ext: 'nef',
mime: 'image/x-nikon-nef',
};
}
}

await tokenizer.ignore(ifdOffset);
const fileType = await readTiffIFD(false);
return fileType ? fileType : {
ext: 'tif',
mime: 'image/tiff',
};
}

buffer = Buffer.alloc(24);
await tokenizer.peekBuffer(buffer);
if (
(check([0x10, 0xFB, 0x86, 0x01], {offset: 4}) || check([0x08, 0x00, 0x00, 0x00], {offset: 4}))
// This pattern differentiates ARW from other TIFF-ish file types:
&& check([0x00, 0xFE, 0x00, 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x01], {offset: 9})
) {
if (version === 43) {
// Big TIFF file header
return {
ext: 'arw',
mime: 'image/x-sony-arw',
ext: 'tif',
mime: 'image/tiff',
};
}
}

return {
ext: 'tif',
mime: 'image/tiff',
};
// TIFF, little-endian type
if (check([0x49, 0x49])) {
const fileType = await readTiffHeader(false);
if (fileType) {
return fileType;
}
}

// TIFF, big-endian type
if (check([0x4D, 0x4D, 0x0, 0x2A])) {
return {
ext: 'tif',
mime: 'image/tiff',
};
if (check([0x4D, 0x4D])) {
const fileType = await readTiffHeader(true);
if (fileType) {
return fileType;
}
}

if (checkString('MAC ')) {
Expand All @@ -683,7 +712,8 @@ async function _fromTokenizer(tokenizer) {
async function readField() {
const msb = await tokenizer.peekNumber(Token.UINT8);
let mask = 0x80;
let ic = 0; // 0 = A, 1 = B, 2 = C, 3 = D
let ic = 0; // 0 = A, 1 = B, 2 = C, 3
// = D

while ((msb & mask) === 0) {
++ic;
Expand Down
Binary file added fixture/fixture-Leica-M10.dng
Binary file not shown.
Binary file added fixture/fixture-sony-zv-e10.arw
Binary file not shown.
Binary file removed fixture/fixture.arw
Binary file not shown.
Binary file removed fixture/fixture.dng
Binary file not shown.
Binary file removed fixture/fixture2.arw
Binary file not shown.
Binary file removed fixture/fixture2.dng
Binary file not shown.
Binary file removed fixture/fixture3.arw
Binary file not shown.
Binary file removed fixture/fixture4.arw
Binary file not shown.
Binary file removed fixture/fixture5.arw
Binary file not shown.
9 changes: 2 additions & 7 deletions test.js
Expand Up @@ -35,18 +35,13 @@ const names = {
'fixture2',
],
arw: [
'fixture',
'fixture2',
'fixture3',
'fixture4',
'fixture5',
'fixture-sony-zv-e10',
],
cr3: [
'fixture',
],
dng: [
'fixture',
'fixture2',
'fixture-Leica-M10',
],
nef: [
'fixture',
Expand Down

0 comments on commit 82c9ccb

Please sign in to comment.