Skip to content

Commit

Permalink
Support java class files
Browse files Browse the repository at this point in the history
Java class files and MachO FAT binaries have the same magic number
'0xCAFEBABE'. It's important to inspect the following 8 bytes to
disambiguate the two.
  • Loading branch information
mikewiacek committed Mar 12, 2024
1 parent 6469358 commit 30e5597
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 1 deletion.
Binary file added fixtures/sample.class
Binary file not shown.
2 changes: 2 additions & 0 deletions match_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ func TestMatchFile(t *testing.T) {
{"zst"},
{"exr"},
{"avif"},
{"class"},
}

for _, test := range cases {
Expand All @@ -77,6 +78,7 @@ func TestMatchReader(t *testing.T) {
{bytes.NewBuffer([]byte{0xFF, 0xD8, 0xFF}), "jpg"},
{bytes.NewBuffer([]byte{0xFF, 0xD8, 0x00}), "unknown"},
{bytes.NewBuffer([]byte{0x89, 0x50, 0x4E, 0x47}), "png"},
{bytes.NewBuffer([]byte{0xCA, 0xFE, 0xBA, 0xBE, 0x00, 0x00, 0x00, 0xff}), "class"},
}

for _, test := range cases {
Expand Down
21 changes: 20 additions & 1 deletion matchers/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ var (
TypeDcm = newType("dcm", "application/dicom")
TypeIso = newType("iso", "application/x-iso9660-image")
TypeMachO = newType("macho", "application/x-mach-binary") // Mach-O binaries have no common extension.
TypeClass = newType("class", "application/java-vm") // Java class files
)

var Archive = Map{
Expand Down Expand Up @@ -67,6 +68,7 @@ var Archive = Map{
TypeDcm: Dcm,
TypeIso: Iso,
TypeMachO: MachO,
TypeClass: Class,
}

var (
Expand Down Expand Up @@ -184,7 +186,24 @@ func MachO(buf []byte) bool {
// Big endian versions below here...
(buf[0] == 0xCF && buf[1] == 0xFA && buf[2] == 0xED && buf[3] == 0xFE) ||
(buf[0] == 0xCE && buf[1] == 0xFA && buf[2] == 0xED && buf[3] == 0xFE) ||
(buf[0] == 0xCA && buf[1] == 0xFE && buf[2] == 0xBA && buf[3] == 0xBE))
// Java class files have a 0xCAFEBABE magic number. As the Class func can disambiguate,
// if buf has 0xCAFEBABE as a magic number but Class returns false, assume it's a MachO.
(buf[0] == 0xCA && buf[1] == 0xFE && buf[2] == 0xBA && buf[3] == 0xBE && !Class(buf)))
}

// Class matches Java class files which are pretty tricky.
// They have the same magic number as some MachO FAT files.
// Reference: https://opensource.apple.com/source/file/file-80.40.2/file/magic/Magdir/cafebabe.auto.html
func Class(buf []byte) bool {
if len(buf) < 8 {
return false
}

if !(buf[0] == 0xCA && buf[1] == 0xFE && buf[2] == 0xBA && buf[3] == 0xBE) {
return false
}

return binary.BigEndian.Uint32(buf[4:8]) >= 44 // Java 1.0 corresponds to major version 44.
}

// Zstandard compressed data is made of one or more frames.
Expand Down

0 comments on commit 30e5597

Please sign in to comment.