Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add AV1 color space parsing in MP4 atom parser #692

Merged
merged 9 commits into from
Nov 2, 2023
4 changes: 4 additions & 0 deletions RELEASENOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@
video track are available. The default value is `false` which means
selecting a video track is prioritized.
* Extractors:
* Add additional AV1C parsing to MP4 extractor to retrieve
`ColorInfo.colorSpace`, `ColorInfo.colorTransfer`, and
`ColorInfo.colorRange` values
([#692](https://github.com/androidx/media/pull/692)).
* Audio:
* Video:
* Add workaround for a device issue on Galaxy Tab S7 FE, Chromecast with
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import androidx.media3.common.util.CodecSpecificDataUtil;
import androidx.media3.common.util.Log;
import androidx.media3.common.util.NullableType;
import androidx.media3.common.util.ParsableBitArray;
import androidx.media3.common.util.ParsableByteArray;
import androidx.media3.common.util.Util;
import androidx.media3.container.Mp4LocationData;
Expand Down Expand Up @@ -1221,22 +1222,15 @@ private static void parseVideoSampleEntry(
colorTransfer =
ColorInfo.isoTransferCharacteristicsToColorTransfer(transferCharacteristics);
} else if (childAtomType == Atom.TYPE_av1C) {
ExtractorUtil.checkContainerInput(mimeType == null, /* message= */ null);
mimeType = MimeTypes.VIDEO_AV1;
parent.setPosition(childStartPosition + Atom.HEADER_SIZE);
parent.skipBytes(1);
int byte2 = parent.readUnsignedByte();
int seqProfile = byte2 >> 5;
int byte3 = parent.readUnsignedByte();
boolean highBitdepth = ((byte3 >> 6) & 0b1) != 0;
// From https://aomediacodec.github.io/av1-spec/av1-spec.pdf#page=44
if (seqProfile == 2 && highBitdepth) {
boolean twelveBit = ((byte3 >> 5) & 0b1) != 0;
bitdepthLuma = twelveBit ? 12 : 10;
} else if (seqProfile <= 2) {
bitdepthLuma = highBitdepth ? 10 : 8;
}
bitdepthChroma = bitdepthLuma;
ColorInfo colorInfo = parseAv1c(parent);

bitdepthLuma = colorInfo.lumaBitdepth;
bitdepthChroma = colorInfo.chromaBitdepth;
colorSpace = colorInfo.colorSpace;
colorRange = colorInfo.colorRange;
colorTransfer = colorInfo.colorTransfer;
} else if (childAtomType == Atom.TYPE_clli) {
if (hdrStaticInfo == null) {
hdrStaticInfo = allocateHdrStaticInfo();
Expand Down Expand Up @@ -1317,8 +1311,8 @@ private static void parseVideoSampleEntry(
// established by the bitstream. The absence of color descriptors ('colorSpace' and
// 'colorTransfer') does not necessarily mean that 'colorRange' has default values, hence it
// is not being verified here.
// If 'Atom.TYPE_avcC', 'Atom.TYPE_hvcC' or 'Atom.TYPE_vpcC' is available, they will take
// precedence and overwrite any existing values.
// If 'Atom.TYPE_avcC', 'Atom.TYPE_hvcC', 'Atom.TYPE_vpcC' or 'Atom.TYPE_av1c' is available,
// they will take precedence and overwrite any existing values.
if (colorSpace == Format.NO_VALUE && colorTransfer == Format.NO_VALUE) {
int colorType = parent.readInt();
if (colorType == TYPE_nclx || colorType == TYPE_nclc) {
Expand Down Expand Up @@ -1385,6 +1379,138 @@ private static void parseVideoSampleEntry(
out.format = formatBuilder.build();
}

/**
* Parses the av1C configuration record and OBU sequence header and returns a {@link ColorInfo}
* from their data.
*
* <p>See av1C configuration record syntax in this <a
* href="https://aomediacodec.github.io/av1-isobmff/#av1codecconfigurationbox-syntax">spec</a>.
*
* <p>See av1C OBU syntax in this <a
* href="https://aomediacodec.github.io/av1-spec/av1-spec.pdf">spec</a>.
*
* <p>The sections referenced in the method are from these specs.
*
* @param data The av1C atom data.
* @return {@link ColorInfo} parsed from the av1C data.
*/
private static ColorInfo parseAv1c(ParsableByteArray data) {
ColorInfo.Builder colorInfo = new ColorInfo.Builder();
ParsableBitArray bitArray = new ParsableBitArray(data.getData());
bitArray.setPosition(data.getPosition() * 8); // Convert byte to bit position.

// Parse av1C config record for bitdepth info.
// See https://aomediacodec.github.io/av1-isobmff/#av1codecconfigurationbox-syntax.
bitArray.skipBytes(1); // marker, version
int seqProfile = bitArray.readBits(3); // seq_profile
bitArray.skipBits(6); // seq_level_idx_0, seq_tier_0
boolean highBitdepth = bitArray.readBit(); // high_bitdepth
boolean twelveBit = bitArray.readBit(); // twelve_bit
if (seqProfile == 2 && highBitdepth) {
colorInfo.setLumaBitdepth(twelveBit ? 12 : 10);
colorInfo.setChromaBitdepth(twelveBit ? 12 : 10);
} else if (seqProfile <= 2) {
colorInfo.setLumaBitdepth(highBitdepth ? 10 : 8);
colorInfo.setChromaBitdepth(highBitdepth ? 10 : 8);
}
// Skip monochrome, chroma_subsampling_x, chroma_subsampling_y, chroma_sample_position,
// reserved and initial_presentation_delay.
bitArray.skipBits(13);

// 5.3.1. General OBU syntax
bitArray.skipBit(); // obu_forbidden_bit
int obuType = bitArray.readBits(4); // obu_type
if (obuType != 1) { // obu_type != OBU_SEQUENCE_HEADER
Log.i(TAG, "Unsupported obu_type: " + obuType);
return colorInfo.build();
}
if (bitArray.readBit()) { // obu_extension_flag
Log.i(TAG, "Unsupported obu_extension_flag");
return colorInfo.build();
}
boolean obuHasSizeField = bitArray.readBit(); // obu_has_size_field
bitArray.skipBit(); // obu_reserved_1bit
// obu_size is unsigned leb128 and if obu_size <= 127 then it can be simplified as readBits(8).
if (obuHasSizeField && bitArray.readBits(8) > 127) { // obu_size
Log.i(TAG, "Excessive obu_size");
return colorInfo.build();
}
// 5.5.1. General OBU sequence header syntax
int obuSeqHeaderSeqProfile = bitArray.readBits(3); // seq_profile
bitArray.skipBit(); // still_picture
if (bitArray.readBit()) { // reduced_still_picture_header
Log.i(TAG, "Unsupported reduced_still_picture_header");
return colorInfo.build();
}
if (bitArray.readBit()) { // timing_info_present_flag
Log.i(TAG, "Unsupported timing_info_present_flag");
return colorInfo.build();
}
if (bitArray.readBit()) { // initial_display_delay_present_flag
Log.i(TAG, "Unsupported initial_display_delay_present_flag");
return colorInfo.build();
}
int operatingPointsCountMinus1 = bitArray.readBits(5); // operating_points_cnt_minus_1
for (int i = 0; i <= operatingPointsCountMinus1; i++) {
bitArray.skipBits(12); // operating_point_idc[i]
int seqLevelIdx = bitArray.readBits(5); // seq_level_idx[i]
if (seqLevelIdx > 7) {
bitArray.skipBit(); // seq_tier[i]
}
}
int frameWidthBitsMinus1 = bitArray.readBits(4); // frame_width_bits_minus_1
int frameHeightBitsMinus1 = bitArray.readBits(4); // frame_height_bits_minus_1
bitArray.skipBits(frameWidthBitsMinus1 + 1); // max_frame_width_minus_1
bitArray.skipBits(frameHeightBitsMinus1 + 1); // max_frame_height_minus_1
if (bitArray.readBit()) { // frame_id_numbers_present_flag
bitArray.skipBits(7); // delta_frame_id_length_minus_2, additional_frame_id_length_minus_1
}
bitArray.skipBits(7); // use_128x128_superblock...enable_dual_filter: 7 flags
boolean enableOrderHint = bitArray.readBit(); // enable_order_hint
if (enableOrderHint) {
bitArray.skipBits(2); // enable_jnt_comp, enable_ref_frame_mvs
}
int seqForceScreenContentTools =
bitArray.readBit() // seq_choose_screen_content_tools
? 2 // SELECT_SCREEN_CONTENT_TOOLS
: bitArray.readBits(1); // seq_force_screen_content_tools
if (seqForceScreenContentTools > 0) {
if (!bitArray.readBit()) { // seq_choose_integer_mv
bitArray.skipBits(1); // seq_force_integer_mv
}
}
if (enableOrderHint) {
bitArray.skipBits(3); // order_hint_bits_minus_1
}
bitArray.skipBits(3); // enable_superres, enable_cdef, enable_restoration
// 5.5.2. OBU Color config syntax
boolean colorConfigHighBitdepth = bitArray.readBit(); // high_bitdepth
if (obuSeqHeaderSeqProfile == 2 && colorConfigHighBitdepth) {
bitArray.skipBit(); // twelve_bit
}

boolean monochrome = (obuSeqHeaderSeqProfile != 1) && bitArray.readBit(); // mono_chrome

if (bitArray.readBit()) { // color_description_present_flag
int colorPrimaries = bitArray.readBits(8); // color_primaries
int transferCharacteristics = bitArray.readBits(8); // transfer_characteristics
int matrixCoefficients = bitArray.readBits(8); // matrix_coefficients
int colorRange =
(!monochrome
&& colorPrimaries == 1 // CP_BT_709
&& transferCharacteristics == 13 // TC_SRGB
&& matrixCoefficients == 0) // MC_IDENTITY
? 1
: bitArray.readBits(1); // color_range;
colorInfo
.setColorSpace(ColorInfo.isoColorPrimariesToColorSpace(colorPrimaries))
.setColorRange((colorRange == 1) ? C.COLOR_RANGE_FULL : C.COLOR_RANGE_LIMITED)
.setColorTransfer(
ColorInfo.isoTransferCharacteristicsToColorTransfer(transferCharacteristics));
}
return colorInfo.build();
}

private static ByteBuffer allocateHdrStaticInfo() {
// For HDR static info, Android decoders expect a 25-byte array. The first byte is zero to
// represent Static Metadata Type 1, as per CTA-861-G:2017, Table 44. The following 24 bytes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,4 +134,10 @@ public void mp4SampleWithOriginalQuicktimeSpecification() throws Exception {
"media/mp4/sample_with_original_quicktime_specification.mov",
simulationConfig);
}

@Test
public void mp4SampleWithAv1c() throws Exception {
ExtractorAsserts.assertBehavior(
Mp4Extractor::new, "media/mp4/sample_with_av1c.mp4", simulationConfig);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
seekMap:
isSeekable = true
duration = 1000000
getPosition(0) = [[timeUs=0, position=48]]
getPosition(1) = [[timeUs=0, position=48]]
getPosition(500000) = [[timeUs=0, position=48]]
getPosition(1000000) = [[timeUs=0, position=48]]
numberOfTracks = 1
track 0:
total output bytes = 942
sample count = 30
format 0:
id = 1
sampleMimeType = video/av01
maxInputSize = 188
width = 720
height = 1280
frameRate = 30.0
colorInfo:
colorSpace = 6
colorRange = 2
colorTransfer = 7
lumaBitdepth = 10
chromaBitdepth = 10
metadata = entries=[TSSE: description=null: values=[Lavf60.3.100], Mp4Timestamp: creation time=0, modification time=0, timescale=1000]
sample 0:
time = 0
flags = 1
data = length 84, hash 9C46A819
sample 1:
time = 33333
flags = 0
data = length 158, hash 43A1B544
sample 2:
time = 66666
flags = 0
data = length 3, hash D600
sample 3:
time = 100000
flags = 0
data = length 28, hash 27890E81
sample 4:
time = 133333
flags = 0
data = length 3, hash D5F0
sample 5:
time = 166666
flags = 0
data = length 55, hash 9FC5012E
sample 6:
time = 200000
flags = 0
data = length 3, hash D600
sample 7:
time = 233333
flags = 0
data = length 27, hash 70CFAC05
sample 8:
time = 266666
flags = 0
data = length 3, hash D5D0
sample 9:
time = 300000
flags = 0
data = length 82, hash 944218D6
sample 10:
time = 333333
flags = 0
data = length 3, hash D600
sample 11:
time = 366666
flags = 0
data = length 27, hash BA4D4A06
sample 12:
time = 400000
flags = 0
data = length 3, hash D5F0
sample 13:
time = 433333
flags = 0
data = length 54, hash A98584CA
sample 14:
time = 466666
flags = 0
data = length 3, hash D600
sample 15:
time = 500000
flags = 0
data = length 27, hash 45D733B8
sample 16:
time = 533333
flags = 0
data = length 3, hash D5A0
sample 17:
time = 566666
flags = 0
data = length 112, hash B80B26FD
sample 18:
time = 600000
flags = 0
data = length 3, hash D5F0
sample 19:
time = 633333
flags = 0
data = length 27, hash 37DD29D9
sample 20:
time = 666666
flags = 0
data = length 3, hash D5E0
sample 21:
time = 700000
flags = 0
data = length 54, hash 1C15581C
sample 22:
time = 733333
flags = 0
data = length 3, hash D5F0
sample 23:
time = 766666
flags = 0
data = length 27, hash 49EC3531
sample 24:
time = 800000
flags = 0
data = length 3, hash D5B0
sample 25:
time = 833333
flags = 0
data = length 84, hash 2025C9F5
sample 26:
time = 866666
flags = 0
data = length 3, hash D5D0
sample 27:
time = 900000
flags = 0
data = length 27, hash B927669C
sample 28:
time = 933333
flags = 0
data = length 3, hash D5C0
sample 29:
time = 966666
flags = 536870912
data = length 27, hash 706C58AD
tracksEnded = true