From 086d897edc81915a2626521160a2322576373a10 Mon Sep 17 00:00:00 2001 From: chyyran Date: Sat, 6 Aug 2022 03:27:42 -0400 Subject: [PATCH] Reduce unnecessary allocations and indirections * Changed literal_probs array from a Vec> to a Vec2D backed by a contiguous allocation * BitTrees in LenDecoder and DecoderState are now stored inline. The actual BitTree data still lives in a Vec but one level of indirection is reduced. * Don't bother with filling stack-allocated DecoderState arrays on reset, and just recreate the arrays dropping the existing ones. --- src/decode/lzma.rs | 77 ++++++++++--------- src/decode/rangecoder.rs | 42 +++++++++- src/lib.rs | 1 + src/util/mod.rs | 1 + src/util/vec2d.rs | 161 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 242 insertions(+), 40 deletions(-) create mode 100644 src/util/mod.rs create mode 100644 src/util/vec2d.rs diff --git a/src/decode/lzma.rs b/src/decode/lzma.rs index 036d3f2..7d1d5b3 100644 --- a/src/decode/lzma.rs +++ b/src/decode/lzma.rs @@ -1,8 +1,8 @@ use crate::decode::lzbuffer::{LzBuffer, LzCircularBuffer}; -use crate::decode::rangecoder; -use crate::decode::rangecoder::RangeDecoder; +use crate::decode::rangecoder::{BitTree, LenDecoder, RangeDecoder}; use crate::decompress::{Options, UnpackedSize}; use crate::error; +use crate::util::vec2d::Vec2D; use byteorder::{LittleEndian, ReadBytesExt}; use std::io; @@ -166,9 +166,9 @@ pub(crate) struct DecoderState { partial_input_buf: std::io::Cursor<[u8; MAX_REQUIRED_INPUT]>, pub(crate) lzma_props: LzmaProperties, unpacked_size: Option, - literal_probs: Vec>, - pos_slot_decoder: Vec, - align_decoder: rangecoder::BitTree, + literal_probs: Vec2D, + pos_slot_decoder: [BitTree; 4], + align_decoder: BitTree, pos_decoders: [u16; 115], is_match: [u16; 192], // true = LZ, false = literal is_rep: [u16; 12], @@ -178,8 +178,8 @@ pub(crate) struct DecoderState { is_rep_0long: [u16; 192], state: usize, rep: [usize; 4], - len_decoder: rangecoder::LenDecoder, - rep_len_decoder: rangecoder::LenDecoder, + len_decoder: LenDecoder, + rep_len_decoder: LenDecoder, } impl DecoderState { @@ -189,9 +189,14 @@ impl DecoderState { partial_input_buf: std::io::Cursor::new([0; MAX_REQUIRED_INPUT]), lzma_props, unpacked_size, - literal_probs: vec![vec![0x400; 0x300]; 1 << (lzma_props.lc + lzma_props.lp)], - pos_slot_decoder: vec![rangecoder::BitTree::new(6); 4], - align_decoder: rangecoder::BitTree::new(4), + literal_probs: Vec2D::init(0x400, (1 << (lzma_props.lc + lzma_props.lp), 0x300)), + pos_slot_decoder: [ + BitTree::new(6), + BitTree::new(6), + BitTree::new(6), + BitTree::new(6), + ], + align_decoder: BitTree::new(4), pos_decoders: [0x400; 115], is_match: [0x400; 192], is_rep: [0x400; 12], @@ -201,8 +206,8 @@ impl DecoderState { is_rep_0long: [0x400; 192], state: 0, rep: [0; 4], - len_decoder: rangecoder::LenDecoder::new(), - rep_len_decoder: rangecoder::LenDecoder::new(), + len_decoder: LenDecoder::new(), + rep_len_decoder: LenDecoder::new(), } } @@ -210,24 +215,27 @@ impl DecoderState { new_props.validate(); if self.lzma_props.lc + self.lzma_props.lp == new_props.lc + new_props.lp { // We can reset here by filling the existing buffer with 0x400. - self.literal_probs.iter_mut().for_each(|v| v.fill(0x400)) + self.literal_probs.fill(0x400); } else { // We need to reallocate because of the new size of `lc+lp`. - self.literal_probs = vec![vec![0x400; 0x300]; 1 << (new_props.lc + new_props.lp)]; + self.literal_probs = Vec2D::init(0x400, (1 << (new_props.lc + new_props.lp), 0x300)); } self.lzma_props = new_props; self.pos_slot_decoder.iter_mut().for_each(|t| t.reset()); self.align_decoder.reset(); - self.pos_decoders.fill(0x400); - self.is_match.fill(0x400); - self.is_rep.fill(0x400); - self.is_rep_g0.fill(0x400); - self.is_rep_g1.fill(0x400); - self.is_rep_g2.fill(0x400); - self.is_rep_0long.fill(0x400); + // For stack-allocated arrays, it was found to be faster to re-create new arrays + // dropping the existing one, rather than using `fill` to reset the contents to zero. + // Heap-based arrays use fill to keep their allocation rather than reallocate. + self.pos_decoders = [0x400; 115]; + self.is_match = [0x400; 192]; + self.is_rep = [0x400; 12]; + self.is_rep_g0 = [0x400; 12]; + self.is_rep_g1 = [0x400; 12]; + self.is_rep_g2 = [0x400; 12]; + self.is_rep_0long = [0x400; 192]; self.state = 0; - self.rep.fill(0); + self.rep = [0; 4]; self.len_decoder.reset(); self.rep_len_decoder.reset(); } @@ -239,7 +247,7 @@ impl DecoderState { pub fn process<'a, W: io::Write, LZB: LzBuffer, R: io::BufRead>( &mut self, output: &mut LZB, - rangecoder: &mut rangecoder::RangeDecoder<'a, R>, + rangecoder: &mut RangeDecoder<'a, R>, ) -> error::Result<()> { self.process_mode(output, rangecoder, ProcessingMode::Finish) } @@ -248,7 +256,7 @@ impl DecoderState { pub fn process_stream<'a, W: io::Write, LZB: LzBuffer, R: io::BufRead>( &mut self, output: &mut LZB, - rangecoder: &mut rangecoder::RangeDecoder<'a, R>, + rangecoder: &mut RangeDecoder<'a, R>, ) -> error::Result<()> { self.process_mode(output, rangecoder, ProcessingMode::Partial) } @@ -262,7 +270,7 @@ impl DecoderState { fn process_next_inner<'a, W: io::Write, LZB: LzBuffer, R: io::BufRead>( &mut self, output: &mut LZB, - rangecoder: &mut rangecoder::RangeDecoder<'a, R>, + rangecoder: &mut RangeDecoder<'a, R>, update: bool, ) -> error::Result { let pos_state = output.len() & ((1 << self.lzma_props.pb) - 1); @@ -379,7 +387,7 @@ impl DecoderState { fn process_next<'a, W: io::Write, LZB: LzBuffer, R: io::BufRead>( &mut self, output: &mut LZB, - rangecoder: &mut rangecoder::RangeDecoder<'a, R>, + rangecoder: &mut RangeDecoder<'a, R>, ) -> error::Result { self.process_next_inner(output, rangecoder, true) } @@ -397,7 +405,7 @@ impl DecoderState { code: u32, ) -> error::Result<()> { let mut temp = std::io::Cursor::new(buf); - let mut rangecoder = rangecoder::RangeDecoder::from_parts(&mut temp, range, code); + let mut rangecoder = RangeDecoder::from_parts(&mut temp, range, code); let _ = self.process_next_inner(output, &mut rangecoder, false)?; Ok(()) } @@ -405,7 +413,7 @@ impl DecoderState { /// Utility function to read data into the partial input buffer. fn read_partial_input_buf<'a, R: io::BufRead>( &mut self, - rangecoder: &mut rangecoder::RangeDecoder<'a, R>, + rangecoder: &mut RangeDecoder<'a, R>, ) -> error::Result<()> { // Fill as much of the tmp buffer as possible let start = self.partial_input_buf.position() as usize; @@ -419,7 +427,7 @@ impl DecoderState { fn process_mode<'a, W: io::Write, LZB: LzBuffer, R: io::BufRead>( &mut self, output: &mut LZB, - rangecoder: &mut rangecoder::RangeDecoder<'a, R>, + rangecoder: &mut RangeDecoder<'a, R>, mode: ProcessingMode, ) -> error::Result<()> { loop { @@ -460,11 +468,8 @@ impl DecoderState { // Run the decompressor on the tmp buffer let mut tmp_reader = io::Cursor::new(&tmp[..self.partial_input_buf.position() as usize]); - let mut tmp_rangecoder = rangecoder::RangeDecoder::from_parts( - &mut tmp_reader, - rangecoder.range, - rangecoder.code, - ); + let mut tmp_rangecoder = + RangeDecoder::from_parts(&mut tmp_reader, rangecoder.range, rangecoder.code); let res = self.process_next(output, &mut tmp_rangecoder)?; // Update the actual rangecoder @@ -513,7 +518,7 @@ impl DecoderState { fn decode_literal<'a, W: io::Write, LZB: LzBuffer, R: io::BufRead>( &mut self, output: &mut LZB, - rangecoder: &mut rangecoder::RangeDecoder<'a, R>, + rangecoder: &mut RangeDecoder<'a, R>, update: bool, ) -> error::Result { let def_prev_byte = 0u8; @@ -549,7 +554,7 @@ impl DecoderState { fn decode_distance<'a, R: io::BufRead>( &mut self, - rangecoder: &mut rangecoder::RangeDecoder<'a, R>, + rangecoder: &mut RangeDecoder<'a, R>, length: usize, update: bool, ) -> error::Result { diff --git a/src/decode/rangecoder.rs b/src/decode/rangecoder.rs index 4cee3ad..52271f9 100644 --- a/src/decode/rangecoder.rs +++ b/src/decode/rangecoder.rs @@ -190,8 +190,8 @@ impl BitTree { pub struct LenDecoder { choice: u16, choice2: u16, - low_coder: Vec, - mid_coder: Vec, + low_coder: [BitTree; 16], + mid_coder: [BitTree; 16], high_coder: BitTree, } @@ -200,8 +200,42 @@ impl LenDecoder { LenDecoder { choice: 0x400, choice2: 0x400, - low_coder: vec![BitTree::new(3); 16], - mid_coder: vec![BitTree::new(3); 16], + low_coder: [ + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + ], + mid_coder: [ + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + BitTree::new(3), + ], high_coder: BitTree::new(8), } } diff --git a/src/lib.rs b/src/lib.rs index 7bd7730..f8bac25 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,6 +12,7 @@ mod encode; pub mod error; +mod util; mod xz; use std::io; diff --git a/src/util/mod.rs b/src/util/mod.rs new file mode 100644 index 0000000..ee2474e --- /dev/null +++ b/src/util/mod.rs @@ -0,0 +1 @@ +pub mod vec2d; diff --git a/src/util/vec2d.rs b/src/util/vec2d.rs new file mode 100644 index 0000000..0b476e1 --- /dev/null +++ b/src/util/vec2d.rs @@ -0,0 +1,161 @@ +use std::ops::{Index, IndexMut}; + +/// A 2 dimensional matrix in row-major order backed by a contiguous slice. +#[derive(Debug)] +pub struct Vec2D { + data: Box<[T]>, + cols: usize, +} + +impl Vec2D { + /// Initialize a grid of size (`rows`, `cols`) with the given data element. + pub fn init(data: T, size: (usize, usize)) -> Vec2D + where + T: Clone, + { + let (rows, cols) = size; + let len = rows.checked_mul(cols).expect(&format!( + "{} rows by {} cols exceeds usize::MAX", + rows, cols + )); + Vec2D { + data: vec![data; len].into_boxed_slice(), + cols, + } + } + + /// Fills the grid with elements by cloning `value`. + pub fn fill(&mut self, value: T) + where + T: Clone, + { + self.data.fill(value) + } +} + +impl Index for Vec2D { + type Output = [T]; + + #[inline] + fn index(&self, row: usize) -> &Self::Output { + let start_row = row * self.cols; + &self.data[start_row..start_row + self.cols] + } +} + +impl IndexMut for Vec2D { + #[inline] + fn index_mut(&mut self, row: usize) -> &mut Self::Output { + let start_row = row * self.cols; + &mut self.data[start_row..start_row + self.cols] + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn init() { + let vec2d = Vec2D::init(1, (2, 3)); + assert_eq!(vec2d[0], [1, 1, 1]); + assert_eq!(vec2d[1], [1, 1, 1]); + } + + #[test] + #[should_panic] + fn init_overflow() { + Vec2D::init(1, (usize::MAX, usize::MAX)); + } + + #[test] + fn fill() { + let mut vec2d = Vec2D::init(0, (2, 3)); + vec2d.fill(7); + assert_eq!(vec2d[0], [7, 7, 7]); + assert_eq!(vec2d[1], [7, 7, 7]); + } + + #[test] + fn index() { + let vec2d = Vec2D { + data: vec![0, 1, 2, 3, 4, 5, 6, 7].into_boxed_slice(), + cols: 2, + }; + assert_eq!(vec2d[0], [0, 1]); + assert_eq!(vec2d[1], [2, 3]); + assert_eq!(vec2d[2], [4, 5]); + assert_eq!(vec2d[3], [6, 7]); + } + + #[test] + fn index_mut() { + let mut vec2d = Vec2D { + data: vec![0, 1, 2, 3, 4, 5, 6, 7].into_boxed_slice(), + cols: 2, + }; + + vec2d[1][1] = 9; + assert_eq!(vec2d[0], [0, 1]); + // 1,1 should be 9. + assert_eq!(vec2d[1], [2, 9]); + assert_eq!(vec2d[2], [4, 5]); + assert_eq!(vec2d[3], [6, 7]); + } + + #[test] + #[should_panic] + fn index_out_of_bounds() { + let vec2d = Vec2D::init(1, (2, 3)); + let _x = vec2d[2][3]; + } + + #[test] + #[should_panic] + fn index_out_of_bounds_vec_edge() { + let vec2d = Vec2D::init(1, (2, 3)); + let _x = vec2d[1][3]; + } + + #[test] + #[should_panic] + fn column_out_of_bounds() { + let vec2d = Vec2D::init(1, (2, 3)); + let _x = vec2d[0][3]; + } + + #[test] + #[should_panic] + fn row_out_of_bounds() { + let vec2d = Vec2D::init(1, (2, 3)); + let _x = vec2d[2][0]; + } + + #[test] + #[should_panic] + fn indexmut_out_of_bounds_vec_edge() { + let mut vec2d = Vec2D::init(1, (2, 3)); + vec2d[1][3] = 0; + } + + #[test] + #[should_panic] + fn mut_column_out_of_bounds() { + let mut vec2d = Vec2D::init(1, (2, 3)); + vec2d[0][3] = 0; + } + + #[test] + #[should_panic] + fn indexmut_out_of_bounds() { + let mut vec2d = Vec2D::init(1, (2, 3)); + vec2d[2][3] = 0; + } + + #[test] + #[should_panic] + fn mut_row_out_of_bounds() { + let mut vec2d = Vec2D::init(1, (2, 3)); + vec2d[2][0] = 0; + } +}