Skip to content

Commit

Permalink
rustfmt & clean up
Browse files Browse the repository at this point in the history
  • Loading branch information
guywaldman committed Jul 10, 2019
1 parent 8d66d58 commit 0384e4a
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 24 deletions.
56 changes: 36 additions & 20 deletions src/avro.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use avro_rs::types::Value;
use avro_rs::{Codec, Reader};
use std::fs;
use glob::glob;
use std::fs;
use std::path::PathBuf;

pub(crate) const NULL: &'static str = "null";
Expand All @@ -11,20 +11,21 @@ pub(crate) const CODEC_DEFLATE: &'static str = "deflate";
#[derive(Debug)]
pub(crate) struct AvroFile {
data: Vec<u8>,
path: PathBuf
path: PathBuf,
}

#[derive(Debug)]
pub(crate) struct Avro {
files: Vec<AvroFile>
pub(crate) struct AvroCli {
files: Vec<AvroFile>,
}

impl Avro {
impl AvroCli {
/// Creates an `Avro` as a union of all avros in the received paths
///
///
/// # Arguments
///
/// * `path` - A glob to match against Avro files to load
/// * `codec` - A codec for decompression
pub fn from(path: String, codec: Option<String>) -> Self {
let mut paths: Vec<PathBuf> = Vec::new();
for entry in glob(&path).expect("Failed to read glob pattern") {
Expand All @@ -38,50 +39,65 @@ impl Avro {
panic!("No files found")
}

let mut codec_for_decompressing: Codec = Codec::Null;
// TODO: Add `Codec::Snappy`
let mut codec_for_decompressing: Codec = Codec::Null;
if let Some(c) = codec {
if c == CODEC_DEFLATE {
codec_for_decompressing = Codec::Deflate;
}
}

let mut files: Vec<AvroFile> = Vec::new();
for path in paths {
let mut data = fs::read(&path).expect(&format!(
"Could not read from path {0}", path.display())
);
let mut data =
fs::read(&path).expect(&format!("Could not read from path {0}", path.display()));
codec_for_decompressing.decompress(&mut data).expect("Could not successfully decompress Avro file. Make sure that the codec you specified is correct");
files.push(AvroFile { data, path });
}

Avro { files }
AvroCli { files }
}

/// Get all the names of the columns.
/// Relies on the first record
pub fn get_all_field_names(&self) -> Vec<String> {
let first_file = &self.files[0];
let mut reader = Reader::new(&first_file.data[..]).expect(&format!("Could not read Avro file {}", first_file.path.display()));
if let Ok(Value::Record(fields)) = reader.next().expect("Avro must have at least one record row to infer schema") {
fields.iter().map(|(f, _)| f.to_owned()).collect::<Vec<String>>()
let mut reader = Reader::new(&first_file.data[..]).expect(&format!(
"Could not read Avro file {}",
first_file.path.display()
));
if let Ok(Value::Record(fields)) = reader
.next()
.expect("Avro must have at least one record row to infer schema")
{
fields
.iter()
.map(|(f, _)| f.to_owned())
.collect::<Vec<String>>()
} else {
Vec::new()
}
}

/// Get all columns and values
///
/// # Arguments
/// * `fields_to_get` - Names of the columns to retrieve
pub fn get_fields(&self, fields_to_get: Vec<String>) -> Vec<Vec<String>> {
let mut extracted_fields: Vec<Vec<String>> = Vec::new();
for file in &self.files {
let reader = Reader::new(&file.data[..]).expect(&format!("Could not read Avro file {}", file.path.display()));
let reader = Reader::new(&file.data[..])
.expect(&format!("Could not read Avro file {}", file.path.display()));

for (i, row) in reader.enumerate() {
let row = row.expect(&format!("Could not parse row {} from the Avro", i));
if let Value::Record(fields) = row {
let mut extracted_fields_for_row: Vec<String> = Vec::new();
for field_name in &fields_to_get {
let field_value_to_insert = match fields.iter().find(|(n, _)| n == field_name) {
Some((_, val)) => format_avro_value(&val),
None => NA.to_owned()
};
let field_value_to_insert =
match fields.iter().find(|(n, _)| n == field_name) {
Some((_, val)) => format_avro_value(&val),
None => NA.to_owned(),
};
extracted_fields_for_row.push(field_value_to_insert);
}
extracted_fields.push(extracted_fields_for_row);
Expand Down
9 changes: 5 additions & 4 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
///! A CLI for manipulating [AVRO](https://avro.apache.org/) files.
///!
///! This crate currently expects each line to be a [Record](https://avro.apache.org/docs/1.8.1/spec.html#schema_record).
use avro::Avro;
use avro::AvroCli;
use failure::Error;
use prettytable::{color, Attr, Cell, Row, Table};
use regex::Regex;
Expand Down Expand Up @@ -42,9 +42,9 @@ fn main() -> Result<(), Error> {
fields_to_get,
path,
search,
codec
codec,
} => {
let avro = Avro::from(path, codec);
let avro = AvroCli::from(path, codec);
let fields_to_get = if fields_to_get.is_empty() {
avro.get_all_field_names()
} else {
Expand Down Expand Up @@ -86,7 +86,8 @@ fn main() -> Result<(), Error> {
.filter_map(|v| {
let mut cell = Cell::new(v);
if let Some(search) = &search {
let search = Regex::new(&search).expect("Regular expression is invalid");
let search =
Regex::new(&search).expect("Regular expression is invalid");
if search.is_match(v) {
cell.style(Attr::Bold);
cell.style(Attr::ForegroundColor(color::GREEN));
Expand Down

0 comments on commit 0384e4a

Please sign in to comment.