Skip to content

Commit

Permalink
Make checksum comparison try matching with lf and crlf line endings (#…
Browse files Browse the repository at this point in the history
…2101)

* Encapsulate checksum handling into a module in MigrationConnector

* Make checksum comparison try matching with lf and crlf line endings

See code for documentation on the how and why.

Relevant issues:

- prisma/prisma#7398
- prisma/prisma#7101
  • Loading branch information
tomhoule committed Aug 3, 2021
1 parent 8b139c1 commit 10a1bec
Show file tree
Hide file tree
Showing 5 changed files with 142 additions and 109 deletions.
132 changes: 132 additions & 0 deletions migration-engine/connectors/migration-connector/src/checksum.rs
@@ -0,0 +1,132 @@
//! Checksums of migration scripts are used in various parts of the migration
//! engine to ensure integrity. This module contains common logic that should be
//! used everywhere for consistency.

/// Compute the checksum for a new migration script, and render it formatted to
/// a human readable string.
pub(crate) fn render_checksum(script: &str) -> String {
compute_checksum(script).format_checksum()
}

/// Returns whether a migration script matches an existing checksum.
pub(crate) fn script_matches_checksum(script: &str, checksum: &str) -> bool {
use std::iter::{once, once_with};

// Checksum with potentially different line endings, so checksums will match
// between Unix-like systems and Windows.
//
// This is necessary because git messes with line endings. For background
// information, read
// https://web.archive.org/web/20150912185006/http://adaptivepatchwork.com:80/2012/03/01/mind-the-end-of-your-line/
let mut script_checksums = once(compute_checksum(script))
.chain(once_with(|| compute_checksum(&script.replace("\r\n", "\n"))))
.chain(once_with(|| compute_checksum(&script.replace("\n", "\r\n"))));

script_checksums.any(|script_checksum| {
// Due to an omission in a previous version of the migration engine,
// some migrations tables will have old migrations with checksum strings
// that have not been zero-padded.
//
// Corresponding issue:
// https://github.com/prisma/prisma-engines/issues/1887
let script_checksum_str = if !checksum.is_empty() && checksum.len() != CHECKSUM_STR_LEN {
script_checksum.format_checksum_old()
} else {
script_checksum.format_checksum()
};

script_checksum_str == checksum
})
}

/// Checksumming implementation. This should be the single place where we do this.
fn compute_checksum(script: &str) -> [u8; 32] {
use sha2::{Digest, Sha256};
let mut hasher = Sha256::new();
hasher.update(script);
hasher.finalize().into()
}

/// The length (in bytes, or equivalently ascii characters) of the checksum
/// strings.
const CHECKSUM_STR_LEN: usize = 64;

/// Format a checksum to a hexadecimal string. This is used to checksum
/// migration scripts with Sha256.
trait FormatChecksum {
/// Format a checksum to a hexadecimal string.
fn format_checksum(&self) -> String;
/// Obsolete checksum method, should only be used for compatibility.
fn format_checksum_old(&self) -> String;
}

impl FormatChecksum for [u8; 32] {
fn format_checksum(&self) -> String {
use std::fmt::Write as _;

let mut checksum_string = String::with_capacity(32 * 2);

for byte in self {
write!(checksum_string, "{:02x}", byte).unwrap();
}

assert_eq!(checksum_string.len(), CHECKSUM_STR_LEN);

checksum_string
}

// Due to an omission in a previous version of the migration engine,
// some migrations tables will have old migrations with checksum strings
// that have not been zero-padded.
//
// Corresponding issue:
// https://github.com/prisma/prisma-engines/issues/1887
fn format_checksum_old(&self) -> String {
use std::fmt::Write as _;

let mut checksum_string = String::with_capacity(32 * 2);

for byte in self {
write!(checksum_string, "{:x}", byte).unwrap();
}

checksum_string
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn format_checksum_does_not_strip_zeros() {
assert_eq!(
render_checksum("hello"),
"2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
);
assert_eq!(render_checksum("abcd").len(), CHECKSUM_STR_LEN);
}

#[test]
fn script_matches_checksum_is_line_ending_agnostic() {
let scripts = &[
&["ab\ncd\nef\ngh\rab", "ab\r\ncd\r\nef\r\ngh\rab"],
&["ab\ncd\nef\ngh\rab\n", "ab\r\ncd\r\nef\r\ngh\rab\r\n"],
];

// for loops go brrrrrrrrr
for scripts in scripts {
for script in *scripts {
for other_script in *scripts {
assert!(script_matches_checksum(script, &render_checksum(other_script)),);
}
}
}
}

#[test]
fn script_matches_checksum_negative() {
assert!(!script_matches_checksum("abc", &render_checksum("abcd")));
assert!(!script_matches_checksum("abc\n", &render_checksum("abc")));
}
}
4 changes: 4 additions & 0 deletions migration-engine/connectors/migration-connector/src/error.rs
Expand Up @@ -15,6 +15,10 @@ use user_facing_errors::{
#[derive(Clone)]
pub struct ConnectorError(Box<ConnectorErrorImpl>);

/// Shorthand for a [Result](https://doc.rust-lang.org/std/result/enum.Result.html) where the error
/// variant is a [ConnectorError](/error/enum.ConnectorError.html).
pub type ConnectorResult<T> = Result<T, ConnectorError>;

#[derive(Debug, Clone)]
struct ConnectorErrorImpl {
/// An optional error already rendered for users in case the migration core does not handle it.
Expand Down
77 changes: 2 additions & 75 deletions migration-engine/connectors/migration-connector/src/lib.rs
Expand Up @@ -2,6 +2,7 @@

//! This crate defines the API exposed by the connectors to the migration engine core. The entry point for this API is the [MigrationConnector](trait.MigrationConnector.html) trait.

mod checksum;
mod database_migration_step_applier;
mod destructive_change_checker;
mod diff;
Expand All @@ -15,11 +16,10 @@ pub use destructive_change_checker::{
DestructiveChangeChecker, DestructiveChangeDiagnostics, MigrationWarning, UnexecutableMigration,
};
pub use diff::DiffTarget;
pub use error::ConnectorError;
pub use error::{ConnectorError, ConnectorResult};
pub use migration_persistence::{MigrationPersistence, MigrationRecord, PersistenceNotInitializedError, Timestamp};

use migrations_directory::MigrationDirectory;
use sha2::{Digest, Sha256};

/// A boxed migration, opaque to the migration engine core. The connectors are
/// sole responsible for producing and understanding migrations — the core just
Expand Down Expand Up @@ -95,76 +95,3 @@ pub trait MigrationConnector: Send + Sync + 'static {
/// If possible, check that the passed in migrations apply cleanly.
async fn validate_migrations(&self, _migrations: &[MigrationDirectory]) -> ConnectorResult<()>;
}

/// Shorthand for a [Result](https://doc.rust-lang.org/std/result/enum.Result.html) where the error
/// variant is a [ConnectorError](/error/enum.ConnectorError.html).
pub type ConnectorResult<T> = Result<T, ConnectorError>;

/// Compute the checksum for a migration script, and return it formatted to be human-readable.
fn checksum(script: &str) -> String {
let mut hasher = Sha256::new();
hasher.update(script.as_bytes());
let checksum: [u8; 32] = hasher.finalize().into();
checksum.format_checksum()
}

/// The length (in bytes, or equivalently ascii characters) of the checksum
/// strings.
const CHECKSUM_STR_LEN: usize = 64;

/// Format a checksum to a hexadecimal string. This is used to checksum
/// migration scripts with Sha256.
trait FormatChecksum {
/// Format a checksum to a hexadecimal string.
fn format_checksum(&self) -> String;
/// Obsolete checksum method, should only be used for compatibility.
fn format_checksum_old(&self) -> String;
}

impl FormatChecksum for [u8; 32] {
fn format_checksum(&self) -> String {
use std::fmt::Write as _;

let mut checksum_string = String::with_capacity(32 * 2);

for byte in self {
write!(checksum_string, "{:02x}", byte).unwrap();
}

assert_eq!(checksum_string.len(), CHECKSUM_STR_LEN);

checksum_string
}

// Due to an omission in a previous version of the migration engine,
// some migrations tables will have old migrations with checksum strings
// that have not been zero-padded.
//
// Corresponding issue:
// https://github.com/prisma/prisma-engines/issues/1887
fn format_checksum_old(&self) -> String {
use std::fmt::Write as _;

let mut checksum_string = String::with_capacity(32 * 2);

for byte in self {
write!(checksum_string, "{:x}", byte).unwrap();
}

checksum_string
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn format_checksum_does_not_strip_zeros() {
assert_eq!(
checksum("hello"),
"2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
);
assert_eq!(checksum("abcd").len(), CHECKSUM_STR_LEN);
}
}
Expand Up @@ -24,7 +24,7 @@ pub trait MigrationPersistence: Send + Sync {
/// Connectors should implement mark_migration_applied_impl to avoid doing
/// the checksuming themselves.
async fn mark_migration_applied(&self, migration_name: &str, script: &str) -> ConnectorResult<String> {
self.mark_migration_applied_impl(migration_name, &checksum(script))
self.mark_migration_applied_impl(migration_name, &checksum::render_checksum(script))
.await
}

Expand All @@ -43,7 +43,7 @@ pub trait MigrationPersistence: Send + Sync {
/// This is a default method that computes the checksum. Implementors should
/// implement record_migration_started_impl.
async fn record_migration_started(&self, migration_name: &str, script: &str) -> ConnectorResult<String> {
self.record_migration_started_impl(migration_name, &checksum(script))
self.record_migration_started_impl(migration_name, &checksum::render_checksum(script))
.await
}

Expand Down
Expand Up @@ -5,8 +5,7 @@
//! It also contains multiple subfolders, named after the migration id, and each containing:
//! - A migration script

use crate::{ConnectorError, ConnectorResult, FormatChecksum, CHECKSUM_STR_LEN};
use sha2::{Digest, Sha256, Sha512};
use crate::{checksum, ConnectorError, ConnectorResult};
use std::{
error::Error,
fmt::Display,
Expand Down Expand Up @@ -201,40 +200,11 @@ impl MigrationDirectory {
.expect("Migration directory name is not valid UTF-8.")
}

/// Write the checksum of the migration script file to `buf`.
pub fn checksum(&mut self, buf: &mut Vec<u8>) -> Result<(), ReadMigrationScriptError> {
let script = self.read_migration_script()?;
let mut hasher = Sha512::new();
hasher.update(&script);
let bytes = hasher.finalize();

buf.clear();
buf.extend_from_slice(bytes.as_ref());

Ok(())
}

/// Check whether the checksum of the migration script matches the provided one.
#[tracing::instrument]
pub fn matches_checksum(&self, checksum_str: &str) -> Result<bool, ReadMigrationScriptError> {
let filesystem_script = self.read_migration_script()?;
let mut hasher = Sha256::new();
hasher.update(&filesystem_script);
let filesystem_script_checksum: [u8; 32] = hasher.finalize().into();

// Due to an omission in a previous version of the migration engine,
// some migrations tables will have old migrations with checksum strings
// that have not been zero-padded.
//
// Corresponding issue:
// https://github.com/prisma/prisma-engines/issues/1887
let filesystem_script_checksum_str = if !checksum_str.is_empty() && checksum_str.len() != CHECKSUM_STR_LEN {
filesystem_script_checksum.format_checksum_old()
} else {
filesystem_script_checksum.format_checksum()
};

Ok(checksum_str == filesystem_script_checksum_str)
Ok(checksum::script_matches_checksum(&filesystem_script, checksum_str))
}

/// Write the migration script to the directory.
Expand Down

0 comments on commit 10a1bec

Please sign in to comment.