From a5ba2002f8b5f2db5c06968fcaa23a69496bdc4f Mon Sep 17 00:00:00 2001 From: Jake Bailey Date: Tue, 26 Jul 2022 16:49:56 -0700 Subject: [PATCH] Port signed_source.ml to Rust Summary: Port [signed_source.ml](https://www.internalfb.com/code/fbsource/%5B9cc4202e36a9%5D/fbcode/hphp/hack/src/utils/signed_source.ml) (which itself is a port of [signed_source.py](https://www.internalfb.com/code/fbsource/[424a7ea148d69a0a6f1ba32ca5f950ce657155f6]/tools/signedsource_lib/signedsource.py)) to Rust. Replace the ad-hoc signing implementation in hh_codegen. Reviewed By: edwinsmith Differential Revision: D38163503 fbshipit-source-id: e4dc4f96b43e6c34e37c331bfbd2f04f50b6c128 --- hphp/hack/Cargo.lock | 15 +- hphp/hack/src/hh_codegen/Cargo.toml | 3 +- hphp/hack/src/hh_codegen/common/mod.rs | 6 +- hphp/hack/src/hh_codegen/hh_codegen.rs | 27 +--- hphp/hack/src/utils/rust/signed_source.rs | 182 ++++++++++++++++++++++ hphp/hack/src/utils/rust/signed_source/Cargo.toml | 16 ++ 6 files changed, 220 insertions(+), 29 deletions(-) create mode 100644 hphp/hack/src/utils/rust/signed_source.rs create mode 100644 hphp/hack/src/utils/rust/signed_source/Cargo.toml diff --git a/hphp/hack/Cargo.lock b/hphp/hack/Cargo.lock index 5bc846e9aba..bf8201c8e7a 100644 --- a/hphp/hack/Cargo.lock +++ b/hphp/hack/Cargo.lock @@ -1591,10 +1591,9 @@ name = "hh_codegen" version = "0.0.0" dependencies = [ "anyhow", - "hex", - "md-5", "proc-macro2", "quote", + "signed_source", "structopt", "syn", "synstructure", @@ -3088,6 +3087,18 @@ dependencies = [ ] [[package]] +name = "signed_source" +version = "0.0.0" +dependencies = [ + "bstr", + "hex", + "md-5", + "once_cell", + "regex", + "thiserror", +] + +[[package]] name = "smallvec" version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" diff --git a/hphp/hack/src/hh_codegen/Cargo.toml b/hphp/hack/src/hh_codegen/Cargo.toml index f9a5ae3720c..98d58a3fcdc 100644 --- a/hphp/hack/src/hh_codegen/Cargo.toml +++ b/hphp/hack/src/hh_codegen/Cargo.toml @@ -10,10 +10,9 @@ path = "hh_codegen.rs" [dependencies] anyhow = "1.0.56" -hex = "0.4.3" -md-5 = "0.10" proc-macro2 = "1.0" quote = "1.0" +signed_source = { path = "../utils/rust/signed_source" } structopt = "0.3.23" syn = { version = "1.0.96", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } synstructure = "0.12" diff --git a/hphp/hack/src/hh_codegen/common/mod.rs b/hphp/hack/src/hh_codegen/common/mod.rs index c1d24c5580a..b6a1fd6f2dd 100644 --- a/hphp/hack/src/hh_codegen/common/mod.rs +++ b/hphp/hack/src/hh_codegen/common/mod.rs @@ -54,14 +54,16 @@ pub fn insert_header(s: &str, command: &str) -> Result { // This source code is licensed under the MIT license found in the // LICENSE file in the \"hack\" directory of this source tree. // -// @{} <> +// {} // // To regenerate this file, run: // {} {} ", - "generated", command, s + signed_source::SIGNING_TOKEN, + command, + s )?; Ok(content) } diff --git a/hphp/hack/src/hh_codegen/hh_codegen.rs b/hphp/hack/src/hh_codegen/hh_codegen.rs index 30ddddf7814..ee2daa27044 100644 --- a/hphp/hack/src/hh_codegen/hh_codegen.rs +++ b/hphp/hack/src/hh_codegen/hh_codegen.rs @@ -10,12 +10,9 @@ mod gen_enum_helper; mod gen_visitor; mod quote_helper; -use anyhow::anyhow; +use anyhow::Context; use anyhow::Result; use common::*; -use md5::Digest; -use md5::Md5; -use std::fs; use std::fs::File; use std::io::prelude::*; use std::path::Path; @@ -108,24 +105,8 @@ fn format(formatter: Option<&str>, file: &Path) -> Result<()> { } fn sign(file: &Path) -> Result<()> { - // avoid putting the obvious literal in this source file, as that makes the - // file as generated - let token_tag = format!("@{}", "generated"); - let token = "<>"; - let expected = format!("{} {}", token_tag, token); - - let contents = fs::read_to_string(file).expect("signing failed: could not read file"); - if !contents.contains(&expected) { - return Err(anyhow!("signing failed: input does not contain marker")); - } - - let mut digest = Md5::new(); - digest.update(&contents); - let md5 = hex::encode(digest.finalize()); - - let new_contents = - contents.replace(&expected, &format!("{} SignedSource<<{}>>", token_tag, md5)); - fs::write(file, new_contents)?; - + let contents = std::fs::read(file).context("Failed to read file for signing")?; + let new_contents = signed_source::sign_file(&contents)?; + std::fs::write(file, new_contents).context("Failed to write signed file")?; Ok(()) } diff --git a/hphp/hack/src/utils/rust/signed_source.rs b/hphp/hack/src/utils/rust/signed_source.rs new file mode 100644 index 00000000000..a902fe3b92b --- /dev/null +++ b/hphp/hack/src/utils/rust/signed_source.rs @@ -0,0 +1,182 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the "hack" directory of this source tree. + +use bstr::ByteSlice; +use once_cell::sync::Lazy; +use regex::bytes::Regex; + +/// This crate is a port of hphp/hack/src/utils/signed_source.ml, which was +/// based on a historical version of fbsource/tools/signedsource.py. + +/// The signing token, which you must embed in the file you wish to sign. +/// Generally, you should put this in a header comment. +pub static SIGNING_TOKEN: &str = concat!( + "@", + "generated", + " ", + "<>" +); + +/// Sign a source file into which you have previously embedded a signing token. +/// Signing modifies only the signing token, so the semantics of the file will +/// not change if the token is put in a comment. +/// +/// Returns `TokenNotFoundError` if no signing token is present. +pub fn sign_file(data: &[u8]) -> Result, TokenNotFoundError> { + let data = SIGN_OR_OLD_TOKEN.replace_all(data, TOKEN.as_bytes()); + if !data.contains_str(TOKEN) { + return Err(TokenNotFoundError); + } + let signature = format!("SignedSource<<{}>>", hash(&data)); + Ok(TOKEN_REGEX + .replace_all(&data, signature.as_bytes()) + .into_owned()) +} + +/// Sign a UTF-8 source file into which you have previously embedded a signing +/// token. Signing modifies only the signing token, so the semantics of the file +/// will not change if the token is put in a comment. +/// +/// Returns `TokenNotFoundError` if no signing token is present. +pub fn sign_utf8_file(data: &str) -> Result { + let data = sign_file(data.as_bytes())?; + // SAFETY: `data` was a valid `&str` before signing, and signing only + // replaces ASCII characters with other ASCII characters. + unsafe { Ok(String::from_utf8_unchecked(data)) } +} + +/// Determine whether a file is signed. This does NOT verify the signature. +pub fn is_signed(data: &[u8]) -> bool { + SIGNING_REGEX.is_match(data) +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum SignCheckResponse { + Ok, + Unsigned, + Invalid, +} + +/// Verify a file's signature. +pub fn verify_signature(data: &[u8]) -> SignCheckResponse { + let expected_md5 = match SIGNING_REGEX.captures(data) { + None => return SignCheckResponse::Unsigned, + Some(caps) => match caps.get(1) { + None => return SignCheckResponse::Unsigned, + Some(cap) => cap.as_bytes(), + }, + }; + for tok in [TOKEN, OLD_TOKEN] { + let replacement = make_signing_token(tok); + let unsigned_data = SIGNING_REGEX.replace_all(data, replacement.as_bytes()); + let actual_md5 = hash(&unsigned_data); + if expected_md5 == actual_md5.as_bytes() { + return SignCheckResponse::Ok; + } + } + SignCheckResponse::Invalid +} + +static TOKEN: &str = "<>"; + +/// This old token was historically used as the signing token. It was replaced +/// because it is 2 characters shorter than the final signature, and as a result, +/// signing data with the old token forced the entire string to be rewritten +/// (everything after the token needs to be shifted forwards 2 bytes). +/// In this implementation, we rewrite the entire string anyway. +static OLD_TOKEN: &str = "<>"; + +fn make_signing_token(token: &str) -> String { + format!("@{} {}", "generated", token) +} + +static SIGNATURE_RE: &str = r"SignedSource<<([a-f0-9]+)>>"; + +static SIGN_OR_OLD_TOKEN: Lazy = + Lazy::new(|| Regex::new(&format!("{}|{}", SIGNATURE_RE, regex::escape(OLD_TOKEN))).unwrap()); + +static SIGNING_REGEX: Lazy = + Lazy::new(|| Regex::new(&make_signing_token(SIGNATURE_RE)).unwrap()); + +static TOKEN_REGEX: Lazy = Lazy::new(|| Regex::new(®ex::escape(TOKEN)).unwrap()); + +fn hash(data: &[u8]) -> String { + use md5::Digest; + let mut digest = md5::Md5::new(); + digest.update(&data); + hex::encode(digest.finalize()) +} + +#[derive(Debug, thiserror::Error, PartialEq, Eq)] +#[error("Failed to sign file: input does not contain signing token")] +pub struct TokenNotFoundError; + +#[cfg(test)] +mod test { + use super::is_signed; + use super::make_signing_token; + use super::sign_utf8_file; + use super::verify_signature; + use super::SignCheckResponse; + use super::TokenNotFoundError; + use super::SIGNING_TOKEN; + use super::TOKEN; + + static NO_TOKEN: &str = concat!("// @", "generated\nfn foo() {}"); + static INVALID: &str = concat!( + "// @", + "generated SignedSource<<48ab1081d9394843f184debf0b251a18>>\nfn foo() {}" + ); + static UNSIGNED: &str = concat!( + "// @", + "generated <>\nfn foo() {}" + ); + // Below signature was manually verified to be equal to the OCaml + // Signed_source output for `UNSIGNED`. + static SIGNED: &str = concat!( + "// @", + "generated SignedSource<<38ab1081d9394843f184debf0b251a18>>\nfn foo() {}" + ); + + #[test] + fn test_signing_token() { + // We use `concat!` so that `SIGNING_TOKEN` can be a `&str` rather than + // a `Lazy`, since `make_signing_token` can't be a `const fn` yet. + // Verify that we're producing the same result. + assert_eq!(SIGNING_TOKEN, make_signing_token(TOKEN)) + } + + #[test] + fn test_sign_utf8_file() { + assert_eq!(sign_utf8_file(UNSIGNED), Ok(SIGNED.to_owned())); + assert_eq!(sign_utf8_file(SIGNED), Ok(SIGNED.to_owned())); + assert_eq!(sign_utf8_file(NO_TOKEN), Err(TokenNotFoundError)); + } + + #[test] + fn test_is_signed() { + assert!(is_signed(SIGNED.as_bytes())); + assert!(is_signed(INVALID.as_bytes())); // `is_signed` doesn't validate + assert!(!is_signed(NO_TOKEN.as_bytes())); + assert!(!is_signed(UNSIGNED.as_bytes())); + } + + #[test] + fn test_verify_signature() { + assert_eq!(verify_signature(SIGNED.as_bytes()), SignCheckResponse::Ok); + assert_eq!( + verify_signature(INVALID.as_bytes()), + SignCheckResponse::Invalid + ); + assert_eq!( + verify_signature(NO_TOKEN.as_bytes()), + SignCheckResponse::Unsigned + ); + assert_eq!( + verify_signature(UNSIGNED.as_bytes()), + SignCheckResponse::Unsigned + ); + } +} diff --git a/hphp/hack/src/utils/rust/signed_source/Cargo.toml b/hphp/hack/src/utils/rust/signed_source/Cargo.toml new file mode 100644 index 00000000000..6929c7a54ca --- /dev/null +++ b/hphp/hack/src/utils/rust/signed_source/Cargo.toml @@ -0,0 +1,16 @@ +# @generated by autocargo from //hphp/hack/src/utils/rust:signed_source +[package] +name = "signed_source" +version = "0.0.0" +edition = "2021" + +[lib] +path = "../signed_source.rs" + +[dependencies] +bstr = { version = "0.2", features = ["serde1"] } +hex = "0.4.3" +md-5 = "0.10" +once_cell = "1.12" +regex = "1.5.4" +thiserror = "1.0.30" -- 2.11.4.GIT