From 9d93381bfcfed9117ebc1c0c55089248614e3490 Mon Sep 17 00:00:00 2001 From: Aaron Orenstein Date: Fri, 14 Oct 2022 15:28:05 -0700 Subject: [PATCH] assemble_opcode macro Summary: This macro generates the `assemble_opcode` function which will do most of the assembler opcode handling. The macro is actually used in a following diff. Reviewed By: edwinsmith Differential Revision: D40270777 fbshipit-source-id: 2bb657dc75e9c86cd64edcdfe866ce4591f4e894 --- .../src/hackc/assemble/assemble_opcode_macro.rs | 168 +++++++++++++++++++++ hphp/hack/src/hackc/assemble/lexer.rs | 9 +- .../hackc/cargo/assemble_opcode_macro/Cargo.toml | 18 +++ 3 files changed, 189 insertions(+), 6 deletions(-) create mode 100644 hphp/hack/src/hackc/assemble/assemble_opcode_macro.rs create mode 100644 hphp/hack/src/hackc/cargo/assemble_opcode_macro/Cargo.toml diff --git a/hphp/hack/src/hackc/assemble/assemble_opcode_macro.rs b/hphp/hack/src/hackc/assemble/assemble_opcode_macro.rs new file mode 100644 index 00000000000..ade7889f9ac --- /dev/null +++ b/hphp/hack/src/hackc/assemble/assemble_opcode_macro.rs @@ -0,0 +1,168 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the "hack" directory of this source tree. + +use hhbc_gen::OpcodeData; +use itertools::Itertools; +use proc_macro2::Ident; +use proc_macro2::Span; +use proc_macro2::TokenStream; +use quote::quote; +use syn::LitByteStr; +use syn::Result; + +#[proc_macro_attribute] +pub fn assemble_opcode( + _attrs: proc_macro::TokenStream, + input: proc_macro::TokenStream, +) -> proc_macro::TokenStream { + match assemble_opcode_impl(input.into(), hhbc_gen::opcode_data()) { + Ok(res) => res.into(), + Err(err) => err.into_compile_error().into(), + } +} + +fn assemble_opcode_impl(_input: TokenStream, opcodes: &[OpcodeData]) -> Result { + let name = quote!(assemble_opcode); + let mut body = Vec::new(); + for opcode in opcodes { + let variant_name = Ident::new(opcode.name, Span::call_site()); + let name = opcode.name.to_string(); + let name_bstr = LitByteStr::new(name.as_bytes(), Span::call_site()); + + // SSwitch and MemoGetEage both have unusual semantics and have to be + // parsed specially in the outer code block. + if name == "SSwitch" || name == "MemoGetEager" { + body.push(quote!(#name_bstr => { unreachable!(); })); + continue; + } + + let imms = if opcode.immediates.is_empty() { + quote!() + } else { + let imms = opcode + .immediates + .iter() + .map(|_| quote!(token_iter.assemble_imm(alloc, decl_map)?)) + .collect_vec(); + + quote!((#(#imms),*)) + }; + + body.push(quote!( + #name_bstr => { + token_iter.expect_is_str(Token::into_identifier, #name)?; + Ok(hhbc::Instruct::Opcode(hhbc::Opcode::#variant_name #imms)) + } + )); + } + + Ok(quote!( + fn #name<'arena>( + alloc: &'arena Bump, + tok: &'_ [u8], + token_iter: &mut Lexer<'_>, + decl_map: &HashMap, u32>, + ) -> Result>{ + match tok { + #(#body)* + t => bail!("unknown opcode: {:?}", t), + } + + } + )) +} + +/// Used like: +/// +/// assemble_enum!(lexer, [E::A, E::B, E::C]) +/// +/// turns into a handler for A, B, and C that looks something like: +/// +/// impl AssembleImm<'_, $ret_ty> for Lexer<'_> { +/// fn assemble_imm(&mut self, _alloc: &'_ Bump, _decl_map: &DeclMap<'_>) -> Result<$ret_ty> { +/// use $ret_ty; +/// match self.expect(Token::into_identifier)? { +/// b"A" => E::A, +/// b"B" => E::B, +/// b"C" => E::C, +/// _ => bail!(...) +/// } +/// } +/// } +/// +/// This needs to be a proc-macro so it can manipulate the names (turning 'E::A' +/// into 'b"A"'). +#[proc_macro] +pub fn assemble_imm_for_enum(tokens: proc_macro::TokenStream) -> proc_macro::TokenStream { + use quote::ToTokens; + + use syn::Path; + use syn::Token; + use syn::Type; + + #[derive(Debug)] + struct Input { + ret_ty: Type, + variants: Vec, + } + + impl syn::parse::Parse for Input { + fn parse(input: syn::parse::ParseStream<'_>) -> Result { + let ret_ty = input.parse()?; + input.parse::()?; + let variants_stream; + syn::bracketed!(variants_stream in input); + input.parse::()?; + let variants = variants_stream + .parse_terminated::(Path::parse)? + .into_iter() + .collect_vec(); + + Ok(Input { ret_ty, variants }) + } + } + + let Input { ret_ty, variants } = syn::parse_macro_input!(tokens as Input); + + let mut expected = variants.first().unwrap().clone(); + if expected.segments.len() > 1 { + expected.segments.pop(); + // Make sure to remove any trailing punctuation. + let t = expected.segments.pop().unwrap().into_value(); + expected.segments.push(t); + } + + let body = variants + .into_iter() + .map(|variant| { + let ident = &variant.segments.last().unwrap().ident; + let ident_str = LitByteStr::new(ident.to_string().as_bytes(), Span::call_site()); + quote!(#ident_str => #variant) + }) + .collect_vec(); + + let msg = format!( + "Expected a '{}', got {{:?}} on line {{}}", + expected.into_token_stream() + ); + + // Unfortunately since most of these 'enums' aren't real Rust enums we can't + // do anything to ensure that this is exhaustive. + + let output = quote! { + impl AssembleImm<'_, #ret_ty> for Lexer<'_> { + fn assemble_imm(&mut self, _: &'_ Bump, _: &DeclMap<'_>) -> Result<#ret_ty> { + use #ret_ty; + let id = self.expect(Token::into_identifier)?; + Ok(match id { + #(#body),*, + f => anyhow::bail!(#msg, f, self.cur_line()), + }) + } + } + }; + + output.into() +} diff --git a/hphp/hack/src/hackc/assemble/lexer.rs b/hphp/hack/src/hackc/assemble/lexer.rs index 26d453d744c..764f2975b18 100644 --- a/hphp/hack/src/hackc/assemble/lexer.rs +++ b/hphp/hack/src/hackc/assemble/lexer.rs @@ -513,8 +513,7 @@ mod test { "class_meth() expects a literal class name or ::class constant, followed by a constant string that refers to a static method on that class"; "#; let s = s.as_bytes(); - let l: Lexer<'_> = Lexer::from_slice(s, 1); - let mut l = l.into_iter(); + let mut l: Lexer<'_> = Lexer::from_slice(s, 1); // Expecting 3 string tokens let _st1 = l.next().unwrap(); let _by1 = str::as_bytes(r#""\"0\"""#); @@ -532,9 +531,8 @@ mod test { #[test] fn odd_unicode_test() { let s: &[u8] = b".\xA9\xEF\xB8\x8E $0\xC5\xA3\xB1\xC3 \xE2\x98\xBA\xE2\x98\xBA\xE2\x98\xBA @\xE2\x99\xA1\xE2\x99\xA4$"; - let l: Lexer<'_> = Lexer::from_slice(s, 1); + let mut l: Lexer<'_> = Lexer::from_slice(s, 1); // We are expecting an decl, a var, an identifier a global, and an error on the last empty variable - let mut l = l.into_iter(); let decl = l.next().unwrap(); assert!(matches!(decl, Token::Decl(..))); let var = l.next().unwrap(); @@ -555,8 +553,7 @@ mod test { // Expect glob var tsl decl strlit semicolon dash open_curly open_brack open_paren close_paren close_bracket // close_curly equal number number number number , < > : identifier identifier ERROR on the last . let s = s.as_bytes(); - let l: Lexer<'_> = Lexer::from_slice(s, 1); - let mut l = l.into_iter(); + let mut l: Lexer<'_> = Lexer::from_slice(s, 1); let glob = l.next().unwrap(); assert!( matches!(glob, Token::Global(..)), diff --git a/hphp/hack/src/hackc/cargo/assemble_opcode_macro/Cargo.toml b/hphp/hack/src/hackc/cargo/assemble_opcode_macro/Cargo.toml new file mode 100644 index 00000000000..6d4d40ae48a --- /dev/null +++ b/hphp/hack/src/hackc/cargo/assemble_opcode_macro/Cargo.toml @@ -0,0 +1,18 @@ +# @generated by autocargo from //hphp/hack/src/hackc/assemble:assemble_opcode_macro +[package] +name = "assemble_opcode_macro" +version = "0.0.0" +edition = "2021" + +[lib] +path = "../../assemble/assemble_opcode_macro.rs" +test = false +doctest = false +proc-macro = true + +[dependencies] +hhbc-gen = { path = "../../../../../tools/hhbc-gen" } +itertools = "0.10.3" +proc-macro2 = "1.0.46" +quote = "1.0" +syn = { version = "1.0.96", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } -- 2.11.4.GIT