diff --git a/crates/synth-backend/src/arm_backend.rs b/crates/synth-backend/src/arm_backend.rs index a74ceb6..7749098 100644 --- a/crates/synth-backend/src/arm_backend.rs +++ b/crates/synth-backend/src/arm_backend.rs @@ -160,57 +160,72 @@ fn compile_wasm_to_arm( // unmodified default, so every function that compiles today is selected by // exactly the code that compiled it yesterday (bit-identity is structural, // not behavioural). - let select_direct_attempt = - |spill_on_exhaustion: bool| -> Result, synth_core::Error> { - let db = RuleDatabase::with_standard_rules(); - let mut selector = - InstructionSelector::with_bounds_check(db.rules().to_vec(), bounds_config); - selector.set_target(config.target.fpu, &config.target.triple); - if config.num_imports > 0 { - selector.set_num_imports(config.num_imports); - } - // #195: plumb the callee argument-count tables so the direct selector can - // marshal call arguments into R0–R3 per AAPCS. - selector.set_func_arg_counts( - config.func_arg_counts.clone(), - config.type_arg_counts.clone(), - ); - // #197: in relocatable host-link mode, emit direct `func_N` BLs for - // imports (rewritten to the wasm field name by build_relocatable_elf) - // instead of `__meld_dispatch_import`. - selector.set_relocatable(config.relocatable); - // #237: native-pointer ABI — wasm statics become __synth_wasm_data-relative. - selector.set_native_pointer_abi(config.native_pointer_abi, config.linear_memory_bytes); - // #311: i64 call results are register PAIRS — tag them. - selector.set_result_types(config.func_ret_i64.clone(), config.type_ret_i64.clone()); - // Stack-pointer promotion is meaningful only under the native-pointer ABI; - // gating here keeps every non-native compile (all frozen fixtures) on the - // legacy R9 globals-table path, bit-identical. - if config.native_pointer_abi - && let Some((sp_idx, sp_init)) = config.stack_pointer_global - { - selector.set_native_pointer_stack(sp_idx, sp_init); - } - selector.set_spill_on_exhaustion(spill_on_exhaustion); - selector.select_with_stack(wasm_ops, num_params) - }; + let select_direct_attempt = |spill_on_exhaustion: bool, + param_backing_on_exhaustion: bool| + -> Result, synth_core::Error> { + let db = RuleDatabase::with_standard_rules(); + let mut selector = + InstructionSelector::with_bounds_check(db.rules().to_vec(), bounds_config); + selector.set_target(config.target.fpu, &config.target.triple); + if config.num_imports > 0 { + selector.set_num_imports(config.num_imports); + } + // #195: plumb the callee argument-count tables so the direct selector can + // marshal call arguments into R0–R3 per AAPCS. + selector.set_func_arg_counts( + config.func_arg_counts.clone(), + config.type_arg_counts.clone(), + ); + // #197: in relocatable host-link mode, emit direct `func_N` BLs for + // imports (rewritten to the wasm field name by build_relocatable_elf) + // instead of `__meld_dispatch_import`. + selector.set_relocatable(config.relocatable); + // #237: native-pointer ABI — wasm statics become __synth_wasm_data-relative. + selector.set_native_pointer_abi(config.native_pointer_abi, config.linear_memory_bytes); + // #311: i64 call results are register PAIRS — tag them. + selector.set_result_types(config.func_ret_i64.clone(), config.type_ret_i64.clone()); + // Stack-pointer promotion is meaningful only under the native-pointer ABI; + // gating here keeps every non-native compile (all frozen fixtures) on the + // legacy R9 globals-table path, bit-identical. + if config.native_pointer_abi + && let Some((sp_idx, sp_init)) = config.stack_pointer_global + { + selector.set_native_pointer_stack(sp_idx, sp_init); + } + selector.set_spill_on_exhaustion(spill_on_exhaustion); + selector.set_param_backing_on_exhaustion(param_backing_on_exhaustion); + selector.select_with_stack(wasm_ops, num_params) + }; let select_direct = || -> Result, String> { - match select_direct_attempt(false) { - Ok(instrs) => Ok(instrs), - // VCR-RA-001 step 3b-lite (#242): the i32 register-exhaustion - // hard-fail is recoverable — retry once with spill-on-exhaustion, - // which reserves the spill area and spills the deepest stack value - // when the pool is full. Only functions that FAILED the first pass - // ever reach this, so existing output is untouched by construction. - Err(e) - if e.to_string() - .contains("all allocatable registers are live on the stack") => - { - select_direct_attempt(true) - .map_err(|e| format!("instruction selection failed: {}", e)) - } - Err(e) => Err(format!("instruction selection failed: {}", e)), + // The two recoverable exhaustion classes. NOT retried: the i64 + // spill-slot-pool Err ("spill-slot pool exhausted") — the honest + // remaining bound of the 3b-lite allocator. + const SINGLE_EXHAUSTION: &str = "all allocatable registers are live on the stack"; + const PAIR_EXHAUSTION: &str = "no consecutive pair of free registers for i64"; + let mut attempt = select_direct_attempt(false, false); + // VCR-RA-001 step 3b-lite (#242): the i32 register-exhaustion + // hard-fail is recoverable — retry with spill-on-exhaustion, which + // reserves the spill area and spills the deepest stack value when the + // pool is full. Only functions that FAILED the first pass ever reach + // this, so existing output is untouched by construction. + if let Err(e) = &attempt + && e.to_string().contains(SINGLE_EXHAUSTION) + { + attempt = select_direct_attempt(true, false); + } + // VCR-RA-001 acceptance increment (#242): the i64 consecutive-PAIR + // exhaustion is recoverable too — but not by stack spilling (the pair + // allocator already spills stack values, #171): the blockers are the + // pinned param home registers. The final retry frame-backs the params + // (#204 machinery) so they stop pinning R0-R3, with spill-on-exhaustion + // kept on for the single-register pressure the reloads add. Reached + // only by functions that failed every earlier pass. + if let Err(e) = &attempt + && e.to_string().contains(PAIR_EXHAUSTION) + { + attempt = select_direct_attempt(true, true); } + attempt.map_err(|e| format!("instruction selection failed: {}", e)) }; // Instruction selection: optimized or direct. diff --git a/crates/synth-synthesis/src/instruction_selector.rs b/crates/synth-synthesis/src/instruction_selector.rs index 89eb235..35491d9 100644 --- a/crates/synth-synthesis/src/instruction_selector.rs +++ b/crates/synth-synthesis/src/instruction_selector.rs @@ -816,6 +816,7 @@ fn compute_local_layout( func_ret_i64: &[bool], type_ret_i64: &[bool], force_spill_area: bool, + force_param_backing: bool, ) -> LocalLayout { use std::collections::{BTreeSet, HashMap}; let i64_set = infer_i64_locals(wasm_ops, func_ret_i64, type_ret_i64); @@ -899,7 +900,14 @@ fn compute_local_layout( // register-backed — no behaviour change, and isolated-lowering tests (and // the common case) are untouched. Call-free temp-allocation clobbers are the // distinct, non-blocking #193 fuzz class (allocator reservation, deferred). - if has_call { + // VCR-RA-001 acceptance increment (#242): `force_param_backing` extends the + // same frame-backing to call-FREE functions, set ONLY on the backend's + // retry after the i64 consecutive-pair exhaustion `Err` — the pinned param + // home registers (#193 reservation) are the one blocker the pair + // allocator's stack-spill loop (#171) cannot free, so the retry spills + // them to the frame at entry instead. Functions that compile without it + // keep byte-identical frames by construction. + if has_call || force_param_backing { let mut used_params: BTreeSet = BTreeSet::new(); for op in wasm_ops { let pidx = match op { @@ -1378,6 +1386,21 @@ pub struct InstructionSelector { /// allocation under exhaustion spills the deepest stack value instead of /// hard-failing ([`alloc_temp_or_spill`]). spill_on_exhaustion: bool, + /// VCR-RA-001 acceptance increment (#242): param-backing-on-exhaustion + /// retry mode. Default OFF — the backend sets it only for a retry after + /// `select_with_stack` failed with the i64 consecutive-PAIR exhaustion + /// `Err`. The pair allocator already spills register-resident STACK values + /// (#171, pair-aware); the only remaining blockers at the pair hard-fail + /// are the *pinned param home registers* (#193 `reserved`) plus the popped + /// operand pairs (`extra_avoid`) — and params are not stack values, so no + /// amount of stack spilling can free them. When ON, every read param is + /// frame-backed via the proven #204 `param_slots` machinery (spilled to a + /// frame slot at entry, reloaded on read), so `reserved` is empty and a + /// free consecutive pair always exists after stack spilling (at most two + /// adjacent operand pairs can be blocked among the 9-register pool). Like + /// `spill_on_exhaustion`, bit-identity is structural: only functions that + /// failed BOTH earlier passes ever compile with this set. + param_backing_on_exhaustion: bool, } impl InstructionSelector { @@ -1405,6 +1428,7 @@ impl InstructionSelector { has_helium: false, next_qreg: 0, spill_on_exhaustion: false, + param_backing_on_exhaustion: false, } } @@ -1432,6 +1456,7 @@ impl InstructionSelector { has_helium: false, next_qreg: 0, spill_on_exhaustion: false, + param_backing_on_exhaustion: false, } } @@ -1449,6 +1474,18 @@ impl InstructionSelector { self.spill_on_exhaustion = enabled; } + /// VCR-RA-001 acceptance increment (#242): enable param frame-backing on + /// i64 consecutive-pair exhaustion. Intended ONLY as the backend's final + /// retry after `select_with_stack` failed with the pair-exhaustion `Err` + /// (which stack spilling alone cannot recover — the blockers are pinned + /// param home registers, not stack values). Forces the #204 `param_slots` + /// frame-backing for every read param, so it changes the frame layout and + /// every param read; calling it for a function that compiles without it + /// would change its bytes. + pub fn set_param_backing_on_exhaustion(&mut self, enabled: bool) { + self.param_backing_on_exhaustion = enabled; + } + /// Enable relocatable host-link mode (#197): import calls emit a direct /// `BL func_N` (rewritten to the wasm field name by the relocatable-ELF /// builder) instead of dispatching through `__meld_dispatch_import`. @@ -4989,6 +5026,7 @@ impl InstructionSelector { &self.func_ret_i64, &self.type_ret_i64, self.spill_on_exhaustion, + self.param_backing_on_exhaustion, ); // Allocate stack space for non-param locals so they don't alias the // callee-saved-register spill area (which immediately follows SP @@ -14969,7 +15007,7 @@ mod tests { fn test_compute_local_layout_no_locals() { // Function with only params and no LocalGet/Set produces zero frame. let ops = vec![WasmOp::LocalGet(0), WasmOp::LocalGet(1), WasmOp::I32Add]; - let layout = compute_local_layout(&ops, 2, &[], &[], false); + let layout = compute_local_layout(&ops, 2, &[], &[], false, false); assert_eq!(layout.frame_size, 0); assert!(layout.locals.is_empty()); } @@ -14982,7 +15020,7 @@ mod tests { WasmOp::LocalSet(1), WasmOp::LocalGet(1), ]; - let layout = compute_local_layout(&ops, 1, &[], &[], false); + let layout = compute_local_layout(&ops, 1, &[], &[], false, false); assert!(layout.locals.contains_key(&1)); let (off, is_i64) = layout.locals[&1]; assert_eq!(off, 0); @@ -14999,7 +15037,7 @@ mod tests { WasmOp::LocalSet(0), WasmOp::LocalGet(0), ]; - let layout = compute_local_layout(&ops, 0, &[], &[], false); + let layout = compute_local_layout(&ops, 0, &[], &[], false, false); let (off, is_i64) = layout.locals[&0]; assert_eq!(off, 0); assert!(is_i64); @@ -15019,7 +15057,7 @@ mod tests { WasmOp::I64Const(2), WasmOp::LocalSet(1), ]; - let layout = compute_local_layout(&ops, 0, &[], &[], false); + let layout = compute_local_layout(&ops, 0, &[], &[], false, false); let (off0, is_i64_0) = layout.locals[&0]; let (off1, is_i64_1) = layout.locals[&1]; assert_eq!(off0, 0); @@ -15041,7 +15079,7 @@ mod tests { WasmOp::I32Add, WasmOp::LocalSet(2), ]; - let layout = compute_local_layout(&ops, 2, &[], &[], false); + let layout = compute_local_layout(&ops, 2, &[], &[], false, false); // Only idx 2 should be in the layout. assert!(!layout.locals.contains_key(&0)); assert!(!layout.locals.contains_key(&1)); @@ -15352,6 +15390,234 @@ mod tests { assert_eq!(spills, reloads, "every spill must reload exactly once"); } + // ── VCR-RA-001 acceptance increment (#242): i64 PAIR exhaustion ── + + /// The pair allocator's #171 stack-spill loop is pair-aware: when every + /// register is pinned and the deepest stack entry is an i64, BOTH halves + /// spill into one 8-byte slot (two STRs, lo then hi) and the freed pair is + /// returned. + #[test] + fn alloc_consecutive_pair_spills_pair_victim_242() { + let mut next_temp: u8 = 0; + // 4 i64 pairs (R0/R1, R2/R3, R4/R5, R6/R7) + one i32 in R8 = all nine + // allocatable registers pinned. + let mut stack = vec![ + StackVal::i64(Reg::R0), + StackVal::i64(Reg::R2), + StackVal::i64(Reg::R4), + StackVal::i64(Reg::R6), + StackVal::i32(Reg::R8), + ]; + let mut instructions: Vec = Vec::new(); + let mut spill = SpillState::new(0); + let (lo, hi) = alloc_consecutive_pair( + &mut next_temp, + &mut stack, + &mut instructions, + &mut spill, + &[], + &[], + 3, + ) + .unwrap(); + // The deepest entry (i64 in R0/R1) spilled — its pair is the result. + assert_eq!((lo, hi), (Reg::R0, Reg::R1)); + assert_eq!( + stack[0], + StackVal::Spilled { + lo_slot: 0, + is_i64: true + }, + "deepest i64 entry rewritten to Spilled" + ); + // Exactly two STRs: lo -> [SP,#0], hi -> [SP,#4] (one 8-byte slot). + assert_eq!(instructions.len(), 2); + match (&instructions[0].op, &instructions[1].op) { + (ArmOp::Str { rd: r0, addr: a0 }, ArmOp::Str { rd: r1, addr: a1 }) => { + assert_eq!((*r0, a0.clone()), (Reg::R0, MemAddr::imm(Reg::SP, 0))); + assert_eq!((*r1, a1.clone()), (Reg::R1, MemAddr::imm(Reg::SP, 4))); + } + other => panic!("expected two STR spills, got {other:?}"), + } + } + + /// Pair exhaustion with i32 victims: freeing a consecutive pair takes TWO + /// single-register spills (each into its own slot) before (R0,R1) frees. + #[test] + fn alloc_consecutive_pair_spills_single_victims_242() { + let mut next_temp: u8 = 0; + let mut stack = full_i32_stack(); // nine i32s pin R0-R8 + let mut instructions: Vec = Vec::new(); + let mut spill = SpillState::new(0); + let (lo, hi) = alloc_consecutive_pair( + &mut next_temp, + &mut stack, + &mut instructions, + &mut spill, + &[], + &[], + 5, + ) + .unwrap(); + assert_eq!((lo, hi), (Reg::R0, Reg::R1)); + assert_eq!( + stack[0], + StackVal::Spilled { + lo_slot: 0, + is_i64: false + } + ); + assert_eq!( + stack[1], + StackVal::Spilled { + lo_slot: 8, + is_i64: false + } + ); + // Two single STRs into two distinct slots. + assert_eq!(instructions.len(), 2); + for (i, (reg, off)) in [(Reg::R0, 0), (Reg::R1, 8)].iter().enumerate() { + match &instructions[i].op { + ArmOp::Str { rd, addr } => { + assert_eq!((*rd, addr.clone()), (*reg, MemAddr::imm(Reg::SP, *off))); + } + other => panic!("expected STR spill, got {other:?}"), + } + } + } + + /// When the blockers are NOT stack values (pinned param registers in + /// `reserved` + popped operand pairs in `extra_avoid`), stack spilling + /// cannot help and the exact original pair-exhaustion `Err` is preserved + /// with nothing emitted — the case only the backend's param-backing retry + /// recovers. + #[test] + fn alloc_consecutive_pair_err_when_only_pinned_blockers_242() { + let mut next_temp: u8 = 4; + let mut stack: Vec = Vec::new(); // nothing register-resident + let mut instructions: Vec = Vec::new(); + let mut spill = SpillState::new(0); + let err = alloc_consecutive_pair( + &mut next_temp, + &mut stack, + &mut instructions, + &mut spill, + &[Reg::R4, Reg::R5, Reg::R6, Reg::R7], // popped operand pairs + &[Reg::R0, Reg::R1, Reg::R2, Reg::R3], // pinned params (#193) + 0, + ) + .unwrap_err(); + assert!( + err.to_string() + .contains("no consecutive pair of free registers for i64"), + "must preserve the original pair-exhaustion Err, got: {err}" + ); + assert!(instructions.is_empty(), "must not emit on the Err path"); + } + + /// A spilled i64 PAIR reloads on pop through the #171 machinery: a fresh + /// consecutive pair and two LDRs (lo from slot, hi from slot+4), freeing + /// the slot. + #[test] + fn spilled_i64_pair_reloads_on_pop_242() { + let mut next_temp: u8 = 0; + let mut spill = SpillState::new(0); + let slot = spill.alloc().unwrap(); // occupy slot 0 as the spill did + let mut stack = vec![StackVal::Spilled { + lo_slot: slot, + is_i64: true, + }]; + let mut instructions: Vec = Vec::new(); + let lo = pop_operand( + &mut stack, + &mut next_temp, + &mut instructions, + &mut spill, + &[], + 9, + ) + .unwrap(); + let hi = i64_pair_hi(lo).unwrap(); + assert_eq!(instructions.len(), 2, "lo + hi reload LDRs"); + match (&instructions[0].op, &instructions[1].op) { + (ArmOp::Ldr { rd: r0, addr: a0 }, ArmOp::Ldr { rd: r1, addr: a1 }) => { + assert_eq!((*r0, a0.clone()), (lo, MemAddr::imm(Reg::SP, slot))); + assert_eq!((*r1, a1.clone()), (hi, MemAddr::imm(Reg::SP, slot + 4))); + } + other => panic!("expected two LDR reloads, got {other:?}"), + } + assert_eq!(spill.alloc(), Some(slot), "slot freed for reuse on reload"); + } + + /// End-to-end fail-then-retry contract for the PAIR site (the backend's + /// three-pass ladder, `scripts/repro/high_pressure_i64.wat` sequence): + /// pass 1 (default) -> the pair-exhaustion hard-fail, verbatim; + /// pass 2 (spill-only #320) -> STILL the pair hard-fail (stack spilling + /// cannot free pinned param registers); + /// pass 3 (+param backing) -> compiles, params frame-backed at entry. + #[test] + fn select_with_stack_pair_exhaustion_recoverable_via_param_backing_242() { + use WasmOp::*; + let ops = vec![ + I64Const(0x1111111111111111), + I64Const(0x2222222222222222), + I64Const(0x3333333333333333), + I64Const(0x4444444444444444), + I64Add, + I64Sub, + I64Xor, + LocalGet(0), + I64ExtendI32U, + I64Add, + LocalGet(1), + I64ExtendI32U, + I64Sub, + LocalGet(2), + I64ExtendI32U, + I64Xor, + LocalGet(3), + I64ExtendI32U, + I64Add, + ]; + + // Pass 1 (default world): the pair hard-fail, verbatim. + let err = fresh_selector().select_with_stack(&ops, 4).unwrap_err(); + assert!( + err.to_string() + .contains("no consecutive pair of free registers for i64"), + "expected the pair-exhaustion hard-fail, got: {err}" + ); + + // Pass 2 (#320 spill-only): insufficient by construction — the + // blockers are param home registers, not stack values. + let mut spill_only = fresh_selector(); + spill_only.set_spill_on_exhaustion(true); + let err = spill_only.select_with_stack(&ops, 4).unwrap_err(); + assert!( + err.to_string() + .contains("no consecutive pair of free registers for i64"), + "spill-only retry must still hit the pair hard-fail, got: {err}" + ); + + // Pass 3 (param-backing retry): compiles; the entry spills each read + // param to its frame slot before any operand-stack traffic. + let mut retry = fresh_selector(); + retry.set_spill_on_exhaustion(true); + retry.set_param_backing_on_exhaustion(true); + let instrs = retry + .select_with_stack(&ops, 4) + .expect("param-backing retry must compile"); + let entry_param_spills = instrs + .iter() + .take_while(|i| i.source_line.is_none()) + .filter(|i| matches!(&i.op, ArmOp::Str { addr, .. } if addr.base == Reg::SP)) + .count(); + assert_eq!( + entry_param_spills, 4, + "all four read params frame-backed at entry" + ); + } + #[test] fn test_select_with_stack_emits_frame_alloc_for_i64_local() { // When a non-param i64 local exists, select_with_stack must: diff --git a/scripts/repro/high_pressure_i64.wat b/scripts/repro/high_pressure_i64.wat new file mode 100644 index 0000000..640ccfe --- /dev/null +++ b/scripts/repro/high_pressure_i64.wat @@ -0,0 +1,42 @@ +;; VCR-RA-001 acceptance increment (#242) — i64 consecutive-PAIR exhaustion repro. +;; +;; The i64 sibling of high_pressure_i32.wat. Keeps 4 i64 constants +;; simultaneously live (4 register pairs = 8 regs; the deepest two spill via +;; the #171 pair-aware stack-spill loop) while all four i32 params are +;; reserved in r0-r3 until their last read (#193). At the first i64.add the +;; two popped operand pairs (r4-r7, extra_avoid) plus the four pinned param +;; registers leave only r8 free — no consecutive pair — and the operand stack +;; holds nothing register-resident left to spill, so pre-acceptance the +;; selector hard-failed with +;; "register exhaustion: no consecutive pair of free registers for i64" +;; (stack spilling CANNOT recover this: the blockers are param home registers, +;; not stack values). With the param-backing retry the params are frame-backed +;; at entry (#204 machinery), freeing r0-r3, and the function compiles. The +;; fold mixes non-commutative ops (sub/xor) so an operand-order, reload, or +;; half-swap bug changes the result; the i64 result returns in r0:r1. +;; +;; Differential oracle: +;; synth compile scripts/repro/high_pressure_i64.wat -o /tmp/hp64.elf \ +;; --target cortex-m4 --relocatable +;; /tmp/armv/bin/python scripts/repro/high_pressure_i64_differential.py /tmp/hp64.elf +(module + (func (export "high_pressure64") (param i32 i32 i32 i32) (result i64) + i64.const 0x1111111111111111 + i64.const 0x2222222222222222 + i64.const 0x3333333333333333 + i64.const 0x4444444444444444 + i64.add ;; <- pair exhaustion here pre-acceptance + i64.sub + i64.xor + local.get 0 + i64.extend_i32_u + i64.add + local.get 1 + i64.extend_i32_u + i64.sub + local.get 2 + i64.extend_i32_u + i64.xor + local.get 3 + i64.extend_i32_u + i64.add)) diff --git a/scripts/repro/high_pressure_i64_differential.py b/scripts/repro/high_pressure_i64_differential.py new file mode 100644 index 0000000..b30f7a9 --- /dev/null +++ b/scripts/repro/high_pressure_i64_differential.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +"""VCR-RA-001 acceptance increment (#242) — i64 pair-spill differential oracle. + +`high_pressure_i64.wat` keeps 4 i64 constants simultaneously live (4 register +pairs) while all four i32 params are reserved in r0-r3: at the first i64.add +the popped operand pairs + pinned params leave no free consecutive pair and +nothing register-resident to spill, so pre-acceptance the selector hard-failed +("register exhaustion: no consecutive pair of free registers for i64"). With +the param-backing retry the params frame-back at entry (#204 machinery) and +the function compiles. wasmtime is ground truth; unicorn runs synth's ARM +(`--relocatable` / select_with_stack path). The fold mixes non-commutative ops +(sub/xor) so an operand-order, reload, or i64 half-swap bug changes the +result; the i64 result returns in r0 (lo) : r1 (hi). + +Run: + synth compile scripts/repro/high_pressure_i64.wat -o /tmp/hp64.elf \ + --target cortex-m4 --relocatable + /tmp/armv/bin/python scripts/repro/high_pressure_i64_differential.py /tmp/hp64.elf + +Exits nonzero on any mismatch so it can gate a release. +""" +import sys +from pathlib import Path + +import wasmtime +from elftools.elf.elffile import ELFFile +from unicorn import UC_ARCH_ARM, UC_MODE_THUMB, Uc +from unicorn.arm_const import ( + UC_ARM_REG_LR, + UC_ARM_REG_R0, + UC_ARM_REG_R1, + UC_ARM_REG_R2, + UC_ARM_REG_R3, + UC_ARM_REG_SP, +) + +ELF = sys.argv[1] if len(sys.argv) > 1 else "/tmp/hp64.elf" +WAT = Path(__file__).with_name("high_pressure_i64.wat") + +# Ground truth: wasmtime. +eng = wasmtime.Engine() +mod = wasmtime.Module(eng, WAT.read_bytes()) +st = wasmtime.Store(eng) +inst = wasmtime.Instance(st, mod, []) +gt = inst.exports(st)["high_pressure64"] + +# synth's ARM under unicorn. +e = ELFFile(open(ELF, "rb")) +text = e.get_section_by_name(".text").data() +symtab = [s for s in e.iter_sections() if s["sh_type"] == "SHT_SYMTAB"][0] +syms = {s.name: s["st_value"] for s in symtab.iter_symbols()} + +CODE, STK = 0x10000, 0x90000 +mu = Uc(UC_ARCH_ARM, UC_MODE_THUMB) +mu.mem_map(CODE, 0x10000) +mu.mem_write(CODE, text) +mu.mem_map(STK, 0x10000) +RET = CODE + 0xFF00 +mu.mem_write(RET, b"\x00\xbf\x00\xbf") + + +def signed32(v): + return v - (1 << 32) if v >= 1 << 31 else v + + +ok = True +for a, b, c, d in ( + (0, 0, 0, 0), + (1, 2, 3, 4), + (3000, 50, 7, 11), + (0x7FFFFFFF, 1, 0x7FFFFFFF, 2), + (0xFFFFFFFF, 0x80000000, 0xFFFFFFFF, 0x80000000), + (12345, 0xDEADBEEF, 0xCAFEBABE, 54321), +): + exp = gt(st, signed32(a), signed32(b), signed32(c), signed32(d)) & ( + (1 << 64) - 1 + ) + mu.reg_write(UC_ARM_REG_R0, a) + mu.reg_write(UC_ARM_REG_R1, b) + mu.reg_write(UC_ARM_REG_R2, c) + mu.reg_write(UC_ARM_REG_R3, d) + mu.reg_write(UC_ARM_REG_SP, STK + 0x8000) + mu.reg_write(UC_ARM_REG_LR, RET | 1) + mu.emu_start((CODE + syms["high_pressure64"]) | 1, RET, timeout=5_000_000) + got = mu.reg_read(UC_ARM_REG_R0) | (mu.reg_read(UC_ARM_REG_R1) << 32) + m = "OK " if got == exp else "FAIL" + if got != exp: + ok = False + print( + f"{m} high_pressure64({a:#x},{b:#x},{c:#x},{d:#x}) = " + f"{got:#018x} expect {exp:#018x}" + ) + +print("ORACLE:", "PASS" if ok else "FAIL") +sys.exit(0 if ok else 1)