From 03415f86f56598c2398801ea8e3306b5cff2cd43 Mon Sep 17 00:00:00 2001 From: TechnoPorg Date: Wed, 1 Apr 2026 18:22:12 +0200 Subject: [PATCH] [LLVM,X64] Add support for SHA intrinsics --- tpde-llvm/src/x64/LLVMCompilerX64.cpp | 73 +++++++++++++++++++ .../test/filetest/x64/intrin_x86_sha1.ll | 37 ++++++++++ .../test/filetest/x64/intrin_x86_sha256.ll | 35 +++++++++ 3 files changed, 145 insertions(+) create mode 100644 tpde-llvm/test/filetest/x64/intrin_x86_sha1.ll create mode 100644 tpde-llvm/test/filetest/x64/intrin_x86_sha256.ll diff --git a/tpde-llvm/src/x64/LLVMCompilerX64.cpp b/tpde-llvm/src/x64/LLVMCompilerX64.cpp index 160f76bb..e44500cc 100644 --- a/tpde-llvm/src/x64/LLVMCompilerX64.cpp +++ b/tpde-llvm/src/x64/LLVMCompilerX64.cpp @@ -619,6 +619,79 @@ bool LLVMCompilerX64::handle_intrin(const llvm::IntrinsicInst *inst) { return true; } case llvm::Intrinsic::x86_sse2_pause: ASM(PAUSE); return true; + case llvm::Intrinsic::x86_sha1msg1: { + auto [dst_ref, dst_part] = this->val_ref_single(inst->getOperand(0)); + auto [src_ref, src_part] = this->val_ref_single(inst->getOperand(1)); + ASM(SHA1MSG1rr, + dst_part.load_to_reg(), + src_part.load_to_reg()); + this->result_ref(inst).part(0).set_value(std::move(dst_part)); + return true; + } + case llvm::Intrinsic::x86_sha1msg2: { + auto [dst_ref, dst_part] = this->val_ref_single(inst->getOperand(0)); + auto [src_ref, src_part] = this->val_ref_single(inst->getOperand(1)); + ASM(SHA1MSG2rr, + dst_part.load_to_reg(), + src_part.load_to_reg()); + this->result_ref(inst).part(0).set_value(std::move(dst_part)); + return true; + } + case llvm::Intrinsic::x86_sha1nexte: { + auto [dst_ref, dst_part] = this->val_ref_single(inst->getOperand(0)); + auto [src_ref, src_part] = this->val_ref_single(inst->getOperand(1)); + ASM(SHA1NEXTErr, + dst_part.load_to_reg(), + src_part.load_to_reg()); + this->result_ref(inst).part(0).set_value(std::move(dst_part)); + return true; + } + case llvm::Intrinsic::x86_sha1rnds4: { + auto immediate = llvm::cast(inst->getOperand(2)); + auto [dst_ref, dst_part] = this->val_ref_single(inst->getOperand(0)); + auto [src_ref, src_part] = this->val_ref_single(inst->getOperand(1)); + ASM(SHA1RNDS4rri, + dst_part.load_to_reg(), + src_part.load_to_reg(), + (u8)immediate->getZExtValue()); + this->result_ref(inst).part(0).set_value(std::move(dst_part)); + return true; + } + case llvm::Intrinsic::x86_sha256msg1: { + auto [dst_ref, dst_part] = this->val_ref_single(inst->getOperand(0)); + auto [src_ref, src_part] = this->val_ref_single(inst->getOperand(1)); + ASM(SHA256MSG1rr, + dst_part.load_to_reg(), + src_part.load_to_reg()); + this->result_ref(inst).part(0).set_value(std::move(dst_part)); + return true; + } + case llvm::Intrinsic::x86_sha256msg2: { + auto [dst_ref, dst_part] = this->val_ref_single(inst->getOperand(0)); + auto [src_ref, src_part] = this->val_ref_single(inst->getOperand(1)); + ASM(SHA256MSG2rr, + dst_part.load_to_reg(), + src_part.load_to_reg()); + this->result_ref(inst).part(0).set_value(std::move(dst_part)); + return true; + } + case llvm::Intrinsic::x86_sha256rnds2: { + auto [dst_ref, dst_part] = this->val_ref_single(inst->getOperand(0)); + auto [src_ref, src_part] = this->val_ref_single(inst->getOperand(1)); + auto [round_constants_ref, round_constants_part] = this->val_ref_single(inst->getOperand(2)); + auto round_constants_gval = GenericValuePart(std::move(round_constants_part)); + ScratchReg scratch_xmm0{this}; + FixedRegBackup reg_backup_xmm0 = {.scratch = ScratchReg{this}}; + scratch_alloc_specific( + AsmReg::XMM0, scratch_xmm0, {&round_constants_gval}, reg_backup_xmm0); + ASM(SHA256RNDS2rrr, + dst_part.load_to_reg(), + src_part.load_to_reg(), + FE_XMM0); + this->result_ref(inst).part(0).set_value(dst_ref.part(0)); + scratch_check_fixed_backup(scratch_xmm0, reg_backup_xmm0, false); + return true; + } default: return false; } } diff --git a/tpde-llvm/test/filetest/x64/intrin_x86_sha1.ll b/tpde-llvm/test/filetest/x64/intrin_x86_sha1.ll new file mode 100644 index 00000000..a1b71a03 --- /dev/null +++ b/tpde-llvm/test/filetest/x64/intrin_x86_sha1.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by test/update_tpde_llc_test_checks.py UTC_ARGS: --version 5 +; SPDX-FileCopyrightText: 2026 Contributors to TPDE +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +; RUN: tpde-llc --target=x86_64 %s | %objdump | FileCheck %s -check-prefixes=X64 + +define <4 x i32> @sha1msg1(<4 x i32> %a, <4 x i32> %b) { +; X64-LABEL: : +; X64: sha1msg1 xmm0, xmm1 +; X64-NEXT: ret + %res = call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %res +} + +define <4 x i32> @sha1msg2(<4 x i32> %a, <4 x i32> %b) { +; X64-LABEL: : +; X64: sha1msg2 xmm0, xmm1 +; X64-NEXT: ret + %res = call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %res +} + +define <4 x i32> @sha1nexte(<4 x i32> %a, <4 x i32> %b) { +; X64-LABEL: : +; X64: sha1nexte xmm0, xmm1 +; X64-NEXT: ret + %res = call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %res +} + +define <4 x i32> @sha1rnds4(<4 x i32> %a, <4 x i32> %b) { +; X64-LABEL: : +; X64: sha1rnds4 xmm0, xmm1, 0x3 +; X64-NEXT: ret + %res = call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %b, i8 3) + ret <4 x i32> %res +} diff --git a/tpde-llvm/test/filetest/x64/intrin_x86_sha256.ll b/tpde-llvm/test/filetest/x64/intrin_x86_sha256.ll new file mode 100644 index 00000000..42d00455 --- /dev/null +++ b/tpde-llvm/test/filetest/x64/intrin_x86_sha256.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by test/update_tpde_llc_test_checks.py UTC_ARGS: --version 5 +; SPDX-FileCopyrightText: 2026 Contributors to TPDE +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +; RUN: tpde-llc --target=x86_64 %s | %objdump | FileCheck %s -check-prefixes=X64 + +define <4 x i32> @sha256msg1(<4 x i32> %a, <4 x i32> %b) { +; X64-LABEL: : +; X64: sha256msg1 xmm0, xmm1 +; X64-NEXT: ret + %res = call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %res +} + +define <4 x i32> @sha256msg2(<4 x i32> %a, <4 x i32> %b) { +; X64-LABEL: : +; X64: sha256msg2 xmm0, xmm1 +; X64-NEXT: ret + %res = call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %res +} + +define <4 x i32> @sha256rnds2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; X64-LABEL: : +; X64: push rbp +; X64-NEXT: mov rbp, rsp +; X64-NEXT: movapd xmmword ptr [rbp - 0x40], xmm0 +; X64-NEXT: movapd xmm3, xmmword ptr [rbp - 0x40] +; X64-NEXT: sha256rnds2 xmm3, xmm1, xmm0 +; X64-NEXT: movapd xmm0, xmm3 +; X64-NEXT: pop rbp +; X64-NEXT: ret + %res = call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) + ret <4 x i32> %res +}