-
Notifications
You must be signed in to change notification settings - Fork 35
[LLVM,X64] Add support for SHA intrinsics #37
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -619,6 +619,79 @@ bool LLVMCompilerX64::handle_intrin(const llvm::IntrinsicInst *inst) { | |
| return true; | ||
| } | ||
| case llvm::Intrinsic::x86_sse2_pause: ASM(PAUSE); return true; | ||
| case llvm::Intrinsic::x86_sha1msg1: { | ||
| auto [dst_ref, dst_part] = this->val_ref_single(inst->getOperand(0)); | ||
| auto [src_ref, src_part] = this->val_ref_single(inst->getOperand(1)); | ||
| ASM(SHA1MSG1rr, | ||
| dst_part.load_to_reg(), | ||
| src_part.load_to_reg()); | ||
| this->result_ref(inst).part(0).set_value(std::move(dst_part)); | ||
| return true; | ||
| } | ||
| case llvm::Intrinsic::x86_sha1msg2: { | ||
| auto [dst_ref, dst_part] = this->val_ref_single(inst->getOperand(0)); | ||
| auto [src_ref, src_part] = this->val_ref_single(inst->getOperand(1)); | ||
| ASM(SHA1MSG2rr, | ||
| dst_part.load_to_reg(), | ||
| src_part.load_to_reg()); | ||
| this->result_ref(inst).part(0).set_value(std::move(dst_part)); | ||
| return true; | ||
| } | ||
| case llvm::Intrinsic::x86_sha1nexte: { | ||
| auto [dst_ref, dst_part] = this->val_ref_single(inst->getOperand(0)); | ||
| auto [src_ref, src_part] = this->val_ref_single(inst->getOperand(1)); | ||
| ASM(SHA1NEXTErr, | ||
| dst_part.load_to_reg(), | ||
| src_part.load_to_reg()); | ||
| this->result_ref(inst).part(0).set_value(std::move(dst_part)); | ||
| return true; | ||
| } | ||
| case llvm::Intrinsic::x86_sha1rnds4: { | ||
| auto immediate = llvm::cast<llvm::ConstantInt>(inst->getOperand(2)); | ||
| auto [dst_ref, dst_part] = this->val_ref_single(inst->getOperand(0)); | ||
| auto [src_ref, src_part] = this->val_ref_single(inst->getOperand(1)); | ||
| ASM(SHA1RNDS4rri, | ||
| dst_part.load_to_reg(), | ||
| src_part.load_to_reg(), | ||
| (u8)immediate->getZExtValue()); | ||
| this->result_ref(inst).part(0).set_value(std::move(dst_part)); | ||
| return true; | ||
| } | ||
| case llvm::Intrinsic::x86_sha256msg1: { | ||
| auto [dst_ref, dst_part] = this->val_ref_single(inst->getOperand(0)); | ||
| auto [src_ref, src_part] = this->val_ref_single(inst->getOperand(1)); | ||
| ASM(SHA256MSG1rr, | ||
| dst_part.load_to_reg(), | ||
| src_part.load_to_reg()); | ||
| this->result_ref(inst).part(0).set_value(std::move(dst_part)); | ||
| return true; | ||
| } | ||
| case llvm::Intrinsic::x86_sha256msg2: { | ||
| auto [dst_ref, dst_part] = this->val_ref_single(inst->getOperand(0)); | ||
| auto [src_ref, src_part] = this->val_ref_single(inst->getOperand(1)); | ||
| ASM(SHA256MSG2rr, | ||
| dst_part.load_to_reg(), | ||
| src_part.load_to_reg()); | ||
| this->result_ref(inst).part(0).set_value(std::move(dst_part)); | ||
| return true; | ||
| } | ||
| case llvm::Intrinsic::x86_sha256rnds2: { | ||
| auto [dst_ref, dst_part] = this->val_ref_single(inst->getOperand(0)); | ||
| auto [src_ref, src_part] = this->val_ref_single(inst->getOperand(1)); | ||
| auto [round_constants_ref, round_constants_part] = this->val_ref_single(inst->getOperand(2)); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use load_to_specific and do this before loading any other values. FixedRegBackup should never be used outside of generated code.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note: you will have to call into_temporary first as otherwise it will break for fixed assignments. |
||
| auto round_constants_gval = GenericValuePart(std::move(round_constants_part)); | ||
| ScratchReg scratch_xmm0{this}; | ||
| FixedRegBackup reg_backup_xmm0 = {.scratch = ScratchReg{this}}; | ||
| scratch_alloc_specific( | ||
| AsmReg::XMM0, scratch_xmm0, {&round_constants_gval}, reg_backup_xmm0); | ||
| ASM(SHA256RNDS2rrr, | ||
| dst_part.load_to_reg(), | ||
| src_part.load_to_reg(), | ||
| FE_XMM0); | ||
| this->result_ref(inst).part(0).set_value(dst_ref.part(0)); | ||
| scratch_check_fixed_backup(scratch_xmm0, reg_backup_xmm0, false); | ||
| return true; | ||
| } | ||
| default: return false; | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| ; NOTE: Assertions have been autogenerated by test/update_tpde_llc_test_checks.py UTC_ARGS: --version 5 | ||
| ; SPDX-FileCopyrightText: 2026 Contributors to TPDE <https://tpde.org> | ||
| ; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|
|
||
| ; RUN: tpde-llc --target=x86_64 %s | %objdump | FileCheck %s -check-prefixes=X64 | ||
|
|
||
| define <4 x i32> @sha1msg1(<4 x i32> %a, <4 x i32> %b) { | ||
| ; X64-LABEL: <sha1msg1>: | ||
| ; X64: sha1msg1 xmm0, xmm1 | ||
| ; X64-NEXT: ret | ||
| %res = call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %b) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tests lack variant where destination reg cannot be reused. |
||
| ret <4 x i32> %res | ||
| } | ||
|
|
||
| define <4 x i32> @sha1msg2(<4 x i32> %a, <4 x i32> %b) { | ||
| ; X64-LABEL: <sha1msg2>: | ||
| ; X64: sha1msg2 xmm0, xmm1 | ||
| ; X64-NEXT: ret | ||
| %res = call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %b) | ||
| ret <4 x i32> %res | ||
| } | ||
|
|
||
| define <4 x i32> @sha1nexte(<4 x i32> %a, <4 x i32> %b) { | ||
| ; X64-LABEL: <sha1nexte>: | ||
| ; X64: sha1nexte xmm0, xmm1 | ||
| ; X64-NEXT: ret | ||
| %res = call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %b) | ||
| ret <4 x i32> %res | ||
| } | ||
|
|
||
| define <4 x i32> @sha1rnds4(<4 x i32> %a, <4 x i32> %b) { | ||
| ; X64-LABEL: <sha1rnds4>: | ||
| ; X64: sha1rnds4 xmm0, xmm1, 0x3 | ||
| ; X64-NEXT: ret | ||
| %res = call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %b, i8 3) | ||
| ret <4 x i32> %res | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| ; NOTE: Assertions have been autogenerated by test/update_tpde_llc_test_checks.py UTC_ARGS: --version 5 | ||
| ; SPDX-FileCopyrightText: 2026 Contributors to TPDE <https://tpde.org> | ||
| ; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|
|
||
| ; RUN: tpde-llc --target=x86_64 %s | %objdump | FileCheck %s -check-prefixes=X64 | ||
|
|
||
| define <4 x i32> @sha256msg1(<4 x i32> %a, <4 x i32> %b) { | ||
| ; X64-LABEL: <sha256msg1>: | ||
| ; X64: sha256msg1 xmm0, xmm1 | ||
| ; X64-NEXT: ret | ||
| %res = call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %b) | ||
| ret <4 x i32> %res | ||
| } | ||
|
|
||
| define <4 x i32> @sha256msg2(<4 x i32> %a, <4 x i32> %b) { | ||
| ; X64-LABEL: <sha256msg2>: | ||
| ; X64: sha256msg2 xmm0, xmm1 | ||
| ; X64-NEXT: ret | ||
| %res = call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %b) | ||
| ret <4 x i32> %res | ||
| } | ||
|
|
||
| define <4 x i32> @sha256rnds2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { | ||
| ; X64-LABEL: <sha256rnds2>: | ||
| ; X64: push rbp | ||
| ; X64-NEXT: mov rbp, rsp | ||
| ; X64-NEXT: movapd xmmword ptr [rbp - 0x40], xmm0 | ||
| ; X64-NEXT: movapd xmm3, xmmword ptr [rbp - 0x40] | ||
| ; X64-NEXT: sha256rnds2 xmm3, xmm1, xmm0 | ||
| ; X64-NEXT: movapd xmm0, xmm3 | ||
| ; X64-NEXT: pop rbp | ||
| ; X64-NEXT: ret | ||
| %res = call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) | ||
| ret <4 x i32> %res | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This destroys a value that might still be used. Use into_temporary, see crc32 above. Likewise for other cases.