Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
20ac5dd
Add JIT code generator for PPC64
cyrozap Mar 28, 2026
d24b69c
Correct comment for STORE_LE_VR
cyrozap Apr 5, 2026
5cc3a96
Optimize STORE_LE_VR on little-endian POWER
cyrozap Apr 5, 2026
73ffbbd
Make it clear that f0-f31 are aliased by vs0-vs31
cyrozap Apr 5, 2026
26e0d91
Mark r23 as unused
cyrozap Apr 5, 2026
4aa3387
Optimize CBRANCH
cyrozap Apr 5, 2026
8d2a70a
Add a comment on the importance of using dcbt to prefetch the next block
cyrozap Apr 6, 2026
bf0d6f4
Optimize scratchpad address calculation in program suffix
cyrozap Apr 6, 2026
6bf51f3
Optimize emitMovImm64 for rotated 32-bit immediates
cyrozap Apr 6, 2026
15b0f88
Use cpu.hasAes() instead of getauxval on PPC64
cyrozap Apr 6, 2026
b2c9f9f
Set default PPC64 CPU based on whether the system is BE or LE
cyrozap Apr 6, 2026
8cb435b
Enable compatibility with PPC64 ELF ABI V1
cyrozap Apr 7, 2026
57a6451
Fix BE PPC64 cache and dataset endianness
cyrozap Apr 7, 2026
05c1720
Fix BE PPC64 scratchpad and register endianness
cyrozap Apr 8, 2026
ee98798
Fix interpreter v2 tests on big-endian PPC64
cyrozap Apr 8, 2026
963fe7e
Remove unnecessary immediate load on PPC64 BE with v1 ABI
cyrozap Apr 8, 2026
21ba867
Move PPC64 VM prologue generation into prefix generation function
cyrozap Apr 10, 2026
c4127e2
Factor out the common parts of the scratchpad store
cyrozap Apr 10, 2026
98f415c
Flush the cache on PPC64 for real
cyrozap Apr 10, 2026
2dbda49
Optimize CFROUND for POWER9 (ISA v3.0B)
cyrozap Apr 11, 2026
9836c47
Cache reciprocals in PPC64 JIT compiler
cyrozap Apr 11, 2026
36d32c1
Simplify scratchpad loading code
cyrozap Apr 11, 2026
20b4c2b
Move the creation of the zero vector further from where it's used
cyrozap Apr 11, 2026
38302fb
Use AltiVec instructions instead of VSX instructions where possible
cyrozap Apr 13, 2026
0fa4736
Implement software AES for PPC64
cyrozap Apr 13, 2026
161751e
Avoid dependency on Linux kernel headers
cyrozap Apr 6, 2026
af1e831
Use round-robin temporary register allocator in PPC64 JIT compiler
cyrozap Apr 17, 2026
ede3493
Fix PPC64 build for musl libc
cyrozap Apr 17, 2026
dca3008
Rename the PPC64 byte-reverse mask to better reflect its purpose
cyrozap Apr 24, 2026
6fb1a9d
Use .octa for vector byte-reverse mask to avoid confusion
cyrozap Apr 24, 2026
905c5b5
Correct the Group E AND mask
cyrozap Apr 24, 2026
44c4b32
Optimize Group E register conversion on PPC64
cyrozap Apr 24, 2026
9afe5f0
PPC64 JIT: Correct maximum RandomX instruction code size
cyrozap Apr 26, 2026
c805c17
PPC64 JIT: Optimize emitAddImm32 by using `addis` for supported values
cyrozap Apr 27, 2026
39b2d7b
PPC64 JIT: Add some notes on optimizing emitAddImm32
cyrozap Apr 27, 2026
f7af1e6
PPC64 JIT: Make sure groups of four loads use different temporary regs
cyrozap Apr 27, 2026
9852466
PPC64 JIT: Group loads four at a time
cyrozap Apr 27, 2026
88781a2
PPC64 JIT: Reorder ld arguments to match the assembly instruction
cyrozap Apr 28, 2026
a7891d0
PPC64 JIT: Rename scratchpad store prologue/epilogue
cyrozap Apr 28, 2026
5cec426
PPC64 JIT: Move the Group F scratchpad store into the code generator
cyrozap Apr 28, 2026
6e26b1a
PPC64 JIT: Optimize Group F scratchpad store on v3.0 and later
cyrozap Apr 28, 2026
f9e708d
PPC64 JIT: Avoid moving register ma when we don't need to
cyrozap Apr 28, 2026
638319f
PPC64 JIT: Optimize Group F register scratchpad stores on pre-v3.0
cyrozap Apr 29, 2026
0a96ac1
PPC64 JIT: Optimize IXOR_R for 16-bit and shifted unsigned 16-bit values
cyrozap Apr 29, 2026
fa2c8aa
PPC64 JIT: Rearrange the beq/bne instruction formatters
cyrozap Apr 29, 2026
85c5ea8
PPC64 JIT: Add branch hint for CFROUND in V2 mode
cyrozap Apr 29, 2026
956e083
PPC64 JIT: Convert the BO field values to hexadecimal
cyrozap May 6, 2026
80477cb
PPC64 JIT: Add branch hint for CBRANCH
cyrozap May 6, 2026
6050215
PPC64 JIT: Remove STORE_LE_VR macro
cyrozap May 7, 2026
2604dd7
PPC64 JIT: Interleave immediate loads with vector loads and stores
cyrozap May 7, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,22 @@ endif()

# PowerPC
if(ARCH_ID STREQUAL "ppc64" OR ARCH_ID STREQUAL "ppc64le")
if(ARCH STREQUAL "native")
add_flag("-mcpu=native")
list(APPEND randomx_sources
src/jit_compiler_ppc64_static.S
src/jit_compiler_ppc64.cpp)

set_property(SOURCE src/jit_compiler_ppc64_static.S PROPERTY LANGUAGE C)

if(ARCH STREQUAL "default")
if(ARCH_ID STREQUAL "ppc64le")
# Little-endian defaults to POWER8
add_flag("-mcpu=power8")
else()
# Big-endian defaults to POWER7
add_flag("-mcpu=power7")
endif()
else()
add_flag("-mcpu=${ARCH}")
endif()
# PowerPC AES requires ALTIVEC (POWER7+), so it cannot be enabled in the default build
endif()
Expand Down
5 changes: 5 additions & 0 deletions src/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,11 @@ namespace randomx {
#define RANDOMX_COMPILER_RV64
class JitCompilerRV64;
using JitCompiler = JitCompilerRV64;
#elif defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || defined(__PPC64LE__)
#define RANDOMX_HAVE_COMPILER 1
#define RANDOMX_COMPILER_PPC64
class JitCompilerPPC64;
using JitCompiler = JitCompilerPPC64;
#else
#define RANDOMX_HAVE_COMPILER 0
class JitCompilerFallback;
Expand Down
16 changes: 15 additions & 1 deletion src/cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <asm/hwcap.h>
#endif

#if defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || defined(__PPC64LE__)
#include <sys/auxv.h>
// From asm/cputable.h:
#ifndef PPC_FEATURE2_VEC_CRYPTO
#define PPC_FEATURE2_VEC_CRYPTO 0x02000000
#endif
#ifndef PPC_FEATURE2_ARCH_3_00
#define PPC_FEATURE2_ARCH_3_00 0x00800000
#endif
#endif

#ifdef __riscv
#include <signal.h>
#include <setjmp.h>
Expand Down Expand Up @@ -120,8 +131,11 @@ namespace randomx {

sigaction(SIGILL, &old_action, nullptr);
}
#elif defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || defined(__PPC64LE__)
unsigned long hwcaps2 = getauxval(AT_HWCAP2);
aes_ = (hwcaps2 & PPC_FEATURE2_VEC_CRYPTO) != 0;
v3p0_ = (hwcaps2 & PPC_FEATURE2_ARCH_3_00) != 0;
#endif
//TODO POWER8 AES
}

const Cpu cpu;
Expand Down
6 changes: 6 additions & 0 deletions src/cpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ namespace randomx {
inline bool hasRVV() const { return rvv_; }
inline int getRVV_Length() const { return rvv_length; }
#endif
#if defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || defined(__PPC64LE__)
inline bool hasV3P0() const { return v3p0_; }
#endif

private:
bool aes_ = false;
Expand All @@ -49,6 +52,9 @@ namespace randomx {
#ifdef __riscv
bool rvv_ = false;
int rvv_length = 0;
#endif
#if defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || defined(__PPC64LE__)
bool v3p0_ = false;
#endif
};

Expand Down
8 changes: 8 additions & 0 deletions src/intrin_portable.h
Original file line number Diff line number Diff line change
Expand Up @@ -277,11 +277,19 @@ FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
}

FORCE_INLINE rx_vec_f128 rx_cast_vec_i2f(rx_vec_i128 a) {
#if defined(NATIVE_LITTLE_ENDIAN)
return (rx_vec_f128)a;
#else
return (rx_vec_f128)vec_perm((__m128i)a, (__m128i)a, (__m128i){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11});
#endif
}

FORCE_INLINE rx_vec_i128 rx_cast_vec_f2i(rx_vec_f128 a) {
#if defined(NATIVE_LITTLE_ENDIAN)
return (rx_vec_i128)a;
#else
return (rx_vec_i128)vec_perm((__m128i)a, (__m128i)a, (__m128i){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11});
#endif
}

FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
Expand Down
2 changes: 2 additions & 0 deletions src/jit_compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ namespace randomx {
#include "jit_compiler_a64.hpp"
#elif defined(RANDOMX_COMPILER_RV64)
#include "jit_compiler_rv64.hpp"
#elif defined(RANDOMX_COMPILER_PPC64)
#include "jit_compiler_ppc64.hpp"
#else
#include "jit_compiler_fallback.hpp"
#endif
Expand Down
1,622 changes: 1,622 additions & 0 deletions src/jit_compiler_ppc64.cpp

Large diffs are not rendered by default.

120 changes: 120 additions & 0 deletions src/jit_compiler_ppc64.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/*
Copyright (c) 2023 tevador <tevador@gmail.com>
Copyright (c) 2026, Forest Crossman <cyrozap@gmail.com>

All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#pragma once

#include <vector>

#include "common.hpp"
#include "jit_compiler.hpp"

#include "jit_compiler_ppc64_static.hpp"

#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define PPC_BIG_ENDIAN 1
#else
#define PPC_BIG_ENDIAN 0
#endif

#if (defined(_CALL_ELF) && _CALL_ELF == 2) || (!defined(_CALL_ELF) && !PPC_BIG_ENDIAN)
#define PPC_ABI_V2 1
#else
#define PPC_ABI_V2 0
#endif

namespace randomx {

class Program;
struct ProgramConfiguration;
class SuperscalarProgram;
class Instruction;

class JitCompilerPPC64 {
public:
JitCompilerPPC64();
~JitCompilerPPC64();

void generateProgram(Program&, ProgramConfiguration&);
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);

void generateSuperscalarHash(SuperscalarProgramList& programs, std::vector<uint64_t> &);

void generateDatasetInitCode() {}

ProgramFunc* getProgramFunc() {
#if PPC_ABI_V2
return reinterpret_cast<ProgramFunc*>(entryProgram);
#else
return reinterpret_cast<ProgramFunc*>(descriptorProgram);
#endif
}
DatasetInitFunc* getDatasetInitFunc() {
#if PPC_ABI_V2
return reinterpret_cast<DatasetInitFunc*>(entryDataInit);
#else
return reinterpret_cast<DatasetInitFunc*>(descriptorDataInit);
#endif
}
uint8_t* getCode() { return state.code; }
size_t getCodeSize();

void enableWriting();
void enableExecution();
void enableAll();

void setFlags(randomx_flags f) { flags = f; }

uint32_t getTempGpr();
uint32_t getTempVr();

static uint8_t instMap[256];

private:
void emitProgramPrefix(CompilerState& state, Program& prog, ProgramConfiguration& pcfg, randomx_flags flags);
void emitProgramSuffix(CompilerState& state, ProgramConfiguration& pcfg, randomx_flags flags);

CompilerState state;
randomx_flags flags;

void* entryDataInit = nullptr;
void* entryProgram = nullptr;
#if !PPC_ABI_V2
uint64_t descriptorProgram[3];
uint64_t descriptorDataInit[3];
#endif

int32_t RandomXCodePos;
int32_t SshashSingleItemPos;
int32_t LoopBeginPos;

uint32_t tempGprIndex = 0;
uint32_t tempVrIndex = 0;
};

}
Loading
Loading