2024-09-30 13:39:47 +06:00
|
|
|
#ifndef PPC_CONTEXT_H_INCLUDED
|
|
|
|
#define PPC_CONTEXT_H_INCLUDED
|
2024-09-24 17:32:04 +03:00
|
|
|
|
|
|
|
#ifndef PPC_CONFIG_H_INCLUDED
|
|
|
|
#error "ppc_config.h must be included before ppc_context.h"
|
|
|
|
#endif
|
|
|
|
|
2024-09-27 18:47:35 +03:00
|
|
|
#include <cmath>
|
|
|
|
#include <csetjmp>
|
2024-09-08 08:32:31 +06:00
|
|
|
#include <cstdint>
|
2024-09-10 19:26:53 +03:00
|
|
|
#include <cstdlib>
|
2024-09-15 20:30:56 +03:00
|
|
|
#include <cstring>
|
2024-09-10 19:26:53 +03:00
|
|
|
|
2024-09-27 18:47:35 +03:00
|
|
|
#include <intrin.h>
|
2024-09-12 21:50:36 +03:00
|
|
|
#include <x86intrin.h>
|
2024-09-10 19:26:53 +03:00
|
|
|
|
2024-09-30 18:45:07 +06:00
|
|
|
#define PPC_JOIN(x, y) x##y
|
|
|
|
#define PPC_XSTRINGIFY(x) #x
|
|
|
|
#define PPC_STRINGIFY(x) PPC_XSTRINGIFY(x)
|
2024-09-27 18:47:35 +03:00
|
|
|
#define PPC_FUNC(x) void x(PPCContext& __restrict ctx, uint8_t* base)
|
2024-09-30 18:45:07 +06:00
|
|
|
#define PPC_FUNC_IMPL(x) extern "C" PPC_FUNC(x)
|
2024-09-30 20:41:32 +06:00
|
|
|
#define PPC_EXTERN_FUNC(x) extern PPC_FUNC(x)
|
2024-09-27 18:47:35 +03:00
|
|
|
#define PPC_WEAK_FUNC(x) __attribute__((weak,noinline)) PPC_FUNC(x)
|
2024-09-20 18:05:13 +03:00
|
|
|
|
2024-09-27 12:37:12 +03:00
|
|
|
#define PPC_FUNC_PROLOGUE() __builtin_assume(((size_t)base & 0xFFFFFFFF) == 0)
|
2024-09-22 12:50:11 +03:00
|
|
|
|
2024-11-24 16:16:08 +06:00
|
|
|
#ifndef PPC_LOAD_U8
|
2024-11-17 21:00:36 +03:00
|
|
|
#define PPC_LOAD_U8(x) *(volatile uint8_t*)(base + (x))
|
2024-11-24 16:16:08 +06:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PPC_LOAD_U16
|
2024-11-17 21:00:36 +03:00
|
|
|
#define PPC_LOAD_U16(x) __builtin_bswap16(*(volatile uint16_t*)(base + (x)))
|
2024-11-24 16:16:08 +06:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PPC_LOAD_U32
|
2024-11-17 21:00:36 +03:00
|
|
|
#define PPC_LOAD_U32(x) __builtin_bswap32(*(volatile uint32_t*)(base + (x)))
|
2024-11-24 16:16:08 +06:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PPC_LOAD_U64
|
2024-11-17 21:00:36 +03:00
|
|
|
#define PPC_LOAD_U64(x) __builtin_bswap64(*(volatile uint64_t*)(base + (x)))
|
2024-11-24 16:16:08 +06:00
|
|
|
#endif
|
2024-11-17 21:00:36 +03:00
|
|
|
|
2024-11-24 15:36:50 +06:00
|
|
|
// TODO: Implement.
|
|
|
|
// These are currently unused. However, MMIO loads could possibly be handled statically with some profiling and a fallback.
|
|
|
|
// The fallback would be a runtime exception handler which will intercept reads from MMIO regions
|
|
|
|
// and log the PC for compiling to static code later.
|
2024-11-24 16:16:08 +06:00
|
|
|
#ifndef PPC_MM_LOAD_U8
|
2024-11-24 15:36:50 +06:00
|
|
|
#define PPC_MM_LOAD_U8(x) PPC_LOAD_U8 (x)
|
2024-11-24 16:16:08 +06:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PPC_MM_LOAD_U16
|
2024-11-24 15:36:50 +06:00
|
|
|
#define PPC_MM_LOAD_U16(x) PPC_LOAD_U16(x)
|
2024-11-24 16:16:08 +06:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PPC_MM_LOAD_U32
|
2024-11-24 15:36:50 +06:00
|
|
|
#define PPC_MM_LOAD_U32(x) PPC_LOAD_U32(x)
|
2024-11-24 16:16:08 +06:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PPC_MM_LOAD_U64
|
2024-11-24 15:36:50 +06:00
|
|
|
#define PPC_MM_LOAD_U64(x) PPC_LOAD_U64(x)
|
2024-11-24 16:16:08 +06:00
|
|
|
#endif
|
2024-11-24 15:36:50 +06:00
|
|
|
|
2024-11-24 16:16:08 +06:00
|
|
|
#ifndef PPC_STORE_U8
|
2024-11-17 21:00:36 +03:00
|
|
|
#define PPC_STORE_U8(x, y) *(volatile uint8_t*)(base + (x)) = (y)
|
2024-11-24 16:16:08 +06:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PPC_STORE_U16
|
2024-11-17 21:00:36 +03:00
|
|
|
#define PPC_STORE_U16(x, y) *(volatile uint16_t*)(base + (x)) = __builtin_bswap16(y)
|
2024-11-24 16:16:08 +06:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PPC_STORE_U32
|
2024-11-17 21:00:36 +03:00
|
|
|
#define PPC_STORE_U32(x, y) *(volatile uint32_t*)(base + (x)) = __builtin_bswap32(y)
|
2024-11-24 16:16:08 +06:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PPC_STORE_U64
|
2024-11-17 21:00:36 +03:00
|
|
|
#define PPC_STORE_U64(x, y) *(volatile uint64_t*)(base + (x)) = __builtin_bswap64(y)
|
2024-11-24 16:16:08 +06:00
|
|
|
#endif
|
2024-09-10 19:26:53 +03:00
|
|
|
|
2024-11-24 15:36:50 +06:00
|
|
|
// MMIO Store handling is completely reliant on being preeceded by eieio.
|
|
|
|
// TODO: Verify if that's always the case.
|
2024-11-24 16:16:08 +06:00
|
|
|
#ifndef PPC_MM_STORE_U8
|
2024-11-24 15:36:50 +06:00
|
|
|
#define PPC_MM_STORE_U8(x, y) PPC_STORE_U8 (x, y)
|
2024-11-24 16:16:08 +06:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PPC_MM_STORE_U16
|
2024-11-24 15:36:50 +06:00
|
|
|
#define PPC_MM_STORE_U16(x, y) PPC_STORE_U16(x, y)
|
2024-11-24 16:16:08 +06:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PPC_MM_STORE_U32
|
2024-11-24 15:36:50 +06:00
|
|
|
#define PPC_MM_STORE_U32(x, y) PPC_STORE_U32(x, y)
|
2024-11-24 16:16:08 +06:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PPC_MM_STORE_U64
|
2024-11-24 15:36:50 +06:00
|
|
|
#define PPC_MM_STORE_U64(x, y) PPC_STORE_U64(x, y)
|
2024-11-24 16:16:08 +06:00
|
|
|
#endif
|
2024-11-24 15:36:50 +06:00
|
|
|
|
2024-11-24 16:16:08 +06:00
|
|
|
#ifndef PPC_CALL_FUNC
|
2024-09-24 17:32:04 +03:00
|
|
|
#define PPC_CALL_FUNC(x) x(ctx, base)
|
2024-11-24 16:16:08 +06:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PPC_CALL_INDIRECT_FUNC
|
2024-09-24 17:32:04 +03:00
|
|
|
#define PPC_CALL_INDIRECT_FUNC(x) (*(PPCFunc**)(ctx.fn + uint64_t(x) * 2))(ctx, base)
|
2024-11-24 16:16:08 +06:00
|
|
|
#endif
|
2024-09-24 17:32:04 +03:00
|
|
|
|
2024-09-22 19:58:22 +03:00
|
|
|
typedef void PPCFunc(struct PPCContext& __restrict__ ctx, uint8_t* base);
|
2024-09-13 17:32:54 +03:00
|
|
|
|
2024-09-18 13:31:37 +03:00
|
|
|
struct PPCFuncMapping
|
|
|
|
{
|
|
|
|
size_t guest;
|
|
|
|
PPCFunc* host;
|
|
|
|
};
|
|
|
|
|
2024-09-27 18:47:35 +03:00
|
|
|
extern PPCFuncMapping PPCFuncMappings[];
|
2024-09-18 16:04:52 +03:00
|
|
|
|
2024-09-10 19:26:53 +03:00
|
|
|
struct PPCRegister
|
|
|
|
{
|
|
|
|
union
|
|
|
|
{
|
|
|
|
int8_t s8;
|
|
|
|
uint8_t u8;
|
|
|
|
int16_t s16;
|
|
|
|
uint16_t u16;
|
|
|
|
int32_t s32;
|
|
|
|
uint32_t u32;
|
|
|
|
int64_t s64;
|
|
|
|
uint64_t u64;
|
|
|
|
float f32;
|
|
|
|
double f64;
|
|
|
|
};
|
|
|
|
};
|
2024-09-08 08:32:31 +06:00
|
|
|
|
2024-09-12 17:41:39 +03:00
|
|
|
struct PPCXERRegister
|
|
|
|
{
|
|
|
|
uint8_t so;
|
|
|
|
uint8_t ov;
|
|
|
|
uint8_t ca;
|
|
|
|
};
|
|
|
|
|
2024-09-10 23:16:21 +03:00
|
|
|
struct PPCCRRegister
|
|
|
|
{
|
|
|
|
uint8_t lt;
|
|
|
|
uint8_t gt;
|
|
|
|
uint8_t eq;
|
2024-09-13 15:26:03 +03:00
|
|
|
union
|
|
|
|
{
|
|
|
|
uint8_t so;
|
|
|
|
uint8_t un;
|
|
|
|
};
|
2024-09-12 15:34:22 +03:00
|
|
|
|
|
|
|
template<typename T>
|
2024-09-24 17:32:04 +03:00
|
|
|
inline void compare(T left, T right, const PPCXERRegister& xer) noexcept
|
2024-09-12 15:34:22 +03:00
|
|
|
{
|
|
|
|
lt = left < right;
|
|
|
|
gt = left > right;
|
|
|
|
eq = left == right;
|
2024-09-12 17:41:39 +03:00
|
|
|
so = xer.so;
|
2024-09-12 15:34:22 +03:00
|
|
|
}
|
2024-09-13 15:26:03 +03:00
|
|
|
|
2024-09-24 17:32:04 +03:00
|
|
|
inline void compare(double left, double right) noexcept
|
2024-09-13 15:26:03 +03:00
|
|
|
{
|
2024-09-22 19:58:22 +03:00
|
|
|
un = __builtin_isnan(left) || __builtin_isnan(right);
|
|
|
|
lt = !un && (left < right);
|
|
|
|
gt = !un && (left > right);
|
|
|
|
eq = !un && (left == right);
|
2024-09-13 15:26:03 +03:00
|
|
|
}
|
2024-09-17 21:32:13 +03:00
|
|
|
|
2024-09-24 17:32:04 +03:00
|
|
|
inline void setFromMask(__m128 mask, int imm) noexcept
|
2024-09-17 21:32:13 +03:00
|
|
|
{
|
|
|
|
int m = _mm_movemask_ps(mask);
|
|
|
|
lt = m == imm; // all equal
|
|
|
|
gt = 0;
|
|
|
|
eq = m == 0; // none equal
|
|
|
|
so = 0;
|
|
|
|
}
|
|
|
|
|
2024-09-24 17:32:04 +03:00
|
|
|
inline void setFromMask(__m128i mask, int imm) noexcept
|
2024-09-17 21:32:13 +03:00
|
|
|
{
|
|
|
|
int m = _mm_movemask_epi8(mask);
|
|
|
|
lt = m == imm; // all equal
|
|
|
|
gt = 0;
|
|
|
|
eq = m == 0; // none equal
|
|
|
|
so = 0;
|
|
|
|
}
|
2024-09-10 23:16:21 +03:00
|
|
|
};
|
|
|
|
|
2024-09-14 23:17:51 +03:00
|
|
|
struct alignas(0x10) PPCVRegister
|
|
|
|
{
|
|
|
|
union
|
|
|
|
{
|
|
|
|
int8_t s8[16];
|
|
|
|
uint8_t u8[16];
|
|
|
|
int16_t s16[8];
|
|
|
|
uint16_t u16[8];
|
|
|
|
int32_t s32[4];
|
|
|
|
uint32_t u32[4];
|
|
|
|
int64_t s64[2];
|
|
|
|
uint64_t u64[2];
|
|
|
|
float f32[4];
|
|
|
|
double f64[2];
|
|
|
|
};
|
|
|
|
};
|
2024-09-08 08:32:31 +06:00
|
|
|
|
2024-09-27 12:37:12 +03:00
|
|
|
#define PPC_ROUND_NEAREST 0x00
|
|
|
|
#define PPC_ROUND_TOWARD_ZERO 0x01
|
|
|
|
#define PPC_ROUND_UP 0x02
|
|
|
|
#define PPC_ROUND_DOWN 0x03
|
|
|
|
#define PPC_ROUND_MASK 0x03
|
|
|
|
|
2024-09-22 00:07:15 +03:00
|
|
|
struct PPCFPSCRRegister
|
2024-09-20 18:05:13 +03:00
|
|
|
{
|
2024-09-22 00:07:15 +03:00
|
|
|
uint32_t csr;
|
2024-09-20 18:05:13 +03:00
|
|
|
|
2024-09-27 12:37:12 +03:00
|
|
|
static constexpr size_t GuestToHost[] = { _MM_ROUND_NEAREST, _MM_ROUND_TOWARD_ZERO, _MM_ROUND_UP, _MM_ROUND_DOWN };
|
|
|
|
static constexpr size_t HostToGuest[] = { PPC_ROUND_NEAREST, PPC_ROUND_DOWN, PPC_ROUND_UP, PPC_ROUND_TOWARD_ZERO };
|
|
|
|
|
2024-09-24 17:32:04 +03:00
|
|
|
inline uint32_t loadFromHost() noexcept
|
2024-09-20 18:05:13 +03:00
|
|
|
{
|
2024-09-22 00:07:15 +03:00
|
|
|
csr = _mm_getcsr();
|
2024-09-27 12:37:12 +03:00
|
|
|
return HostToGuest[(csr & _MM_ROUND_MASK) >> 13];
|
2024-09-22 00:07:15 +03:00
|
|
|
}
|
|
|
|
|
2024-09-24 17:32:04 +03:00
|
|
|
inline void storeFromGuest(uint32_t value) noexcept
|
2024-09-22 00:07:15 +03:00
|
|
|
{
|
|
|
|
csr &= ~_MM_ROUND_MASK;
|
2024-09-27 12:37:12 +03:00
|
|
|
csr |= GuestToHost[value & PPC_ROUND_MASK];
|
2024-09-22 00:07:15 +03:00
|
|
|
_mm_setcsr(csr);
|
2024-09-20 18:05:13 +03:00
|
|
|
}
|
|
|
|
|
2024-09-26 17:24:04 +03:00
|
|
|
static constexpr size_t FlushMask = _MM_FLUSH_ZERO_MASK | _MM_DENORMALS_ZERO_MASK;
|
|
|
|
|
2024-09-26 18:31:56 +03:00
|
|
|
inline void enableFlushModeUnconditional() noexcept
|
2024-09-20 18:05:13 +03:00
|
|
|
{
|
2024-09-26 17:24:04 +03:00
|
|
|
csr |= FlushMask;
|
|
|
|
_mm_setcsr(csr);
|
|
|
|
}
|
|
|
|
|
2024-09-26 18:31:56 +03:00
|
|
|
inline void disableFlushModeUnconditional() noexcept
|
2024-09-26 17:24:04 +03:00
|
|
|
{
|
|
|
|
csr &= ~FlushMask;
|
|
|
|
_mm_setcsr(csr);
|
|
|
|
}
|
|
|
|
|
2024-09-26 18:31:56 +03:00
|
|
|
inline void enableFlushMode() noexcept
|
2024-09-26 17:24:04 +03:00
|
|
|
{
|
|
|
|
if ((csr & FlushMask) != FlushMask) [[unlikely]]
|
|
|
|
{
|
|
|
|
csr |= FlushMask;
|
|
|
|
_mm_setcsr(csr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-09-26 18:31:56 +03:00
|
|
|
inline void disableFlushMode() noexcept
|
2024-09-26 17:24:04 +03:00
|
|
|
{
|
|
|
|
if ((csr & FlushMask) != 0) [[unlikely]]
|
2024-09-20 18:05:13 +03:00
|
|
|
{
|
2024-09-26 17:24:04 +03:00
|
|
|
csr &= ~FlushMask;
|
|
|
|
_mm_setcsr(csr);
|
2024-09-20 18:05:13 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2024-09-08 08:32:31 +06:00
|
|
|
struct PPCContext
|
|
|
|
{
|
2024-09-24 17:32:04 +03:00
|
|
|
uint8_t* fn;
|
|
|
|
|
|
|
|
#ifndef PPC_CONFIG_SKIP_LR
|
2024-09-08 08:32:31 +06:00
|
|
|
uint64_t lr;
|
2024-09-24 17:32:04 +03:00
|
|
|
#endif
|
|
|
|
#ifndef PPC_CONFIG_CTR_AS_LOCAL
|
2024-09-19 13:54:13 +03:00
|
|
|
PPCRegister ctr;
|
2024-09-24 17:32:04 +03:00
|
|
|
#endif
|
|
|
|
#ifndef PPC_CONFIG_XER_AS_LOCAL
|
2024-09-12 17:41:39 +03:00
|
|
|
PPCXERRegister xer;
|
2024-09-24 17:32:04 +03:00
|
|
|
#endif
|
|
|
|
#ifndef PPC_CONFIG_RESERVED_AS_LOCAL
|
2024-09-15 15:32:29 +03:00
|
|
|
PPCRegister reserved;
|
2024-09-24 17:32:04 +03:00
|
|
|
#endif
|
|
|
|
#ifndef PPC_CONFIG_SKIP_MSR
|
2024-09-22 19:58:22 +03:00
|
|
|
uint32_t msr = 0x200A000;
|
2024-09-24 17:32:04 +03:00
|
|
|
#endif
|
2024-09-08 08:32:31 +06:00
|
|
|
|
2024-09-24 17:32:04 +03:00
|
|
|
#ifndef PPC_CONFIG_CR_AS_LOCAL
|
2024-09-22 19:58:22 +03:00
|
|
|
PPCCRRegister cr0;
|
|
|
|
PPCCRRegister cr1;
|
|
|
|
PPCCRRegister cr2;
|
|
|
|
PPCCRRegister cr3;
|
|
|
|
PPCCRRegister cr4;
|
|
|
|
PPCCRRegister cr5;
|
|
|
|
PPCCRRegister cr6;
|
|
|
|
PPCCRRegister cr7;
|
2024-09-24 17:32:04 +03:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PPC_CONFIG_NON_ARGUMENT_AS_LOCAL
|
2024-09-22 19:58:22 +03:00
|
|
|
PPCRegister r0;
|
2024-09-24 17:32:04 +03:00
|
|
|
#endif
|
2024-09-22 19:58:22 +03:00
|
|
|
PPCRegister r1;
|
2024-09-24 17:32:04 +03:00
|
|
|
#ifndef PPC_CONFIG_NON_ARGUMENT_AS_LOCAL
|
2024-09-22 19:58:22 +03:00
|
|
|
PPCRegister r2;
|
2024-09-24 17:32:04 +03:00
|
|
|
#endif
|
2024-09-22 19:58:22 +03:00
|
|
|
PPCRegister r3;
|
|
|
|
PPCRegister r4;
|
|
|
|
PPCRegister r5;
|
|
|
|
PPCRegister r6;
|
|
|
|
PPCRegister r7;
|
|
|
|
PPCRegister r8;
|
|
|
|
PPCRegister r9;
|
|
|
|
PPCRegister r10;
|
2024-09-24 17:32:04 +03:00
|
|
|
#ifndef PPC_CONFIG_NON_ARGUMENT_AS_LOCAL
|
2024-09-22 19:58:22 +03:00
|
|
|
PPCRegister r11;
|
|
|
|
PPCRegister r12;
|
2024-09-24 17:32:04 +03:00
|
|
|
#endif
|
2024-09-22 19:58:22 +03:00
|
|
|
PPCRegister r13;
|
2024-09-24 17:32:04 +03:00
|
|
|
#ifndef PPC_CONFIG_NON_VOLATILE_AS_LOCAL
|
2024-09-22 19:58:22 +03:00
|
|
|
PPCRegister r14;
|
|
|
|
PPCRegister r15;
|
|
|
|
PPCRegister r16;
|
|
|
|
PPCRegister r17;
|
|
|
|
PPCRegister r18;
|
|
|
|
PPCRegister r19;
|
|
|
|
PPCRegister r20;
|
|
|
|
PPCRegister r21;
|
|
|
|
PPCRegister r22;
|
|
|
|
PPCRegister r23;
|
|
|
|
PPCRegister r24;
|
|
|
|
PPCRegister r25;
|
|
|
|
PPCRegister r26;
|
|
|
|
PPCRegister r27;
|
|
|
|
PPCRegister r28;
|
|
|
|
PPCRegister r29;
|
|
|
|
PPCRegister r30;
|
|
|
|
PPCRegister r31;
|
2024-09-24 17:32:04 +03:00
|
|
|
#endif
|
2024-09-10 19:26:53 +03:00
|
|
|
|
2024-09-22 19:58:22 +03:00
|
|
|
PPCFPSCRRegister fpscr;
|
2024-09-10 19:26:53 +03:00
|
|
|
|
2024-09-24 17:32:04 +03:00
|
|
|
#ifndef PPC_CONFIG_NON_ARGUMENT_AS_LOCAL
|
2024-09-22 19:58:22 +03:00
|
|
|
PPCRegister f0;
|
2024-09-24 17:32:04 +03:00
|
|
|
#endif
|
2024-09-22 19:58:22 +03:00
|
|
|
PPCRegister f1;
|
|
|
|
PPCRegister f2;
|
|
|
|
PPCRegister f3;
|
|
|
|
PPCRegister f4;
|
|
|
|
PPCRegister f5;
|
|
|
|
PPCRegister f6;
|
|
|
|
PPCRegister f7;
|
|
|
|
PPCRegister f8;
|
|
|
|
PPCRegister f9;
|
|
|
|
PPCRegister f10;
|
|
|
|
PPCRegister f11;
|
|
|
|
PPCRegister f12;
|
|
|
|
PPCRegister f13;
|
2024-09-24 17:32:04 +03:00
|
|
|
#ifndef PPC_CONFIG_NON_VOLATILE_AS_LOCAL
|
2024-09-22 19:58:22 +03:00
|
|
|
PPCRegister f14;
|
|
|
|
PPCRegister f15;
|
|
|
|
PPCRegister f16;
|
|
|
|
PPCRegister f17;
|
|
|
|
PPCRegister f18;
|
|
|
|
PPCRegister f19;
|
|
|
|
PPCRegister f20;
|
|
|
|
PPCRegister f21;
|
|
|
|
PPCRegister f22;
|
|
|
|
PPCRegister f23;
|
|
|
|
PPCRegister f24;
|
|
|
|
PPCRegister f25;
|
|
|
|
PPCRegister f26;
|
|
|
|
PPCRegister f27;
|
|
|
|
PPCRegister f28;
|
|
|
|
PPCRegister f29;
|
|
|
|
PPCRegister f30;
|
|
|
|
PPCRegister f31;
|
2024-09-24 17:32:04 +03:00
|
|
|
#endif
|
|
|
|
|
2024-09-22 19:58:22 +03:00
|
|
|
PPCVRegister v0;
|
|
|
|
PPCVRegister v1;
|
|
|
|
PPCVRegister v2;
|
|
|
|
PPCVRegister v3;
|
|
|
|
PPCVRegister v4;
|
|
|
|
PPCVRegister v5;
|
|
|
|
PPCVRegister v6;
|
|
|
|
PPCVRegister v7;
|
|
|
|
PPCVRegister v8;
|
|
|
|
PPCVRegister v9;
|
|
|
|
PPCVRegister v10;
|
|
|
|
PPCVRegister v11;
|
|
|
|
PPCVRegister v12;
|
|
|
|
PPCVRegister v13;
|
2024-09-24 17:32:04 +03:00
|
|
|
#ifndef PPC_CONFIG_NON_VOLATILE_AS_LOCAL
|
2024-09-22 19:58:22 +03:00
|
|
|
PPCVRegister v14;
|
|
|
|
PPCVRegister v15;
|
|
|
|
PPCVRegister v16;
|
|
|
|
PPCVRegister v17;
|
|
|
|
PPCVRegister v18;
|
|
|
|
PPCVRegister v19;
|
|
|
|
PPCVRegister v20;
|
|
|
|
PPCVRegister v21;
|
|
|
|
PPCVRegister v22;
|
|
|
|
PPCVRegister v23;
|
|
|
|
PPCVRegister v24;
|
|
|
|
PPCVRegister v25;
|
|
|
|
PPCVRegister v26;
|
|
|
|
PPCVRegister v27;
|
|
|
|
PPCVRegister v28;
|
|
|
|
PPCVRegister v29;
|
|
|
|
PPCVRegister v30;
|
|
|
|
PPCVRegister v31;
|
2024-09-24 17:32:04 +03:00
|
|
|
#endif
|
|
|
|
#ifndef PPC_CONFIG_NON_ARGUMENT_AS_LOCAL
|
2024-09-22 19:58:22 +03:00
|
|
|
PPCVRegister v32;
|
|
|
|
PPCVRegister v33;
|
|
|
|
PPCVRegister v34;
|
|
|
|
PPCVRegister v35;
|
|
|
|
PPCVRegister v36;
|
|
|
|
PPCVRegister v37;
|
|
|
|
PPCVRegister v38;
|
|
|
|
PPCVRegister v39;
|
|
|
|
PPCVRegister v40;
|
|
|
|
PPCVRegister v41;
|
|
|
|
PPCVRegister v42;
|
|
|
|
PPCVRegister v43;
|
|
|
|
PPCVRegister v44;
|
|
|
|
PPCVRegister v45;
|
|
|
|
PPCVRegister v46;
|
|
|
|
PPCVRegister v47;
|
|
|
|
PPCVRegister v48;
|
|
|
|
PPCVRegister v49;
|
|
|
|
PPCVRegister v50;
|
|
|
|
PPCVRegister v51;
|
|
|
|
PPCVRegister v52;
|
|
|
|
PPCVRegister v53;
|
|
|
|
PPCVRegister v54;
|
|
|
|
PPCVRegister v55;
|
|
|
|
PPCVRegister v56;
|
|
|
|
PPCVRegister v57;
|
|
|
|
PPCVRegister v58;
|
|
|
|
PPCVRegister v59;
|
|
|
|
PPCVRegister v60;
|
|
|
|
PPCVRegister v61;
|
|
|
|
PPCVRegister v62;
|
|
|
|
PPCVRegister v63;
|
2024-09-24 17:32:04 +03:00
|
|
|
#endif
|
|
|
|
#ifndef PPC_CONFIG_NON_VOLATILE_AS_LOCAL
|
2024-09-22 19:58:22 +03:00
|
|
|
PPCVRegister v64;
|
|
|
|
PPCVRegister v65;
|
|
|
|
PPCVRegister v66;
|
|
|
|
PPCVRegister v67;
|
|
|
|
PPCVRegister v68;
|
|
|
|
PPCVRegister v69;
|
|
|
|
PPCVRegister v70;
|
|
|
|
PPCVRegister v71;
|
|
|
|
PPCVRegister v72;
|
|
|
|
PPCVRegister v73;
|
|
|
|
PPCVRegister v74;
|
|
|
|
PPCVRegister v75;
|
|
|
|
PPCVRegister v76;
|
|
|
|
PPCVRegister v77;
|
|
|
|
PPCVRegister v78;
|
|
|
|
PPCVRegister v79;
|
|
|
|
PPCVRegister v80;
|
|
|
|
PPCVRegister v81;
|
|
|
|
PPCVRegister v82;
|
|
|
|
PPCVRegister v83;
|
|
|
|
PPCVRegister v84;
|
|
|
|
PPCVRegister v85;
|
|
|
|
PPCVRegister v86;
|
|
|
|
PPCVRegister v87;
|
|
|
|
PPCVRegister v88;
|
|
|
|
PPCVRegister v89;
|
|
|
|
PPCVRegister v90;
|
|
|
|
PPCVRegister v91;
|
|
|
|
PPCVRegister v92;
|
|
|
|
PPCVRegister v93;
|
|
|
|
PPCVRegister v94;
|
|
|
|
PPCVRegister v95;
|
|
|
|
PPCVRegister v96;
|
|
|
|
PPCVRegister v97;
|
|
|
|
PPCVRegister v98;
|
|
|
|
PPCVRegister v99;
|
|
|
|
PPCVRegister v100;
|
|
|
|
PPCVRegister v101;
|
|
|
|
PPCVRegister v102;
|
|
|
|
PPCVRegister v103;
|
|
|
|
PPCVRegister v104;
|
|
|
|
PPCVRegister v105;
|
|
|
|
PPCVRegister v106;
|
|
|
|
PPCVRegister v107;
|
|
|
|
PPCVRegister v108;
|
|
|
|
PPCVRegister v109;
|
|
|
|
PPCVRegister v110;
|
|
|
|
PPCVRegister v111;
|
|
|
|
PPCVRegister v112;
|
|
|
|
PPCVRegister v113;
|
|
|
|
PPCVRegister v114;
|
|
|
|
PPCVRegister v115;
|
|
|
|
PPCVRegister v116;
|
|
|
|
PPCVRegister v117;
|
|
|
|
PPCVRegister v118;
|
|
|
|
PPCVRegister v119;
|
|
|
|
PPCVRegister v120;
|
|
|
|
PPCVRegister v121;
|
|
|
|
PPCVRegister v122;
|
|
|
|
PPCVRegister v123;
|
|
|
|
PPCVRegister v124;
|
|
|
|
PPCVRegister v125;
|
|
|
|
PPCVRegister v126;
|
|
|
|
PPCVRegister v127;
|
2024-09-24 17:32:04 +03:00
|
|
|
#endif
|
2024-09-08 08:32:31 +06:00
|
|
|
};
|
2024-09-15 19:48:15 +03:00
|
|
|
|
2024-09-16 20:33:46 +03:00
|
|
|
inline uint8_t VectorMaskL[] =
|
|
|
|
{
|
|
|
|
0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
|
|
|
|
0xFF, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01,
|
|
|
|
0xFF, 0xFF, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02,
|
|
|
|
0xFF, 0xFF, 0xFF, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03,
|
|
|
|
0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04,
|
|
|
|
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05,
|
|
|
|
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06,
|
|
|
|
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07,
|
|
|
|
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08,
|
|
|
|
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09,
|
|
|
|
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A,
|
|
|
|
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B,
|
|
|
|
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x0E, 0x0D, 0x0C,
|
|
|
|
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x0E, 0x0D,
|
|
|
|
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x0E,
|
|
|
|
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F,
|
|
|
|
};
|
|
|
|
|
|
|
|
inline uint8_t VectorMaskR[] =
|
|
|
|
{
|
|
|
|
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
|
|
|
0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
|
|
|
0x01, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
|
|
|
0x02, 0x01, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
|
|
|
0x03, 0x02, 0x01, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
|
|
|
0x04, 0x03, 0x02, 0x01, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
|
|
|
0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
|
|
|
0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
|
|
|
0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
|
|
|
0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
|
|
|
0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
|
|
|
0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
|
|
|
0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0xFF, 0xFF, 0xFF, 0xFF,
|
|
|
|
0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0xFF, 0xFF, 0xFF,
|
|
|
|
0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0xFF, 0xFF,
|
|
|
|
0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0xFF,
|
|
|
|
};
|
|
|
|
|
|
|
|
inline uint8_t VectorShiftTableL[] =
|
|
|
|
{
|
|
|
|
0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
|
|
|
|
0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01,
|
|
|
|
0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02,
|
|
|
|
0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03,
|
|
|
|
0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04,
|
|
|
|
0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05,
|
|
|
|
0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06,
|
|
|
|
0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07,
|
|
|
|
0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08,
|
|
|
|
0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09,
|
|
|
|
0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A,
|
|
|
|
0x1A, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B,
|
|
|
|
0x1B, 0x1A, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C,
|
|
|
|
0x1C, 0x1B, 0x1A, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D,
|
|
|
|
0x1D, 0x1C, 0x1B, 0x1A, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E,
|
|
|
|
0x1E, 0x1D, 0x1C, 0x1B, 0x1A, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F,
|
|
|
|
};
|
|
|
|
|
|
|
|
inline uint8_t VectorShiftTableR[] =
|
|
|
|
{
|
|
|
|
0x1F, 0x1E, 0x1D, 0x1C, 0x1B, 0x1A, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10,
|
|
|
|
0x1E, 0x1D, 0x1C, 0x1B, 0x1A, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F,
|
|
|
|
0x1D, 0x1C, 0x1B, 0x1A, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E,
|
|
|
|
0x1C, 0x1B, 0x1A, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D,
|
|
|
|
0x1B, 0x1A, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C,
|
|
|
|
0x1A, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B,
|
|
|
|
0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A,
|
|
|
|
0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09,
|
|
|
|
0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08,
|
|
|
|
0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07,
|
|
|
|
0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06,
|
|
|
|
0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05,
|
|
|
|
0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04,
|
|
|
|
0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03,
|
|
|
|
0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02,
|
|
|
|
0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01,
|
|
|
|
};
|
|
|
|
|
2024-09-15 19:48:15 +03:00
|
|
|
inline __m128i _mm_adds_epu32(__m128i a, __m128i b)
|
|
|
|
{
|
2024-09-22 23:54:17 +03:00
|
|
|
return _mm_add_epi32(a, _mm_min_epu32(_mm_xor_si128(a, _mm_cmpeq_epi32(a, a)), b));
|
2024-09-15 19:48:15 +03:00
|
|
|
}
|
2024-09-16 16:03:23 +03:00
|
|
|
|
|
|
|
inline __m128i _mm_avg_epi8(__m128i a, __m128i b)
|
|
|
|
{
|
|
|
|
__m128i c = _mm_set1_epi8(char(128));
|
2024-09-20 17:01:57 +03:00
|
|
|
return _mm_xor_si128(c, _mm_avg_epu8(_mm_xor_si128(c, a), _mm_xor_si128(c, b)));
|
2024-09-16 16:03:23 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
inline __m128i _mm_avg_epi16(__m128i a, __m128i b)
|
|
|
|
{
|
|
|
|
__m128i c = _mm_set1_epi16(short(32768));
|
2024-09-20 17:01:57 +03:00
|
|
|
return _mm_xor_si128(c, _mm_avg_epu16(_mm_xor_si128(c, a), _mm_xor_si128(c, b)));
|
2024-09-16 16:03:23 +03:00
|
|
|
}
|
|
|
|
|
2024-09-22 23:54:17 +03:00
|
|
|
inline __m128 _mm_cvtepu32_ps_(__m128i src1)
|
2024-09-16 16:03:23 +03:00
|
|
|
{
|
2024-09-22 23:54:17 +03:00
|
|
|
__m128i xmm1 = _mm_add_epi32(src1, _mm_set1_epi32(127));
|
|
|
|
__m128i xmm0 = _mm_slli_epi32(src1, 31 - 8);
|
|
|
|
xmm0 = _mm_srli_epi32(xmm0, 31);
|
|
|
|
xmm0 = _mm_add_epi32(xmm0, xmm1);
|
|
|
|
xmm0 = _mm_srai_epi32(xmm0, 8);
|
|
|
|
xmm0 = _mm_add_epi32(xmm0, _mm_set1_epi32(0x4F800000));
|
|
|
|
__m128 xmm2 = _mm_cvtepi32_ps(src1);
|
|
|
|
return _mm_blendv_ps(xmm2, _mm_castsi128_ps(xmm0), _mm_castsi128_ps(src1));
|
2024-09-16 16:03:23 +03:00
|
|
|
}
|
2024-09-17 19:56:44 +03:00
|
|
|
|
2024-09-18 15:19:20 +03:00
|
|
|
inline __m128i _mm_perm_epi8_(__m128i a, __m128i b, __m128i c)
|
2024-09-17 19:56:44 +03:00
|
|
|
{
|
|
|
|
__m128i d = _mm_set1_epi8(0xF);
|
|
|
|
__m128i e = _mm_sub_epi8(d, _mm_and_si128(c, d));
|
|
|
|
return _mm_blendv_epi8(_mm_shuffle_epi8(a, e), _mm_shuffle_epi8(b, e), _mm_slli_epi32(c, 3));
|
|
|
|
}
|
2024-09-17 21:32:13 +03:00
|
|
|
|
|
|
|
inline __m128i _mm_cmpgt_epu8(__m128i a, __m128i b)
|
|
|
|
{
|
2024-09-20 17:01:57 +03:00
|
|
|
__m128i c = _mm_set1_epi8(char(128));
|
2024-09-17 21:32:13 +03:00
|
|
|
return _mm_cmpgt_epi8(_mm_xor_si128(a, c), _mm_xor_si128(b, c));
|
|
|
|
}
|
|
|
|
|
|
|
|
inline __m128i _mm_cmpgt_epu16(__m128i a, __m128i b)
|
|
|
|
{
|
2024-09-20 17:01:57 +03:00
|
|
|
__m128i c = _mm_set1_epi16(short(32768));
|
2024-09-17 21:32:13 +03:00
|
|
|
return _mm_cmpgt_epi16(_mm_xor_si128(a, c), _mm_xor_si128(b, c));
|
|
|
|
}
|
|
|
|
|
2024-09-22 23:54:17 +03:00
|
|
|
inline __m128i _mm_vctsxs(__m128 src1)
|
2024-09-20 17:01:57 +03:00
|
|
|
{
|
2024-09-22 23:54:17 +03:00
|
|
|
__m128 xmm2 = _mm_cmpunord_ps(src1, src1);
|
|
|
|
__m128i xmm0 = _mm_cvttps_epi32(src1);
|
|
|
|
__m128i xmm1 = _mm_cmpeq_epi32(xmm0, _mm_set1_epi32(INT_MIN));
|
|
|
|
xmm1 = _mm_andnot_si128(_mm_castps_si128(src1), xmm1);
|
|
|
|
__m128 dest = _mm_blendv_ps(_mm_castsi128_ps(xmm0), _mm_castsi128_ps(_mm_set1_epi32(INT_MAX)), _mm_castsi128_ps(xmm1));
|
|
|
|
return _mm_andnot_si128(_mm_castps_si128(xmm2), _mm_castps_si128(dest));
|
2024-09-20 17:01:57 +03:00
|
|
|
}
|
2024-09-21 21:47:34 +03:00
|
|
|
|
2024-11-03 21:27:33 +01:00
|
|
|
inline __m128i _mm_vctuxs(__m128 src1)
|
|
|
|
{
|
|
|
|
__m128 xmm0 = _mm_max_ps(src1, _mm_set1_epi32(0));
|
|
|
|
__m128 xmm1 = _mm_cmpge_ps(xmm0, _mm_set1_ps((float)0x80000000));
|
|
|
|
__m128 xmm2 = _mm_sub_ps(xmm0, _mm_set1_ps((float)0x80000000));
|
|
|
|
xmm0 = _mm_blendv_ps(xmm0, xmm2, xmm1);
|
|
|
|
__m128i dest = _mm_cvttps_epi32(xmm0);
|
|
|
|
xmm0 = _mm_cmpeq_epi32(dest, _mm_set1_epi32(INT_MIN));
|
|
|
|
xmm1 = _mm_and_si128(xmm1, _mm_set1_epi32(INT_MIN));
|
|
|
|
dest = _mm_add_epi32(dest, xmm1);
|
|
|
|
return _mm_or_si128(dest, xmm0);
|
|
|
|
}
|
|
|
|
|
2024-09-21 21:47:34 +03:00
|
|
|
inline __m128i _mm_vsr(__m128i a, __m128i b)
|
|
|
|
{
|
|
|
|
b = _mm_srli_epi64(_mm_slli_epi64(b, 61), 61);
|
|
|
|
return _mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(_mm_srl_epi64(a, b)), _mm_castsi128_ps(_mm_srl_epi64(_mm_srli_si128(a, 4), b)), 0x10));
|
|
|
|
}
|
2024-09-30 13:39:47 +06:00
|
|
|
|
|
|
|
#endif
|