115 lines
2.7 KiB
C
115 lines
2.7 KiB
C
#include "crc32c.h"
|
|
|
|
#define CRC32C3X8(ITR) \
|
|
crc1 = __crc32cd(crc1, *((const uint64_t *)data + 42*1 + (ITR)));\
|
|
crc2 = __crc32cd(crc2, *((const uint64_t *)data + 42*2 + (ITR)));\
|
|
crc0 = __crc32cd(crc0, *((const uint64_t *)data + 42*0 + (ITR)));
|
|
|
|
#define CRC32C7X3X8(ITR) do {\
|
|
CRC32C3X8((ITR)*7+0) \
|
|
CRC32C3X8((ITR)*7+1) \
|
|
CRC32C3X8((ITR)*7+2) \
|
|
CRC32C3X8((ITR)*7+3) \
|
|
CRC32C3X8((ITR)*7+4) \
|
|
CRC32C3X8((ITR)*7+5) \
|
|
CRC32C3X8((ITR)*7+6) \
|
|
} while(0)
|
|
|
|
#ifndef HWCAP_CRC32
|
|
#define HWCAP_CRC32 (1 << 7)
|
|
#endif /* HWCAP_CRC32 */
|
|
|
|
int crc32c_arm64_available = 0;
|
|
|
|
#ifdef ARCH_HAVE_ARM64_CRC_CRYPTO
|
|
|
|
#include <sys/auxv.h>
|
|
#include <arm_acle.h>
|
|
#include <arm_neon.h>
|
|
|
|
static int crc32c_probed;
|
|
|
|
/*
|
|
* Function to calculate reflected crc with PMULL Instruction
|
|
* crc done "by 3" for fixed input block size of 1024 bytes
|
|
*/
|
|
uint32_t crc32c_arm64(unsigned char const *data, unsigned long length)
|
|
{
|
|
signed long len = length;
|
|
uint32_t crc = ~0;
|
|
uint32_t crc0, crc1, crc2;
|
|
|
|
/* Load two consts: K1 and K2 */
|
|
const poly64_t k1 = 0xe417f38a, k2 = 0x8f158014;
|
|
uint64_t t0, t1;
|
|
|
|
while ((len -= 1024) >= 0) {
|
|
/* Do first 8 bytes here for better pipelining */
|
|
crc0 = __crc32cd(crc, *(const uint64_t *)data);
|
|
crc1 = 0;
|
|
crc2 = 0;
|
|
data += sizeof(uint64_t);
|
|
|
|
/* Process block inline
|
|
Process crc0 last to avoid dependency with above */
|
|
CRC32C7X3X8(0);
|
|
CRC32C7X3X8(1);
|
|
CRC32C7X3X8(2);
|
|
CRC32C7X3X8(3);
|
|
CRC32C7X3X8(4);
|
|
CRC32C7X3X8(5);
|
|
|
|
data += 42*3*sizeof(uint64_t);
|
|
|
|
/* Merge crc0 and crc1 into crc2
|
|
crc1 multiply by K2
|
|
crc0 multiply by K1 */
|
|
|
|
t1 = (uint64_t)vmull_p64(crc1, k2);
|
|
t0 = (uint64_t)vmull_p64(crc0, k1);
|
|
crc = __crc32cd(crc2, *(const uint64_t *)data);
|
|
crc1 = __crc32cd(0, t1);
|
|
crc ^= crc1;
|
|
crc0 = __crc32cd(0, t0);
|
|
crc ^= crc0;
|
|
|
|
data += sizeof(uint64_t);
|
|
}
|
|
|
|
if (!(len += 1024))
|
|
return crc;
|
|
|
|
while ((len -= sizeof(uint64_t)) >= 0) {
|
|
crc = __crc32cd(crc, *(const uint64_t *)data);
|
|
data += sizeof(uint64_t);
|
|
}
|
|
|
|
/* The following is more efficient than the straight loop */
|
|
if (len & sizeof(uint32_t)) {
|
|
crc = __crc32cw(crc, *(const uint32_t *)data);
|
|
data += sizeof(uint32_t);
|
|
}
|
|
if (len & sizeof(uint16_t)) {
|
|
crc = __crc32ch(crc, *(const uint16_t *)data);
|
|
data += sizeof(uint16_t);
|
|
}
|
|
if (len & sizeof(uint8_t)) {
|
|
crc = __crc32cb(crc, *(const uint8_t *)data);
|
|
}
|
|
|
|
return crc;
|
|
}
|
|
|
|
void crc32c_arm64_probe(void)
|
|
{
|
|
unsigned long hwcap;
|
|
|
|
if (!crc32c_probed) {
|
|
hwcap = getauxval(AT_HWCAP);
|
|
if (hwcap & HWCAP_CRC32)
|
|
crc32c_arm64_available = 1;
|
|
crc32c_probed = 1;
|
|
}
|
|
}
|
|
|
|
#endif /* ARCH_HAVE_ARM64_CRC_CRYPTO */
|