82 lines
1.7 KiB
C
82 lines
1.7 KiB
C
/*
|
|
* Compute 16-bit sum in ones' complement arithmetic (with end-around carry).
|
|
* This sum is often used as a simple checksum in networking.
|
|
*
|
|
* Copyright (c) 2020, Arm Limited.
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
#include "networking.h"
|
|
#include "chksum_common.h"
|
|
|
|
always_inline
|
|
static inline uint32_t
|
|
slurp_head32(const void **pptr, uint32_t *nbytes)
|
|
{
|
|
uint32_t sum = 0;
|
|
Assert(*nbytes >= 4);
|
|
uint32_t off = (uintptr_t) *pptr % 4;
|
|
if (likely(off != 0))
|
|
{
|
|
/* Get rid of bytes 0..off-1 */
|
|
const unsigned char *ptr32 = align_ptr(*pptr, 4);
|
|
uint32_t mask = ~0U << (CHAR_BIT * off);
|
|
sum = load32(ptr32) & mask;
|
|
*pptr = ptr32 + 4;
|
|
*nbytes -= 4 - off;
|
|
}
|
|
return sum;
|
|
}
|
|
|
|
/* Additional loop unrolling would help when not auto-vectorizing */
|
|
unsigned short
|
|
__chksum(const void *ptr, unsigned int nbytes)
|
|
{
|
|
bool swap = false;
|
|
uint64_t sum = 0;
|
|
|
|
if (nbytes > 300)
|
|
{
|
|
/* 4-byte align pointer */
|
|
swap = (uintptr_t) ptr & 1;
|
|
sum = slurp_head32(&ptr, &nbytes);
|
|
}
|
|
/* Else benefit of aligning not worth the overhead */
|
|
|
|
/* Sum all 16-byte chunks */
|
|
const char *cptr = ptr;
|
|
for (uint32_t nquads = nbytes / 16; nquads != 0; nquads--)
|
|
{
|
|
uint64_t h0 = load32(cptr + 0);
|
|
uint64_t h1 = load32(cptr + 4);
|
|
uint64_t h2 = load32(cptr + 8);
|
|
uint64_t h3 = load32(cptr + 12);
|
|
sum += h0 + h1 + h2 + h3;
|
|
cptr += 16;
|
|
}
|
|
nbytes %= 16;
|
|
Assert(nbytes < 16);
|
|
|
|
/* Handle any trailing 4-byte chunks */
|
|
while (nbytes >= 4)
|
|
{
|
|
sum += load32(cptr);
|
|
cptr += 4;
|
|
nbytes -= 4;
|
|
}
|
|
Assert(nbytes < 4);
|
|
|
|
if (nbytes & 2)
|
|
{
|
|
sum += load16(cptr);
|
|
cptr += 2;
|
|
}
|
|
|
|
if (nbytes & 1)
|
|
{
|
|
sum += *(uint8_t *)cptr;
|
|
}
|
|
|
|
return fold_and_swap(sum, swap);
|
|
}
|