149 lines
5.3 KiB
C++
149 lines
5.3 KiB
C++
// SPDX-License-Identifier: Apache-2.0
|
|
// ----------------------------------------------------------------------------
|
|
// Copyright 2011-2020 Arm Limited
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
|
// use this file except in compliance with the License. You may obtain a copy
|
|
// of the License at:
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
// License for the specific language governing permissions and limitations
|
|
// under the License.
|
|
// ----------------------------------------------------------------------------
|
|
|
|
/**
|
|
* @brief Soft-float library for IEEE-754.
|
|
*/
|
|
|
|
#include "astc_mathlib.h"
|
|
|
|
/******************************************
|
|
helper functions and their lookup tables
|
|
******************************************/
|
|
/* count leading zeros functions. Only used when the input is nonzero. */
|
|
|
|
#if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
|
|
#elif defined(__arm__) && defined(__ARMCC_VERSION)
|
|
#elif defined(__arm__) && defined(__GNUC__)
|
|
#else
|
|
/* table used for the slow default versions. */
|
|
static const uint8_t clz_table[256] =
|
|
{
|
|
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
|
};
|
|
#endif
|
|
|
|
/*
|
|
32-bit count-leading-zeros function: use the Assembly instruction whenever possible. */
|
|
uint32_t clz32(uint32_t inp)
|
|
{
|
|
#if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
|
|
uint32_t bsr;
|
|
__asm__("bsrl %1, %0": "=r"(bsr):"r"(inp | 1));
|
|
return 31 - bsr;
|
|
#else
|
|
#if defined(__arm__) && defined(__ARMCC_VERSION)
|
|
return __clz(inp); /* armcc builtin */
|
|
#else
|
|
#if defined(__arm__) && defined(__GNUC__)
|
|
uint32_t lz;
|
|
__asm__("clz %0, %1": "=r"(lz):"r"(inp));
|
|
return lz;
|
|
#else
|
|
/* slow default version */
|
|
uint32_t summa = 24;
|
|
if (inp >= UINT32_C(0x10000))
|
|
{
|
|
inp >>= 16;
|
|
summa -= 16;
|
|
}
|
|
if (inp >= UINT32_C(0x100))
|
|
{
|
|
inp >>= 8;
|
|
summa -= 8;
|
|
}
|
|
return summa + clz_table[inp];
|
|
#endif
|
|
#endif
|
|
#endif
|
|
}
|
|
|
|
/* convert from FP16 to FP32. */
|
|
sf32 sf16_to_sf32(sf16 inp)
|
|
{
|
|
uint32_t inpx = inp;
|
|
|
|
/*
|
|
This table contains, for every FP16 sign/exponent value combination,
|
|
the difference between the input FP16 value and the value obtained
|
|
by shifting the correct FP32 result right by 13 bits.
|
|
This table allows us to handle every case except denormals and NaN
|
|
with just 1 table lookup, 2 shifts and 1 add.
|
|
*/
|
|
|
|
#define WITH_MB(a) INT32_C((a) | (1 << 31))
|
|
static const int32_t tbl[64] =
|
|
{
|
|
WITH_MB(0x00000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000),
|
|
INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000),
|
|
INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000),
|
|
INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), WITH_MB(0x38000),
|
|
WITH_MB(0x38000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000),
|
|
INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000),
|
|
INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000),
|
|
INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), WITH_MB(0x70000)
|
|
};
|
|
|
|
int32_t res = tbl[inpx >> 10];
|
|
res += inpx;
|
|
|
|
/* the normal cases: the MSB of 'res' is not set. */
|
|
if (res >= 0) /* signed compare */
|
|
return res << 13;
|
|
|
|
/* Infinity and Zero: the bottom 10 bits of 'res' are clear. */
|
|
if ((res & UINT32_C(0x3FF)) == 0)
|
|
return res << 13;
|
|
|
|
/* NaN: the exponent field of 'inp' is not zero; NaNs must be quietened. */
|
|
if ((inpx & 0x7C00) != 0)
|
|
return (res << 13) | UINT32_C(0x400000);
|
|
|
|
/* the remaining cases are Denormals. */
|
|
{
|
|
uint32_t sign = (inpx & UINT32_C(0x8000)) << 16;
|
|
uint32_t mskval = inpx & UINT32_C(0x7FFF);
|
|
uint32_t leadingzeroes = clz32(mskval);
|
|
mskval <<= leadingzeroes;
|
|
return (mskval >> 8) + ((0x85 - leadingzeroes) << 23) + sign;
|
|
}
|
|
}
|
|
|
|
/* convert from soft-float to native-float */
|
|
float sf16_to_float(sf16 p)
|
|
{
|
|
if32 i;
|
|
i.u = sf16_to_sf32(p);
|
|
return i.f;
|
|
}
|
|
|