427 lines
12 KiB
C++
427 lines
12 KiB
C++
/*-------------------------------------------------------------------------
|
|
* drawElements Quality Program Tester Core
|
|
* ----------------------------------------
|
|
*
|
|
* Copyright 2014 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*
|
|
*//*!
|
|
* \file
|
|
* \brief Adjustable-precision floating point operations.
|
|
*//*--------------------------------------------------------------------*/
|
|
|
|
#include "tcuFloatFormat.hpp"
|
|
|
|
#include "deMath.h"
|
|
#include "deUniquePtr.hpp"
|
|
|
|
#include <sstream>
|
|
#include <iomanip>
|
|
#include <limits>
|
|
|
|
namespace tcu
|
|
{
|
|
namespace
|
|
{
|
|
|
|
Interval chooseInterval(YesNoMaybe choice, const Interval& no, const Interval& yes)
|
|
{
|
|
switch (choice)
|
|
{
|
|
case NO: return no;
|
|
case YES: return yes;
|
|
case MAYBE: return no | yes;
|
|
default: DE_FATAL("Impossible case");
|
|
}
|
|
|
|
return Interval();
|
|
}
|
|
|
|
double computeMaxValue (int maxExp, int fractionBits)
|
|
{
|
|
return (deLdExp(1.0, maxExp) +
|
|
deLdExp(double((1ull << fractionBits) - 1), maxExp - fractionBits));
|
|
}
|
|
|
|
} // anonymous
|
|
|
|
FloatFormat::FloatFormat (int minExp,
|
|
int maxExp,
|
|
int fractionBits,
|
|
bool exactPrecision,
|
|
YesNoMaybe hasSubnormal_,
|
|
YesNoMaybe hasInf_,
|
|
YesNoMaybe hasNaN_)
|
|
: m_minExp (minExp)
|
|
, m_maxExp (maxExp)
|
|
, m_fractionBits (fractionBits)
|
|
, m_hasSubnormal (hasSubnormal_)
|
|
, m_hasInf (hasInf_)
|
|
, m_hasNaN (hasNaN_)
|
|
, m_exactPrecision (exactPrecision)
|
|
, m_maxValue (computeMaxValue(maxExp, fractionBits))
|
|
{
|
|
DE_ASSERT(minExp <= maxExp);
|
|
}
|
|
|
|
/*-------------------------------------------------------------------------
|
|
* On the definition of ULP
|
|
*
|
|
* The GLSL spec does not define ULP. However, it refers to IEEE 754, which
|
|
* (reportedly) uses Harrison's definition:
|
|
*
|
|
* ULP(x) is the distance between the closest floating point numbers
|
|
* a and be such that a <= x <= b and a != b
|
|
*
|
|
* Note that this means that when x = 2^n, ULP(x) = 2^(n-p-1), i.e. it is the
|
|
* distance to the next lowest float, not next highest.
|
|
*
|
|
* Furthermore, it is assumed that ULP is calculated relative to the exact
|
|
* value, not the approximation. This is because otherwise a less accurate
|
|
* approximation could be closer in ULPs, because its ULPs are bigger.
|
|
*
|
|
* For details, see "On the definition of ulp(x)" by Jean-Michel Muller
|
|
*
|
|
*-----------------------------------------------------------------------*/
|
|
|
|
double FloatFormat::ulp (double x, double count) const
|
|
{
|
|
int exp = 0;
|
|
const double frac = deFractExp(deAbs(x), &exp);
|
|
|
|
if (deIsNaN(frac))
|
|
return TCU_NAN;
|
|
else if (deIsInf(frac))
|
|
return deLdExp(1.0, m_maxExp - m_fractionBits);
|
|
else if (frac == 1.0)
|
|
{
|
|
// Harrison's ULP: choose distance to closest (i.e. next lower) at binade
|
|
// boundary.
|
|
--exp;
|
|
}
|
|
else if (frac == 0.0)
|
|
exp = m_minExp;
|
|
|
|
// ULP cannot be lower than the smallest quantum.
|
|
exp = de::max(exp, m_minExp);
|
|
|
|
{
|
|
const double oneULP = deLdExp(1.0, exp - m_fractionBits);
|
|
ScopedRoundingMode ctx (DE_ROUNDINGMODE_TO_POSITIVE_INF);
|
|
|
|
return oneULP * count;
|
|
}
|
|
}
|
|
|
|
//! Return the difference between the given nominal exponent and
|
|
//! the exponent of the lowest significand bit of the
|
|
//! representation of a number with this format.
|
|
//! For normal numbers this is the number of significand bits, but
|
|
//! for subnormals it is less and for values of exp where 2^exp is too
|
|
//! small to represent it is <0
|
|
int FloatFormat::exponentShift (int exp) const
|
|
{
|
|
return m_fractionBits - de::max(m_minExp - exp, 0);
|
|
}
|
|
|
|
//! Return the number closest to `d` that is exactly representable with the
|
|
//! significand bits and minimum exponent of the floatformat. Round up if
|
|
//! `upward` is true, otherwise down.
|
|
double FloatFormat::round (double d, bool upward) const
|
|
{
|
|
int exp = 0;
|
|
const double frac = deFractExp(d, &exp);
|
|
const int shift = exponentShift(exp);
|
|
const double shiftFrac = deLdExp(frac, shift);
|
|
const double roundFrac = upward ? deCeil(shiftFrac) : deFloor(shiftFrac);
|
|
|
|
return deLdExp(roundFrac, exp - shift);
|
|
}
|
|
|
|
//! Return the range of numbers that `d` might be converted to in the
|
|
//! floatformat, given its limitations with infinities, subnormals and maximum
|
|
//! exponent.
|
|
Interval FloatFormat::clampValue (double d) const
|
|
{
|
|
const double rSign = deSign(d);
|
|
int rExp = 0;
|
|
|
|
DE_ASSERT(!deIsNaN(d));
|
|
|
|
deFractExp(d, &rExp);
|
|
if (rExp < m_minExp)
|
|
return chooseInterval(m_hasSubnormal, rSign * 0.0, d);
|
|
else if (deIsInf(d) || rExp > m_maxExp)
|
|
return chooseInterval(m_hasInf, rSign * getMaxValue(), rSign * TCU_INFINITY);
|
|
|
|
return Interval(d);
|
|
}
|
|
|
|
//! Return the range of numbers that might be used with this format to
|
|
//! represent a number within `x`.
|
|
Interval FloatFormat::convert (const Interval& x) const
|
|
{
|
|
Interval ret;
|
|
Interval tmp = x;
|
|
|
|
if (x.hasNaN())
|
|
{
|
|
// If NaN might be supported, NaN is a legal return value
|
|
if (m_hasNaN != NO)
|
|
ret |= TCU_NAN;
|
|
|
|
// If NaN might not be supported, any (non-NaN) value is legal,
|
|
// _subject_ to clamping. Hence we modify tmp, not ret.
|
|
if (m_hasNaN != YES)
|
|
tmp = Interval::unbounded();
|
|
}
|
|
|
|
// Round both bounds _inwards_ to closest representable values.
|
|
if (!tmp.empty())
|
|
ret |= clampValue(round(tmp.lo(), false)) | clampValue(round(tmp.hi(), true));
|
|
|
|
// If this format's precision is not exact, the (possibly out-of-bounds)
|
|
// original value is also a possible result.
|
|
if (!m_exactPrecision)
|
|
ret |= x;
|
|
|
|
return ret;
|
|
}
|
|
|
|
double FloatFormat::roundOut (double d, bool upward, bool roundUnderOverflow) const
|
|
{
|
|
int exp = 0;
|
|
deFractExp(d, &exp);
|
|
|
|
if (roundUnderOverflow && exp > m_maxExp && (upward == (d < 0.0)))
|
|
return deSign(d) * getMaxValue();
|
|
else
|
|
return round(d, upward);
|
|
}
|
|
|
|
//! Round output of an operation.
|
|
//! \param roundUnderOverflow Can +/-inf rounded to min/max representable;
|
|
//! should be false if any of operands was inf, true otherwise.
|
|
Interval FloatFormat::roundOut (const Interval& x, bool roundUnderOverflow) const
|
|
{
|
|
Interval ret = x.nan();
|
|
|
|
if (!x.empty())
|
|
ret |= Interval(roundOut(x.lo(), false, roundUnderOverflow),
|
|
roundOut(x.hi(), true, roundUnderOverflow));
|
|
|
|
return ret;
|
|
}
|
|
|
|
std::string FloatFormat::floatToHex (double x) const
|
|
{
|
|
if (deIsNaN(x))
|
|
return "NaN";
|
|
else if (deIsInf(x))
|
|
return (x < 0.0 ? "-" : "+") + std::string("inf");
|
|
else if (x == 0.0) // \todo [2014-03-27 lauri] Negative zero
|
|
return "0.0";
|
|
|
|
int exp = 0;
|
|
const double frac = deFractExp(deAbs(x), &exp);
|
|
const int shift = exponentShift(exp);
|
|
const deUint64 bits = deUint64(deLdExp(frac, shift));
|
|
const deUint64 whole = bits >> m_fractionBits;
|
|
const deUint64 fraction = bits & ((deUint64(1) << m_fractionBits) - 1);
|
|
const int exponent = exp + m_fractionBits - shift;
|
|
const int numDigits = (m_fractionBits + 3) / 4;
|
|
const deUint64 aligned = fraction << (numDigits * 4 - m_fractionBits);
|
|
std::ostringstream oss;
|
|
|
|
oss << (x < 0 ? "-" : "")
|
|
<< "0x" << whole << "."
|
|
<< std::hex << std::setw(numDigits) << std::setfill('0') << aligned
|
|
<< "p" << std::dec << std::setw(0) << exponent;
|
|
|
|
return oss.str();
|
|
}
|
|
|
|
std::string FloatFormat::intervalToHex (const Interval& interval) const
|
|
{
|
|
if (interval.empty())
|
|
return interval.hasNaN() ? "{ NaN }" : "{}";
|
|
|
|
else if (interval.lo() == interval.hi())
|
|
return (std::string(interval.hasNaN() ? "{ NaN, " : "{ ") +
|
|
floatToHex(interval.lo()) + " }");
|
|
else if (interval == Interval::unbounded(true))
|
|
return "<any>";
|
|
|
|
return (std::string(interval.hasNaN() ? "{ NaN } | " : "") +
|
|
"[" + floatToHex(interval.lo()) + ", " + floatToHex(interval.hi()) + "]");
|
|
}
|
|
|
|
template <typename T>
|
|
static FloatFormat nativeFormat (void)
|
|
{
|
|
typedef std::numeric_limits<T> Limits;
|
|
|
|
DE_ASSERT(Limits::radix == 2);
|
|
|
|
return FloatFormat(Limits::min_exponent - 1, // These have a built-in offset of one
|
|
Limits::max_exponent - 1,
|
|
Limits::digits - 1, // don't count the hidden bit
|
|
Limits::has_denorm != std::denorm_absent,
|
|
Limits::has_infinity ? YES : NO,
|
|
Limits::has_quiet_NaN ? YES : NO,
|
|
((Limits::has_denorm == std::denorm_present) ? YES :
|
|
(Limits::has_denorm == std::denorm_absent) ? NO :
|
|
MAYBE));
|
|
}
|
|
|
|
FloatFormat FloatFormat::nativeFloat (void)
|
|
{
|
|
return nativeFormat<float>();
|
|
}
|
|
|
|
FloatFormat FloatFormat::nativeDouble (void)
|
|
{
|
|
return nativeFormat<double>();
|
|
}
|
|
|
|
NormalizedFormat::NormalizedFormat (int fractionBits)
|
|
: FloatFormat(0, 0, fractionBits, true, tcu::YES)
|
|
{
|
|
|
|
}
|
|
|
|
double NormalizedFormat::round(double d, bool upward) const
|
|
{
|
|
const int fractionBits = getFractionBits();
|
|
|
|
if (fractionBits <= 0)
|
|
return d;
|
|
|
|
const int maxIntValue = (1 << fractionBits) - 1;
|
|
const double value = d * maxIntValue;
|
|
const double normValue = upward ? deCeil(value) : deFloor(value);
|
|
return normValue / maxIntValue;
|
|
}
|
|
|
|
double NormalizedFormat::ulp(double x, double count) const
|
|
{
|
|
(void) x;
|
|
|
|
const int maxIntValue = (1 << getFractionBits()) - 1;
|
|
const double precision = 1.0 / maxIntValue;
|
|
return precision * count;
|
|
}
|
|
|
|
double NormalizedFormat::roundOut (double d, bool upward, bool roundUnderOverflow) const
|
|
{
|
|
if (roundUnderOverflow && deAbs(d) > 1.0 && (upward == (d < 0.0)))
|
|
return deSign(d);
|
|
else
|
|
return round(d, upward);
|
|
}
|
|
|
|
namespace
|
|
{
|
|
|
|
using std::string;
|
|
using std::ostringstream;
|
|
using de::MovePtr;
|
|
using de::UniquePtr;
|
|
|
|
class Test
|
|
{
|
|
protected:
|
|
|
|
Test (MovePtr<FloatFormat> fmt) : m_fmt(fmt) {}
|
|
double p (int e) const { return deLdExp(1.0, e); }
|
|
void check (const string& expr,
|
|
double result,
|
|
double reference) const;
|
|
void testULP (double arg, double ref) const;
|
|
void testRound (double arg, double refDown, double refUp) const;
|
|
|
|
UniquePtr<FloatFormat> m_fmt;
|
|
};
|
|
|
|
void Test::check (const string& expr, double result, double reference) const
|
|
{
|
|
if (result != reference)
|
|
{
|
|
ostringstream oss;
|
|
oss << expr << " returned " << result << ", expected " << reference;
|
|
TCU_FAIL(oss.str().c_str());
|
|
}
|
|
}
|
|
|
|
void Test::testULP (double arg, double ref) const
|
|
{
|
|
ostringstream oss;
|
|
|
|
oss << "ulp(" << arg << ")";
|
|
check(oss.str(), m_fmt->ulp(arg), ref);
|
|
}
|
|
|
|
void Test::testRound (double arg, double refDown, double refUp) const
|
|
{
|
|
{
|
|
ostringstream oss;
|
|
oss << "round(" << arg << ", false)";
|
|
check(oss.str(), m_fmt->round(arg, false), refDown);
|
|
}
|
|
{
|
|
ostringstream oss;
|
|
oss << "round(" << arg << ", true)";
|
|
check(oss.str(), m_fmt->round(arg, true), refUp);
|
|
}
|
|
}
|
|
|
|
class TestBinary32 : public Test
|
|
{
|
|
public:
|
|
TestBinary32 (void)
|
|
: Test (MovePtr<FloatFormat>(new FloatFormat(-126, 127, 23, true))) {}
|
|
|
|
void runTest (void) const;
|
|
};
|
|
|
|
void TestBinary32::runTest (void) const
|
|
{
|
|
testULP(p(0), p(-24));
|
|
testULP(p(0) + p(-23), p(-23));
|
|
testULP(p(-124), p(-148));
|
|
testULP(p(-125), p(-149));
|
|
testULP(p(-125) + p(-140), p(-148));
|
|
testULP(p(-126), p(-149));
|
|
testULP(p(-130), p(-149));
|
|
|
|
testRound(p(0) + p(-20) + p(-40), p(0) + p(-20), p(0) + p(-20) + p(-23));
|
|
testRound(p(-126) - p(-150), p(-126) - p(-149), p(-126));
|
|
|
|
TCU_CHECK(m_fmt->floatToHex(p(0)) == "0x1.000000p0");
|
|
TCU_CHECK(m_fmt->floatToHex(p(8) + p(-4)) == "0x1.001000p8");
|
|
TCU_CHECK(m_fmt->floatToHex(p(-140)) == "0x0.000400p-126");
|
|
TCU_CHECK(m_fmt->floatToHex(p(-140)) == "0x0.000400p-126");
|
|
TCU_CHECK(m_fmt->floatToHex(p(-126) + p(-125)) == "0x1.800000p-125");
|
|
}
|
|
|
|
} // anonymous
|
|
|
|
void FloatFormat_selfTest (void)
|
|
{
|
|
TestBinary32 test32;
|
|
test32.runTest();
|
|
}
|
|
|
|
} // tcu
|