32 #ifndef HERMES_GEOMETRY_CUDA_NUMERIC_H
33 #define HERMES_GEOMETRY_CUDA_NUMERIC_H
53 static constexpr
real_t pi = 3.14159265358979323846;
54 static constexpr
real_t two_pi = 6.28318530718;
55 static constexpr
real_t inv_pi = 0.31830988618379067154;
56 static constexpr
real_t inv_two_pi = 0.15915494309189533577;
57 static constexpr
real_t inv_four_pi = 0.07957747154594766788;
58 static constexpr
real_t pi_over_four = 0.78539816339;
59 static constexpr
real_t pi_over_two = 1.57079632679;
60 static constexpr
real_t machine_epsilon = std::numeric_limits<real_t>::epsilon() * .5;
61 static constexpr
real_t real_infinity = std::numeric_limits<real_t>::max();
62 static constexpr
f64 f64_one_minus_epsilon = 0x1.fffffffffffffp-1;
63 static constexpr
f32 f32_one_minus_epsilon = 0x1.fffffep-1;
64 #ifdef HERMES_USE_DOUBLE_AS_DEFAULT
65 static constexpr
real_t one_minus_epsilon = f64_one_minus_epsilon;
67 static constexpr
real_t one_minus_epsilon = f32_one_minus_epsilon;
81 return -0x1.fffffffffffffp+1023;
87 return -0x1.fffffep+127;
98 return 0x1.fffffep+127;
103 return 0x1.fffffffffffffp+1023;
177 return fmaxf(l, fminf(n, u));
203 return a >= 0 ? 1 : -1;
210 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ > 0
235 template<
typename Ta,
typename Tb,
typename Tc,
typename Td>
238 auto difference_of_products =
FMA(a, b, -cd);
240 return difference_of_products +
error;
248 template<
typename T,
typename C>
261 template<
typename T,
typename C,
typename... Args>
270 template<
typename Predicate>
272 using ssize_t = std::make_signed_t<size_t>;
273 ssize_t size = (ssize_t) sz - 2, first = 1;
275 size_t half = (size_t) size >> 1, middle = first + half;
276 bool predResult = pred(middle);
277 first = predResult ? middle + 1 : first;
278 size = predResult ? size - (half + 1) : half;
280 return (
size_t) clamp<ssize_t>((ssize_t) first - 1, 0, sz - 2);
290 n = (n ^ (n << 8)) & 0x00ff00ff;
291 n = (n ^ (n << 4)) & 0x0f0f0f0f;
292 n = (n ^ (n << 2)) & 0x33333333;
293 n = (n ^ (n << 1)) & 0x55555555;
300 n = (n ^ (n << 16)) & 0xff0000ff;
301 n = (n ^ (n << 8)) & 0x0300f00f;
302 n = (n ^ (n << 4)) & 0x030c30c3;
303 n = (n ^ (n << 2)) & 0x09249249;
345 #if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 0))
346 return __float_as_uint(f);
349 std::memcpy(&ui, &f,
sizeof(
f32));
357 #if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 0))
358 return __uint_as_float(ui);
361 std::memcpy(&f, &ui,
sizeof(uint32_t));
370 #if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 0))
371 memcpy(&ui, &d,
sizeof(
f64));
373 std::memcpy(&ui, &d,
sizeof(
f64));
382 #if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 0))
383 memcpy(&d, &ui,
sizeof(uint64_t));
385 std::memcpy(&d, &ui,
sizeof(uint64_t));
393 #if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 0))
394 if (isinf(v) && v > 0.)
396 if (std::isinf(v) && v > 0.f)
412 #if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 0))
413 if (isinf(v) && v > 0.)
415 if (std::isinf(v) && v > 0.f)
431 #if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 0))
432 if (isinf(v) && v > 0.)
434 if (std::isinf(v) && v > 0.)
450 #if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 0))
451 if (isinf(v) && v > 0.)
453 if (std::isinf(v) && v > 0.)
535 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ > 0
536 #ifdef HERMES_USE_DOUBLE_AS_DEFAULT
537 return __dmul_rd(a, b);
539 return __fmul_rd(a, b);
550 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ > 0
551 #ifdef HERMES_USE_DOUBLE_AS_DEFAULT
552 return __dmul_ru(a, b);
554 return __fmul_ru(a, b);
565 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ > 0
566 #ifdef HERMES_USE_DOUBLE_AS_DEFAULT
567 return __ddiv_rd(a, b);
569 return __fdiv_rd(a, b);
580 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ > 0
581 #ifdef HERMES_USE_DOUBLE_AS_DEFAULT
582 return __ddiv_ru(a, b);
584 return __fdiv_ru(a, b);
595 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ > 0
596 #ifdef HERMES_USE_DOUBLE_AS_DEFAULT
597 return __dadd_rd(a, b);
599 return __fadd_rd(a, b);
610 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ > 0
611 #ifdef HERMES_USE_DOUBLE_AS_DEFAULT
612 return __dadd_ru(a, b);
614 return __fadd_ru(a, b);
625 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ > 0
626 #ifdef HERMES_USE_DOUBLE_AS_DEFAULT
627 return __dsub_rd(a, b);
629 return __fsub_rd(a, b);
640 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ > 0
641 #ifdef HERMES_USE_DOUBLE_AS_DEFAULT
642 return __dsub_ru(a, b);
644 return __fsub_ru(a, b);
654 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ > 0
655 #ifdef HERMES_USE_DOUBLE_AS_DEFAULT
656 return __dsqrt_rd(a);
658 return __fsqrt_rd(a);
668 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ > 0
669 #ifdef HERMES_USE_DOUBLE_AS_DEFAULT
670 return __dsqrt_ru(a);
672 return __fsqrt_ru(a);
683 #ifndef HERMES_DEVICE_ENABLED
684 static f32 invLog2 = 1.f / logf(2.f);
686 f32 invLog2 = 1.f / logf(2.f);
688 return logf(x) * invLog2;
695 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ > 0
696 return sqrtf(fmaxf(0.f, x));
698 return std::sqrt(std::max(0.f, x));
708 return 1 /
pow<-n>(b);
709 float n2 =
pow<n / 2>(b);
710 return n2 * n2 * pow<n & 1>(b);
716 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ > 0
720 float xp = x * 1.442695041f;
723 float fxp = std::floor(xp), f = xp - fxp;
734 return Constants::real_infinity;
736 bits &= 0b10000000011111111111111111111111u;
737 bits |= (exponent + 127) << 23;
748 return (n * Constants::machine_epsilon) / (1 - n * Constants::machine_epsilon);
799 return a * 180.f / Constants::pi;
805 return a * Constants::pi / 180.f;
811 return std::acos(Numbers::clamp<f32>(x, -1, 1));
817 return std::asin(Numbers::clamp<f32>(x, -1, 1));
835 #if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 0))
836 return fabs(a) < 1e-8;
838 return std::fabs(a) < 1e-8;
848 return fabs(a - b) < 1e-8;
858 return fabs(a - b) < e;
866 template<
typename T>
static constexpr
bool is_between(T x, T a, T b) {
867 return x > a && x < b;
876 return x >= a && x <= b;
885 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ > 0
888 return std::isnan(v);
905 T det = A[0][0] * A[1][1] - A[0][1] * A[1][0];
906 if (
abs(det) < 1e-10f)
908 *x0 = (A[1][1] * B[0] - A[0][1] * B[1]) / det;
909 *x1 = (A[0][0] * B[1] - A[1][0] * B[0]) / det;
910 #if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ > 0))
911 if(isnan(*x0) || isnan(*x1))
914 if (std::isnan(*x0) || std::isnan(*x1))
Debug, logging and assertion macros.
uint64_t u64
64 bit size unsigned integer type
Definition: defs.h:89
#define HERMES_UNUSED_VARIABLE(x)
Specifies that variable is not used in this scope.
Definition: debug.h:62
float real_t
default floating point type
Definition: defs.h:75
#define HERMES_DEVICE_CALLABLE
Specifies that the function can be called from both host and device sides.
Definition: defs.h:45
uint32_t u32
32 bit size unsigned integer type
Definition: defs.h:88
uint8_t u8
8 bit size unsigned integer type
Definition: defs.h:86
double f64
64 bit size floating point type
Definition: defs.h:79
float f32
32 bit size floating point type
Definition: defs.h:78
#define HERMES_CHECK_EXP(expr)
Warns if expression is false.
Definition: debug.h:95
int32_t i32
32 bit size integer type
Definition: defs.h:83
@ error
logs into error stream
HERMES_DEVICE_CALLABLE Normal3< T > abs(const Normal3< T > &normal)
Computes absolute normal components.
Definition: normal.h:212
HERMES_DEVICE_CALLABLE bool soveLinearSystem(const T A[2][2], const T B[2], T *x0, T *x1)
Solves a linear system of 2 equations.
Definition: numeric.h:904
Number checks.
Definition: numeric.h:825
static constexpr HERMES_DEVICE_CALLABLE bool is_equal(T a, T b, T e)
Checks if two numbers are at most to a threshold apart.
Definition: numeric.h:857
static constexpr bool is_between(T x, T a, T b)
Checks if a number is in a open interval.
Definition: numeric.h:866
static constexpr bool is_between_closed(T x, T a, T b)
Checks if a number is in a closed interval.
Definition: numeric.h:875
static constexpr HERMES_DEVICE_CALLABLE bool is_zero(T a)
Checks if number is 0.
Definition: numeric.h:834
static HERMES_DEVICE_CALLABLE std::enable_if_t< std::is_floating_point< T >::value, bool > is_nan(T v)
Checks if number representation is nan
Definition: numeric.h:884
static constexpr HERMES_DEVICE_CALLABLE bool is_equal(T a, T b)
Checks if two numbers are at most 1e-8 apart.
Definition: numeric.h:847
Numeric constants.
Definition: numeric.h:49
Number functions.
Definition: numeric.h:75
static HERMES_DEVICE_CALLABLE f32 nextFloatUp(f32 v)
Computes the next greater representable floating-point value.
Definition: numeric.h:392
static HERMES_DEVICE_CALLABLE real_t addRoundDown(real_t a, real_t b)
Adds and rounds down to the next smaller float value.
Definition: numeric.h:594
static constexpr HERMES_DEVICE_CALLABLE T min(const T &a, const T &b)
Computes minimum between two numbers.
Definition: numeric.h:117
static HERMES_DEVICE_CALLABLE f64 nextDoubleUp(f64 v)
Computes the next greater representable floating-point value.
Definition: numeric.h:430
static HERMES_DEVICE_CALLABLE real_t subRoundUp(real_t a, real_t b)
Subtracts and rounds up to the next float value.
Definition: numeric.h:639
static HERMES_DEVICE_CALLABLE void swap(T &a, T &b)
Swaps values.
Definition: numeric.h:184
static constexpr HERMES_DEVICE_CALLABLE T evaluatePolynomial(T t, C c)
Solves polynomial.
Definition: numeric.h:249
static HERMES_DEVICE_CALLABLE real_t divRoundUp(real_t a, real_t b)
Divides and rounds up to the next float value.
Definition: numeric.h:579
static HERMES_DEVICE_CALLABLE int floatSignificand(f32 v)
Extracts significand bits.
Definition: numeric.h:332
static constexpr HERMES_DEVICE_CALLABLE f64 greatest_f32()
Gets greatest representable 32 bit floating point.
Definition: numeric.h:97
static constexpr HERMES_DEVICE_CALLABLE real_t pow(real_t b)
Computes b to the power of n.
Definition: numeric.h:706
static constexpr HERMES_DEVICE_CALLABLE T sqrt(T a)
Computes square root.
Definition: numeric.h:209
static HERMES_DEVICE_CALLABLE real_t divRoundDown(real_t a, real_t b)
Divides and rounds down to the next smaller float value.
Definition: numeric.h:564
static constexpr HERMES_DEVICE_CALLABLE real_t gamma(i32 n)
Computes conservative bounds in error.
Definition: numeric.h:747
static HERMES_DEVICE_CALLABLE auto differenceOfProducts(Ta a, Tb b, Tc c, Td d)
Computes difference of products.
Definition: numeric.h:236
static constexpr HERMES_DEVICE_CALLABLE T greatest()
Gets greatest representable floating point.
Definition: numeric.h:108
static HERMES_DEVICE_CALLABLE real_t fract(real_t x)
Extract decimal fraction from x.
Definition: numeric.h:524
static HERMES_DEVICE_CALLABLE uint64_t floatToBits(f64 d)
Interprets a f64-point value into a integer type.
Definition: numeric.h:368
static HERMES_DEVICE_CALLABLE f32 bitsToFloat(uint32_t ui)
Fills a f32 variable data.
Definition: numeric.h:356
static constexpr HERMES_DEVICE_CALLABLE T cube(T a)
Computes square.
Definition: numeric.h:197
static HERMES_DEVICE_CALLABLE T FMA(T a, T b, T c)
Computes a * b + c.
Definition: numeric.h:222
static HERMES_DEVICE_CALLABLE T clamp(const T &n, const T &l, const T &u)
Clamps value to closed interval.
Definition: numeric.h:176
static HERMES_DEVICE_CALLABLE int sign(T a)
Computes sign.
Definition: numeric.h:202
static HERMES_DEVICE_CALLABLE u8 countHexDigits(T n)
Counts hexadecimal digits.
Definition: numeric.h:161
static constexpr HERMES_DEVICE_CALLABLE T min(std::initializer_list< T > l)
Computes minimum value from input.
Definition: numeric.h:137
static HERMES_DEVICE_CALLABLE real_t fastExp(real_t x)
Computes fast exponential.
Definition: numeric.h:715
static HERMES_DEVICE_CALLABLE f64 bitsToDouble(uint64_t ui)
Fills a f64 variable data.
Definition: numeric.h:380
static HERMES_DEVICE_CALLABLE real_t mulRoundUp(real_t a, real_t b)
Multiplies and rounds up to the next float value.
Definition: numeric.h:549
static HERMES_DEVICE_CALLABLE int floatExponent(f32 v)
Extracts exponent from floating-point number.
Definition: numeric.h:326
static HERMES_DEVICE_CALLABLE u32 interleaveBits(u32 x, u32 y)
Interleaves bits of two integers.
Definition: numeric.h:319
static HERMES_DEVICE_CALLABLE f64 nextDoubleDown(f64 v)
Computes the next smaller representable floating-point value.
Definition: numeric.h:449
static constexpr HERMES_DEVICE_CALLABLE int lowest_int()
Gets minimum representable 32 bit signed integer.
Definition: numeric.h:471
static HERMES_DEVICE_CALLABLE u32 separateBitsBy2(u32 n)
Separate bits by 2 bit-spaces.
Definition: numeric.h:299
static constexpr HERMES_DEVICE_CALLABLE f32 lowest_f32()
Gets lowest representable 64 bit floating point.
Definition: numeric.h:86
static HERMES_DEVICE_CALLABLE real_t addRoundUp(real_t a, real_t b)
Adds and rounds up to the next float value.
Definition: numeric.h:609
static constexpr HERMES_DEVICE_CALLABLE f64 lowest_f64()
Gets lowest representable 64 bit floating point.
Definition: numeric.h:80
static constexpr HERMES_DEVICE_CALLABLE T max(std::initializer_list< T > l)
Computes maximum value from input.
Definition: numeric.h:149
static HERMES_DEVICE_CALLABLE u32 separateBitsBy1(u32 n)
Separate bits by 1 bit-space.
Definition: numeric.h:289
static constexpr HERMES_DEVICE_CALLABLE T sqr(T a)
Definition: numeric.h:192
static HERMES_DEVICE_CALLABLE int round2Int(float f)
rounds to closest integer
Definition: numeric.h:502
static HERMES_DEVICE_CALLABLE size_t findInterval(size_t sz, const Predicate &pred)
Bisect range based on predicate.
Definition: numeric.h:271
static HERMES_DEVICE_CALLABLE f32 log2(f32 x)
Computes base 2 log.
Definition: numeric.h:682
static constexpr HERMES_DEVICE_CALLABLE int greatest_int()
Gets maximum representable 32 bit signed integer.
Definition: numeric.h:474
static HERMES_DEVICE_CALLABLE int floor2Int(float f)
rounds down
Definition: numeric.h:498
static HERMES_DEVICE_CALLABLE uint32_t floatToBits(f32 f)
Interprets a floating-point value into a integer type.
Definition: numeric.h:344
static HERMES_DEVICE_CALLABLE u8 countDigits(u64 t, u8 base=10)
Computes number of digits.
Definition: numeric.h:508
static HERMES_DEVICE_CALLABLE int ceil2Int(float f)
rounds up
Definition: numeric.h:494
static constexpr HERMES_DEVICE_CALLABLE T lowest()
Gets lowest representable floating point.
Definition: numeric.h:92
static constexpr HERMES_DEVICE_CALLABLE f64 greatest_f64()
Gets greatest representable 64 bit floating point.
Definition: numeric.h:102
static HERMES_DEVICE_CALLABLE uint32_t floatSignBit(f32 v)
Extracts sign bit.
Definition: numeric.h:338
static constexpr HERMES_DEVICE_CALLABLE T max(const T &a, const T &b)
Computes maximum between two numbers.
Definition: numeric.h:127
static HERMES_DEVICE_CALLABLE f32 nextFloatDown(f32 v)
Computes the next smaller representable floating-point value.
Definition: numeric.h:411
static HERMES_DEVICE_CALLABLE real_t sqrtRoundDown(real_t a)
Computes square root rounded down to the next smaller float value.
Definition: numeric.h:653
static constexpr HERMES_DEVICE_CALLABLE T evaluatePolynomial(T t, C c, Args... cs)
Solves polynomial.
Definition: numeric.h:262
static HERMES_DEVICE_CALLABLE f32 safe_sqrt(f32 x)
Computes square root with clamped input.
Definition: numeric.h:693
static HERMES_DEVICE_CALLABLE int mod(int a, int b)
Computes modulus.
Definition: numeric.h:483
static HERMES_DEVICE_CALLABLE real_t mulRoundDown(real_t a, real_t b)
Multiplies and rounds down to the next smaller float value.
Definition: numeric.h:534
static HERMES_DEVICE_CALLABLE real_t sqrtRoundUp(real_t a)
Computes square root rounded up to the next float value.
Definition: numeric.h:667
static HERMES_DEVICE_CALLABLE u32 interleaveBits(u32 x, u32 y, u32 z)
Interleaves bits of three integers.
Definition: numeric.h:311
static HERMES_DEVICE_CALLABLE real_t subRoundDown(real_t a, real_t b)
Subtracts and rounds down to the next smaller float value.
Definition: numeric.h:624
static constexpr HERMES_DEVICE_CALLABLE bool isPowerOf2(int v)
Checks if integer is power of 2.
Definition: numeric.h:478
Trigonometric functions.
Definition: numeric.h:791
static HERMES_DEVICE_CALLABLE f32 safe_asin(f32 x)
Computes asin with clamped input.
Definition: numeric.h:816
static constexpr HERMES_DEVICE_CALLABLE real_t radians2degrees(real_t a)
Converts radians to degrees.
Definition: numeric.h:798
static constexpr HERMES_DEVICE_CALLABLE real_t degrees2radians(real_t a)
Converts degrees to radians.
Definition: numeric.h:804
static HERMES_DEVICE_CALLABLE f32 safe_acos(f32 x)
Computes acos with clamped input.
Definition: numeric.h:810