|
| 1 | +/* |
| 2 | + This file is part of Leela Chess Zero. |
| 3 | + Copyright (C) 2018 The LCZero Authors |
| 4 | +
|
| 5 | + Leela Chess is free software: you can redistribute it and/or modify |
| 6 | + it under the terms of the GNU General Public License as published by |
| 7 | + the Free Software Foundation, either version 3 of the License, or |
| 8 | + (at your option) any later version. |
| 9 | +
|
| 10 | + Leela Chess is distributed in the hope that it will be useful, |
| 11 | + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 | + GNU General Public License for more details. |
| 14 | +
|
| 15 | + You should have received a copy of the GNU General Public License |
| 16 | + along with Leela Chess. If not, see <http://www.gnu.org/licenses/>. |
| 17 | +
|
| 18 | + Additional permission under GNU GPL version 3 section 7 |
| 19 | +
|
| 20 | + If you modify this Program, or any covered work, by linking or |
| 21 | + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA |
| 22 | + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a |
| 23 | + modified version of those libraries), containing parts covered by the |
| 24 | + terms of the respective license agreement, the licensors of this |
| 25 | + Program grant you additional permission to convey the resulting work. |
| 26 | +*/ |
| 27 | + |
| 28 | +#pragma once |
| 29 | + |
| 30 | +#include <cstring> |
| 31 | + |
| 32 | +namespace lczero { |
| 33 | +// These stunts are performed by trained professionals, do not try this at home. |
| 34 | + |
| 35 | +// Fast approximate log2(x). Does no range checking. |
| 36 | +// The approximation used here is log2(2^N*(1+f)) ~ N+f*(1.342671-0.342671*f) |
| 37 | +// where N is the integer and f the fractional part, f>=0. |
| 38 | +inline float FastLog2(const float a) { |
| 39 | + int32_t tmp; |
| 40 | + std::memcpy(&tmp, &a, sizeof(float)); |
| 41 | + int expb = (tmp >> 23); |
| 42 | + tmp = (tmp & 0x7fffff) | (0x7f << 23); |
| 43 | + float out; |
| 44 | + std::memcpy(&out, &tmp, sizeof(float)); |
| 45 | + return out * (2.028011f - 0.342671f * out) - 128.68534f + expb; |
| 46 | +} |
| 47 | + |
| 48 | +// Fast approximate 2^x. Does only limited range checking. |
| 49 | +// The approximation used here is 2^(N+f) ~ 2^N*(1+f*(0.656366+0.343634*f)) |
| 50 | +// where N is the integer and f the fractional part, f>=0. |
| 51 | +inline float FastPow2(const float a) { |
| 52 | + if (a < -126) return 0.0; |
| 53 | + int exp = floor(a); |
| 54 | + float out = a - exp; |
| 55 | + out = 1.0f + out * (0.656366f + 0.343634f * out); |
| 56 | + int32_t tmp; |
| 57 | + std::memcpy(&tmp, &out, sizeof(float)); |
| 58 | + tmp += exp << 23; |
| 59 | + std::memcpy(&out, &tmp, sizeof(float)); |
| 60 | + return out; |
| 61 | +} |
| 62 | + |
| 63 | +// Fast approximate ln(x). Does no range checking. |
| 64 | +inline float FastLog(const float a) { |
| 65 | + return 0.6931471805599453f * FastLog2(a); |
| 66 | +} |
| 67 | + |
| 68 | +} // namespace lczero |
0 commit comments