From c11d016bc7991923834157654b0e73c2e42921d1 Mon Sep 17 00:00:00 2001 From: Petr Ledvina Date: Sat, 3 Mar 2018 13:26:33 +0100 Subject: [PATCH] optimize math (#5287) * optimize math Results in considerable flash saving * log_approx, exp_approx, pow_approx Taken from https://github.com/jhjourdan/SIMD-math-prims/blob/master/simd_math_prims.h * Fix pow in rangefinder * Use approximate function in baro calculation Maximum error is < 20cm * fixup! Fix pow in rangefinder --- make/source.mk | 1 + src/main/common/explog_approx.c | 99 +++++++++++++++++++++++++++++++++++++++++ src/main/common/filter.c | 9 ++-- src/main/common/maths.h | 6 +++ src/main/io/osd.c | 2 +- src/main/sensors/barometer.c | 4 +- src/main/sensors/rangefinder.c | 3 +- 7 files changed, 117 insertions(+), 7 deletions(-) create mode 100644 src/main/common/explog_approx.c diff --git a/make/source.mk b/make/source.mk index f7d0fc304..5fed8a988 100644 --- a/make/source.mk +++ b/make/source.mk @@ -11,6 +11,7 @@ COMMON_SRC = \ common/huffman.c \ common/huffman_table.c \ common/maths.c \ + common/explog_approx.c \ common/printf.c \ common/streambuf.c \ common/string_light.c \ diff --git a/src/main/common/explog_approx.c b/src/main/common/explog_approx.c new file mode 100644 index 000000000..6dea0186a --- /dev/null +++ b/src/main/common/explog_approx.c @@ -0,0 +1,99 @@ +/* + +The MIT License (MIT) + +Copyright (c) 2015 Jacques-Henri Jourdan + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +Taken from https://github.com/jhjourdan/SIMD-math-prims/blob/master/simd_math_prims.h +Stripped down for BF use + +*/ + +#include +#include + +/* Workaround a lack of optimization in gcc */ +float exp_cst1 = 2139095040.f; +float exp_cst2 = 0.f; + +/* Relative error bounded by 1e-5 for normalized outputs + Returns invalid outputs for nan inputs + Continuous error */ +float exp_approx(float val) { + union { int32_t i; float f; } xu, xu2; + float val2, val3, val4, b; + int32_t val4i; + val2 = 12102203.1615614f*val+1065353216.f; + val3 = val2 < exp_cst1 ? val2 : exp_cst1; + val4 = val3 > exp_cst2 ? val3 : exp_cst2; + val4i = (int32_t) val4; + xu.i = val4i & 0x7F800000; // mask exponent / round down to neareset 2^n (implicit mantisa bit) + xu2.i = (val4i & 0x7FFFFF) | 0x3F800000; // force exponent to 0 + b = xu2.f; + + /* Generated in Sollya with: + > f=remez(1-x*exp(-(x-1)*log(2)), + [|(x-1)*(x-2), (x-1)*(x-2)*x, (x-1)*(x-2)*x*x|], + [1.000001,1.999999], exp(-(x-1)*log(2))); + > plot(exp((x-1)*log(2))/(f+x)-1, [1,2]); + > f+x; + */ + return + xu.f * (0.509871020343597804469416f + b * + (0.312146713032169896138863f + b * + (0.166617139319965966118107f + b * + (-2.19061993049215080032874e-3f + b * + 1.3555747234758484073940937e-2f)))); +} + +/* Absolute error bounded by 1e-6 for normalized inputs + Returns a finite number for +inf input + Returns -inf for nan and <= 0 inputs. + Continuous error. */ +float log_approx(float val) { + union { float f; int32_t i; } valu; + float exp, addcst, x; + valu.f = val; + exp = valu.i >> 23; + /* 89.970756366f = 127 * log(2) - constant term of polynomial */ + addcst = val > 0 ? -89.970756366f : -(float)INFINITY; + valu.i = (valu.i & 0x7FFFFF) | 0x3F800000; + x = valu.f; + + + /* Generated in Sollya using: + > f = remez(log(x)-(x-1)*log(2), + [|1,(x-1)*(x-2), (x-1)*(x-2)*x, (x-1)*(x-2)*x*x, + (x-1)*(x-2)*x*x*x|], [1,2], 1, 1e-8); + > plot(f+(x-1)*log(2)-log(x), [1,2]); + > f+(x-1)*log(2) + */ + return + x * (3.529304993f + x * (-2.461222105f + + x * (1.130626167f + x * (-0.288739945f + + x * 3.110401639e-2f)))) + + (addcst + 0.69314718055995f*exp); +} + +float pow_approx(float a, float b) +{ + return exp_approx(b * log_approx(a)); +} diff --git a/src/main/common/filter.c b/src/main/common/filter.c index 3424729cd..470092896 100644 --- a/src/main/common/filter.c +++ b/src/main/common/filter.c @@ -79,10 +79,13 @@ FAST_CODE float slewFilterApply(slewFilter_t *filter, float input) return filter->state; } - +// get notch filter Q given center frequency (f0) and lower cutoff frequency (f1) +// Q = f0 / (f2 - f1) ; f2 = f0^2 / f1 float filterGetNotchQ(uint16_t centerFreq, uint16_t cutoff) { - float octaves = log2f((float) centerFreq / (float) cutoff) * 2; - return sqrtf(powf(2, octaves)) / (powf(2, octaves) - 1); + const float f0sq = (float)centerFreq * centerFreq; + const float f1 = cutoff; + + return (f1 * f0sq) / (f0sq - f1 * f1); } /* sets up a biquad Filter */ diff --git a/src/main/common/maths.h b/src/main/common/maths.h index fea305032..ee9203f00 100644 --- a/src/main/common/maths.h +++ b/src/main/common/maths.h @@ -117,12 +117,18 @@ float cos_approx(float x); float atan2_approx(float y, float x); float acos_approx(float x); #define tan_approx(x) (sin_approx(x) / cos_approx(x)) +float exp_approx(float val); +float log_approx(float val); +float pow_approx(float a, float b); #else #define sin_approx(x) sinf(x) #define cos_approx(x) cosf(x) #define atan2_approx(y,x) atan2f(y,x) #define acos_approx(x) acosf(x) #define tan_approx(x) tanf(x) +#define exp_approx(x) expf(x) +#define log_approx(x) logf(x) +#define pow_approx(a, b) powf(b, a) #endif void arraySubInt32(int32_t *dest, int32_t *array1, int32_t *array2, int count); diff --git a/src/main/io/osd.c b/src/main/io/osd.c index 8db8d57f6..5c1ecc8e6 100644 --- a/src/main/io/osd.c +++ b/src/main/io/osd.c @@ -728,7 +728,7 @@ static bool osdDrawSingleElement(uint8_t item) const float value = constrain(batteryConfig()->batteryCapacity - getMAhDrawn(), 0, batteryConfig()->batteryCapacity); // Calculate mAh used progress - const uint8_t mAhUsedProgress = ceil((value / (batteryConfig()->batteryCapacity / MAIN_BATT_USAGE_STEPS))); + const uint8_t mAhUsedProgress = ceilf((value / (batteryConfig()->batteryCapacity / MAIN_BATT_USAGE_STEPS))); // Create empty battery indicator bar buff[0] = SYM_PB_START; diff --git a/src/main/sensors/barometer.c b/src/main/sensors/barometer.c index d8590f0fe..ba0707000 100644 --- a/src/main/sensors/barometer.c +++ b/src/main/sensors/barometer.c @@ -332,7 +332,7 @@ int32_t baroCalculateAltitude(void) // calculates height from ground via baro readings // see: https://github.com/diydrones/ardupilot/blob/master/libraries/AP_Baro/AP_Baro.cpp#L140 if (isBaroCalibrationComplete()) { - BaroAlt_tmp = lrintf((1.0f - powf((float)(baroPressureSum / PRESSURE_SAMPLE_COUNT) / 101325.0f, 0.190295f)) * 4433000.0f); // in cm + BaroAlt_tmp = lrintf((1.0f - pow_approx((float)(baroPressureSum / PRESSURE_SAMPLE_COUNT) / 101325.0f, 0.190295f)) * 4433000.0f); // in cm BaroAlt_tmp -= baroGroundAltitude; baro.BaroAlt = lrintf((float)baro.BaroAlt * CONVERT_PARAMETER_TO_FLOAT(barometerConfig()->baro_noise_lpf) + (float)BaroAlt_tmp * (1.0f - CONVERT_PARAMETER_TO_FLOAT(barometerConfig()->baro_noise_lpf))); // additional LPF to reduce baro noise } @@ -348,7 +348,7 @@ void performBaroCalibrationCycle(void) baroGroundPressure -= baroGroundPressure / 8; baroGroundPressure += baroPressureSum / PRESSURE_SAMPLE_COUNT; - baroGroundAltitude = (1.0f - powf((baroGroundPressure / 8) / 101325.0f, 0.190295f)) * 4433000.0f; + baroGroundAltitude = (1.0f - pow_approx((baroGroundPressure / 8) / 101325.0f, 0.190295f)) * 4433000.0f; if (baroGroundPressure == savedGroundPressure) calibratingB = 0; diff --git a/src/main/sensors/rangefinder.c b/src/main/sensors/rangefinder.c index 990a08dae..dcf9e1874 100644 --- a/src/main/sensors/rangefinder.c +++ b/src/main/sensors/rangefinder.c @@ -233,7 +233,8 @@ static int16_t computePseudoSnr(int32_t newReading) { static bool snrReady = false; int16_t pseudoSnr = 0; - snrSamples[snrSampleIndex] = constrain((int)(pow(newReading - previousReading, 2) / 10), 0, 6400); + const int delta = newReading - previousReading; + snrSamples[snrSampleIndex] = constrain(delta * delta / 10, 0, 6400); ++snrSampleIndex; if (snrSampleIndex == SNR_SAMPLES) { snrSampleIndex = 0; -- 2.11.4.GIT