Add built-in VGA font
[jpcrr.git] / streamtools / lanczos.hh
blob41cb3ea596a288c30779387293548e8897f04422
1 #include <math.h>
2 #include <alloca.h>
4 #include <valarray>
6 # define likely(x) __builtin_expect(!!(x), 1)
7 # define unlikely(x) __builtin_expect(!!(x), 0)
9 #if 1
10 typedef float LanczosFloat;
11 #define LanczosSin sinf
13 //LanczosFloat LanczosSin(LanczosFloat x) { return 1.0f; }
14 #else
15 typedef double LanczosFloat;
16 #define LanczosSin sin
17 #endif
19 template<int Radius, bool check>
20 inline LanczosFloat Lanczos_pi(LanczosFloat x_pi)
22 if(unlikely(x_pi == (LanczosFloat)0.0)) return (LanczosFloat)1.0;
23 if(check)
25 if (x_pi <= (LanczosFloat)(-Radius*M_PI)
26 || x_pi >= (LanczosFloat)( Radius*M_PI)) return (LanczosFloat)0.0;
29 LanczosFloat x_pi_div_Radius = x_pi / Radius;
31 //LanczosFloat a = sin(x_pi) / x_pi;
32 //LanczosFloat b = sin(x_pi_div_Radius) / x_pi_div_Radius;
33 //return a * b;
35 return LanczosSin(x_pi) * LanczosSin(x_pi_div_Radius) / (x_pi * x_pi_div_Radius);
37 template<int Radius>
38 inline LanczosFloat Lanczos(LanczosFloat x)
40 return Lanczos_pi<Radius,true>(x * (LanczosFloat)M_PI);
43 #if 1
44 const int DefaultTripletCount = 4; // Better for -ftree-vectorize
45 #else
46 const int DefaultTripletCount = 3; // Better when no ftree-vectorize
47 #endif
49 /* Triplet is a 3-element tuple. We use them to maximize
50 * the compiler's chances for vectorizing optimizations.
52 * It's kind of valarray, but with build-time specified size.
54 template<typename T, int count = DefaultTripletCount>
55 struct Triplet
57 T values[count];
59 template<typename V>
60 Triplet(V a,V b,V c)
61 { values[0]=a; values[1]=b; values[2]=c; }
63 template<typename V>
64 Triplet(const Triplet<V>& b)
65 { for(size_t n=0; n<count; ++n) values[n] = b.values[n]; }
67 template<typename V>
68 Triplet(V a)
69 { T tmp=a; for(size_t n=0; n<count; ++n) values[n] = tmp; }
71 Triplet() { }
73 T& operator[] (size_t ind) { return values[ind]; }
74 const T& operator[] (size_t ind) const { return values[ind]; }
76 #define bop(op) \
77 template<typename B> \
78 Triplet<T>& operator op (const Triplet<B>& b) \
79 { for(size_t n=0; n<count; ++n) values[n] op b.values[n]; \
80 return *this; } \
82 template<typename B> \
83 Triplet<T>& operator op (const B & b) \
84 { for(size_t n=0; n<count; ++n) values[n] op b; \
85 return *this; }
87 bop(*=)
88 bop(+=)
89 bop(/=)
90 bop(>>=)
91 bop(&=)
93 #undef bop
95 #define uop(op) \
96 template<typename B> \
97 Triplet<T> operator op (const B& b) const \
98 { Triplet<T> res = *this; res op##= b; return res; }
100 uop(*)
101 uop(/)
102 uop(>>)
103 uop(&)
105 #undef uop
108 template<typename A, typename B>
109 Triplet<A> operator>> (A a, const Triplet<B>& b)
110 { Triplet<A> res = a; res >>= b; return res; }
112 template<typename A, typename B>
113 Triplet<A> operator* (A a, const Triplet<B>& b)
114 { Triplet<A> res = a; res *= b; return res; }
116 template<typename T>
117 struct LanczosFloatValue { typedef LanczosFloat result; };
118 template<typename T>
119 struct LanczosFloatValue<Triplet<T> > { typedef Triplet<LanczosFloat> result; };
121 /* The actual basis for scaling. */
123 template<typename SrcTab, typename DestTab>
124 class Lanczos2DBase
126 const SrcTab& src; DestTab& tgt;
128 // Note: In this vocabulary,
129 // y denotes outer loop and
130 // x denotes inner loop, but
131 // it could be vice versa.
132 int xinc_src, yinc_src;
133 int xinc_tgt, yinc_tgt;
134 int ylimit;
135 public:
136 Lanczos2DBase(
137 const SrcTab& src_, DestTab& tgt_,
138 int sxi,int syi, int txi,int tyi, int ylim)
139 : src(src_), tgt(tgt_),
140 xinc_src(sxi), yinc_src(syi),
141 xinc_tgt(txi), yinc_tgt(tyi),
142 ylimit(ylim)
145 void ScaleOne
146 (int srcpos, int tgtpos, int nmax,
147 const LanczosFloat contrib[], LanczosFloat density_rev) const
149 typename LanczosFloatValue
150 <typename DestTab::value_type>::result res = 0.0;
151 //LanczosFloat r=0, g=0, b=0;
153 int srctemp = srcpos;
155 //#pragma omp parallel for reduction(+:r,g,b)
156 for(int n=0; n<nmax; ++n)
158 res += contrib[n] * src.at(srctemp);
159 srctemp += xinc_src; // source x increment
162 // Multiplication is faster than division, so we use the reciprocal.
163 res *= density_rev;
165 tgt.at(tgtpos) = res;
168 void StripeLoop(int tx, int sx, int nmax,
169 const LanczosFloat contrib[], LanczosFloat density) const;
172 template<typename SrcTab, typename DestTab>
173 void Lanczos2DBase<SrcTab,DestTab>
174 ::StripeLoop(int tx, int sx, int nmax, const LanczosFloat contrib[], LanczosFloat density) const
176 int srcpos = sx * xinc_src; // source x pos at y = 0
177 int tgtpos = tx * xinc_tgt; // target x pos at y = 0
180 fprintf(stderr, "StripeLoop sx=%d, tx=%d, srcpos=%d, tgtpos=%d, srcsize=%d, tgtsize=%d, ylimit=%d\n",
181 sx,tx, srcpos, tgtpos,
182 (int)src.size(), (int)tgt.size(), ylimit);*/
184 const LanczosFloat density_rev = (density == 0.0 || density == 1.0)
185 ? 1.0
186 : (1.0 / density);
188 for(int y=ylimit; y-->0; )
190 /*fprintf(stderr, "- within: srcpos=%d, tgtpos=%d, y=%d\n", srcpos,tgtpos, y);*/
191 ScaleOne(srcpos, tgtpos, nmax, contrib, density_rev);
193 srcpos += yinc_src; // source y increment
194 tgtpos += yinc_tgt; // target y increment
198 template<typename SrcTab, typename DestTab>
199 class HorizScaler: public Lanczos2DBase<SrcTab,DestTab>
201 public:
203 <-------------->
204 <-------------->
205 <-------------->
206 <-------------->
207 <-------------->
208 <-------------->
210 For each output column (out_size = {ow}),
211 {h} rows (source and target) get processed
213 On each row,
214 {nmax} source columns get transformed
215 into 1 target column
217 Target:
218 New column stride = {1}
219 New row stride = {ow}
220 Source:
221 Next column stride = {1}
222 Next row stride = {iw}
225 HorizScaler(
226 int iw,int ow, int h,
227 const SrcTab& src, DestTab& tgt)
228 : Lanczos2DBase<SrcTab,DestTab>(
229 src,tgt,
230 1, // xinc_src ok
231 iw, // yinc_src ok
232 1, // xinc_tgt ok
233 ow, // yinc_tgt ok
234 h // ylimit ok
235 ) { }
238 template<typename SrcTab, typename DestTab>
239 class VertScaler: public Lanczos2DBase<SrcTab,DestTab>
241 public:
243 ^^^^^^^^^^^^^^^^
244 ||||||||||||||||
245 ||||||||||||||||
246 ||||||||||||||||
247 ||||||||||||||||
248 vvvvvvvvvvvvvvvv
250 For each output row (out_size = {oh}),
251 {w} columns (source and target) get processed
253 On each column,
254 {nmax} source rows get transformed
255 into 1 target row
257 Target:
258 New row stride = {w}
259 New column stride = {1}
260 Source:
261 Next row stride = {w}
262 Next column stride = {1}
265 VertScaler(
266 int w,
267 const SrcTab& src, DestTab& tgt)
269 : Lanczos2DBase<SrcTab,DestTab>(
270 src,tgt,
271 w, // xinc_src ok
272 1, // yinc_src ok
273 w, // xinc_tgt ok
274 1, // yinc_tgt ok
275 w // ylimit ok
276 ) { }
279 /*template<typename SrcTab, typename DestTab>
280 class ScalarScaler: private Lanczos2DBase<SrcTab, DestTab>
282 public:
283 ScalarScaler(const SrcTab& src, DestTab& tgt)
284 : Lanczos2DBase<SrcTab,DestTab>(src,tgt, 1,1,1,1,1) { }
286 void StripeLoop(int tx, int sx, int nmax,
287 const LanczosFloat contrib[], LanczosFloat density) const
289 const LanczosFloat density_rev = (density == 0.0 || density == 1.0)
290 ? 1.0
291 : (1.0 / density);
292 ScaleOne(sx, tx, nmax, contrib, density_rev);
294 };*/
296 struct LanczosCoreCalcRes
298 int start;
299 int nmax;
300 LanczosFloat density;
303 template<int FilterRadius>
304 inline LanczosCoreCalcRes LanczosCoreCalc
305 (int in_size,
306 LanczosFloat center, LanczosFloat support, LanczosFloat scale,
307 LanczosFloat contrib[])
309 const int start = std::max((int)(center-support+(LanczosFloat)0.5), 0);
310 const int end = std::min((int)(center+support+(LanczosFloat)0.5), in_size);
311 const int nmax = end-start;
313 const LanczosFloat scale_pi = scale * M_PI;
315 const LanczosFloat s_min = -FilterRadius*M_PI;
316 const LanczosFloat s_max = FilterRadius*M_PI;
318 LanczosFloat s_pi = (start-center+(LanczosFloat)0.5) * scale_pi;
320 LanczosFloat density = 0.0;
322 { int n=0;
323 for(; n < nmax && unlikely(s_pi < s_min); ++n, s_pi += scale_pi)
325 for(; n < nmax && likely(s_pi < s_max); ++n, s_pi += scale_pi)
327 LanczosFloat l = Lanczos_pi<FilterRadius,false> (s_pi);
328 contrib[n] = l;
329 density += l;
333 LanczosCoreCalcRes res;
334 res.start = start;
335 res.nmax = nmax;
336 res.density = density;
337 return res;
340 /* A generic Lanczos scaler suitable for
341 * converting something to something else
342 * at once.
343 * For image pixels, use Triplet<type>
344 * For stereo samples, use Triplet<type, 2>
345 * For mono samples, just use type
347 template<typename Handler>
348 void LanczosScale(int in_size, int out_size, Handler& target)
350 const int FilterRadius = 3;
351 const LanczosFloat blur = 1.0;
353 const LanczosFloat factor = out_size / (LanczosFloat)in_size;
354 const LanczosFloat scale = std::min(factor, (LanczosFloat)1.0) / blur;
355 const LanczosFloat support = FilterRadius / scale;
357 const size_t contrib_size = std::min(in_size, 5+int(2*support));
358 LanczosFloat contrib[contrib_size];
360 /*fprintf(stderr, "Scaling (%d->%d), contrib=%d\n",
361 in_size, out_size, (int)contrib_size);*/
363 /* FIXME: does private() really work for stack arrays, especially autosize ones? */
365 //#pragma omp parallel for schedule(static) private(contrib)
366 for(int outpos=0; outpos<out_size; ++outpos)
368 LanczosFloat center = (outpos+0.5) / factor;
369 LanczosCoreCalcRes res
370 = LanczosCoreCalc<FilterRadius>(in_size, center, support, scale, contrib);
371 target.StripeLoop(outpos, res.start, res.nmax, &contrib[0], res.density);
375 #if 0
376 /* Suitable for audio resampling
378 * For stereo samples, use Triplet<type, 2>
379 * For mono samples, just use type
381 template<typename SrcT = short, typename DestT = LanczosFloat>
382 class ContiguousLanczosScaler
384 static const int FilterRadius = 3;
386 std::vector<T> in_buffer;
387 double factor, scale, support;
390 private:
391 void SetRatio(int in_rate, int out_rate)
393 const LanczosFloat blur = 1.0;
395 factor = out_size / (LanczosFloat)in_size;
396 scale = std::min(factor, (LanczosFloat)1.0) / blur;
397 support = FilterRadius / scale;
400 #endif
402 template<typename SrcTab, typename DestTab>
403 void LanczosScale(int wid,int hei, int out_wid,int out_hei,
404 const SrcTab& src,
405 DestTab& tgt)
407 const size_t in_dim = wid*hei;
408 const size_t out_dim = out_wid*out_hei;
410 assert(src.size() == in_dim);
412 if(out_wid != wid || out_hei != hei)
414 #if 1
415 typedef SrcTab TmpSrcTab;
416 const SrcTab& tmpsrc = src;
417 #else
418 typedef DestTab TmpSrcTab;
419 DestTab tmpsrc(in_dim);
420 std::copy(&src[0],&src[in_dim], &tmpsrc[0]);
421 #endif
423 if(out_wid != wid)
425 // X scaling (sx*sy -> dx*sy)
426 tgt.resize(out_wid*hei);
428 HorizScaler<TmpSrcTab, DestTab> handler_x(wid,out_wid, hei, tmpsrc,tgt);
429 LanczosScale(wid, out_wid, handler_x);
431 if(out_hei != hei)
433 // Also Y scaling (dx*sy -> dx*dy)
434 typedef DestTab TempTab;
435 TempTab tmp(out_dim);
436 tmp.swap(tgt);
438 VertScaler<TempTab, DestTab> handler_y(out_wid, tmp,tgt);
439 LanczosScale(hei, out_hei, handler_y);
442 else if(out_hei != hei)
444 // Only Y scaling (x*sy -> x*dy)
445 tgt.resize(out_dim);
446 VertScaler<TmpSrcTab, DestTab> handler_y(wid, tmpsrc,tgt);
447 LanczosScale(hei, out_hei, handler_y);
450 else
452 tgt.resize(out_dim);
453 // No scalings
454 std::copy(&src[0],&src[in_dim], &tgt[0]);