6 # define likely(x) __builtin_expect(!!(x), 1)
7 # define unlikely(x) __builtin_expect(!!(x), 0)
10 typedef float LanczosFloat
;
11 #define LanczosSin sinf
13 //LanczosFloat LanczosSin(LanczosFloat x) { return 1.0f; }
15 typedef double LanczosFloat
;
16 #define LanczosSin sin
19 template<int Radius
, bool check
>
20 inline LanczosFloat
Lanczos_pi(LanczosFloat x_pi
)
22 if(unlikely(x_pi
== (LanczosFloat
)0.0)) return (LanczosFloat
)1.0;
25 if (x_pi
<= (LanczosFloat
)(-Radius
*M_PI
)
26 || x_pi
>= (LanczosFloat
)( Radius
*M_PI
)) return (LanczosFloat
)0.0;
29 LanczosFloat x_pi_div_Radius
= x_pi
/ Radius
;
31 //LanczosFloat a = sin(x_pi) / x_pi;
32 //LanczosFloat b = sin(x_pi_div_Radius) / x_pi_div_Radius;
35 return LanczosSin(x_pi
) * LanczosSin(x_pi_div_Radius
) / (x_pi
* x_pi_div_Radius
);
38 inline LanczosFloat
Lanczos(LanczosFloat x
)
40 return Lanczos_pi
<Radius
,true>(x
* (LanczosFloat
)M_PI
);
44 const int DefaultTripletCount
= 4; // Better for -ftree-vectorize
46 const int DefaultTripletCount
= 3; // Better when no ftree-vectorize
49 /* Triplet is a 3-element tuple. We use them to maximize
50 * the compiler's chances for vectorizing optimizations.
52 * It's kind of valarray, but with build-time specified size.
54 template<typename T
, int count
= DefaultTripletCount
>
61 { values
[0]=a
; values
[1]=b
; values
[2]=c
; }
64 Triplet(const Triplet
<V
>& b
)
65 { for(size_t n
=0; n
<count
; ++n
) values
[n
] = b
.values
[n
]; }
69 { T tmp
=a
; for(size_t n
=0; n
<count
; ++n
) values
[n
] = tmp
; }
73 T
& operator[] (size_t ind
) { return values
[ind
]; }
74 const T
& operator[] (size_t ind
) const { return values
[ind
]; }
77 template<typename B> \
78 Triplet<T>& operator op (const Triplet<B>& b) \
79 { for(size_t n=0; n<count; ++n) values[n] op b.values[n]; \
82 template<typename B> \
83 Triplet<T>& operator op (const B & b) \
84 { for(size_t n=0; n<count; ++n) values[n] op b; \
96 template<typename B> \
97 Triplet<T> operator op (const B& b) const \
98 { Triplet<T> res = *this; res op##= b; return res; }
108 template<typename A
, typename B
>
109 Triplet
<A
> operator>> (A a
, const Triplet
<B
>& b
)
110 { Triplet
<A
> res
= a
; res
>>= b
; return res
; }
112 template<typename A
, typename B
>
113 Triplet
<A
> operator* (A a
, const Triplet
<B
>& b
)
114 { Triplet
<A
> res
= a
; res
*= b
; return res
; }
117 struct LanczosFloatValue
{ typedef LanczosFloat result
; };
119 struct LanczosFloatValue
<Triplet
<T
> > { typedef Triplet
<LanczosFloat
> result
; };
121 /* The actual basis for scaling. */
123 template<typename SrcTab
, typename DestTab
>
126 const SrcTab
& src
; DestTab
& tgt
;
128 // Note: In this vocabulary,
129 // y denotes outer loop and
130 // x denotes inner loop, but
131 // it could be vice versa.
132 int xinc_src
, yinc_src
;
133 int xinc_tgt
, yinc_tgt
;
137 const SrcTab
& src_
, DestTab
& tgt_
,
138 int sxi
,int syi
, int txi
,int tyi
, int ylim
)
139 : src(src_
), tgt(tgt_
),
140 xinc_src(sxi
), yinc_src(syi
),
141 xinc_tgt(txi
), yinc_tgt(tyi
),
146 (int srcpos
, int tgtpos
, int nmax
,
147 const LanczosFloat contrib
[], LanczosFloat density_rev
) const
149 typename LanczosFloatValue
150 <typename
DestTab::value_type
>::result res
= 0.0;
151 //LanczosFloat r=0, g=0, b=0;
153 int srctemp
= srcpos
;
155 //#pragma omp parallel for reduction(+:r,g,b)
156 for(int n
=0; n
<nmax
; ++n
)
158 res
+= contrib
[n
] * src
.at(srctemp
);
159 srctemp
+= xinc_src
; // source x increment
162 // Multiplication is faster than division, so we use the reciprocal.
165 tgt
.at(tgtpos
) = res
;
168 void StripeLoop(int tx
, int sx
, int nmax
,
169 const LanczosFloat contrib
[], LanczosFloat density
) const;
172 template<typename SrcTab
, typename DestTab
>
173 void Lanczos2DBase
<SrcTab
,DestTab
>
174 ::StripeLoop(int tx
, int sx
, int nmax
, const LanczosFloat contrib
[], LanczosFloat density
) const
176 int srcpos
= sx
* xinc_src
; // source x pos at y = 0
177 int tgtpos
= tx
* xinc_tgt
; // target x pos at y = 0
180 fprintf(stderr, "StripeLoop sx=%d, tx=%d, srcpos=%d, tgtpos=%d, srcsize=%d, tgtsize=%d, ylimit=%d\n",
181 sx,tx, srcpos, tgtpos,
182 (int)src.size(), (int)tgt.size(), ylimit);*/
184 const LanczosFloat density_rev
= (density
== 0.0 || density
== 1.0)
188 for(int y
=ylimit
; y
-->0; )
190 /*fprintf(stderr, "- within: srcpos=%d, tgtpos=%d, y=%d\n", srcpos,tgtpos, y);*/
191 ScaleOne(srcpos
, tgtpos
, nmax
, contrib
, density_rev
);
193 srcpos
+= yinc_src
; // source y increment
194 tgtpos
+= yinc_tgt
; // target y increment
198 template<typename SrcTab
, typename DestTab
>
199 class HorizScaler
: public Lanczos2DBase
<SrcTab
,DestTab
>
210 For each output column (out_size = {ow}),
211 {h} rows (source and target) get processed
214 {nmax} source columns get transformed
218 New column stride = {1}
219 New row stride = {ow}
221 Next column stride = {1}
222 Next row stride = {iw}
226 int iw
,int ow
, int h
,
227 const SrcTab
& src
, DestTab
& tgt
)
228 : Lanczos2DBase
<SrcTab
,DestTab
>(
238 template<typename SrcTab
, typename DestTab
>
239 class VertScaler
: public Lanczos2DBase
<SrcTab
,DestTab
>
250 For each output row (out_size = {oh}),
251 {w} columns (source and target) get processed
254 {nmax} source rows get transformed
259 New column stride = {1}
261 Next row stride = {w}
262 Next column stride = {1}
267 const SrcTab
& src
, DestTab
& tgt
)
269 : Lanczos2DBase
<SrcTab
,DestTab
>(
279 /*template<typename SrcTab, typename DestTab>
280 class ScalarScaler: private Lanczos2DBase<SrcTab, DestTab>
283 ScalarScaler(const SrcTab& src, DestTab& tgt)
284 : Lanczos2DBase<SrcTab,DestTab>(src,tgt, 1,1,1,1,1) { }
286 void StripeLoop(int tx, int sx, int nmax,
287 const LanczosFloat contrib[], LanczosFloat density) const
289 const LanczosFloat density_rev = (density == 0.0 || density == 1.0)
292 ScaleOne(sx, tx, nmax, contrib, density_rev);
296 struct LanczosCoreCalcRes
300 LanczosFloat density
;
303 template<int FilterRadius
>
304 inline LanczosCoreCalcRes LanczosCoreCalc
306 LanczosFloat center
, LanczosFloat support
, LanczosFloat scale
,
307 LanczosFloat contrib
[])
309 const int start
= std::max((int)(center
-support
+(LanczosFloat
)0.5), 0);
310 const int end
= std::min((int)(center
+support
+(LanczosFloat
)0.5), in_size
);
311 const int nmax
= end
-start
;
313 const LanczosFloat scale_pi
= scale
* M_PI
;
315 const LanczosFloat s_min
= -FilterRadius
*M_PI
;
316 const LanczosFloat s_max
= FilterRadius
*M_PI
;
318 LanczosFloat s_pi
= (start
-center
+(LanczosFloat
)0.5) * scale_pi
;
320 LanczosFloat density
= 0.0;
323 for(; n
< nmax
&& unlikely(s_pi
< s_min
); ++n
, s_pi
+= scale_pi
)
325 for(; n
< nmax
&& likely(s_pi
< s_max
); ++n
, s_pi
+= scale_pi
)
327 LanczosFloat l
= Lanczos_pi
<FilterRadius
,false> (s_pi
);
333 LanczosCoreCalcRes res
;
336 res
.density
= density
;
340 /* A generic Lanczos scaler suitable for
341 * converting something to something else
343 * For image pixels, use Triplet<type>
344 * For stereo samples, use Triplet<type, 2>
345 * For mono samples, just use type
347 template<typename Handler
>
348 void LanczosScale(int in_size
, int out_size
, Handler
& target
)
350 const int FilterRadius
= 3;
351 const LanczosFloat blur
= 1.0;
353 const LanczosFloat factor
= out_size
/ (LanczosFloat
)in_size
;
354 const LanczosFloat scale
= std::min(factor
, (LanczosFloat
)1.0) / blur
;
355 const LanczosFloat support
= FilterRadius
/ scale
;
357 const size_t contrib_size
= std::min(in_size
, 5+int(2*support
));
358 LanczosFloat contrib
[contrib_size
];
360 /*fprintf(stderr, "Scaling (%d->%d), contrib=%d\n",
361 in_size, out_size, (int)contrib_size);*/
363 /* FIXME: does private() really work for stack arrays, especially autosize ones? */
365 //#pragma omp parallel for schedule(static) private(contrib)
366 for(int outpos
=0; outpos
<out_size
; ++outpos
)
368 LanczosFloat center
= (outpos
+0.5) / factor
;
369 LanczosCoreCalcRes res
370 = LanczosCoreCalc
<FilterRadius
>(in_size
, center
, support
, scale
, contrib
);
371 target
.StripeLoop(outpos
, res
.start
, res
.nmax
, &contrib
[0], res
.density
);
376 /* Suitable for audio resampling
378 * For stereo samples, use Triplet<type, 2>
379 * For mono samples, just use type
381 template<typename SrcT
= short, typename DestT
= LanczosFloat
>
382 class ContiguousLanczosScaler
384 static const int FilterRadius
= 3;
386 std::vector
<T
> in_buffer
;
387 double factor
, scale
, support
;
391 void SetRatio(int in_rate
, int out_rate
)
393 const LanczosFloat blur
= 1.0;
395 factor
= out_size
/ (LanczosFloat
)in_size
;
396 scale
= std::min(factor
, (LanczosFloat
)1.0) / blur
;
397 support
= FilterRadius
/ scale
;
402 template<typename SrcTab
, typename DestTab
>
403 void LanczosScale(int wid
,int hei
, int out_wid
,int out_hei
,
407 const size_t in_dim
= wid
*hei
;
408 const size_t out_dim
= out_wid
*out_hei
;
410 assert(src
.size() == in_dim
);
412 if(out_wid
!= wid
|| out_hei
!= hei
)
415 typedef SrcTab TmpSrcTab
;
416 const SrcTab
& tmpsrc
= src
;
418 typedef DestTab TmpSrcTab
;
419 DestTab
tmpsrc(in_dim
);
420 std::copy(&src
[0],&src
[in_dim
], &tmpsrc
[0]);
425 // X scaling (sx*sy -> dx*sy)
426 tgt
.resize(out_wid
*hei
);
428 HorizScaler
<TmpSrcTab
, DestTab
> handler_x(wid
,out_wid
, hei
, tmpsrc
,tgt
);
429 LanczosScale(wid
, out_wid
, handler_x
);
433 // Also Y scaling (dx*sy -> dx*dy)
434 typedef DestTab TempTab
;
435 TempTab
tmp(out_dim
);
438 VertScaler
<TempTab
, DestTab
> handler_y(out_wid
, tmp
,tgt
);
439 LanczosScale(hei
, out_hei
, handler_y
);
442 else if(out_hei
!= hei
)
444 // Only Y scaling (x*sy -> x*dy)
446 VertScaler
<TmpSrcTab
, DestTab
> handler_y(wid
, tmpsrc
,tgt
);
447 LanczosScale(hei
, out_hei
, handler_y
);
454 std::copy(&src
[0],&src
[in_dim
], &tgt
[0]);