4 Copyright (c) 2011, Attractive Chaos <attractor@live.co.uk>
6 Permission is hereby granted, free of charge, to any person obtaining a copy
7 of this software and associated documentation files (the "Software"), to deal
8 in the Software without restriction, including without limitation the rights
9 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 copies of the Software, and to permit persons to whom the Software is
11 furnished to do so, subject to the following conditions:
13 The above copyright notice and this permission notice shall be included in
14 all copies or substantial portions of the Software.
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 This is a Lua library, more exactly a collection of Lua snippets, covering
27 utilities (e.g. getopt), string operations (e.g. split), statistics (e.g.
28 Fisher's exact test), special functions (e.g. logarithm gamma) and matrix
29 operations (e.g. Gauss-Jordan elimination). The routines are designed to be
30 as independent as possible, such that one can copy-paste relevant pieces of
31 code without worrying about additional library dependencies.
33 If you use routines from this library, please include the licensing
34 information above where appropriate.
38 Library functions and dependencies. "a>b" means "a is required by b"; "b<a"
39 means "b depends on a".
46 math.lgamma() >math.lbinom() >math.igamma()
47 math.igamma() <math.lgamma() >matrix.chi2()
49 math.lbinom() <math.lgamma() >math.fisher_exact()
50 math.bernstein_poly() <math.lbinom()
51 math.fisher_exact() <math.lbinom()
58 matrix.T() >matrix.mul()
59 matrix.mul() <matrix.T()
61 matrix.chi2() <math.igamma()
65 -- Description: getopt() translated from the BSD getopt(); compatible with the default Unix getopt()
67 for o, a in os.getopt(arg, 'a:b') do
71 function os
.getopt(args
, ostr
)
72 local arg
, place
= nil, 0;
74 if place
== 0 then -- update scanning pointer
76 if #args
== 0 or args
[1]:sub(1, 1) ~= '-' then place
= 0; return nil end
79 if args
[1]:sub(2, 2) == '-' then -- found "--"
81 table.remove(args
, 1);
86 local optopt
= args
[1]:sub(place
, place
);
88 local oli
= ostr
:find(optopt
);
89 if optopt
== ':' or oli
== nil then -- unknown option
90 if optopt
== '-' then return nil end
91 if place
> #args
[1] then
92 table.remove(args
, 1);
98 if ostr
:sub(oli
, oli
) ~= ':' then -- do not need argument
100 if place
> #args
[1] then
101 table.remove(args
, 1);
104 else -- need an argument
105 if place
<= #args
[1] then -- no white space
106 arg
= args
[1]:sub(place
);
108 table.remove(args
, 1);
109 if #args
== 0 then -- an option requiring argument is the last one
111 if ostr
:sub(1, 1) == ':' then return ':' end
113 else arg
= args
[1] end
115 table.remove(args
, 1);
122 -- Description: string split
123 function string:split(sep
, n
)
124 local a
, start
= {}, 1;
127 local b
, e
= self
:find(sep
, start
);
129 table.insert(a
, self
:sub(start
));
132 a
[#a
+1] = self
:sub(start
, b
- 1);
134 if n
and #a
== n
then
135 table.insert(a
, self
:sub(start
));
142 -- Description: smart file open
143 function io
.xopen(fn
, mode
)
145 if fn
== nil then return io
.stdin
;
146 elseif fn
== '-' then return (mode
== 'r' and io
.stdin
) or io
.stdout
;
147 elseif fn
:sub(-3) == '.gz' then return (mode
== 'r' and io
.popen('gzip -dc ' .. fn
, 'r')) or io
.popen('gzip > ' .. fn
, 'w');
148 elseif fn
:sub(-4) == '.bz2' then return (mode
== 'r' and io
.popen('bzip2 -dc ' .. fn
, 'r')) or io
.popen('bgzip2 > ' .. fn
, 'w');
149 else return io
.open(fn
, mode
) end
152 -- Description: find the k-th smallest element in an array (Ref. http://ndevilla.free.fr/median/)
153 function table.ksmall(arr
, k
)
154 local low
, high
= 1, #arr
;
156 if high
<= low
then return arr
[k
] end
157 if high
== low
+ 1 then
158 if arr
[high
] < arr
[low
] then arr
[high
], arr
[low
] = arr
[low
], arr
[high
] end;
161 local mid
= math
.floor((high
+ low
) / 2);
162 if arr
[high
] < arr
[mid
] then arr
[mid
], arr
[high
] = arr
[high
], arr
[mid
] end
163 if arr
[high
] < arr
[low
] then arr
[low
], arr
[high
] = arr
[high
], arr
[low
] end
164 if arr
[low
] < arr
[mid
] then arr
[low
], arr
[mid
] = arr
[mid
], arr
[low
] end
165 arr
[mid
], arr
[low
+1] = arr
[low
+1], arr
[mid
];
166 local ll
, hh
= low
+ 1, high
;
168 repeat ll
= ll
+ 1 until arr
[ll
] >= arr
[low
]
169 repeat hh
= hh
- 1 until arr
[low
] >= arr
[hh
]
170 if hh
< ll
then break end
171 arr
[ll
], arr
[hh
] = arr
[hh
], arr
[ll
];
173 arr
[low
], arr
[hh
] = arr
[hh
], arr
[low
];
174 if hh
<= k
then low
= ll
end
175 if hh
>= k
then high
= hh
- 1 end
179 -- Description: shuffle/permutate an array
180 function table.shuffle(a
)
182 local j
= math
.random(i
)
183 a
[j
], a
[i
] = a
[i
], a
[j
]
191 -- Description: log gamma function
192 -- Required by: math.lbinom()
193 -- Reference: AS245, 2nd algorithm, http://lib.stat.cmu.edu/apstat/245
194 function math
.lgamma(z
)
196 x
= 0.1659470187408462e-06 / (z
+7);
197 x
= x
+ 0.9934937113930748e-05 / (z
+6);
198 x
= x
- 0.1385710331296526 / (z
+5);
199 x
= x
+ 12.50734324009056 / (z
+4);
200 x
= x
- 176.6150291498386 / (z
+3);
201 x
= x
+ 771.3234287757674 / (z
+2);
202 x
= x
- 1259.139216722289 / (z
+1);
203 x
= x
+ 676.5203681218835 / z
;
204 x
= x
+ 0.9999999999995183;
205 return math
.log(x
) - 5.58106146679532777 - z
+ (z
-0.5) * math
.log(z
+6.5);
208 -- Description: regularized incomplete gamma function
209 -- Dependent on: math.lgamma()
211 Formulas are taken from Wiki, with additional input from Numerical
212 Recipes in C (for modified Lentz's algorithm) and AS245
213 (http://lib.stat.cmu.edu/apstat/245).
215 A good online calculator is available at:
217 http://www.danielsoper.com/statcalc/calc23.aspx
219 It calculates upper incomplete gamma function, which equals
220 math.igamma(s,z,true)*math.exp(math.lgamma(s))
222 function math
.igamma(s
, z
, complement
)
224 local function _kf_gammap(s
, z
)
229 if x
/ sum
< 1e-14 then break end
231 return math
.exp(s
* math
.log(z
) - z
- math
.lgamma(s
+ 1.) + math
.log(sum
));
234 local function _kf_gammaq(s
, z
)
236 f
= 1. + z
- s
; C
= f
; D
= 0.; TINY
= 1e-290;
237 -- Modified Lentz's algorithm for computing continued fraction. See Numerical Recipes in C, 2nd edition, section 5.2
240 local a
, b
= j
* (s
- j
), j
*2 + 1 + z
- s
;
242 if D
< TINY
then D
= TINY
end
244 if C
< TINY
then C
= TINY
end
248 if math
.abs(d
- 1) < 1e-14 then break end
250 return math
.exp(s
* math
.log(z
) - z
- math
.lgamma(s
) - math
.log(f
));
254 return ((z
<= 1 or z
< s
) and 1 - _kf_gammap(s
, z
)) or _kf_gammaq(s
, z
);
256 return ((z
<= 1 or z
< s
) and _kf_gammap(s
, z
)) or (1 - _kf_gammaq(s
, z
));
260 math
.M_SQRT2
= 1.41421356237309504880 -- sqrt(2)
261 math
.M_SQRT1_2
= 0.70710678118654752440 -- 1/sqrt(2)
263 -- Description: complement error function erfc(x): \Phi(x) = 0.5 * erfc(-x/M_SQRT2)
264 function math
.erfc(x
)
265 local z
= math
.abs(x
) * math
.M_SQRT2
266 if z
> 37 then return (x
> 0 and 0) or 2 end
267 local expntl
= math
.exp(-0.5 * z
* z
)
269 if z
< 10. / math
.M_SQRT2
then -- for small z
270 p
= expntl
* ((((((.03526249659989109 * z
+ .7003830644436881) * z
+ 6.37396220353165) * z
+ 33.912866078383)
271 * z
+ 112.0792914978709) * z
+ 221.2135961699311) * z
+ 220.2068679123761)
272 / (((((((.08838834764831844 * z
+ 1.755667163182642) * z
+ 16.06417757920695) * z
+ 86.78073220294608)
273 * z
+ 296.5642487796737) * z
+ 637.3336333788311) * z
+ 793.8265125199484) * z
+ 440.4137358247522);
274 else p
= expntl
/ 2.506628274631001 / (z
+ 1. / (z
+ 2. / (z
+ 3. / (z
+ 4. / (z
+ .65))))) end
275 return (x
> 0 and 2 * p
) or 2 * (1 - p
)
278 -- Description: log binomial coefficient
279 -- Dependent on: math.lgamma()
280 -- Required by: math.fisher_exact()
281 function math
.lbinom(n
, m
)
285 local t
= math
.lgamma(n
+1);
286 for m
= 1, n
-1 do a
[m
] = t
- math
.lgamma(m
+1) - math
.lgamma(n
-m
+1) end
288 else return math
.lgamma(n
+1) - math
.lgamma(m
+1) - math
.lgamma(n
-m
+1) end
291 -- Description: Berstein polynomials (mainly for Bezier curves)
292 -- Dependent on: math.lbinom()
293 -- Note: to compute derivative: let beta_new[i]=beta[i+1]-beta[i]
294 function math
.bernstein_poly(beta
)
296 local lbc
= math
.lbinom(n
); -- log binomial coefficients
298 assert(t
>= 0 and t
<= 1);
299 if t
== 0 then return beta
[1] end
300 if t
== 1 then return beta
[n
+1] end
301 local sum
, logt
, logt1
= 0, math
.log(t
), math
.log(1-t
);
302 for i
= 0, n
do sum
= sum
+ beta
[i
+1] * math
.exp(lbc
[i
] + i
* logt
+ (n
-i
) * logt1
) end
307 -- Description: Fisher's exact test
308 -- Dependent on: math.lbinom()
309 -- Return: left-, right- and two-tail P-values
311 Fisher's exact test for 2x2 congintency tables:
318 Reference: http://www.langsrud.com/fisher.htm
320 function math
.fisher_exact(n11
, n12
, n21
, n22
)
321 local aux
; -- keep the states of n* for acceleration
323 -- Description: hypergeometric function
324 local function hypergeo(n11
, n1_
, n_1
, n
)
325 return math
.exp(math
.lbinom(n1_
, n11
) + math
.lbinom(n
-n1_
, n_1
-n11
) - math
.lbinom(n
, n_1
));
328 -- Description: incremental hypergeometric function
329 -- Note: aux = {n11, n1_, n_1, n, p}
330 local function hypergeo_inc(n11
, n1_
, n_1
, n
)
331 if n1_
~= 0 or n_1
~= 0 or n
~= 0 then
332 aux
= {n11
, n1_
, n_1
, n
, 1};
333 else -- then only n11 is changed
335 _
, mod = math
.modf(n11
/ 11);
336 if mod ~= 0 and n11
+ aux
[4] - aux
[2] - aux
[3] ~= 0 then
337 if n11
== aux
[1] + 1 then -- increase by 1
338 aux
[5] = aux
[5] * (aux
[2] - aux
[1]) / n11
* (aux
[3] - aux
[1]) / (n11
+ aux
[4] - aux
[2] - aux
[3]);
342 if n11
== aux
[1] - 1 then -- descrease by 1
343 aux
[5] = aux
[5] * aux
[1] / (aux
[2] - n11
) * (aux
[1] + aux
[4] - aux
[2] - aux
[3]) / (aux
[3] - n11
);
350 aux
[5] = hypergeo(aux
[1], aux
[2], aux
[3], aux
[4]);
354 -- Description: computing the P-value by Fisher's exact test
355 local max, min, left
, right
, n1_
, n_1
, n
, two
, p
, q
, i
, j
;
356 n1_
, n_1
, n
= n11
+ n12
, n11
+ n21
, n11
+ n12
+ n21
+ n22
;
357 max = (n_1
< n1_
and n_1
) or n1_
; -- max n11, for the right tail
359 if min < 0 then min = 0 end -- min n11, for the left tail
360 two
, left
, right
= 1, 1, 1;
361 if min == max then return 1 end -- no need to do test
362 q
= hypergeo_inc(n11
, n1_
, n_1
, n
); -- the probability of the current table
364 i
, left
, p
= min + 1, 0, hypergeo_inc(min, 0, 0, 0);
365 while p
< 0.99999999 * q
do
366 left
, p
, i
= left
+ p
, hypergeo_inc(i
, 0, 0, 0), i
+ 1;
369 if p
< 1.00000001 * q
then left
= left
+ p
;
372 j
, right
, p
= max - 1, 0, hypergeo_inc(max, 0, 0, 0);
373 while p
< 0.99999999 * q
do
374 right
, p
, j
= right
+ p
, hypergeo_inc(j
, 0, 0, 0), j
- 1;
377 if p
< 1.00000001 * q
then right
= right
+ p
;
381 if two
> 1 then two
= 1 end
382 -- adjust left and right
383 if math
.abs(i
- n11
) < math
.abs(j
- n11
) then right
= 1 - left
+ q
;
384 else left
= 1 - right
+ q
end
385 return left
, right
, two
;
388 -- Description: Delete-m Jackknife
390 Given g groups of values with a statistics estimated from m[i] samples in
391 i-th group being t[i], compute the mean and the variance. t0 below is the
392 estimate from all samples. Reference:
394 Busing et al. (1999) Delete-m Jackknife for unequal m. Statistics and Computing, 9:3-8.
396 function math
.jackknife(g
, m
, t
, t0
)
397 local h
, n
, sum
= {}, 0, 0;
398 for j
= 1, g
do n
= n
+ m
[j
] end
399 if t0
== nil then -- When t0 is absent, estimate it in a naive way
401 for j
= 1, g
do t0
= t0
+ m
[j
] * t
[j
] end
404 local mean
, var
= 0, 0;
407 mean
= mean
+ (1 - m
[j
] / n
) * t
[j
];
409 mean
= g
* t0
- mean
; -- Eq. (8)
411 local x
= h
[j
] * t0
- (h
[j
] - 1) * t
[j
] - mean
;
412 var
= var
+ 1 / (h
[j
] - 1) * x
* x
;
418 -- Description: Pearson correlation coefficient
419 -- Input: a is an n*2 table
420 function math
.pearson(a
)
423 for _
, v
in pairs(a
) do
424 x1
, y1
= x1
+ v
[1], y1
+ v
[2]
426 -- compute the coefficient
427 x1
, y1
= x1
/ #a
, y1
/ #a
428 local x2
, y2
, xy
= 0, 0, 0
429 for _
, v
in pairs(a
) do
430 local tx
, ty
= v
[1] - x1
, v
[2] - y1
431 xy
, x2
, y2
= xy
+ tx
* ty
, x2
+ tx
* tx
, y2
+ ty
* ty
433 return xy
/ math
.sqrt(x2
) / math
.sqrt(y2
)
436 -- Description: Spearman correlation coefficient
437 function math
.spearman(a
)
438 local function aux_func(t
) -- auxiliary function
439 return (t
== 1 and 0) or (t
*t
- 1) * t
/ 12
442 for _
, v
in pairs(a
) do v
.r
= {} end
446 table.sort(a
, function(u
,v
) return u
[k
]<v
[k
] end)
450 if i
<= #a
and a
[i
-1][k
] == a
[i
][k
] then same
= same
+ 1
452 local rank
= (i
-1) * 2 - same
+ 1
453 for j
= i
- same
, i
- 1 do a
[j
].r
[k
] = rank
end
454 if same
> 1 then T
[k
], same
= T
[k
] + aux_func(same
), 1 end
457 S
[k
] = aux_func(#a
) - T
[k
]
459 -- compute the coefficient
461 for _
, v
in pairs(a
) do -- TODO: use nested loops to reduce loss of precision
462 local t
= (v
.r
[1] - v
.r
[2]) / 2
465 return (S
[1] + S
[2] - sum
) / 2 / math
.sqrt(S
[1] * S
[2])
468 -- Description: Hooke-Jeeves derivative-free optimization
469 function math
.fmin(func
, x
, data
, r
, eps
, max_calls
)
470 local n
, n_calls
= #x
, 0;
473 max_calls
= max_calls
or 50000
475 function fmin_aux(x1
, data
, fx1
, dx
) -- auxiliary function
478 x1
[k
] = x1
[k
] + dx
[k
];
479 local ftmp
= func(x1
, data
); n_calls
= n_calls
+ 1;
480 if ftmp
< fx1
then fx1
= ftmp
;
481 else -- search the opposite direction
483 x1
[k
] = x1
[k
] + dx
[k
] + dx
[k
];
484 ftmp
= func(x1
, data
); n_calls
= n_calls
+ 1;
485 if ftmp
< fx1
then fx1
= ftmp
486 else x1
[k
] = x1
[k
] - dx
[k
] end -- back to the original x[k]
489 return fx1
; -- here: fx1=f(n,x1)
492 local dx
, x1
= {}, {};
493 for k
= 1, n
do -- initial directions, based on MGJ
494 dx
[k
] = math
.abs(x
[k
]) * r
;
495 if dx
[k
] == 0 then dx
[k
] = r
end;
499 fx
= func(x
, data
); fx1
= fx
; n_calls
= n_calls
+ 1;
501 for i
= 1, n
do x1
[i
] = x
[i
] end; -- x1 = x
502 fx1
= fmin_aux(x1
, data
, fx
, dx
);
506 dx
[k
] = (x1
[k
] > x
[k
] and math
.abs(dx
[k
])) or -math
.abs(dx
[k
]);
508 x1
[k
] = x1
[k
] + x1
[k
] - t
;
511 if n_calls
>= max_calls
then break end
512 fx1
= func(x1
, data
); n_calls
= n_calls
+ 1;
513 fx1
= fmin_aux(x1
, data
, fx1
, dx
);
514 if fx1
>= fx
then break end
517 if math
.abs(x1
[k
] - x
[k
]) > .5 * math
.abs(dx
[k
]) then
522 if kk
== n
then break end
524 if radius
>= eps
then
525 if n_calls
>= max_calls
then break end
527 for k
= 1, n
do dx
[k
] = dx
[k
] * r
end
539 -- Description: matrix transpose
540 -- Required by: matrix.mul()
542 local m
, n
, x
= #a
, #a
[1], {};
545 for j
= 1, m
do x
[i
][j
] = a
[j
][i
] end
550 -- Description: matrix add
551 function matrix
.add(a
, b
)
552 assert(#a
== #b
and #a
[1] == #b
[1]);
553 local m
, n
, x
= #a
, #a
[1], {};
556 local ai
, bi
, xi
= a
[i
], b
[i
], x
[i
];
557 for j
= 1, n
do xi
[j
] = ai
[j
] + bi
[j
] end
562 -- Description: matrix mul
563 -- Dependent on: matrix.T()
564 -- Note: much slower without transpose
565 function matrix
.mul(a
, b
)
567 local m
, n
, p
, x
= #a
, #a
[1], #b
[1], {};
568 local c
= matrix
.T(b
); -- transpose for efficiency
573 local sum
, ai
, cj
= 0, a
[i
], c
[j
];
574 for k
= 1, n
do sum
= sum
+ ai
[k
] * cj
[k
] end
581 -- Description: matrix print
582 function matrix
.tostring(a
)
585 z
[i
] = table.concat(a
[i
], "\t");
587 return table.concat(z
, "\n");
590 -- Description: chi^2 test for contingency tables
591 -- Dependent on: math.igamma()
592 function matrix
.chi2(a
)
593 if #a
== 2 and #a
[1] == 2 then -- 2x2 table
595 x
= (a
[1][1] + a
[1][2]) * (a
[2][1] + a
[2][2]) * (a
[1][1] + a
[2][1]) * (a
[1][2] + a
[2][2])
596 if x
== 0 then return 0, 1, false end
597 z
= a
[1][1] * a
[2][2] - a
[1][2] * a
[2][1]
598 z
= (a
[1][1] + a
[1][2] + a
[2][1] + a
[2][2]) * z
* z
/ x
599 return z
, math
.igamma(.5, .5 * z
, true), true
600 else -- generic table
601 local rs
, cs
, n
, m
, N
, z
= {}, {}, #a
, #a
[1], 0, 0
602 for i
= 1, n
do rs
[i
] = 0 end
603 for j
= 1, m
do cs
[j
] = 0 end
604 for i
= 1, n
do -- compute column sum and row sum
605 for j
= 1, m
do cs
[j
], rs
[i
] = cs
[j
] + a
[i
][j
], rs
[i
] + a
[i
][j
] end
607 for i
= 1, n
do N
= N
+ rs
[i
] end
608 for i
= 1, n
do -- compute the chi^2 statistics
610 local E
= rs
[i
] * cs
[j
] / N
;
611 z
= z
+ (a
[i
][j
] - E
) * (a
[i
][j
] - E
) / E
614 return z
, math
.igamma(.5 * (n
-1) * (m
-1), .5 * z
, true), true;
618 -- Description: Gauss-Jordan elimination (solving equations; computing inverse)
619 -- Note: on return, a[n][n] is the inverse; b[n][m] is the solution
620 -- Reference: Section 2.1, Numerical Recipes in C, 2nd edition
621 function matrix
.solve(a
, b
)
623 local n
, m
= #a
, (b
and #b
[1]) or 0;
624 local xc
, xr
, ipiv
= {}, {}, {};
627 for j
= 1, n
do ipiv
[j
] = 0 end
635 if math
.abs(aj
[k
]) >= big
then
636 big
= math
.abs(aj
[k
]);
639 elseif ipiv
[k
] > 1 then return -2 end -- singular matrix
643 ipiv
[ic
] = ipiv
[ic
] + 1;
645 for l
= 1, n
do a
[ir
][l
], a
[ic
][l
] = a
[ic
][l
], a
[ir
][l
] end
647 for l
= 1, m
do b
[ir
][l
], b
[ic
][l
] = b
[ic
][l
], b
[ir
][l
] end
650 xr
[i
], xc
[i
] = ir
, ic
;
651 if a
[ic
][ic
] == 0 then return -3 end -- singular matrix
652 local pivinv
= 1 / a
[ic
][ic
];
654 for l
= 1, n
do a
[ic
][l
] = a
[ic
][l
] * pivinv
end
656 for l
= 1, n
do b
[ic
][l
] = b
[ic
][l
] * pivinv
end
660 local tmp
= a
[ll
][ic
];
662 local all
, aic
= a
[ll
], a
[ic
];
663 for l
= 1, n
do all
[l
] = all
[l
] - aic
[l
] * tmp
end
665 local bll
, bic
= b
[ll
], b
[ic
];
666 for l
= 1, m
do bll
[l
] = bll
[l
] - bic
[l
] * tmp
end
672 if xr
[l
] ~= xc
[l
] then
673 for k
= 1, n
do a
[k
][xr
[l]]
, a
[k
][xc
[l]]
= a
[k
][xc
[l]]
, a
[k
][xr
[l]]
end