dlzma: added fix for very rare out-of-bounds write in LZMA encoder (it can happen...
[iv.d.git] / follin / utils.d
blob52becf9acd300d59e578144675228ae41e0ecc5e
1 /* Invisible Vector Library
2 * coded by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
3 * Understanding is not required. Only obedience.
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, version 3 of the License ONLY.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 module iv.follin.utils /*is aliced*/;
19 import iv.alice;
20 import iv.follin.ftrick;
23 // ////////////////////////////////////////////////////////////////////////// //
24 version(X86) {
25 version(D_PIC) {} else {
26 version = follin_use_sse;
27 version = follin_use_sse2;
32 // ////////////////////////////////////////////////////////////////////////// //
33 /// convert buffer of shorts to buffer of normalized ([-1..1]) floats; will not resize output
34 public void tflShort2Float (in short[] input, float[] output) nothrow @trusted @nogc {
35 if (output.length < input.length) assert(0, "invalid length");
36 auto d = output.ptr;
37 enum mul = cast(float)(1.0f/32768.0f);
38 auto src = input.ptr;
39 auto len = input.length;
40 while (len >= 4) {
41 *d++ = mul*(*src++);
42 *d++ = mul*(*src++);
43 *d++ = mul*(*src++);
44 *d++ = mul*(*src++);
45 len -= 4;
47 while (len-- > 0) *d++ = mul*(*src++);
51 private align(64) __gshared float[256] mvol = 32768.0;
52 private __gshared ubyte* mvolptr = null;
54 shared static this () {
55 mvolptr = cast(ubyte*)mvol.ptr;
56 if ((cast(uint)mvolptr&0x3f) != 0) {
57 // fix pointer
58 mvolptr += 0x40-cast(uint)mvolptr&0x3f;
59 // and refill
60 (cast(float*)mvolptr)[0..8] = 32768.0;
65 // will not resize output
66 /// convert buffer of normalize floats ([-1..1]) to buffer of shorts; does float clamping; will not resize output
67 public void tflFloat2Short (in float[] input, short[] output) nothrow @trusted @nogc {
68 if (output.length < input.length) assert(0, "invalid length");
69 auto s = input.ptr;
70 auto d = output.ptr;
71 /*ALIGN NOT WORKING YET:*/ version(follin_use_sse) {
72 auto blen = cast(uint)input.length;
73 if (blen > 0) {
74 //TODO: use aligned instructions
75 float[4] tmp = void;
76 auto tmpptr = &tmp;
77 asm nothrow @safe @nogc {
78 mov EAX,[mvolptr]; // source
79 //movntdqa XMM4,[EAX]; // XMM4: multipliers (sse4.1)
80 movaps XMM4,[EAX];
81 mov EAX,[s]; // source
82 mov EBX,[d]; // dest
83 mov ECX,[blen]; // number of numbers ;-)
84 shr ECX,2;
85 jz skip4part;
86 // process 4 floats per step
87 align 8;
88 finalloopmix:
89 movups XMM0,[EAX];
90 mulps XMM0,XMM4; // mul by volume and shift
92 version(follin_use_sse2) asm nothrow @safe @nogc {
93 cvttps2dq XMM1,XMM0; // XMM1 now contains four int32 values
94 packssdw XMM1,XMM1;
95 movq [EBX],XMM1; // four s16 == one double
96 } else asm nothrow @safe @nogc {
97 cvtps2pi MM0,XMM0; // MM0 now contains two low int32 values
98 movhlps XMM5,XMM0; // get high floats
99 cvtps2pi MM1,XMM5; // MM1 now contains two high int32 values
100 packssdw MM0,MM1; // MM0 now contains 4 int16 values
101 movq [EBX],MM0;
103 asm nothrow @safe @nogc {
104 add EAX,16;
105 add EBX,8;
106 dec ECX;
107 jnz finalloopmix;
108 skip4part:;
109 mov [s],EAX;
110 mov [d],EBX;
112 version(follin_use_sse2) {} else {
113 asm nothrow @safe @nogc { emms; }
115 mixin(declfcvar!"temp");
116 switch ((blen &= 3)) {
117 case 3:
118 mixin(FAST_SCALED_FLOAT_TO_INT!("*s", "15"));
119 if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
120 *d++ = cast(short)v;
121 ++s;
122 goto case;
123 case 2:
124 mixin(FAST_SCALED_FLOAT_TO_INT!("*s", "15"));
125 if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
126 *d++ = cast(short)v;
127 ++s;
128 goto case;
129 case 1:
130 mixin(FAST_SCALED_FLOAT_TO_INT!("*s", "15"));
131 if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
132 *d++ = cast(short)v;
133 ++s;
134 break;
135 default: break;
138 } else {
139 mixin(declfcvar!"temp");
140 auto len = input.length;
141 while (len >= 4) {
143 mixin(FAST_SCALED_FLOAT_TO_INT!("*s", "15"));
144 if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
145 *d++ = cast(short)v;
146 ++s;
149 mixin(FAST_SCALED_FLOAT_TO_INT!("*s", "15"));
150 if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
151 *d++ = cast(short)v;
152 ++s;
155 mixin(FAST_SCALED_FLOAT_TO_INT!("*s", "15"));
156 if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
157 *d++ = cast(short)v;
158 ++s;
161 mixin(FAST_SCALED_FLOAT_TO_INT!("*s", "15"));
162 if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
163 *d++ = cast(short)v;
164 ++s;
166 len -= 4;
168 switch (len) {
169 case 3:
170 mixin(FAST_SCALED_FLOAT_TO_INT!("*s", "15"));
171 if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
172 *d++ = cast(short)v;
173 ++s;
174 goto case;
175 case 2:
176 mixin(FAST_SCALED_FLOAT_TO_INT!("*s", "15"));
177 if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
178 *d++ = cast(short)v;
179 ++s;
180 goto case;
181 case 1:
182 mixin(FAST_SCALED_FLOAT_TO_INT!("*s", "15"));
183 if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
184 *d++ = cast(short)v;
185 ++s;
186 break;
187 default: break;