2 * 2-channel UHJ Decoder
4 * Copyright (c) Chris Robinson <chris.kcat@gmail.com>
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
37 #include <string_view>
38 #include <system_error>
44 #include "alnumbers.h"
48 #include "opthelpers.h"
49 #include "phase_shifter.h"
53 #include "win_main_utf8.h"
59 void operator()(gsl::owner
<FILE*> file
) { fclose(file
); }
61 using FilePtr
= std::unique_ptr
<FILE,FileDeleter
>;
63 struct SndFileDeleter
{
64 void operator()(SNDFILE
*sndfile
) { sf_close(sndfile
); }
66 using SndFilePtr
= std::unique_ptr
<SNDFILE
,SndFileDeleter
>;
69 using ubyte
= unsigned char;
70 using ushort
= unsigned short;
71 using uint
= unsigned int;
72 using complex_d
= std::complex<double>;
74 using byte4
= std::array
<std::byte
,4>;
77 constexpr std::array
<ubyte
,16> SUBTYPE_BFORMAT_FLOAT
{
78 0x03, 0x00, 0x00, 0x00, 0x21, 0x07, 0xd3, 0x11, 0x86, 0x44, 0xc8, 0xc1,
79 0xca, 0x00, 0x00, 0x00
82 void fwrite16le(ushort val
, FILE *f
)
84 std::array data
{static_cast<ubyte
>(val
&0xff), static_cast<ubyte
>((val
>>8)&0xff)};
85 fwrite(data
.data(), 1, data
.size(), f
);
88 void fwrite32le(uint val
, FILE *f
)
90 std::array data
{static_cast<ubyte
>(val
&0xff), static_cast<ubyte
>((val
>>8)&0xff),
91 static_cast<ubyte
>((val
>>16)&0xff), static_cast<ubyte
>((val
>>24)&0xff)};
92 fwrite(data
.data(), 1, data
.size(), f
);
95 byte4
f32AsLEBytes(const float &value
)
98 std::memcpy(ret
.data(), &value
, 4);
99 if constexpr(al::endian::native
== al::endian::big
)
101 std::swap(ret
[0], ret
[3]);
102 std::swap(ret
[1], ret
[2]);
108 constexpr uint BufferLineSize
{1024};
110 using FloatBufferLine
= std::array
<float,BufferLineSize
>;
111 using FloatBufferSpan
= al::span
<float,BufferLineSize
>;
115 constexpr static std::size_t sFilterDelay
{1024};
117 alignas(16) std::array
<float,BufferLineSize
+sFilterDelay
> mS
{};
118 alignas(16) std::array
<float,BufferLineSize
+sFilterDelay
> mD
{};
119 alignas(16) std::array
<float,BufferLineSize
+sFilterDelay
> mT
{};
120 alignas(16) std::array
<float,BufferLineSize
+sFilterDelay
> mQ
{};
122 /* History for the FIR filter. */
123 alignas(16) std::array
<float,sFilterDelay
-1> mDTHistory
{};
124 alignas(16) std::array
<float,sFilterDelay
-1> mSHistory
{};
126 alignas(16) std::array
<float,BufferLineSize
+ sFilterDelay
*2> mTemp
{};
128 void decode(const al::span
<const float> InSamples
, const std::size_t InChannels
,
129 const al::span
<FloatBufferLine
> OutSamples
, const std::size_t SamplesToDo
);
130 void decode2(const al::span
<const float> InSamples
, const al::span
<FloatBufferLine
> OutSamples
,
131 const std::size_t SamplesToDo
);
134 const PhaseShifterT
<UhjDecoder::sFilterDelay
*2> PShift
{};
137 /* Decoding UHJ is done as:
142 * W = 0.981532*S + 0.197484*j(0.828331*D + 0.767820*T)
143 * X = 0.418496*S - j(0.828331*D + 0.767820*T)
144 * Y = 0.795968*D - 0.676392*T + j(0.186633*S)
147 * where j is a +90 degree phase shift. 3-channel UHJ excludes Q, while 2-
148 * channel excludes Q and T. The B-Format signal reconstructed from 2-channel
149 * UHJ should not be run through a normal B-Format decoder, as it needs
150 * different shelf filters.
152 * NOTE: Some sources specify
154 * S = (Left + Right)/2
155 * D = (Left - Right)/2
157 * However, this is incorrect. It's halving Left and Right even though they
158 * were already halved during encoding, causing S and D to be half what they
159 * initially were at the encoding stage. This division is not present in
160 * Gerzon's original paper for deriving Sigma (S) or Delta (D) from the L and R
161 * signals. As proof, taking Y for example:
163 * Y = 0.795968*D - 0.676392*T + j(0.186633*S)
165 * * Plug in the encoding parameters, using ? as a placeholder for whether S
166 * and D should receive an extra 0.5 factor
167 * Y = 0.795968*(j(-0.3420201*W + 0.5098604*X) + 0.6554516*Y)*? -
168 * 0.676392*(j(-0.1432*W + 0.6512*X) - 0.7071068*Y) +
169 * 0.186633*j(0.9396926*W + 0.1855740*X)*?
171 * * Move common factors in
172 * Y = (j(-0.3420201*0.795968*?*W + 0.5098604*0.795968*?*X) + 0.6554516*0.795968*?*Y) -
173 * (j(-0.1432*0.676392*W + 0.6512*0.676392*X) - 0.7071068*0.676392*Y) +
174 * j(0.9396926*0.186633*?*W + 0.1855740*0.186633*?*X)
176 * * Clean up extraneous groupings
177 * Y = j(-0.3420201*0.795968*?*W + 0.5098604*0.795968*?*X) + 0.6554516*0.795968*?*Y -
178 * j(-0.1432*0.676392*W + 0.6512*0.676392*X) + 0.7071068*0.676392*Y +
179 * j*(0.9396926*0.186633*?*W + 0.1855740*0.186633*?*X)
181 * * Move phase shifts together and combine them
182 * Y = j(-0.3420201*0.795968*?*W + 0.5098604*0.795968*?*X - -0.1432*0.676392*W -
183 * 0.6512*0.676392*X + 0.9396926*0.186633*?*W + 0.1855740*0.186633*?*X) +
184 * 0.6554516*0.795968*?*Y + 0.7071068*0.676392*Y
187 * Y = j(-0.3420201*0.795968*?*W + 0.1432*0.676392*W + 0.9396926*0.186633*?*W +
188 * 0.5098604*0.795968*?*X + -0.6512*0.676392*X + 0.1855740*0.186633*?*X) +
189 * 0.7071068*0.676392*Y + 0.6554516*0.795968*?*Y
191 * * Move common factors out
192 * Y = j((-0.3420201*0.795968*? + 0.1432*0.676392 + 0.9396926*0.186633*?)*W +
193 * ( 0.5098604*0.795968*? + -0.6512*0.676392 + 0.1855740*0.186633*?)*X) +
194 * (0.7071068*0.676392 + 0.6554516*0.795968*?)*Y
196 * * Result w/ 0.5 factor:
197 * -0.3420201*0.795968*0.5 + 0.1432*0.676392 + 0.9396926*0.186633*0.5 = 0.04843*W
198 * 0.5098604*0.795968*0.5 + -0.6512*0.676392 + 0.1855740*0.186633*0.5 = -0.22023*X
199 * 0.7071068*0.676392 + 0.6554516*0.795968*0.5 = 0.73914*Y
200 * -> Y = j(0.04843*W + -0.22023*X) + 0.73914*Y
202 * * Result w/o 0.5 factor:
203 * -0.3420201*0.795968 + 0.1432*0.676392 + 0.9396926*0.186633 = 0.00000*W
204 * 0.5098604*0.795968 + -0.6512*0.676392 + 0.1855740*0.186633 = 0.00000*X
205 * 0.7071068*0.676392 + 0.6554516*0.795968 = 1.00000*Y
206 * -> Y = j(0.00000*W + 0.00000*X) + 1.00000*Y
208 * Not halving produces a result matching the original input.
210 void UhjDecoder::decode(const al::span
<const float> InSamples
, const std::size_t InChannels
,
211 const al::span
<FloatBufferLine
> OutSamples
, const std::size_t SamplesToDo
)
213 ASSUME(SamplesToDo
> 0);
215 auto woutput
= al::span
{OutSamples
[0]};
216 auto xoutput
= al::span
{OutSamples
[1]};
217 auto youtput
= al::span
{OutSamples
[2]};
219 /* Add a delay to the input channels, to align it with the all-passed
223 /* S = Left + Right */
224 for(std::size_t i
{0};i
< SamplesToDo
;++i
)
225 mS
[sFilterDelay
+i
] = InSamples
[i
*InChannels
+ 0] + InSamples
[i
*InChannels
+ 1];
227 /* D = Left - Right */
228 for(std::size_t i
{0};i
< SamplesToDo
;++i
)
229 mD
[sFilterDelay
+i
] = InSamples
[i
*InChannels
+ 0] - InSamples
[i
*InChannels
+ 1];
234 for(std::size_t i
{0};i
< SamplesToDo
;++i
)
235 mT
[sFilterDelay
+i
] = InSamples
[i
*InChannels
+ 2];
240 for(std::size_t i
{0};i
< SamplesToDo
;++i
)
241 mQ
[sFilterDelay
+i
] = InSamples
[i
*InChannels
+ 3];
244 /* Precompute j(0.828331*D + 0.767820*T) and store in xoutput. */
245 auto tmpiter
= std::copy(mDTHistory
.cbegin(), mDTHistory
.cend(), mTemp
.begin());
246 std::transform(mD
.cbegin(), mD
.cbegin()+SamplesToDo
+sFilterDelay
, mT
.cbegin(), tmpiter
,
247 [](const float d
, const float t
) noexcept
{ return 0.828331f
*d
+ 0.767820f
*t
; });
248 std::copy_n(mTemp
.cbegin()+SamplesToDo
, mDTHistory
.size(), mDTHistory
.begin());
249 PShift
.process(xoutput
.first(SamplesToDo
), mTemp
);
251 for(std::size_t i
{0};i
< SamplesToDo
;++i
)
253 /* W = 0.981532*S + 0.197484*j(0.828331*D + 0.767820*T) */
254 woutput
[i
] = 0.981532f
*mS
[i
] + 0.197484f
*xoutput
[i
];
255 /* X = 0.418496*S - j(0.828331*D + 0.767820*T) */
256 xoutput
[i
] = 0.418496f
*mS
[i
] - xoutput
[i
];
259 /* Precompute j*S and store in youtput. */
260 tmpiter
= std::copy(mSHistory
.cbegin(), mSHistory
.cend(), mTemp
.begin());
261 std::copy_n(mS
.cbegin(), SamplesToDo
+sFilterDelay
, tmpiter
);
262 std::copy_n(mTemp
.cbegin()+SamplesToDo
, mSHistory
.size(), mSHistory
.begin());
263 PShift
.process(youtput
.first(SamplesToDo
), mTemp
);
265 for(std::size_t i
{0};i
< SamplesToDo
;++i
)
267 /* Y = 0.795968*D - 0.676392*T + j(0.186633*S) */
268 youtput
[i
] = 0.795968f
*mD
[i
] - 0.676392f
*mT
[i
] + 0.186633f
*youtput
[i
];
271 if(OutSamples
.size() > 3)
273 auto zoutput
= al::span
{OutSamples
[3]};
275 for(std::size_t i
{0};i
< SamplesToDo
;++i
)
276 zoutput
[i
] = 1.023332f
*mQ
[i
];
279 std::copy(mS
.begin()+SamplesToDo
, mS
.begin()+SamplesToDo
+sFilterDelay
, mS
.begin());
280 std::copy(mD
.begin()+SamplesToDo
, mD
.begin()+SamplesToDo
+sFilterDelay
, mD
.begin());
281 std::copy(mT
.begin()+SamplesToDo
, mT
.begin()+SamplesToDo
+sFilterDelay
, mT
.begin());
282 std::copy(mQ
.begin()+SamplesToDo
, mQ
.begin()+SamplesToDo
+sFilterDelay
, mQ
.begin());
285 /* This is an alternative equation for decoding 2-channel UHJ. Not sure what
286 * the intended benefit is over the above equation as this slightly reduces the
287 * amount of the original left response and has more of the phase-shifted
288 * forward response on the left response.
290 * This decoding is done as:
295 * W = 0.981530*S + j*0.163585*D
296 * X = 0.418504*S - j*0.828347*D
297 * Y = 0.762956*D + j*0.384230*S
299 * where j is a +90 degree phase shift.
301 * NOTE: As above, S and D should not be halved. The only consequence of
302 * halving here is merely a -6dB reduction in output, but it's still incorrect.
304 void UhjDecoder::decode2(const al::span
<const float> InSamples
,
305 const al::span
<FloatBufferLine
> OutSamples
, const std::size_t SamplesToDo
)
307 ASSUME(SamplesToDo
> 0);
309 auto woutput
= al::span
{OutSamples
[0]};
310 auto xoutput
= al::span
{OutSamples
[1]};
311 auto youtput
= al::span
{OutSamples
[2]};
313 /* S = Left + Right */
314 for(std::size_t i
{0};i
< SamplesToDo
;++i
)
315 mS
[sFilterDelay
+i
] = InSamples
[i
*2 + 0] + InSamples
[i
*2 + 1];
317 /* D = Left - Right */
318 for(std::size_t i
{0};i
< SamplesToDo
;++i
)
319 mD
[sFilterDelay
+i
] = InSamples
[i
*2 + 0] - InSamples
[i
*2 + 1];
321 /* Precompute j*D and store in xoutput. */
322 auto tmpiter
= std::copy(mDTHistory
.cbegin(), mDTHistory
.cend(), mTemp
.begin());
323 std::copy_n(mD
.cbegin(), SamplesToDo
+sFilterDelay
, tmpiter
);
324 std::copy_n(mTemp
.cbegin()+SamplesToDo
, mDTHistory
.size(), mDTHistory
.begin());
325 PShift
.process(xoutput
.first(SamplesToDo
), mTemp
);
327 for(std::size_t i
{0};i
< SamplesToDo
;++i
)
329 /* W = 0.981530*S + j*0.163585*D */
330 woutput
[i
] = 0.981530f
*mS
[i
] + 0.163585f
*xoutput
[i
];
331 /* X = 0.418504*S - j*0.828347*D */
332 xoutput
[i
] = 0.418504f
*mS
[i
] - 0.828347f
*xoutput
[i
];
335 /* Precompute j*S and store in youtput. */
336 tmpiter
= std::copy(mSHistory
.cbegin(), mSHistory
.cend(), mTemp
.begin());
337 std::copy_n(mS
.cbegin(), SamplesToDo
+sFilterDelay
, tmpiter
);
338 std::copy_n(mTemp
.cbegin()+SamplesToDo
, mSHistory
.size(), mSHistory
.begin());
339 PShift
.process(youtput
.first(SamplesToDo
), mTemp
);
341 for(std::size_t i
{0};i
< SamplesToDo
;++i
)
343 /* Y = 0.762956*D + j*0.384230*S */
344 youtput
[i
] = 0.762956f
*mD
[i
] + 0.384230f
*youtput
[i
];
347 std::copy(mS
.begin()+SamplesToDo
, mS
.begin()+SamplesToDo
+sFilterDelay
, mS
.begin());
348 std::copy(mD
.begin()+SamplesToDo
, mD
.begin()+SamplesToDo
+sFilterDelay
, mD
.begin());
352 int main(al::span
<std::string_view
> args
)
354 if(args
.size() < 2 || args
[1] == "-h" || args
[1] == "--help")
356 printf("Usage: %.*s <[options] filename.wav...>\n\n"
358 " --general Use the general equations for 2-channel UHJ (default).\n"
359 " --alternative Use the alternative equations for 2-channel UHJ.\n"
361 "Note: When decoding 2-channel UHJ to an .amb file, the result should not use\n"
362 "the normal B-Format shelf filters! Only 3- and 4-channel UHJ can accurately\n"
363 "reconstruct the original B-Format signal.",
364 al::sizei(args
[0]), args
[0].data());
368 std::size_t num_files
{0}, num_decoded
{0};
369 bool use_general
{true};
370 for(size_t fidx
{1};fidx
< args
.size();++fidx
)
372 if(args
[fidx
] == "--general")
377 if(args
[fidx
] == "--alternative")
384 SndFilePtr infile
{sf_open(std::string
{args
[fidx
]}.c_str(), SFM_READ
, &ininfo
)};
387 fprintf(stderr
, "Failed to open %.*s\n", al::sizei(args
[fidx
]), args
[fidx
].data());
390 if(sf_command(infile
.get(), SFC_WAVEX_GET_AMBISONIC
, nullptr, 0) == SF_AMBISONIC_B_FORMAT
)
392 fprintf(stderr
, "%.*s is already B-Format\n", al::sizei(args
[fidx
]),
397 if(ininfo
.channels
== 2)
399 else if(ininfo
.channels
== 3 || ininfo
.channels
== 4)
400 outchans
= static_cast<uint
>(ininfo
.channels
);
403 fprintf(stderr
, "%.*s is not a 2-, 3-, or 4-channel file\n", al::sizei(args
[fidx
]),
407 printf("Converting %.*s from %d-channel UHJ%s...\n", al::sizei(args
[fidx
]),
408 args
[fidx
].data(), ininfo
.channels
,
409 (ininfo
.channels
== 2) ? use_general
? " (general)" : " (alternative)" : "");
411 std::string outname
{args
[fidx
]};
412 auto lastslash
= outname
.find_last_of('/');
413 if(lastslash
!= std::string::npos
)
414 outname
.erase(0, lastslash
+1);
415 auto lastdot
= outname
.find_last_of('.');
416 if(lastdot
!= std::string::npos
)
417 outname
.resize(lastdot
+1);
420 FilePtr outfile
{fopen(outname
.c_str(), "wb")};
423 fprintf(stderr
, "Failed to create %s\n", outname
.c_str());
427 fputs("RIFF", outfile
.get());
428 fwrite32le(0xFFFFFFFF, outfile
.get()); // 'RIFF' header len; filled in at close
430 fputs("WAVE", outfile
.get());
432 fputs("fmt ", outfile
.get());
433 fwrite32le(40, outfile
.get()); // 'fmt ' header len; 40 bytes for EXTENSIBLE
435 // 16-bit val, format type id (extensible: 0xFFFE)
436 fwrite16le(0xFFFE, outfile
.get());
437 // 16-bit val, channel count
438 fwrite16le(static_cast<ushort
>(outchans
), outfile
.get());
439 // 32-bit val, frequency
440 fwrite32le(static_cast<uint
>(ininfo
.samplerate
), outfile
.get());
441 // 32-bit val, bytes per second
442 fwrite32le(static_cast<uint
>(ininfo
.samplerate
)*outchans
*uint
{sizeof(float)}, outfile
.get());
443 // 16-bit val, frame size
444 fwrite16le(static_cast<ushort
>(sizeof(float)*outchans
), outfile
.get());
445 // 16-bit val, bits per sample
446 fwrite16le(static_cast<ushort
>(sizeof(float)*8), outfile
.get());
447 // 16-bit val, extra byte count
448 fwrite16le(22, outfile
.get());
449 // 16-bit val, valid bits per sample
450 fwrite16le(static_cast<ushort
>(sizeof(float)*8), outfile
.get());
451 // 32-bit val, channel mask
452 fwrite32le(0, outfile
.get());
453 // 16 byte GUID, sub-type format
454 fwrite(SUBTYPE_BFORMAT_FLOAT
.data(), 1, SUBTYPE_BFORMAT_FLOAT
.size(), outfile
.get());
456 fputs("data", outfile
.get());
457 fwrite32le(0xFFFFFFFF, outfile
.get()); // 'data' header len; filled in at close
458 if(ferror(outfile
.get()))
460 fprintf(stderr
, "Error writing wave file header: %s (%d)\n",
461 std::generic_category().message(errno
).c_str(), errno
);
465 auto DataStart
= ftell(outfile
.get());
467 auto decoder
= std::make_unique
<UhjDecoder
>();
468 auto inmem
= std::vector
<float>(size_t{BufferLineSize
}*static_cast<uint
>(ininfo
.channels
));
469 auto decmem
= al::vector
<std::array
<float,BufferLineSize
>, 16>(outchans
);
470 auto outmem
= std::vector
<byte4
>(size_t{BufferLineSize
}*outchans
);
472 /* A number of initial samples need to be skipped to cut the lead-in
473 * from the all-pass filter delay. The same number of samples need to
474 * be fed through the decoder after reaching the end of the input file
475 * to ensure none of the original input is lost.
477 std::size_t LeadIn
{UhjDecoder::sFilterDelay
};
478 sf_count_t LeadOut
{UhjDecoder::sFilterDelay
};
481 sf_count_t sgot
{sf_readf_float(infile
.get(), inmem
.data(), BufferLineSize
)};
482 sgot
= std::max
<sf_count_t
>(sgot
, 0);
483 if(sgot
< BufferLineSize
)
485 const sf_count_t remaining
{std::min(BufferLineSize
- sgot
, LeadOut
)};
486 std::fill_n(inmem
.begin() + sgot
*ininfo
.channels
, remaining
*ininfo
.channels
, 0.0f
);
488 LeadOut
-= remaining
;
491 auto got
= static_cast<std::size_t>(sgot
);
492 if(ininfo
.channels
> 2 || use_general
)
493 decoder
->decode(inmem
, static_cast<uint
>(ininfo
.channels
), decmem
, got
);
495 decoder
->decode2(inmem
, decmem
, got
);
503 for(std::size_t i
{0};i
< got
;++i
)
505 /* Attenuate by -3dB for FuMa output levels. */
506 constexpr auto inv_sqrt2
= static_cast<float>(1.0/al::numbers::sqrt2
);
507 for(std::size_t j
{0};j
< outchans
;++j
)
508 outmem
[i
*outchans
+ j
] = f32AsLEBytes(decmem
[j
][LeadIn
+i
] * inv_sqrt2
);
512 std::size_t wrote
{fwrite(outmem
.data(), sizeof(byte4
)*outchans
, got
, outfile
.get())};
515 fprintf(stderr
, "Error writing wave data: %s (%d)\n",
516 std::generic_category().message(errno
).c_str(), errno
);
521 auto DataEnd
= ftell(outfile
.get());
522 if(DataEnd
> DataStart
)
524 long dataLen
{DataEnd
- DataStart
};
525 if(fseek(outfile
.get(), 4, SEEK_SET
) == 0)
526 fwrite32le(static_cast<uint
>(DataEnd
-8), outfile
.get()); // 'WAVE' header len
527 if(fseek(outfile
.get(), DataStart
-4, SEEK_SET
) == 0)
528 fwrite32le(static_cast<uint
>(dataLen
), outfile
.get()); // 'data' header len
530 fflush(outfile
.get());
534 fprintf(stderr
, "Failed to decode any input files\n");
535 else if(num_decoded
< num_files
)
536 fprintf(stderr
, "Decoded %zu of %zu files\n", num_decoded
, num_files
);
538 printf("Decoded %zu file%s\n", num_decoded
, (num_decoded
==1)?"":"s");
544 int main(int argc
, char *argv
[])
547 auto args
= std::vector
<std::string_view
>(static_cast<unsigned int>(argc
));
548 std::copy_n(argv
, args
.size(), args
.begin());
549 return main(al::span
{args
});