audioformats.wav source code

1 /**
2 Supports Microsoft WAV audio file format.
3 
4 Copyright: Guillaume Piolat 2015-2020.
5 License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6 */
7 module audioformats.wav;
8 
9 import core.stdc.math: round, floor, fabs;
10 import core.stdc.stdlib: rand, RAND_MAX;
11 import dplug.core.nogc;
12 import dplug.core.vec;
13 import audioformats.io;
14 
15 
16 version(decodeWAV)
17 {
18     /// Use both for scanning and decoding
19     final class WAVDecoder
20     {
21     public:
22     @nogc:
23 
24         static immutable ubyte[16] KSDATAFORMAT_SUBTYPE_IEEE_FLOAT = 
25         [3, 0, 0, 0, 0, 0, 16, 0, 128, 0, 0, 170, 0, 56, 155, 113];
26 
27         this(IOCallbacks* io, void* userData) nothrow
28         {
29             _io = io;
30             _userData = userData;
31         }
32 
33         // After scan, we know _sampleRate, _lengthInFrames, and _channels, and can call `readSamples`
34         void scan()
35         {
36             // check RIFF header
37             {
38                 uint chunkId, chunkSize;
39                 _io.readRIFFChunkHeader(_userData, chunkId, chunkSize);
40                 if (chunkId != RIFFChunkId!"RIFF")
41                     throw mallocNew!Exception("Expected RIFF chunk.");
42 
43                 if (chunkSize < 4)
44                     throw mallocNew!Exception("RIFF chunk is too small to contain a format.");
45 
46                 if (_io.read_uint_BE(_userData) !=  RIFFChunkId!"WAVE")
47                     throw mallocNew!Exception("Expected WAVE format.");
48             }
49 
50             bool foundFmt = false;
51             bool foundData = false;
52 
53             int byteRate;
54             int blockAlign;
55             int bitsPerSample;
56 
57             while (!_io.nothingToReadAnymore(_userData))
58             {
59                 // Some corrupted WAV files in the wild finish with one
60                 // extra 0 byte after an AFAn chunk, very odd
61                 if (_io.remainingBytesToRead(_userData) == 1)
62                 {
63                     if (_io.peek_ubyte(_userData) == 0)
64                         break;
65                 }
66 
67                 // Question: is there any reason to parse the whole WAV file? This prevents streaming.
68 
69                 uint chunkId, chunkSize;
70                 _io.readRIFFChunkHeader(_userData, chunkId, chunkSize); 
71                 if (chunkId == RIFFChunkId!"fmt ")
72                 {
73                     if (foundFmt)
74                         throw mallocNew!Exception("Found several 'fmt ' chunks in RIFF file.");
75 
76                     foundFmt = true;
77 
78                     if (chunkSize < 16)
79                         throw mallocNew!Exception("Expected at least 16 bytes in 'fmt ' chunk."); // found in real-world for the moment: 16 or 40 bytes
80 
81                     _audioFormat = _io.read_ushort_LE(_userData);
82                     bool isWFE = _audioFormat == WAVE_FORMAT_EXTENSIBLE;
83 
84                     if (_audioFormat != LinearPCM && _audioFormat != FloatingPointIEEE && !isWFE)
85                         throw mallocNew!Exception("Unsupported audio format, only PCM and IEEE float and WAVE_FORMAT_EXTENSIBLE are supported.");
86 
87                     _channels = _io.read_ushort_LE(_userData);
88 
89                     _sampleRate = _io.read_uint_LE(_userData);
90                     if (_sampleRate <= 0)
91                         throw mallocNew!Exception("Unsupported sample-rate."); // we do not support sample-rate higher than 2^31hz
92 
93                     uint bytesPerSec = _io.read_uint_LE(_userData);
94                     int bytesPerFrame = _io.read_ushort_LE(_userData);
95                     bitsPerSample = _io.read_ushort_LE(_userData);
96 
97                     if (bitsPerSample != 8 && bitsPerSample != 16 && bitsPerSample != 24 && bitsPerSample != 32 && bitsPerSample != 64) 
98                         throw mallocNew!Exception("Unsupported bitdepth");
99 
100                     if (bytesPerFrame != (bitsPerSample / 8) * _channels)
101                         throw mallocNew!Exception("Invalid bytes-per-second, data might be corrupted.");
102 
103                     // Sometimes there is no cbSize
104                     if (chunkSize >= 18)
105                     {
106                         ushort cbSize = _io.read_ushort_LE(_userData);
107 
108                         if (isWFE)
109                         {
110                             if (cbSize >= 22)
111                             {
112                                 ushort wReserved = _io.read_ushort_LE(_userData);
113                                 uint dwChannelMask = _io.read_uint_LE(_userData);
114                                 ubyte[16] SubFormat = _io.read_guid(_userData);
115 
116                                 if (SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT)
117                                 {
118                                     _audioFormat = FloatingPointIEEE;
119                                 }
120                                 else
121                                     throw mallocNew!Exception("Unsupported GUID in WAVE_FORMAT_EXTENSIBLE.");
122                             }
123                             else
124                                 throw mallocNew!Exception("Unsupported WAVE_FORMAT_EXTENSIBLE.");
125 
126                             _io.skip(chunkSize - (18 + 2 + 4 + 16), _userData);
127                         }
128                         else
129                         {
130                             _io.skip(chunkSize - 18, _userData);
131                         }
132                     }
133                     else
134                     {
135                         _io.skip(chunkSize - 16, _userData);
136                     }
137 
138                 }
139                 else if (chunkId == RIFFChunkId!"data")
140                 {
141                     if (foundData)
142                         throw mallocNew!Exception("Found several 'data' chunks in RIFF file.");
143 
144                     if (!foundFmt)
145                         throw mallocNew!Exception("'fmt ' chunk expected before the 'data' chunk.");
146 
147                     _bytePerSample = bitsPerSample / 8;
148                     uint frameSize = _channels * _bytePerSample;
149                     if (chunkSize % frameSize != 0)
150                         throw mallocNew!Exception("Remaining bytes in 'data' chunk, inconsistent with audio data type.");
151 
152                     uint numFrames = chunkSize / frameSize;
153                     _lengthInFrames = numFrames;
154 
155                     _samplesOffsetInFile = _io.tell(_userData);
156 
157                     _io.skip(chunkSize, _userData); // skip, will read later
158                     foundData = true;
159                 }
160                 else
161                 {
162                     // ignore unknown chunks
163                     _io.skip(chunkSize, _userData);
164                 }
165             }
166 
167             if (!foundFmt)
168                 throw mallocNew!Exception("'fmt ' chunk not found.");
169 
170             if (!foundData)
171                 throw mallocNew!Exception("'data' chunk not found.");
172 
173             // Get ready to decode
174             _io.seek(_samplesOffsetInFile, false, _userData);
175             _framePosition = 0; // seek to start
176         }
177 
178         /// Returns: false in case of failure.
179         bool seekPosition(int absoluteFrame)
180         {
181             if (absoluteFrame < 0)
182                 return false;
183             if (absoluteFrame > _lengthInFrames)
184                 return false;
185             uint frameSize = _channels * _bytePerSample;
186             long pos = _samplesOffsetInFile + absoluteFrame * frameSize;
187             _io.seek(pos, false, _userData);
188             _framePosition = absoluteFrame;
189             return true;
190         }
191 
192         /// Returns: position in absolute number of frames since beginning.
193         int tellPosition()
194         {
195             return _framePosition;
196         }
197 
198         // read interleaved samples
199         // `outData` should have enough room for frames * _channels
200         // Returs: Frames actually read.
201         int readSamples(T)(T* outData, int maxFrames) nothrow
202         {
203             assert(_framePosition <= _lengthInFrames);
204             int available = _lengthInFrames - _framePosition;
205 
206             // How much frames can we decode?
207             int frames = maxFrames;
208             if (frames > available)
209                 frames = available;
210             _framePosition += frames;
211 
212             int numSamples = frames * _channels;
213 
214             uint n = 0;
215 
216             try
217             {
218                 if (_audioFormat == FloatingPointIEEE)
219                 {
220                     if (_bytePerSample == 4)
221                     {
222                         for (n = 0; n < numSamples; ++n)
223                             outData[n] = _io.read_float_LE(_userData);
224                     }
225                     else if (_bytePerSample == 8)
226                     {
227                         for (n = 0; n < numSamples; ++n)
228                             outData[n] = _io.read_double_LE(_userData);
229                     }
230                     else
231                         throw mallocNew!Exception("Unsupported bit-depth for floating point data, should be 32 or 64.");
232                 }
233                 else if (_audioFormat == LinearPCM)
234                 {
235                     if (_bytePerSample == 1)
236                     {
237                         for (n = 0; n < numSamples; ++n)
238                         {
239                             ubyte b = _io.read_ubyte(_userData);
240                             outData[n] = (b - 128) / 127.0;
241                         }
242                     }
243                     else if (_bytePerSample == 2)
244                     {
245                         for (n = 0; n < numSamples; ++n)
246                         {
247                             short s = _io.read_ushort_LE(_userData);
248                             outData[n] = s / 32767.0;
249                         }
250                     }
251                     else if (_bytePerSample == 3)
252                     {
253                         for (n = 0; n < numSamples; ++n)
254                         {
255                             int s = _io.read_24bits_LE(_userData);
256                             // duplicate sign bit
257                             s = (s << 8) >> 8;
258                             outData[n] = s / 8388607.0;
259                         }
260                     }
261                     else if (_bytePerSample == 4)
262                     {
263                         for (n = 0; n < numSamples; ++n)
264                         {
265                             int s = _io.read_uint_LE(_userData);
266                             outData[n] = s / 2147483648.0;
267                         }
268                     }
269                     else
270                         throw mallocNew!Exception("Unsupported bit-depth for integer PCM data, should be 8, 16, 24 or 32 bits.");
271                 }
272                 else
273                     assert(false); // should have been handled earlier, crash
274             }
275             catch(Exception e)
276             {
277                 destroyFree(e); // well this is really unexpected, since no read should fail in this loop
278                 return 0;
279             }
280 
281             // Return number of integer samples read
282             return frames;
283         }
284 
285     package:
286         int _sampleRate;
287         int _channels;
288         int _audioFormat;
289         int _bytePerSample;
290         long _samplesOffsetInFile;
291         uint _lengthInFrames;
292         uint _framePosition;
293 
294     private:
295         void* _userData;
296         IOCallbacks* _io;
297     }
298 }
299 
300 
301 version(encodeWAV)
302 {
303     /// Use both for scanning and decoding
304     final class WAVEncoder
305     {
306     public:
307     @nogc:
308         enum Format
309         {
310             s8,
311             s16le,
312             s24le,
313             fp32le,
314             fp64le,
315         }
316 
317         static bool isFormatLinearPCM(Format fmt)
318         {
319             return fmt <= Format.s24le;
320         }
321 
322         this(IOCallbacks* io, void* userData, int sampleRate, int numChannels, Format format, bool enableDither)
323         {
324             _io = io;
325             _userData = userData;
326             _channels = numChannels;
327             _format = format;
328             _enableDither = enableDither;
329 
330             // Avoids a number of edge cases.
331             if (_channels < 0 || _channels > 1024)
332                 throw mallocNew!Exception("Can't save a WAV with this numnber of channels.");
333 
334             // RIFF header
335             // its size will be overwritten at finalizing
336             _riffLengthOffset = _io.tell(_userData) + 4;
337             _io.writeRIFFChunkHeader(_userData, RIFFChunkId!"RIFF", 0);
338             _io.write_uint_BE(_userData, RIFFChunkId!"WAVE");
339 
340             // 'fmt ' sub-chunk
341             _io.writeRIFFChunkHeader(_userData, RIFFChunkId!"fmt ", 0x10);
342             _io.write_ushort_LE(_userData, isFormatLinearPCM(format) ? LinearPCM : FloatingPointIEEE);
343             _io.write_ushort_LE(_userData, cast(ushort)(_channels));
344             _io.write_uint_LE(_userData, sampleRate);
345 
346             size_t bytesPerSec = sampleRate * cast(size_t) frameSize();
347             _io.write_uint_LE(_userData,  cast(uint)(bytesPerSec));
348 
349             int bytesPerFrame = frameSize();
350             _io.write_ushort_LE(_userData, cast(ushort)bytesPerFrame);
351 
352             _io.write_ushort_LE(_userData, cast(ushort)(sampleSize() * 8));
353 
354             // data sub-chunk
355             _dataLengthOffset = _io.tell(_userData) + 4;
356             _io.writeRIFFChunkHeader(_userData, RIFFChunkId!"data", 0); // write 0 but temporarily, this will be overwritten at finalizing
357             _writtenFrames = 0;
358         }
359 
360         // read interleaved samples
361         // `inSamples` should have enough room for frames * _channels
362         int writeSamples(T)(T* inSamples, int frames) nothrow
363         {
364             int n = 0;
365             try
366             {
367                 int samples = frames * _channels;
368                 
369                 final switch(_format)
370                 {
371                     case Format.s8:
372                         ditherInput(inSamples, samples, 127.0f);
373                         for ( ; n < samples; ++n)
374                         {
375                             double x = _ditherBuf[n];
376                             int b = cast(int)(128.5 + x * 127.0); 
377                             _io.write_byte(_userData, cast(byte)b);
378                         }
379                         break;
380 
381                     case Format.s16le:
382                         ditherInput(inSamples, samples, 32767.0f);
383                         for ( ; n < samples; ++n)
384                         {
385                             double x = _ditherBuf[n];
386                             int s = cast(int)(32768.5 + x * 32767.0);
387                             s -= 32768;
388                             assert(s >= -32767 && s <= 32767);
389                             _io.write_short_LE(_userData, cast(short)s);
390                         }
391                         break;
392 
393                     case Format.s24le:
394                         ditherInput(inSamples, samples, 8388607.0f);
395                         for ( ; n < samples; ++n)
396                         {
397                             double x = _ditherBuf[n];
398                             int s = cast(int)(8388608.5 + x * 8388607.0);
399                             s -= 8388608;
400                             assert(s >= -8388607 && s <= 8388607);
401                             _io.write_24bits_LE(_userData, s);
402                         }
403                         break;
404 
405                     case Format.fp32le:
406                         for ( ; n < samples; ++n)
407                         {
408                             _io.write_float_LE(_userData, inSamples[n]);
409                         }
410                         break;
411                     case Format.fp64le:
412                         for ( ; n < samples; ++n)
413                         {
414                             _io.write_double_LE(_userData, inSamples[n]);
415                         }
416                         break;
417                 }
418                 _writtenFrames += frames;
419             }
420             catch(Exception e)
421             {
422                 destroyFree(e);
423             }
424             return n;
425         }
426 
427         int sampleSize()
428         {
429             final switch(_format)
430             {
431                 case Format.s8:     return 1;
432                 case Format.s16le:  return 2;
433                 case Format.s24le:  return 3;
434                 case Format.fp32le: return 4;
435                 case Format.fp64le: return 8;
436             }
437         }
438 
439         int frameSize()
440         {
441             return sampleSize() * _channels;
442         }
443 
444         void finalizeEncoding() 
445         {
446             size_t bytesOfData = frameSize() * _writtenFrames;
447 
448             // write final number of samples for the 'RIFF' chunk
449             {
450                 uint riffLength = cast(uint)( 4 + (4 + 4 + 16) + (4 + 4 + bytesOfData) );
451                 _io.seek(_riffLengthOffset, false, _userData);
452                 _io.write_uint_LE(_userData, riffLength);
453             }
454 
455             // write final number of samples for the 'data' chunk
456             {
457                 _io.seek(_dataLengthOffset, false, _userData);
458                 _io.write_uint_LE(_userData, cast(uint)bytesOfData );
459             }
460         }
461 
462     private:
463         void* _userData;
464         IOCallbacks* _io;
465         Format _format;
466         int _channels;
467         int _writtenFrames;
468         long _riffLengthOffset, _dataLengthOffset;
469 
470         bool _enableDither;
471         double[] _ditherBuf;
472         TPDFDither _tpdf;
473 
474         void ditherInput(T)(T* inSamples, int frames, double scaleFactor)
475         {
476             if (_ditherBuf.length < frames)
477                 _ditherBuf.reallocBuffer(frames);
478 
479             for (int n = 0; n < frames; ++n)
480             {
481                 _ditherBuf[n] = inSamples[n];
482             }
483 
484             if (_enableDither)
485                 _tpdf.process(_ditherBuf.ptr, frames, scaleFactor);
486         }
487     }
488 }
489 
490 
491 private:
492 
493 // wFormatTag
494 immutable int LinearPCM = 0x0001;
495 immutable int FloatingPointIEEE = 0x0003;
496 immutable int WAVE_FORMAT_EXTENSIBLE = 0xFFFE;
497 
498 
499 /+
500 MIT License
501 
502 Copyright (c) 2018 Chris Johnson
503 
504 Permission is hereby granted, free of charge, to any person obtaining a copy
505 of this software and associated documentation files (the "Software"), to deal
506 in the Software without restriction, including without limitation the rights
507 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
508 copies of the Software, and to permit persons to whom the Software is
509 furnished to do so, subject to the following conditions:
510 
511 The above copyright notice and this permission notice shall be included in all
512 copies or substantial portions of the Software.
513 
514 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
515 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
516 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
517 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
518 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
519 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
520 SOFTWARE.
521 +/
522 /// This is TPDF Dither by Chris Johnson / AirWindows
523 /// However it was modified since I didn't think the sound was particularly good on 8-bit material.
524 struct TPDFDither
525 {
526 nothrow:
527 @nogc:
528 
529     void process(double* inoutSamples, int frames, double scaleFactor)
530     {      
531         for (int n = 0; n < frames; ++n)
532         {
533             double x = inoutSamples[n];           
534 
535             x *= scaleFactor;
536             //0-1 is now one bit, now we dither
537 
538             enum double TUNE0 = 0.25; // could probably be better if tuned interactively
539             enum double TUNE1 = TUNE0*0.5; // ditto
540 
541             x += (0.5 - 0.5 * (TUNE0+TUNE1));
542             x += TUNE0 * (rand()/cast(double)RAND_MAX);
543             x += TUNE1 * (rand()/cast(double)RAND_MAX);
544             x = floor(x);
545             //TPDF: two 0-1 random noises
546             x /= scaleFactor;
547             if (x < -1.0) x = -1.0;
548             if (x > 1.0) x = 1.0;
549             inoutSamples[n] = x;
550         }
551     }
552 }