audioformats.wav source code

1 /**
2 Supports Microsoft WAV audio file format.
3 
4 Copyright: Guillaume Piolat 2015-2020.
5 License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6 */
7 module audioformats.wav;
8 
9 import core.stdc.math: round, floor, fabs;
10 import core.stdc.stdlib: rand, RAND_MAX;
11 import audioformats.io;
12 import audioformats.internals;
13 
14 
15 version(decodeWAV)
16 {
17     /// Use both for scanning and decoding
18 
19     final class WAVDecoder
20     {
21     public:
22     @nogc:
23 
24         static struct WAVError
25         {
26             @nogc nothrow pure @safe:
27             string reason;
28             string file;
29             size_t line;
30             static WAVError none() {return WAVError("","",0);}
31 
32             this(string reason, string file = __FILE__, size_t line = __LINE__)
33             {
34                 this.reason = reason;
35                 this.file = file;
36                 this.line = line;
37             }
38         }
39 
40         static immutable ubyte[16] KSDATAFORMAT_SUBTYPE_IEEE_FLOAT = 
41         [3, 0, 0, 0, 0, 0, 16, 0, 128, 0, 0, 170, 0, 56, 155, 113];
42 
43         this(IOCallbacks* io, void* userData) nothrow
44         {
45             _io = io;
46             _userData = userData;
47         }
48 
49         // After scan, we know _sampleRate, _lengthInFrames, and _channels, and can call `readSamples`
50         WAVError scan()
51         {
52             // check RIFF header
53             {
54                 uint chunkId, chunkSize;
55                 _io.readRIFFChunkHeader(_userData, chunkId, chunkSize);
56                 if (chunkId != RIFFChunkId!"RIFF")
57                     return WAVError("Expected RIFF chunk.");
58 
59                 if (chunkSize < 4)
60                     return WAVError("RIFF chunk is too small to contain a format.");
61 
62                 if (_io.read_uint_BE(_userData) !=  RIFFChunkId!"WAVE")
63                     return WAVError("Expected WAVE format.");
64             }
65 
66             bool foundFmt = false;
67             bool foundData = false;
68 
69             int byteRate;
70             int blockAlign;
71             int bitsPerSample;
72 
73             while (!_io.nothingToReadAnymore(_userData))
74             {
75                 // Some corrupted WAV files in the wild finish with one
76                 // extra 0 byte after an AFAn chunk, very odd
77                 if (_io.remainingBytesToRead(_userData) == 1)
78                 {
79                     if (_io.peek_ubyte(_userData) == 0)
80                         break;
81                 }
82 
83                 // Question: is there any reason to parse the whole WAV file? This prevents streaming.
84 
85                 uint chunkId, chunkSize;
86                 _io.readRIFFChunkHeader(_userData, chunkId, chunkSize); 
87                 if (chunkId == RIFFChunkId!"fmt ")
88                 {
89                     if (foundFmt)
90                         return WAVError("Found several 'fmt ' chunks in RIFF file.");
91 
92                     foundFmt = true;
93 
94                     if (chunkSize < 16)
95                         return WAVError("Expected at least 16 bytes in 'fmt ' chunk."); // found in real-world for the moment: 16 or 40 bytes
96 
97                     _audioFormat = _io.read_ushort_LE(_userData);
98                     bool isWFE = _audioFormat == WAVE_FORMAT_EXTENSIBLE;
99 
100                     if (_audioFormat != LinearPCM && _audioFormat != FloatingPointIEEE && !isWFE)
101                         return WAVError("Unsupported audio format, only PCM and IEEE float and WAVE_FORMAT_EXTENSIBLE are supported.");
102 
103                     _channels = _io.read_ushort_LE(_userData);
104 
105                     _sampleRate = _io.read_uint_LE(_userData);
106                     if (_sampleRate <= 0)
107                         return WAVError("Unsupported sample-rate."); // we do not support sample-rate higher than 2^31hz
108 
109                     uint bytesPerSec = _io.read_uint_LE(_userData);
110                     int bytesPerFrame = _io.read_ushort_LE(_userData);
111                     bitsPerSample = _io.read_ushort_LE(_userData);
112 
113                     if (bitsPerSample != 8 && bitsPerSample != 16 && bitsPerSample != 24 && bitsPerSample != 32 && bitsPerSample != 64) 
114                         return WAVError("Unsupported bitdepth");
115 
116                     if (bytesPerFrame != (bitsPerSample / 8) * _channels)
117                         return WAVError("Invalid bytes-per-second, data might be corrupted.");
118 
119                     // Sometimes there is no cbSize
120                     if (chunkSize >= 18)
121                     {
122                         ushort cbSize = _io.read_ushort_LE(_userData);
123 
124                         if (isWFE)
125                         {
126                             if (cbSize >= 22)
127                             {
128                                 ushort wReserved = _io.read_ushort_LE(_userData);
129                                 uint dwChannelMask = _io.read_uint_LE(_userData);
130                                 ubyte[16] SubFormat = _io.read_guid(_userData);
131 
132                                 if (SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT)
133                                 {
134                                     _audioFormat = FloatingPointIEEE;
135                                 }
136                                 else
137                                     return WAVError("Unsupported GUID in WAVE_FORMAT_EXTENSIBLE.");
138                             }
139                             else
140                                 return WAVError("Unsupported WAVE_FORMAT_EXTENSIBLE.");
141 
142                             _io.skip(chunkSize - (18 + 2 + 4 + 16), _userData);
143                         }
144                         else
145                         {
146                             _io.skip(chunkSize - 18, _userData);
147                         }
148                     }
149                     else
150                     {
151                         _io.skip(chunkSize - 16, _userData);
152                     }
153 
154                 }
155                 else if (chunkId == RIFFChunkId!"data")
156                 {
157                     if (foundData)
158                         return WAVError("Found several 'data' chunks in RIFF file.");
159 
160                     if (!foundFmt)
161                         return WAVError("'fmt ' chunk expected before the 'data' chunk.");
162 
163                     _bytePerSample = bitsPerSample / 8;
164                     uint frameSize = _channels * _bytePerSample;
165                     if (chunkSize % frameSize != 0)
166                         return WAVError("Remaining bytes in 'data' chunk, inconsistent with audio data type.");
167 
168                     uint numFrames = chunkSize / frameSize;
169                     _lengthInFrames = numFrames;
170 
171                     _samplesOffsetInFile = _io.tell(_userData);
172 
173                     _io.skip(chunkSize, _userData); // skip, will read later
174                     foundData = true;
175                 }
176                 else
177                 {
178                     // ignore unknown chunks
179                     _io.skip(chunkSize, _userData);
180                 }
181             }
182 
183             if (!foundFmt)
184                 return WAVError("'fmt ' chunk not found.");
185 
186             if (!foundData)
187                 return WAVError("'data' chunk not found.");
188 
189             // Get ready to decode
190             _io.seek(_samplesOffsetInFile, false, _userData);
191             _framePosition = 0; // seek to start
192 
193             return WAVError.none;
194         }
195 
196         /// Returns: false in case of failure.
197         bool seekPosition(int absoluteFrame)
198         {
199             if (absoluteFrame < 0)
200                 return false;
201             if (absoluteFrame > _lengthInFrames)
202                 return false;
203             uint frameSize = _channels * _bytePerSample;
204             long pos = _samplesOffsetInFile + absoluteFrame * frameSize;
205             _io.seek(pos, false, _userData);
206             _framePosition = absoluteFrame;
207             return true;
208         }
209 
210         /// Returns: position in absolute number of frames since beginning.
211         int tellPosition()
212         {
213             return _framePosition;
214         }
215 
216         // read interleaved samples
217         // `outData` should have enough room for frames * _channels
218         // Returs: Frames actually read.
219         int readSamples(T)(T* outData, int maxFrames)
220         {
221             assert(_framePosition <= _lengthInFrames);
222             int available = _lengthInFrames - _framePosition;
223 
224             // How much frames can we decode?
225             int frames = maxFrames;
226             if (frames > available)
227                 frames = available;
228             _framePosition += frames;
229 
230             int numSamples = frames * _channels;
231 
232             uint n = 0;
233 
234             try
235             {
236                 if (_audioFormat == FloatingPointIEEE)
237                 {
238                     if (_bytePerSample == 4)
239                     {
240                         for (n = 0; n < numSamples; ++n)
241                             outData[n] = _io.read_float_LE(_userData);
242                     }
243                     else if (_bytePerSample == 8)
244                     {
245                         for (n = 0; n < numSamples; ++n)
246                             outData[n] = _io.read_double_LE(_userData);
247                     }
248                     else
249                         throw mallocNew!AudioFormatsException("Unsupported bit-depth for floating point data, should be 32 or 64.");
250                 }
251                 else if (_audioFormat == LinearPCM)
252                 {
253                     if (_bytePerSample == 1)
254                     {
255                         for (n = 0; n < numSamples; ++n)
256                         {
257                             ubyte b = _io.read_ubyte(_userData);
258                             outData[n] = (b - 128) / 127.0;
259                         }
260                     }
261                     else if (_bytePerSample == 2)
262                     {
263                         for (n = 0; n < numSamples; ++n)
264                         {
265                             short s = _io.read_ushort_LE(_userData);
266                             outData[n] = s / 32767.0;
267                         }
268                     }
269                     else if (_bytePerSample == 3)
270                     {
271                         for (n = 0; n < numSamples; ++n)
272                         {
273                             int s = _io.read_24bits_LE(_userData);
274                             // duplicate sign bit
275                             s = (s << 8) >> 8;
276                             outData[n] = s / 8388607.0;
277                         }
278                     }
279                     else if (_bytePerSample == 4)
280                     {
281                         for (n = 0; n < numSamples; ++n)
282                         {
283                             int s = _io.read_uint_LE(_userData);
284                             outData[n] = s / 2147483648.0;
285                         }
286                     }
287                     else
288                         throw mallocNew!AudioFormatsException("Unsupported bit-depth for integer PCM data, should be 8, 16, 24 or 32 bits.");
289                 }
290                 else
291                     assert(false); // should have been handled earlier, crash
292             }
293             catch(AudioFormatsException e)
294             {
295                 destroyFree(e); // well this is really unexpected, since no read should fail in this loop
296                 return 0;
297             }
298 
299             // Return number of integer samples read
300             return frames;
301         }
302 
303     package:
304         int _sampleRate;
305         int _channels;
306         int _audioFormat;
307         int _bytePerSample;
308         long _samplesOffsetInFile;
309         uint _lengthInFrames;
310         uint _framePosition;
311 
312     private:
313         void* _userData;
314         IOCallbacks* _io;
315     }
316 }
317 
318 
319 version(encodeWAV)
320 {
321     /// Use both for scanning and decoding
322     final class WAVEncoder
323     {
324     public:
325     @nogc:
326         enum Format
327         {
328             s8,
329             s16le,
330             s24le,
331             fp32le,
332             fp64le,
333         }
334 
335         static bool isFormatLinearPCM(Format fmt)
336         {
337             return fmt <= Format.s24le;
338         }
339 
340         this(IOCallbacks* io, void* userData, int sampleRate, int numChannels, Format format, bool enableDither)
341         {
342             _io = io;
343             _userData = userData;
344             _channels = numChannels;
345             _format = format;
346             _enableDither = enableDither;
347 
348             // Avoids a number of edge cases.
349             if (_channels < 0 || _channels > 1024)
350                 throw mallocNew!AudioFormatsException("Can't save a WAV with this numnber of channels.");
351 
352             // RIFF header
353             // its size will be overwritten at finalizing
354             _riffLengthOffset = _io.tell(_userData) + 4;
355             _io.writeRIFFChunkHeader(_userData, RIFFChunkId!"RIFF", 0);
356             _io.write_uint_BE(_userData, RIFFChunkId!"WAVE");
357 
358             // 'fmt ' sub-chunk
359             _io.writeRIFFChunkHeader(_userData, RIFFChunkId!"fmt ", 0x10);
360             _io.write_ushort_LE(_userData, isFormatLinearPCM(format) ? LinearPCM : FloatingPointIEEE);
361             _io.write_ushort_LE(_userData, cast(ushort)(_channels));
362             _io.write_uint_LE(_userData, sampleRate);
363 
364             size_t bytesPerSec = sampleRate * cast(size_t) frameSize();
365             _io.write_uint_LE(_userData,  cast(uint)(bytesPerSec));
366 
367             int bytesPerFrame = frameSize();
368             _io.write_ushort_LE(_userData, cast(ushort)bytesPerFrame);
369 
370             _io.write_ushort_LE(_userData, cast(ushort)(sampleSize() * 8));
371 
372             // data sub-chunk
373             _dataLengthOffset = _io.tell(_userData) + 4;
374             _io.writeRIFFChunkHeader(_userData, RIFFChunkId!"data", 0); // write 0 but temporarily, this will be overwritten at finalizing
375             _writtenFrames = 0;
376         }
377 
378         // write interleaved samples
379         // `inSamples` should have enough room for frames * _channels
380         int writeSamples(T)(T* inSamples, int frames) nothrow
381         {
382             int n = 0;
383             try
384             {
385                 int samples = frames * _channels;
386                 
387                 final switch(_format)
388                 {
389                     case Format.s8:
390                         ditherInput(inSamples, samples, 127.0f);
391                         for ( ; n < samples; ++n)
392                         {
393                             double x = _ditherBuf[n];
394                             int b = cast(int)(128.5 + x * 127.0); 
395                             _io.write_byte(_userData, cast(byte)b);
396                         }
397                         break;
398 
399                     case Format.s16le:
400                         ditherInput(inSamples, samples, 32767.0f);
401                         for ( ; n < samples; ++n)
402                         {
403                             double x = _ditherBuf[n];
404                             int s = cast(int)(32768.5 + x * 32767.0);
405                             s -= 32768;
406                             assert(s >= -32767 && s <= 32767);
407                             _io.write_short_LE(_userData, cast(short)s);
408                         }
409                         break;
410 
411                     case Format.s24le:
412                         ditherInput(inSamples, samples, 8388607.0f);
413                         for ( ; n < samples; ++n)
414                         {
415                             double x = _ditherBuf[n];
416                             int s = cast(int)(8388608.5 + x * 8388607.0);
417                             s -= 8388608;
418                             assert(s >= -8388607 && s <= 8388607);
419                             _io.write_24bits_LE(_userData, s);
420                         }
421                         break;
422 
423                     case Format.fp32le:
424                         for ( ; n < samples; ++n)
425                         {
426                             _io.write_float_LE(_userData, inSamples[n]);
427                         }
428                         break;
429                     case Format.fp64le:
430                         for ( ; n < samples; ++n)
431                         {
432                             _io.write_double_LE(_userData, inSamples[n]);
433                         }
434                         break;
435                 }
436                 _writtenFrames += frames;
437             }
438             catch(AudioFormatsException e)
439             {
440                 destroyFree(e);
441             }
442             catch(Exception e)
443             {
444                 assert(false); // disallow
445             }
446             return n;
447         }
448 
449         int sampleSize()
450         {
451             final switch(_format)
452             {
453                 case Format.s8:     return 1;
454                 case Format.s16le:  return 2;
455                 case Format.s24le:  return 3;
456                 case Format.fp32le: return 4;
457                 case Format.fp64le: return 8;
458             }
459         }
460 
461         int frameSize()
462         {
463             return sampleSize() * _channels;
464         }
465 
466         void finalizeEncoding() 
467         {
468             size_t bytesOfData = frameSize() * _writtenFrames;
469 
470             // write final number of samples for the 'RIFF' chunk
471             {
472                 uint riffLength = cast(uint)( 4 + (4 + 4 + 16) + (4 + 4 + bytesOfData) );
473                 _io.seek(_riffLengthOffset, false, _userData);
474                 _io.write_uint_LE(_userData, riffLength);
475             }
476 
477             // write final number of samples for the 'data' chunk
478             {
479                 _io.seek(_dataLengthOffset, false, _userData);
480                 _io.write_uint_LE(_userData, cast(uint)bytesOfData );
481             }
482         }
483 
484     private:
485         void* _userData;
486         IOCallbacks* _io;
487         Format _format;
488         int _channels;
489         int _writtenFrames;
490         long _riffLengthOffset, _dataLengthOffset;
491 
492         bool _enableDither;
493         double[] _ditherBuf;
494         TPDFDither _tpdf;
495 
496         void ditherInput(T)(T* inSamples, int frames, double scaleFactor)
497         {
498             if (_ditherBuf.length < frames)
499                 _ditherBuf.reallocBuffer(frames);
500 
501             for (int n = 0; n < frames; ++n)
502             {
503                 _ditherBuf[n] = inSamples[n];
504             }
505 
506             if (_enableDither)
507                 _tpdf.process(_ditherBuf.ptr, frames, scaleFactor);
508         }
509     }
510 }
511 
512 
513 private:
514 
515 // wFormatTag
516 immutable int LinearPCM = 0x0001;
517 immutable int FloatingPointIEEE = 0x0003;
518 immutable int WAVE_FORMAT_EXTENSIBLE = 0xFFFE;
519 
520 
521 /+
522 MIT License
523 
524 Copyright (c) 2018 Chris Johnson
525 
526 Permission is hereby granted, free of charge, to any person obtaining a copy
527 of this software and associated documentation files (the "Software"), to deal
528 in the Software without restriction, including without limitation the rights
529 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
530 copies of the Software, and to permit persons to whom the Software is
531 furnished to do so, subject to the following conditions:
532 
533 The above copyright notice and this permission notice shall be included in all
534 copies or substantial portions of the Software.
535 
536 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
537 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
538 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
539 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
540 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
541 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
542 SOFTWARE.
543 +/
544 /// This is based upon TPDF Dither by Chris Johnson / AirWindows
545 /// though the algorithm changed quite a bit, tuned on 8-bit dither by ear.
546 struct TPDFDither
547 {
548 nothrow:
549 @nogc:
550 
551     void process(double* inoutSamples, int frames, double scaleFactor)
552     {      
553         for (int n = 0; n < frames; ++n)
554         {
555             double x = inoutSamples[n];           
556 
557             x *= scaleFactor;
558             //0-1 is now one bit, now we dither
559 
560             enum double TUNE0 = 0.25; // could probably be better if tuned interactively
561             enum double TUNE1 = TUNE0*0.5; // ditto
562 
563             x += (0.5 - 0.5 * (TUNE0+TUNE1));
564             x += TUNE0 * (rand()/cast(double)RAND_MAX);
565             x += TUNE1 * (rand()/cast(double)RAND_MAX);
566             x = floor(x);
567             //TPDF: two 0-1 random noises
568             x /= scaleFactor;
569             if (x < -1.0) x = -1.0;
570             if (x > 1.0) x = 1.0;
571             inoutSamples[n] = x;
572         }
573     }
574 }