1 /** 2 Supports Microsoft WAV audio file format. 3 4 Copyright: Guillaume Piolat 2015-2020. 5 License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 module audioformats.wav; 8 9 import core.stdc.math: round, floor, fabs; 10 import core.stdc.stdlib: rand, RAND_MAX; 11 import audioformats.io; 12 import audioformats.internals; 13 14 15 version(decodeWAV) 16 { 17 /// Use both for scanning and decoding 18 19 final class WAVDecoder 20 { 21 public: 22 @nogc: 23 24 static struct WAVError 25 { 26 @nogc nothrow pure @safe: 27 string reason; 28 string file; 29 size_t line; 30 static WAVError none() {return WAVError("","",0);} 31 32 this(string reason, string file = __FILE__, size_t line = __LINE__) 33 { 34 this.reason = reason; 35 this.file = file; 36 this.line = line; 37 } 38 } 39 40 static immutable ubyte[16] KSDATAFORMAT_SUBTYPE_IEEE_FLOAT = 41 [3, 0, 0, 0, 0, 0, 16, 0, 128, 0, 0, 170, 0, 56, 155, 113]; 42 43 this(IOCallbacks* io, void* userData) nothrow 44 { 45 _io = io; 46 _userData = userData; 47 } 48 49 // After scan, we know _sampleRate, _lengthInFrames, and _channels, and can call `readSamples` 50 WAVError scan() 51 { 52 // check RIFF header 53 { 54 uint chunkId, chunkSize; 55 _io.readRIFFChunkHeader(_userData, chunkId, chunkSize); 56 if (chunkId != RIFFChunkId!"RIFF") 57 return WAVError("Expected RIFF chunk."); 58 59 if (chunkSize < 4) 60 return WAVError("RIFF chunk is too small to contain a format."); 61 62 if (_io.read_uint_BE(_userData) != RIFFChunkId!"WAVE") 63 return WAVError("Expected WAVE format."); 64 } 65 66 bool foundFmt = false; 67 bool foundData = false; 68 69 int byteRate; 70 int blockAlign; 71 int bitsPerSample; 72 73 while (!_io.nothingToReadAnymore(_userData)) 74 { 75 // Some corrupted WAV files in the wild finish with one 76 // extra 0 byte after an AFAn chunk, very odd 77 if (_io.remainingBytesToRead(_userData) == 1) 78 { 79 if (_io.peek_ubyte(_userData) == 0) 80 break; 81 } 82 83 // Question: is there any reason to parse the whole WAV file? This prevents streaming. 84 85 uint chunkId, chunkSize; 86 _io.readRIFFChunkHeader(_userData, chunkId, chunkSize); 87 if (chunkId == RIFFChunkId!"fmt ") 88 { 89 if (foundFmt) 90 return WAVError("Found several 'fmt ' chunks in RIFF file."); 91 92 foundFmt = true; 93 94 if (chunkSize < 16) 95 return WAVError("Expected at least 16 bytes in 'fmt ' chunk."); // found in real-world for the moment: 16 or 40 bytes 96 97 _audioFormat = _io.read_ushort_LE(_userData); 98 bool isWFE = _audioFormat == WAVE_FORMAT_EXTENSIBLE; 99 100 if (_audioFormat != LinearPCM && _audioFormat != FloatingPointIEEE && !isWFE) 101 return WAVError("Unsupported audio format, only PCM and IEEE float and WAVE_FORMAT_EXTENSIBLE are supported."); 102 103 _channels = _io.read_ushort_LE(_userData); 104 105 _sampleRate = _io.read_uint_LE(_userData); 106 if (_sampleRate <= 0) 107 return WAVError("Unsupported sample-rate."); // we do not support sample-rate higher than 2^31hz 108 109 uint bytesPerSec = _io.read_uint_LE(_userData); 110 int bytesPerFrame = _io.read_ushort_LE(_userData); 111 bitsPerSample = _io.read_ushort_LE(_userData); 112 113 if (bitsPerSample != 8 && bitsPerSample != 16 && bitsPerSample != 24 && bitsPerSample != 32 && bitsPerSample != 64) 114 return WAVError("Unsupported bitdepth"); 115 116 if (bytesPerFrame != (bitsPerSample / 8) * _channels) 117 return WAVError("Invalid bytes-per-second, data might be corrupted."); 118 119 // Sometimes there is no cbSize 120 if (chunkSize >= 18) 121 { 122 ushort cbSize = _io.read_ushort_LE(_userData); 123 124 if (isWFE) 125 { 126 if (cbSize >= 22) 127 { 128 ushort wReserved = _io.read_ushort_LE(_userData); 129 uint dwChannelMask = _io.read_uint_LE(_userData); 130 ubyte[16] SubFormat = _io.read_guid(_userData); 131 132 if (SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT) 133 { 134 _audioFormat = FloatingPointIEEE; 135 } 136 else 137 return WAVError("Unsupported GUID in WAVE_FORMAT_EXTENSIBLE."); 138 } 139 else 140 return WAVError("Unsupported WAVE_FORMAT_EXTENSIBLE."); 141 142 _io.skip(chunkSize - (18 + 2 + 4 + 16), _userData); 143 } 144 else 145 { 146 _io.skip(chunkSize - 18, _userData); 147 } 148 } 149 else 150 { 151 _io.skip(chunkSize - 16, _userData); 152 } 153 154 } 155 else if (chunkId == RIFFChunkId!"data") 156 { 157 if (foundData) 158 return WAVError("Found several 'data' chunks in RIFF file."); 159 160 if (!foundFmt) 161 return WAVError("'fmt ' chunk expected before the 'data' chunk."); 162 163 _bytePerSample = bitsPerSample / 8; 164 uint frameSize = _channels * _bytePerSample; 165 if (chunkSize % frameSize != 0) 166 return WAVError("Remaining bytes in 'data' chunk, inconsistent with audio data type."); 167 168 uint numFrames = chunkSize / frameSize; 169 _lengthInFrames = numFrames; 170 171 _samplesOffsetInFile = _io.tell(_userData); 172 173 _io.skip(chunkSize, _userData); // skip, will read later 174 foundData = true; 175 } 176 else 177 { 178 // ignore unknown chunks 179 _io.skip(chunkSize, _userData); 180 } 181 } 182 183 if (!foundFmt) 184 return WAVError("'fmt ' chunk not found."); 185 186 if (!foundData) 187 return WAVError("'data' chunk not found."); 188 189 // Get ready to decode 190 _io.seek(_samplesOffsetInFile, false, _userData); 191 _framePosition = 0; // seek to start 192 193 return WAVError.none; 194 } 195 196 /// Returns: false in case of failure. 197 bool seekPosition(int absoluteFrame) 198 { 199 if (absoluteFrame < 0) 200 return false; 201 if (absoluteFrame > _lengthInFrames) 202 return false; 203 uint frameSize = _channels * _bytePerSample; 204 long pos = _samplesOffsetInFile + absoluteFrame * frameSize; 205 _io.seek(pos, false, _userData); 206 _framePosition = absoluteFrame; 207 return true; 208 } 209 210 /// Returns: position in absolute number of frames since beginning. 211 int tellPosition() 212 { 213 return _framePosition; 214 } 215 216 // read interleaved samples 217 // `outData` should have enough room for frames * _channels 218 // Returs: Frames actually read. 219 int readSamples(T)(T* outData, int maxFrames) 220 { 221 assert(_framePosition <= _lengthInFrames); 222 int available = _lengthInFrames - _framePosition; 223 224 // How much frames can we decode? 225 int frames = maxFrames; 226 if (frames > available) 227 frames = available; 228 _framePosition += frames; 229 230 int numSamples = frames * _channels; 231 232 uint n = 0; 233 234 try 235 { 236 if (_audioFormat == FloatingPointIEEE) 237 { 238 if (_bytePerSample == 4) 239 { 240 for (n = 0; n < numSamples; ++n) 241 outData[n] = _io.read_float_LE(_userData); 242 } 243 else if (_bytePerSample == 8) 244 { 245 for (n = 0; n < numSamples; ++n) 246 outData[n] = _io.read_double_LE(_userData); 247 } 248 else 249 throw mallocNew!AudioFormatsException("Unsupported bit-depth for floating point data, should be 32 or 64."); 250 } 251 else if (_audioFormat == LinearPCM) 252 { 253 if (_bytePerSample == 1) 254 { 255 for (n = 0; n < numSamples; ++n) 256 { 257 ubyte b = _io.read_ubyte(_userData); 258 outData[n] = (b - 128) / 127.0; 259 } 260 } 261 else if (_bytePerSample == 2) 262 { 263 for (n = 0; n < numSamples; ++n) 264 { 265 short s = _io.read_ushort_LE(_userData); 266 outData[n] = s / 32767.0; 267 } 268 } 269 else if (_bytePerSample == 3) 270 { 271 for (n = 0; n < numSamples; ++n) 272 { 273 int s = _io.read_24bits_LE(_userData); 274 // duplicate sign bit 275 s = (s << 8) >> 8; 276 outData[n] = s / 8388607.0; 277 } 278 } 279 else if (_bytePerSample == 4) 280 { 281 for (n = 0; n < numSamples; ++n) 282 { 283 int s = _io.read_uint_LE(_userData); 284 outData[n] = s / 2147483648.0; 285 } 286 } 287 else 288 throw mallocNew!AudioFormatsException("Unsupported bit-depth for integer PCM data, should be 8, 16, 24 or 32 bits."); 289 } 290 else 291 assert(false); // should have been handled earlier, crash 292 } 293 catch(AudioFormatsException e) 294 { 295 destroyFree(e); // well this is really unexpected, since no read should fail in this loop 296 return 0; 297 } 298 299 // Return number of integer samples read 300 return frames; 301 } 302 303 package: 304 int _sampleRate; 305 int _channels; 306 int _audioFormat; 307 int _bytePerSample; 308 long _samplesOffsetInFile; 309 uint _lengthInFrames; 310 uint _framePosition; 311 312 private: 313 void* _userData; 314 IOCallbacks* _io; 315 } 316 } 317 318 319 version(encodeWAV) 320 { 321 /// Use both for scanning and decoding 322 final class WAVEncoder 323 { 324 public: 325 @nogc: 326 enum Format 327 { 328 s8, 329 s16le, 330 s24le, 331 fp32le, 332 fp64le, 333 } 334 335 static bool isFormatLinearPCM(Format fmt) 336 { 337 return fmt <= Format.s24le; 338 } 339 340 this(IOCallbacks* io, void* userData, int sampleRate, int numChannels, Format format, bool enableDither) 341 { 342 _io = io; 343 _userData = userData; 344 _channels = numChannels; 345 _format = format; 346 _enableDither = enableDither; 347 348 // Avoids a number of edge cases. 349 if (_channels < 0 || _channels > 1024) 350 throw mallocNew!AudioFormatsException("Can't save a WAV with this numnber of channels."); 351 352 // RIFF header 353 // its size will be overwritten at finalizing 354 _riffLengthOffset = _io.tell(_userData) + 4; 355 _io.writeRIFFChunkHeader(_userData, RIFFChunkId!"RIFF", 0); 356 _io.write_uint_BE(_userData, RIFFChunkId!"WAVE"); 357 358 // 'fmt ' sub-chunk 359 _io.writeRIFFChunkHeader(_userData, RIFFChunkId!"fmt ", 0x10); 360 _io.write_ushort_LE(_userData, isFormatLinearPCM(format) ? LinearPCM : FloatingPointIEEE); 361 _io.write_ushort_LE(_userData, cast(ushort)(_channels)); 362 _io.write_uint_LE(_userData, sampleRate); 363 364 size_t bytesPerSec = sampleRate * cast(size_t) frameSize(); 365 _io.write_uint_LE(_userData, cast(uint)(bytesPerSec)); 366 367 int bytesPerFrame = frameSize(); 368 _io.write_ushort_LE(_userData, cast(ushort)bytesPerFrame); 369 370 _io.write_ushort_LE(_userData, cast(ushort)(sampleSize() * 8)); 371 372 // data sub-chunk 373 _dataLengthOffset = _io.tell(_userData) + 4; 374 _io.writeRIFFChunkHeader(_userData, RIFFChunkId!"data", 0); // write 0 but temporarily, this will be overwritten at finalizing 375 _writtenFrames = 0; 376 } 377 378 // write interleaved samples 379 // `inSamples` should have enough room for frames * _channels 380 int writeSamples(T)(T* inSamples, int frames) nothrow 381 { 382 int n = 0; 383 try 384 { 385 int samples = frames * _channels; 386 387 final switch(_format) 388 { 389 case Format.s8: 390 ditherInput(inSamples, samples, 127.0f); 391 for ( ; n < samples; ++n) 392 { 393 double x = _ditherBuf[n]; 394 int b = cast(int)(128.5 + x * 127.0); 395 _io.write_byte(_userData, cast(byte)b); 396 } 397 break; 398 399 case Format.s16le: 400 ditherInput(inSamples, samples, 32767.0f); 401 for ( ; n < samples; ++n) 402 { 403 double x = _ditherBuf[n]; 404 int s = cast(int)(32768.5 + x * 32767.0); 405 s -= 32768; 406 assert(s >= -32767 && s <= 32767); 407 _io.write_short_LE(_userData, cast(short)s); 408 } 409 break; 410 411 case Format.s24le: 412 ditherInput(inSamples, samples, 8388607.0f); 413 for ( ; n < samples; ++n) 414 { 415 double x = _ditherBuf[n]; 416 int s = cast(int)(8388608.5 + x * 8388607.0); 417 s -= 8388608; 418 assert(s >= -8388607 && s <= 8388607); 419 _io.write_24bits_LE(_userData, s); 420 } 421 break; 422 423 case Format.fp32le: 424 for ( ; n < samples; ++n) 425 { 426 _io.write_float_LE(_userData, inSamples[n]); 427 } 428 break; 429 case Format.fp64le: 430 for ( ; n < samples; ++n) 431 { 432 _io.write_double_LE(_userData, inSamples[n]); 433 } 434 break; 435 } 436 _writtenFrames += frames; 437 } 438 catch(AudioFormatsException e) 439 { 440 destroyFree(e); 441 } 442 catch(Exception e) 443 { 444 assert(false); // disallow 445 } 446 return n; 447 } 448 449 int sampleSize() 450 { 451 final switch(_format) 452 { 453 case Format.s8: return 1; 454 case Format.s16le: return 2; 455 case Format.s24le: return 3; 456 case Format.fp32le: return 4; 457 case Format.fp64le: return 8; 458 } 459 } 460 461 int frameSize() 462 { 463 return sampleSize() * _channels; 464 } 465 466 void finalizeEncoding() 467 { 468 size_t bytesOfData = frameSize() * _writtenFrames; 469 470 // write final number of samples for the 'RIFF' chunk 471 { 472 uint riffLength = cast(uint)( 4 + (4 + 4 + 16) + (4 + 4 + bytesOfData) ); 473 _io.seek(_riffLengthOffset, false, _userData); 474 _io.write_uint_LE(_userData, riffLength); 475 } 476 477 // write final number of samples for the 'data' chunk 478 { 479 _io.seek(_dataLengthOffset, false, _userData); 480 _io.write_uint_LE(_userData, cast(uint)bytesOfData ); 481 } 482 } 483 484 private: 485 void* _userData; 486 IOCallbacks* _io; 487 Format _format; 488 int _channels; 489 int _writtenFrames; 490 long _riffLengthOffset, _dataLengthOffset; 491 492 bool _enableDither; 493 double[] _ditherBuf; 494 TPDFDither _tpdf; 495 496 void ditherInput(T)(T* inSamples, int frames, double scaleFactor) 497 { 498 if (_ditherBuf.length < frames) 499 _ditherBuf.reallocBuffer(frames); 500 501 for (int n = 0; n < frames; ++n) 502 { 503 _ditherBuf[n] = inSamples[n]; 504 } 505 506 if (_enableDither) 507 _tpdf.process(_ditherBuf.ptr, frames, scaleFactor); 508 } 509 } 510 } 511 512 513 private: 514 515 // wFormatTag 516 immutable int LinearPCM = 0x0001; 517 immutable int FloatingPointIEEE = 0x0003; 518 immutable int WAVE_FORMAT_EXTENSIBLE = 0xFFFE; 519 520 521 /+ 522 MIT License 523 524 Copyright (c) 2018 Chris Johnson 525 526 Permission is hereby granted, free of charge, to any person obtaining a copy 527 of this software and associated documentation files (the "Software"), to deal 528 in the Software without restriction, including without limitation the rights 529 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 530 copies of the Software, and to permit persons to whom the Software is 531 furnished to do so, subject to the following conditions: 532 533 The above copyright notice and this permission notice shall be included in all 534 copies or substantial portions of the Software. 535 536 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 537 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 538 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 539 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 540 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 541 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 542 SOFTWARE. 543 +/ 544 /// This is based upon TPDF Dither by Chris Johnson / AirWindows 545 /// though the algorithm changed quite a bit, tuned on 8-bit dither by ear. 546 struct TPDFDither 547 { 548 nothrow: 549 @nogc: 550 551 void process(double* inoutSamples, int frames, double scaleFactor) 552 { 553 for (int n = 0; n < frames; ++n) 554 { 555 double x = inoutSamples[n]; 556 557 x *= scaleFactor; 558 //0-1 is now one bit, now we dither 559 560 enum double TUNE0 = 0.25; // could probably be better if tuned interactively 561 enum double TUNE1 = TUNE0*0.5; // ditto 562 563 x += (0.5 - 0.5 * (TUNE0+TUNE1)); 564 x += TUNE0 * (rand()/cast(double)RAND_MAX); 565 x += TUNE1 * (rand()/cast(double)RAND_MAX); 566 x = floor(x); 567 //TPDF: two 0-1 random noises 568 x /= scaleFactor; 569 if (x < -1.0) x = -1.0; 570 if (x > 1.0) x = 1.0; 571 inoutSamples[n] = x; 572 } 573 } 574 }