1 /** 2 Supports Microsoft WAV audio file format. 3 4 Copyright: Guillaume Piolat 2015-2020. 5 License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 module audioformats.wav; 8 9 import core.stdc.math: round, floor, fabs; 10 import core.stdc.stdlib: rand, RAND_MAX; 11 import dplug.core.nogc; 12 import dplug.core.vec; 13 import audioformats.io; 14 15 16 version(decodeWAV) 17 { 18 /// Use both for scanning and decoding 19 final class WAVDecoder 20 { 21 public: 22 @nogc: 23 24 static immutable ubyte[16] KSDATAFORMAT_SUBTYPE_IEEE_FLOAT = 25 [3, 0, 0, 0, 0, 0, 16, 0, 128, 0, 0, 170, 0, 56, 155, 113]; 26 27 this(IOCallbacks* io, void* userData) nothrow 28 { 29 _io = io; 30 _userData = userData; 31 } 32 33 // After scan, we know _sampleRate, _lengthInFrames, and _channels, and can call `readSamples` 34 void scan() 35 { 36 // check RIFF header 37 { 38 uint chunkId, chunkSize; 39 _io.readRIFFChunkHeader(_userData, chunkId, chunkSize); 40 if (chunkId != RIFFChunkId!"RIFF") 41 throw mallocNew!Exception("Expected RIFF chunk."); 42 43 if (chunkSize < 4) 44 throw mallocNew!Exception("RIFF chunk is too small to contain a format."); 45 46 if (_io.read_uint_BE(_userData) != RIFFChunkId!"WAVE") 47 throw mallocNew!Exception("Expected WAVE format."); 48 } 49 50 bool foundFmt = false; 51 bool foundData = false; 52 53 int byteRate; 54 int blockAlign; 55 int bitsPerSample; 56 57 while (!_io.nothingToReadAnymore(_userData)) 58 { 59 // Some corrupted WAV files in the wild finish with one 60 // extra 0 byte after an AFAn chunk, very odd 61 if (_io.remainingBytesToRead(_userData) == 1) 62 { 63 if (_io.peek_ubyte(_userData) == 0) 64 break; 65 } 66 67 // Question: is there any reason to parse the whole WAV file? This prevents streaming. 68 69 uint chunkId, chunkSize; 70 _io.readRIFFChunkHeader(_userData, chunkId, chunkSize); 71 if (chunkId == RIFFChunkId!"fmt ") 72 { 73 if (foundFmt) 74 throw mallocNew!Exception("Found several 'fmt ' chunks in RIFF file."); 75 76 foundFmt = true; 77 78 if (chunkSize < 16) 79 throw mallocNew!Exception("Expected at least 16 bytes in 'fmt ' chunk."); // found in real-world for the moment: 16 or 40 bytes 80 81 _audioFormat = _io.read_ushort_LE(_userData); 82 bool isWFE = _audioFormat == WAVE_FORMAT_EXTENSIBLE; 83 84 if (_audioFormat != LinearPCM && _audioFormat != FloatingPointIEEE && !isWFE) 85 throw mallocNew!Exception("Unsupported audio format, only PCM and IEEE float and WAVE_FORMAT_EXTENSIBLE are supported."); 86 87 _channels = _io.read_ushort_LE(_userData); 88 89 _sampleRate = _io.read_uint_LE(_userData); 90 if (_sampleRate <= 0) 91 throw mallocNew!Exception("Unsupported sample-rate."); // we do not support sample-rate higher than 2^31hz 92 93 uint bytesPerSec = _io.read_uint_LE(_userData); 94 int bytesPerFrame = _io.read_ushort_LE(_userData); 95 bitsPerSample = _io.read_ushort_LE(_userData); 96 97 if (bitsPerSample != 8 && bitsPerSample != 16 && bitsPerSample != 24 && bitsPerSample != 32 && bitsPerSample != 64) 98 throw mallocNew!Exception("Unsupported bitdepth"); 99 100 if (bytesPerFrame != (bitsPerSample / 8) * _channels) 101 throw mallocNew!Exception("Invalid bytes-per-second, data might be corrupted."); 102 103 // Sometimes there is no cbSize 104 if (chunkSize >= 18) 105 { 106 ushort cbSize = _io.read_ushort_LE(_userData); 107 108 if (isWFE) 109 { 110 if (cbSize >= 22) 111 { 112 ushort wReserved = _io.read_ushort_LE(_userData); 113 uint dwChannelMask = _io.read_uint_LE(_userData); 114 ubyte[16] SubFormat = _io.read_guid(_userData); 115 116 if (SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT) 117 { 118 _audioFormat = FloatingPointIEEE; 119 } 120 else 121 throw mallocNew!Exception("Unsupported GUID in WAVE_FORMAT_EXTENSIBLE."); 122 } 123 else 124 throw mallocNew!Exception("Unsupported WAVE_FORMAT_EXTENSIBLE."); 125 126 _io.skip(chunkSize - (18 + 2 + 4 + 16), _userData); 127 } 128 else 129 { 130 _io.skip(chunkSize - 18, _userData); 131 } 132 } 133 else 134 { 135 _io.skip(chunkSize - 16, _userData); 136 } 137 138 } 139 else if (chunkId == RIFFChunkId!"data") 140 { 141 if (foundData) 142 throw mallocNew!Exception("Found several 'data' chunks in RIFF file."); 143 144 if (!foundFmt) 145 throw mallocNew!Exception("'fmt ' chunk expected before the 'data' chunk."); 146 147 _bytePerSample = bitsPerSample / 8; 148 uint frameSize = _channels * _bytePerSample; 149 if (chunkSize % frameSize != 0) 150 throw mallocNew!Exception("Remaining bytes in 'data' chunk, inconsistent with audio data type."); 151 152 uint numFrames = chunkSize / frameSize; 153 _lengthInFrames = numFrames; 154 155 _samplesOffsetInFile = _io.tell(_userData); 156 157 _io.skip(chunkSize, _userData); // skip, will read later 158 foundData = true; 159 } 160 else 161 { 162 // ignore unknown chunks 163 _io.skip(chunkSize, _userData); 164 } 165 } 166 167 if (!foundFmt) 168 throw mallocNew!Exception("'fmt ' chunk not found."); 169 170 if (!foundData) 171 throw mallocNew!Exception("'data' chunk not found."); 172 173 // Get ready to decode 174 _io.seek(_samplesOffsetInFile, false, _userData); 175 _framePosition = 0; // seek to start 176 } 177 178 /// Returns: false in case of failure. 179 bool seekPosition(int absoluteFrame) 180 { 181 if (absoluteFrame < 0) 182 return false; 183 if (absoluteFrame > _lengthInFrames) 184 return false; 185 uint frameSize = _channels * _bytePerSample; 186 long pos = _samplesOffsetInFile + absoluteFrame * frameSize; 187 _io.seek(pos, false, _userData); 188 _framePosition = absoluteFrame; 189 return true; 190 } 191 192 /// Returns: position in absolute number of frames since beginning. 193 int tellPosition() 194 { 195 return _framePosition; 196 } 197 198 // read interleaved samples 199 // `outData` should have enough room for frames * _channels 200 // Returs: Frames actually read. 201 int readSamples(T)(T* outData, int maxFrames) nothrow 202 { 203 assert(_framePosition <= _lengthInFrames); 204 int available = _lengthInFrames - _framePosition; 205 206 // How much frames can we decode? 207 int frames = maxFrames; 208 if (frames > available) 209 frames = available; 210 _framePosition += frames; 211 212 int numSamples = frames * _channels; 213 214 uint n = 0; 215 216 try 217 { 218 if (_audioFormat == FloatingPointIEEE) 219 { 220 if (_bytePerSample == 4) 221 { 222 for (n = 0; n < numSamples; ++n) 223 outData[n] = _io.read_float_LE(_userData); 224 } 225 else if (_bytePerSample == 8) 226 { 227 for (n = 0; n < numSamples; ++n) 228 outData[n] = _io.read_double_LE(_userData); 229 } 230 else 231 throw mallocNew!Exception("Unsupported bit-depth for floating point data, should be 32 or 64."); 232 } 233 else if (_audioFormat == LinearPCM) 234 { 235 if (_bytePerSample == 1) 236 { 237 for (n = 0; n < numSamples; ++n) 238 { 239 ubyte b = _io.read_ubyte(_userData); 240 outData[n] = (b - 128) / 127.0; 241 } 242 } 243 else if (_bytePerSample == 2) 244 { 245 for (n = 0; n < numSamples; ++n) 246 { 247 short s = _io.read_ushort_LE(_userData); 248 outData[n] = s / 32767.0; 249 } 250 } 251 else if (_bytePerSample == 3) 252 { 253 for (n = 0; n < numSamples; ++n) 254 { 255 int s = _io.read_24bits_LE(_userData); 256 // duplicate sign bit 257 s = (s << 8) >> 8; 258 outData[n] = s / 8388607.0; 259 } 260 } 261 else if (_bytePerSample == 4) 262 { 263 for (n = 0; n < numSamples; ++n) 264 { 265 int s = _io.read_uint_LE(_userData); 266 outData[n] = s / 2147483648.0; 267 } 268 } 269 else 270 throw mallocNew!Exception("Unsupported bit-depth for integer PCM data, should be 8, 16, 24 or 32 bits."); 271 } 272 else 273 assert(false); // should have been handled earlier, crash 274 } 275 catch(Exception e) 276 { 277 destroyFree(e); // well this is really unexpected, since no read should fail in this loop 278 return 0; 279 } 280 281 // Return number of integer samples read 282 return frames; 283 } 284 285 package: 286 int _sampleRate; 287 int _channels; 288 int _audioFormat; 289 int _bytePerSample; 290 long _samplesOffsetInFile; 291 uint _lengthInFrames; 292 uint _framePosition; 293 294 private: 295 void* _userData; 296 IOCallbacks* _io; 297 } 298 } 299 300 301 version(encodeWAV) 302 { 303 /// Use both for scanning and decoding 304 final class WAVEncoder 305 { 306 public: 307 @nogc: 308 enum Format 309 { 310 s8, 311 s16le, 312 s24le, 313 fp32le, 314 fp64le, 315 } 316 317 static bool isFormatLinearPCM(Format fmt) 318 { 319 return fmt <= Format.s24le; 320 } 321 322 this(IOCallbacks* io, void* userData, int sampleRate, int numChannels, Format format, bool enableDither) 323 { 324 _io = io; 325 _userData = userData; 326 _channels = numChannels; 327 _format = format; 328 _enableDither = enableDither; 329 330 // Avoids a number of edge cases. 331 if (_channels < 0 || _channels > 1024) 332 throw mallocNew!Exception("Can't save a WAV with this numnber of channels."); 333 334 // RIFF header 335 // its size will be overwritten at finalizing 336 _riffLengthOffset = _io.tell(_userData) + 4; 337 _io.writeRIFFChunkHeader(_userData, RIFFChunkId!"RIFF", 0); 338 _io.write_uint_BE(_userData, RIFFChunkId!"WAVE"); 339 340 // 'fmt ' sub-chunk 341 _io.writeRIFFChunkHeader(_userData, RIFFChunkId!"fmt ", 0x10); 342 _io.write_ushort_LE(_userData, isFormatLinearPCM(format) ? LinearPCM : FloatingPointIEEE); 343 _io.write_ushort_LE(_userData, cast(ushort)(_channels)); 344 _io.write_uint_LE(_userData, sampleRate); 345 346 size_t bytesPerSec = sampleRate * cast(size_t) frameSize(); 347 _io.write_uint_LE(_userData, cast(uint)(bytesPerSec)); 348 349 int bytesPerFrame = frameSize(); 350 _io.write_ushort_LE(_userData, cast(ushort)bytesPerFrame); 351 352 _io.write_ushort_LE(_userData, cast(ushort)(sampleSize() * 8)); 353 354 // data sub-chunk 355 _dataLengthOffset = _io.tell(_userData) + 4; 356 _io.writeRIFFChunkHeader(_userData, RIFFChunkId!"data", 0); // write 0 but temporarily, this will be overwritten at finalizing 357 _writtenFrames = 0; 358 } 359 360 // read interleaved samples 361 // `inSamples` should have enough room for frames * _channels 362 int writeSamples(T)(T* inSamples, int frames) nothrow 363 { 364 int n = 0; 365 try 366 { 367 int samples = frames * _channels; 368 369 final switch(_format) 370 { 371 case Format.s8: 372 ditherInput(inSamples, samples, 127.0f); 373 for ( ; n < samples; ++n) 374 { 375 double x = _ditherBuf[n]; 376 int b = cast(int)(128.5 + x * 127.0); 377 _io.write_byte(_userData, cast(byte)b); 378 } 379 break; 380 381 case Format.s16le: 382 ditherInput(inSamples, samples, 32767.0f); 383 for ( ; n < samples; ++n) 384 { 385 double x = _ditherBuf[n]; 386 int s = cast(int)(32768.5 + x * 32767.0); 387 s -= 32768; 388 assert(s >= -32767 && s <= 32767); 389 _io.write_short_LE(_userData, cast(short)s); 390 } 391 break; 392 393 case Format.s24le: 394 ditherInput(inSamples, samples, 8388607.0f); 395 for ( ; n < samples; ++n) 396 { 397 double x = _ditherBuf[n]; 398 int s = cast(int)(8388608.5 + x * 8388607.0); 399 s -= 8388608; 400 assert(s >= -8388607 && s <= 8388607); 401 _io.write_24bits_LE(_userData, s); 402 } 403 break; 404 405 case Format.fp32le: 406 for ( ; n < samples; ++n) 407 { 408 _io.write_float_LE(_userData, inSamples[n]); 409 } 410 break; 411 case Format.fp64le: 412 for ( ; n < samples; ++n) 413 { 414 _io.write_double_LE(_userData, inSamples[n]); 415 } 416 break; 417 } 418 _writtenFrames += frames; 419 } 420 catch(Exception e) 421 { 422 destroyFree(e); 423 } 424 return n; 425 } 426 427 int sampleSize() 428 { 429 final switch(_format) 430 { 431 case Format.s8: return 1; 432 case Format.s16le: return 2; 433 case Format.s24le: return 3; 434 case Format.fp32le: return 4; 435 case Format.fp64le: return 8; 436 } 437 } 438 439 int frameSize() 440 { 441 return sampleSize() * _channels; 442 } 443 444 void finalizeEncoding() 445 { 446 size_t bytesOfData = frameSize() * _writtenFrames; 447 448 // write final number of samples for the 'RIFF' chunk 449 { 450 uint riffLength = cast(uint)( 4 + (4 + 4 + 16) + (4 + 4 + bytesOfData) ); 451 _io.seek(_riffLengthOffset, false, _userData); 452 _io.write_uint_LE(_userData, riffLength); 453 } 454 455 // write final number of samples for the 'data' chunk 456 { 457 _io.seek(_dataLengthOffset, false, _userData); 458 _io.write_uint_LE(_userData, cast(uint)bytesOfData ); 459 } 460 } 461 462 private: 463 void* _userData; 464 IOCallbacks* _io; 465 Format _format; 466 int _channels; 467 int _writtenFrames; 468 long _riffLengthOffset, _dataLengthOffset; 469 470 bool _enableDither; 471 double[] _ditherBuf; 472 TPDFDither _tpdf; 473 474 void ditherInput(T)(T* inSamples, int frames, double scaleFactor) 475 { 476 if (_ditherBuf.length < frames) 477 _ditherBuf.reallocBuffer(frames); 478 479 for (int n = 0; n < frames; ++n) 480 { 481 _ditherBuf[n] = inSamples[n]; 482 } 483 484 if (_enableDither) 485 _tpdf.process(_ditherBuf.ptr, frames, scaleFactor); 486 } 487 } 488 } 489 490 491 private: 492 493 // wFormatTag 494 immutable int LinearPCM = 0x0001; 495 immutable int FloatingPointIEEE = 0x0003; 496 immutable int WAVE_FORMAT_EXTENSIBLE = 0xFFFE; 497 498 499 /+ 500 MIT License 501 502 Copyright (c) 2018 Chris Johnson 503 504 Permission is hereby granted, free of charge, to any person obtaining a copy 505 of this software and associated documentation files (the "Software"), to deal 506 in the Software without restriction, including without limitation the rights 507 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 508 copies of the Software, and to permit persons to whom the Software is 509 furnished to do so, subject to the following conditions: 510 511 The above copyright notice and this permission notice shall be included in all 512 copies or substantial portions of the Software. 513 514 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 515 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 516 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 517 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 518 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 519 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 520 SOFTWARE. 521 +/ 522 /// This is TPDF Dither by Chris Johnson / AirWindows 523 /// However it was modified since I didn't think the sound was particularly good on 8-bit material. 524 struct TPDFDither 525 { 526 nothrow: 527 @nogc: 528 529 void process(double* inoutSamples, int frames, double scaleFactor) 530 { 531 for (int n = 0; n < frames; ++n) 532 { 533 double x = inoutSamples[n]; 534 535 x *= scaleFactor; 536 //0-1 is now one bit, now we dither 537 538 enum double TUNE0 = 0.25; // could probably be better if tuned interactively 539 enum double TUNE1 = TUNE0*0.5; // ditto 540 541 x += (0.5 - 0.5 * (TUNE0+TUNE1)); 542 x += TUNE0 * (rand()/cast(double)RAND_MAX); 543 x += TUNE1 * (rand()/cast(double)RAND_MAX); 544 x = floor(x); 545 //TPDF: two 0-1 random noises 546 x /= scaleFactor; 547 if (x < -1.0) x = -1.0; 548 if (x > 1.0) x = 1.0; 549 inoutSamples[n] = x; 550 } 551 } 552 }