1 /** 2 Supports Microsoft WAV audio file format. 3 4 Copyright: Guillaume Piolat 2015-2020. 5 License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 module audioformats.wav; 8 9 import core.stdc.math: round, floor, fabs; 10 import core.stdc.stdlib: rand, RAND_MAX; 11 import audioformats.io; 12 import audioformats.internals; 13 14 @nogc: 15 nothrow: 16 17 version(decodeWAV) 18 { 19 /// Use both for scanning and decoding 20 21 final class WAVDecoder 22 { 23 public: 24 @nogc: 25 nothrow: 26 27 static struct WAVError 28 { 29 @nogc nothrow pure @safe: 30 string reason; 31 string file; 32 size_t line; 33 static WAVError none() {return WAVError("","",0);} 34 35 this(string reason, string file = __FILE__, size_t line = __LINE__) 36 { 37 this.reason = reason; 38 this.file = file; 39 this.line = line; 40 } 41 } 42 43 static immutable ubyte[16] KSDATAFORMAT_SUBTYPE_IEEE_FLOAT = 44 [3, 0, 0, 0, 0, 0, 16, 0, 128, 0, 0, 170, 0, 56, 155, 113]; 45 46 this(IOCallbacks* io, void* userData) nothrow 47 { 48 _io = io; 49 _userData = userData; 50 } 51 52 // After scan, we know _sampleRate, _lengthInFrames, and _channels, and can call `readSamples` 53 WAVError scan() 54 { 55 // check RIFF header 56 { 57 uint chunkId, chunkSize; 58 bool err; 59 _io.readRIFFChunkHeader(_userData, chunkId, chunkSize, &err); 60 if (err) 61 return WAVError("Cannot read RIFF header"); 62 if (chunkId != RIFFChunkId!"RIFF") 63 return WAVError("Expected RIFF chunk."); 64 65 if (chunkSize < 4) 66 return WAVError("RIFF chunk is too small to contain a format."); 67 68 if (_io.read_uint_BE(_userData, &err) != RIFFChunkId!"WAVE") 69 return WAVError("Expected WAVE format."); 70 } 71 72 bool foundFmt = false; 73 bool foundData = false; 74 75 int byteRate; 76 int blockAlign; 77 int bitsPerSample; 78 79 while (!_io.nothingToReadAnymore(_userData)) 80 { 81 // Some corrupted WAV files in the wild finish with one 82 // extra 0 byte after an AFAn chunk, very odd 83 if (_io.remainingBytesToRead(_userData) == 1) 84 { 85 bool err; 86 ubyte res = _io.peek_ubyte(_userData, &err); 87 if (err) 88 return WAVError("cannot read ubyte"); 89 if (res == 0) 90 break; 91 } 92 93 // Question: is there any reason to parse the whole WAV file? This prevents streaming. 94 95 uint chunkId, chunkSize; 96 bool err; 97 _io.readRIFFChunkHeader(_userData, chunkId, chunkSize, &err); 98 if (err) 99 return WAVError("Cannot read RIFF header"); 100 101 if (chunkId == RIFFChunkId!"fmt ") 102 { 103 if (foundFmt) 104 return WAVError("Found several 'fmt ' chunks in RIFF file."); 105 106 foundFmt = true; 107 108 if (chunkSize < 16) 109 return WAVError("Expected at least 16 bytes in 'fmt ' chunk."); // found in real-world for the moment: 16 or 40 bytes 110 111 _audioFormat = _io.read_ushort_LE(_userData, &err); 112 if (err) return WAVError("Cannot read WAV format"); 113 bool isWFE = _audioFormat == WAVE_FORMAT_EXTENSIBLE; 114 115 if (_audioFormat != LinearPCM && _audioFormat != FloatingPointIEEE && !isWFE) 116 return WAVError("Unsupported audio format, only PCM and IEEE float and WAVE_FORMAT_EXTENSIBLE are supported."); 117 118 _channels = _io.read_ushort_LE(_userData, &err); 119 if (err) return WAVError("Cannot read number of channels"); 120 121 _sampleRate = _io.read_uint_LE(_userData, &err); 122 if (_sampleRate <= 0) 123 return WAVError("Unsupported sample-rate."); // we do not support sample-rate higher than 2^31hz 124 125 uint bytesPerSec = _io.read_uint_LE(_userData, &err); 126 if (err) return WAVError("Cannot read bytesPerSec"); 127 int bytesPerFrame = _io.read_ushort_LE(_userData, &err); 128 if (err) return WAVError("Cannot read bytesPerFrame"); 129 bitsPerSample = _io.read_ushort_LE(_userData, &err); 130 if (err) return WAVError("Cannot read bitsPerSample"); 131 132 if (bitsPerSample != 8 && bitsPerSample != 16 && bitsPerSample != 24 && bitsPerSample != 32 && bitsPerSample != 64) 133 return WAVError("Unsupported bitdepth"); 134 135 if (bytesPerFrame != (bitsPerSample / 8) * _channels) 136 return WAVError("Invalid bytes-per-second, data might be corrupted."); 137 138 // Sometimes there is no cbSize 139 if (chunkSize >= 18) 140 { 141 ushort cbSize = _io.read_ushort_LE(_userData, &err); 142 if (err) return WAVError("Cannot read cbSize"); 143 144 if (isWFE) 145 { 146 if (cbSize >= 22) 147 { 148 ushort wReserved = _io.read_ushort_LE(_userData, &err); 149 if (err) return WAVError("Cannot read wReserved"); 150 uint dwChannelMask = _io.read_uint_LE(_userData, &err); 151 if (err) return WAVError("Cannot read dwChannelMask"); 152 ubyte[16] SubFormat = _io.read_guid(_userData, &err); 153 if (err) return WAVError("Cannot read SubFormat"); 154 155 if (SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT) 156 { 157 _audioFormat = FloatingPointIEEE; 158 } 159 else 160 return WAVError("Unsupported GUID in WAVE_FORMAT_EXTENSIBLE."); 161 } 162 else 163 return WAVError("Unsupported WAVE_FORMAT_EXTENSIBLE."); 164 165 _io.skip(chunkSize - (18 + 2 + 4 + 16), _userData); 166 } 167 else 168 { 169 _io.skip(chunkSize - 18, _userData); 170 } 171 } 172 else 173 { 174 _io.skip(chunkSize - 16, _userData); 175 } 176 177 } 178 else if (chunkId == RIFFChunkId!"data") 179 { 180 if (foundData) 181 return WAVError("Found several 'data' chunks in RIFF file."); 182 183 if (!foundFmt) 184 return WAVError("'fmt ' chunk expected before the 'data' chunk."); 185 186 _bytePerSample = bitsPerSample / 8; 187 uint frameSize = _channels * _bytePerSample; 188 if (chunkSize % frameSize != 0) 189 return WAVError("Remaining bytes in 'data' chunk, inconsistent with audio data type."); 190 191 uint numFrames = chunkSize / frameSize; 192 _lengthInFrames = numFrames; 193 194 _samplesOffsetInFile = _io.tell(_userData); 195 196 _io.skip(chunkSize, _userData); // skip, will read later 197 foundData = true; 198 } 199 else 200 { 201 // ignore unknown chunks 202 _io.skip(chunkSize, _userData); 203 } 204 } 205 206 if (!foundFmt) 207 return WAVError("'fmt ' chunk not found."); 208 209 if (!foundData) 210 return WAVError("'data' chunk not found."); 211 212 // Get ready to decode 213 _io.seek(_samplesOffsetInFile, false, _userData); 214 _framePosition = 0; // seek to start 215 216 return WAVError.none; 217 } 218 219 /// Returns: false in case of failure. 220 bool seekPosition(int absoluteFrame) 221 { 222 if (absoluteFrame < 0) 223 return false; 224 if (absoluteFrame > _lengthInFrames) 225 return false; 226 uint frameSize = _channels * _bytePerSample; 227 long pos = _samplesOffsetInFile + absoluteFrame * frameSize; 228 _io.seek(pos, false, _userData); 229 _framePosition = absoluteFrame; 230 return true; 231 } 232 233 /// Returns: position in absolute number of frames since beginning. 234 int tellPosition() 235 { 236 return _framePosition; 237 } 238 239 // read interleaved samples 240 // `outData` should have enough room for frames * _channels 241 // Returs: Frames actually read. 242 int readSamples(T)(T* outData, int maxFrames, bool* err) 243 { 244 *err = false; 245 246 assert(_framePosition <= _lengthInFrames); 247 int available = _lengthInFrames - _framePosition; 248 249 // How much frames can we decode? 250 int frames = maxFrames; 251 if (frames > available) 252 frames = available; 253 _framePosition += frames; 254 255 int numSamples = frames * _channels; 256 257 uint n = 0; 258 259 260 if (_audioFormat == FloatingPointIEEE) 261 { 262 if (_bytePerSample == 4) 263 { 264 for (n = 0; n < numSamples; ++n) 265 { 266 float sample = _io.read_float_LE(_userData, err); 267 if (*err) 268 return 0; // could return n, but well 269 outData[n] = sample; 270 } 271 } 272 else if (_bytePerSample == 8) 273 { 274 for (n = 0; n < numSamples; ++n) 275 { 276 double sample = _io.read_double_LE(_userData, err); 277 if (*err) 278 return 0; // ditto 279 outData[n] = sample; 280 } 281 } 282 else 283 { 284 *err = true; 285 return 0; // Unsupported bit-depth for floating point data, should be 32 or 64. 286 } 287 } 288 else if (_audioFormat == LinearPCM) 289 { 290 if (_bytePerSample == 1) 291 { 292 for (n = 0; n < numSamples; ++n) 293 { 294 ubyte b = _io.read_ubyte(_userData, err); 295 if (*err) 296 return 0; // ditto 297 outData[n] = (b - 128) / 127.0; 298 } 299 } 300 else if (_bytePerSample == 2) 301 { 302 for (n = 0; n < numSamples; ++n) 303 { 304 short s = _io.read_ushort_LE(_userData, err); 305 if (*err) 306 return 0; // ditto 307 outData[n] = s / 32767.0; 308 } 309 } 310 else if (_bytePerSample == 3) 311 { 312 for (n = 0; n < numSamples; ++n) 313 { 314 int s = _io.read_24bits_LE(_userData, err); 315 if (*err) 316 return 0; // ditto 317 // duplicate sign bit 318 s = (s << 8) >> 8; 319 outData[n] = s / 8388607.0; 320 } 321 } 322 else if (_bytePerSample == 4) 323 { 324 for (n = 0; n < numSamples; ++n) 325 { 326 int s = _io.read_uint_LE(_userData, err); 327 if (*err) 328 return 0; // ditto 329 outData[n] = s / 2147483648.0; 330 } 331 } 332 else 333 { 334 // Unsupported bit-depth for integer PCM data, should be 8, 16, 24 or 32 bits. 335 *err = true; 336 return 0; 337 } 338 } 339 else 340 assert(false); // should have been handled earlier, crash 341 342 // Return number of integer samples read 343 return frames; 344 } 345 346 package: 347 int _sampleRate; 348 int _channels; 349 int _audioFormat; 350 int _bytePerSample; 351 long _samplesOffsetInFile; 352 uint _lengthInFrames; 353 uint _framePosition; 354 355 private: 356 void* _userData; 357 IOCallbacks* _io; 358 } 359 } 360 361 362 version(encodeWAV) 363 { 364 /// Use both for scanning and decoding 365 final class WAVEncoder 366 { 367 public: 368 @nogc: 369 nothrow: 370 enum Format 371 { 372 s8, 373 s16le, 374 s24le, 375 fp32le, 376 fp64le, 377 } 378 379 static bool isFormatLinearPCM(Format fmt) 380 { 381 return fmt <= Format.s24le; 382 } 383 384 this(IOCallbacks* io, 385 void* userData, 386 int sampleRate, 387 int numChannels, 388 Format format, 389 bool enableDither, 390 bool* err) 391 { 392 *err = false; 393 _io = io; 394 _userData = userData; 395 _channels = numChannels; 396 _format = format; 397 _enableDither = enableDither; 398 399 // Avoids a number of edge cases. 400 if (_channels < 0 || _channels > 1024) 401 { 402 // Can't save a WAV with this number of channels. 403 *err = true; 404 return; 405 } 406 407 // RIFF header 408 // its size will be overwritten at finalizing 409 _riffLengthOffset = _io.tell(_userData) + 4; 410 if (! _io.writeRIFFChunkHeader(_userData, RIFFChunkId!"RIFF", 0)) 411 { 412 *err = true; 413 return; 414 } 415 if (! _io.write_uint_BE(_userData, RIFFChunkId!"WAVE")) 416 { 417 *err = true; 418 return; 419 } 420 421 // 'fmt ' sub-chunk 422 if (! _io.writeRIFFChunkHeader(_userData, RIFFChunkId!"fmt ", 0x10)) 423 { 424 *err = true; 425 return; 426 } 427 if (! _io.write_ushort_LE(_userData, isFormatLinearPCM(format) ? LinearPCM : FloatingPointIEEE)) 428 { 429 *err = true; 430 return; 431 } 432 if (! _io.write_ushort_LE(_userData, cast(ushort)(_channels))) 433 { 434 *err = true; 435 return; 436 } 437 if (! _io.write_uint_LE(_userData, sampleRate)) 438 { 439 *err = true; 440 return; 441 } 442 443 size_t bytesPerSec = sampleRate * cast(size_t) frameSize(); 444 if (!_io.write_uint_LE(_userData, cast(uint)(bytesPerSec))) 445 { 446 *err = true; 447 return; 448 } 449 450 int bytesPerFrame = frameSize(); 451 if (!_io.write_ushort_LE(_userData, cast(ushort)bytesPerFrame)) 452 { 453 *err = true; 454 return; 455 } 456 457 if (!_io.write_ushort_LE(_userData, cast(ushort)(sampleSize() * 8))) 458 { 459 *err = true; 460 return; 461 } 462 463 // data sub-chunk 464 _dataLengthOffset = _io.tell(_userData) + 4; 465 if(! _io.writeRIFFChunkHeader(_userData, RIFFChunkId!"data", 0)) // write 0 but temporarily, this will be overwritten at finalizing 466 { 467 *err = true; 468 return; 469 } 470 _writtenFrames = 0; 471 } 472 473 // write interleaved samples 474 // `inSamples` should have enough room for frames * _channels 475 int writeSamples(T)(T* inSamples, int frames, bool* err) nothrow 476 { 477 int n = 0; 478 int samples = frames * _channels; 479 480 final switch(_format) 481 { 482 case Format.s8: 483 ditherInput(inSamples, samples, 127.0f); 484 for ( ; n < samples; ++n) 485 { 486 double x = _ditherBuf[n]; 487 int b = cast(int)(128.5 + x * 127.0); 488 if (!_io.write_byte(_userData, cast(byte)b)) 489 { 490 *err = true; 491 return 0; 492 } 493 } 494 break; 495 496 case Format.s16le: 497 ditherInput(inSamples, samples, 32767.0f); 498 for ( ; n < samples; ++n) 499 { 500 double x = _ditherBuf[n]; 501 int s = cast(int)(32768.5 + x * 32767.0); 502 s -= 32768; 503 assert(s >= -32767 && s <= 32767); 504 if (!_io.write_short_LE(_userData, cast(short)s)) 505 { 506 *err = true; 507 return 0; 508 } 509 } 510 break; 511 512 case Format.s24le: 513 ditherInput(inSamples, samples, 8388607.0f); 514 for ( ; n < samples; ++n) 515 { 516 double x = _ditherBuf[n]; 517 int s = cast(int)(8388608.5 + x * 8388607.0); 518 s -= 8388608; 519 assert(s >= -8388607 && s <= 8388607); 520 if (!_io.write_24bits_LE(_userData, s)) 521 { 522 *err = true; 523 return 0; 524 } 525 } 526 break; 527 528 case Format.fp32le: 529 for ( ; n < samples; ++n) 530 { 531 if (!_io.write_float_LE(_userData, inSamples[n])) 532 { 533 *err = true; 534 return 0; 535 } 536 } 537 break; 538 case Format.fp64le: 539 for ( ; n < samples; ++n) 540 { 541 if (!_io.write_double_LE(_userData, inSamples[n])) 542 { 543 *err = true; 544 return 0; 545 } 546 } 547 break; 548 } 549 _writtenFrames += frames; 550 *err = false; 551 552 return n; 553 } 554 555 int sampleSize() 556 { 557 final switch(_format) 558 { 559 case Format.s8: return 1; 560 case Format.s16le: return 2; 561 case Format.s24le: return 3; 562 case Format.fp32le: return 4; 563 case Format.fp64le: return 8; 564 } 565 } 566 567 int frameSize() 568 { 569 return sampleSize() * _channels; 570 } 571 572 void finalizeEncoding(bool* err) 573 { 574 size_t bytesOfData = frameSize() * _writtenFrames; 575 576 // write final number of samples for the 'RIFF' chunk 577 { 578 uint riffLength = cast(uint)( 4 + (4 + 4 + 16) + (4 + 4 + bytesOfData) ); 579 if (!_io.seek(_riffLengthOffset, false, _userData)) 580 { 581 *err = true; 582 return; 583 } 584 if (!_io.write_uint_LE(_userData, riffLength)) 585 { 586 *err = true; 587 return; 588 } 589 } 590 591 // write final number of samples for the 'data' chunk 592 { 593 if (!_io.seek(_dataLengthOffset, false, _userData)) 594 { 595 *err = true; 596 return; 597 } 598 if (!_io.write_uint_LE(_userData, cast(uint)bytesOfData )) 599 { 600 *err = true; 601 return; 602 } 603 } 604 *err = true; 605 } 606 607 ~this() 608 { 609 _ditherBuf.reallocBuffer(0); 610 } 611 612 private: 613 void* _userData; 614 IOCallbacks* _io; 615 Format _format; 616 int _channels; 617 int _writtenFrames; 618 long _riffLengthOffset, _dataLengthOffset; 619 620 bool _enableDither; 621 double[] _ditherBuf; 622 TPDFDither _tpdf; 623 624 void ditherInput(T)(T* inSamples, int frames, double scaleFactor) 625 { 626 if (_ditherBuf.length < frames) 627 _ditherBuf.reallocBuffer(frames); 628 629 for (int n = 0; n < frames; ++n) 630 { 631 _ditherBuf[n] = inSamples[n]; 632 } 633 634 if (_enableDither) 635 _tpdf.process(_ditherBuf.ptr, frames, scaleFactor); 636 } 637 } 638 } 639 640 641 private: 642 643 // wFormatTag 644 immutable int LinearPCM = 0x0001; 645 immutable int FloatingPointIEEE = 0x0003; 646 immutable int WAVE_FORMAT_EXTENSIBLE = 0xFFFE; 647 648 649 /+ 650 MIT License 651 652 Copyright (c) 2018 Chris Johnson 653 654 Permission is hereby granted, free of charge, to any person obtaining a copy 655 of this software and associated documentation files (the "Software"), to deal 656 in the Software without restriction, including without limitation the rights 657 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 658 copies of the Software, and to permit persons to whom the Software is 659 furnished to do so, subject to the following conditions: 660 661 The above copyright notice and this permission notice shall be included in all 662 copies or substantial portions of the Software. 663 664 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 665 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 666 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 667 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 668 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 669 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 670 SOFTWARE. 671 +/ 672 /// This is based upon TPDF Dither by Chris Johnson / AirWindows 673 /// though the algorithm changed quite a bit, tuned on 8-bit dither by ear. 674 public struct TPDFDither 675 { 676 nothrow: 677 @nogc: 678 679 void process(double* inoutSamples, int frames, double scaleFactor) 680 { 681 for (int n = 0; n < frames; ++n) 682 { 683 double x = inoutSamples[n]; 684 685 x *= scaleFactor; 686 //0-1 is now one bit, now we dither 687 688 enum double TUNE0 = 0.25; // could probably be better if tuned interactively 689 enum double TUNE1 = TUNE0*0.5; // ditto 690 691 x += (0.5 - 0.5 * (TUNE0+TUNE1)); 692 x += TUNE0 * (rand()/cast(double)RAND_MAX); 693 x += TUNE1 * (rand()/cast(double)RAND_MAX); 694 x = floor(x); 695 //TPDF: two 0-1 random noises 696 x /= scaleFactor; 697 if (x < -1.0) x = -1.0; 698 if (x > 1.0) x = 1.0; 699 inoutSamples[n] = x; 700 } 701 } 702 }