audioformats.wav source code

1 /**
2 Supports Microsoft WAV audio file format.
3 
4 Copyright: Guillaume Piolat 2015-2020.
5 License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6 */
7 module audioformats.wav;
8 
9 import core.stdc.math: round, floor, fabs;
10 import core.stdc.stdlib: rand, RAND_MAX;
11 import audioformats.io;
12 import audioformats.internals;
13 
14 @nogc:
15 nothrow:
16 
17 version(decodeWAV)
18 {
19     /// Use both for scanning and decoding
20 
21     final class WAVDecoder
22     {
23     public:
24     @nogc:
25     nothrow:
26 
27         static struct WAVError
28         {
29             @nogc nothrow pure @safe:
30             string reason;
31             string file;
32             size_t line;
33             static WAVError none() {return WAVError("","",0);}
34 
35             this(string reason, string file = __FILE__, size_t line = __LINE__)
36             {
37                 this.reason = reason;
38                 this.file = file;
39                 this.line = line;
40             }
41         }
42 
43         static immutable ubyte[16] KSDATAFORMAT_SUBTYPE_IEEE_FLOAT = 
44         [3, 0, 0, 0, 0, 0, 16, 0, 128, 0, 0, 170, 0, 56, 155, 113];
45 
46         this(IOCallbacks* io, void* userData) nothrow
47         {
48             _io = io;
49             _userData = userData;
50         }
51 
52         // After scan, we know _sampleRate, _lengthInFrames, and _channels, and can call `readSamples`
53         WAVError scan()
54         {
55             // check RIFF header
56             {
57                 uint chunkId, chunkSize;
58                 bool err;
59                 _io.readRIFFChunkHeader(_userData, chunkId, chunkSize, &err);
60                 if (err)
61                     return WAVError("Cannot read RIFF header");
62                 if (chunkId != RIFFChunkId!"RIFF")
63                     return WAVError("Expected RIFF chunk.");
64 
65                 if (chunkSize < 4)
66                     return WAVError("RIFF chunk is too small to contain a format.");
67 
68                 if (_io.read_uint_BE(_userData, &err) !=  RIFFChunkId!"WAVE")
69                     return WAVError("Expected WAVE format.");
70             }
71 
72             bool foundFmt = false;
73             bool foundData = false;
74 
75             int byteRate;
76             int blockAlign;
77             int bitsPerSample;
78 
79             while (!_io.nothingToReadAnymore(_userData))
80             {
81                 // Some corrupted WAV files in the wild finish with one
82                 // extra 0 byte after an AFAn chunk, very odd
83                 if (_io.remainingBytesToRead(_userData) == 1)
84                 {
85                     bool err;
86                     ubyte res = _io.peek_ubyte(_userData, &err);
87                     if (err)
88                         return WAVError("cannot read ubyte");
89                     if (res == 0)
90                         break;                    
91                 }
92 
93                 // Question: is there any reason to parse the whole WAV file? This prevents streaming.
94 
95                 uint chunkId, chunkSize;
96                 bool err;
97                 _io.readRIFFChunkHeader(_userData, chunkId, chunkSize, &err); 
98                 if (err)
99                     return WAVError("Cannot read RIFF header");
100 
101                 if (chunkId == RIFFChunkId!"fmt ")
102                 {
103                     if (foundFmt)
104                         return WAVError("Found several 'fmt ' chunks in RIFF file.");
105 
106                     foundFmt = true;
107 
108                     if (chunkSize < 16)
109                         return WAVError("Expected at least 16 bytes in 'fmt ' chunk."); // found in real-world for the moment: 16 or 40 bytes
110 
111                     _audioFormat = _io.read_ushort_LE(_userData, &err);
112                     if (err) return WAVError("Cannot read WAV format");
113                     bool isWFE = _audioFormat == WAVE_FORMAT_EXTENSIBLE;
114 
115                     if (_audioFormat != LinearPCM && _audioFormat != FloatingPointIEEE && !isWFE)
116                         return WAVError("Unsupported audio format, only PCM and IEEE float and WAVE_FORMAT_EXTENSIBLE are supported.");
117 
118                     _channels = _io.read_ushort_LE(_userData, &err);
119                     if (err) return WAVError("Cannot read number of channels");
120 
121                     _sampleRate = _io.read_uint_LE(_userData, &err);
122                     if (_sampleRate <= 0)
123                         return WAVError("Unsupported sample-rate."); // we do not support sample-rate higher than 2^31hz
124 
125                     uint bytesPerSec = _io.read_uint_LE(_userData, &err);
126                     if (err) return WAVError("Cannot read bytesPerSec");
127                     int bytesPerFrame = _io.read_ushort_LE(_userData, &err);
128                     if (err) return WAVError("Cannot read bytesPerFrame");
129                     bitsPerSample = _io.read_ushort_LE(_userData, &err);
130                     if (err) return WAVError("Cannot read bitsPerSample");
131 
132                     if (bitsPerSample != 8 && bitsPerSample != 16 && bitsPerSample != 24 && bitsPerSample != 32 && bitsPerSample != 64) 
133                         return WAVError("Unsupported bitdepth");
134 
135                     if (bytesPerFrame != (bitsPerSample / 8) * _channels)
136                         return WAVError("Invalid bytes-per-second, data might be corrupted.");
137 
138                     // Sometimes there is no cbSize
139                     if (chunkSize >= 18)
140                     {
141                         ushort cbSize = _io.read_ushort_LE(_userData, &err);
142                         if (err) return WAVError("Cannot read cbSize");
143 
144                         if (isWFE)
145                         {
146                             if (cbSize >= 22)
147                             {
148                                 ushort wReserved = _io.read_ushort_LE(_userData, &err);
149                                 if (err) return WAVError("Cannot read wReserved");
150                                 uint dwChannelMask = _io.read_uint_LE(_userData, &err);
151                                 if (err) return WAVError("Cannot read dwChannelMask");
152                                 ubyte[16] SubFormat = _io.read_guid(_userData, &err);
153                                 if (err) return WAVError("Cannot read SubFormat");
154 
155                                 if (SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT)
156                                 {
157                                     _audioFormat = FloatingPointIEEE;
158                                 }
159                                 else
160                                     return WAVError("Unsupported GUID in WAVE_FORMAT_EXTENSIBLE.");
161                             }
162                             else
163                                 return WAVError("Unsupported WAVE_FORMAT_EXTENSIBLE.");
164 
165                             _io.skip(chunkSize - (18 + 2 + 4 + 16), _userData);
166                         }
167                         else
168                         {
169                             _io.skip(chunkSize - 18, _userData);
170                         }
171                     }
172                     else
173                     {
174                         _io.skip(chunkSize - 16, _userData);
175                     }
176 
177                 }
178                 else if (chunkId == RIFFChunkId!"data")
179                 {
180                     if (foundData)
181                         return WAVError("Found several 'data' chunks in RIFF file.");
182 
183                     if (!foundFmt)
184                         return WAVError("'fmt ' chunk expected before the 'data' chunk.");
185 
186                     _bytePerSample = bitsPerSample / 8;
187                     uint frameSize = _channels * _bytePerSample;
188                     if (chunkSize % frameSize != 0)
189                         return WAVError("Remaining bytes in 'data' chunk, inconsistent with audio data type.");
190 
191                     uint numFrames = chunkSize / frameSize;
192                     _lengthInFrames = numFrames;
193 
194                     _samplesOffsetInFile = _io.tell(_userData);
195 
196                     _io.skip(chunkSize, _userData); // skip, will read later
197                     foundData = true;
198                 }
199                 else
200                 {
201                     // ignore unknown chunks
202                     _io.skip(chunkSize, _userData);
203                 }
204             }
205 
206             if (!foundFmt)
207                 return WAVError("'fmt ' chunk not found.");
208 
209             if (!foundData)
210                 return WAVError("'data' chunk not found.");
211 
212             // Get ready to decode
213             _io.seek(_samplesOffsetInFile, false, _userData);
214             _framePosition = 0; // seek to start
215 
216             return WAVError.none;
217         }
218 
219         /// Returns: false in case of failure.
220         bool seekPosition(int absoluteFrame)
221         {
222             if (absoluteFrame < 0)
223                 return false;
224             if (absoluteFrame > _lengthInFrames)
225                 return false;
226             uint frameSize = _channels * _bytePerSample;
227             long pos = _samplesOffsetInFile + absoluteFrame * frameSize;
228             _io.seek(pos, false, _userData);
229             _framePosition = absoluteFrame;
230             return true;
231         }
232 
233         /// Returns: position in absolute number of frames since beginning.
234         int tellPosition()
235         {
236             return _framePosition;
237         }
238 
239         // read interleaved samples
240         // `outData` should have enough room for frames * _channels
241         // Returs: Frames actually read.
242         int readSamples(T)(T* outData, int maxFrames, bool* err)
243         {
244             *err = false;
245 
246             assert(_framePosition <= _lengthInFrames);
247             int available = _lengthInFrames - _framePosition;
248 
249             // How much frames can we decode?
250             int frames = maxFrames;
251             if (frames > available)
252                 frames = available;
253             _framePosition += frames;
254 
255             int numSamples = frames * _channels;
256 
257             uint n = 0;
258 
259            
260                 if (_audioFormat == FloatingPointIEEE)
261             {
262                 if (_bytePerSample == 4)
263                 {
264                     for (n = 0; n < numSamples; ++n)
265                     {
266                         float sample = _io.read_float_LE(_userData, err);
267                         if (*err)
268                             return 0; // could return n, but well
269                         outData[n] = sample;                        
270                     }
271                 }
272                 else if (_bytePerSample == 8)
273                 {
274                     for (n = 0; n < numSamples; ++n)
275                     {
276                         double sample = _io.read_double_LE(_userData, err);
277                         if (*err)
278                             return 0; // ditto
279                         outData[n] = sample;
280                     }
281                 }
282                 else
283                 {
284                     *err = true;
285                     return 0; // Unsupported bit-depth for floating point data, should be 32 or 64.
286                 }
287             }
288             else if (_audioFormat == LinearPCM)
289             {
290                 if (_bytePerSample == 1)
291                 {
292                     for (n = 0; n < numSamples; ++n)
293                     {
294                         ubyte b = _io.read_ubyte(_userData, err);
295                         if (*err)
296                             return 0; // ditto
297                         outData[n] = (b - 128) / 127.0;
298                     }
299                 }
300                 else if (_bytePerSample == 2)
301                 {
302                     for (n = 0; n < numSamples; ++n)
303                     {
304                         short s = _io.read_ushort_LE(_userData, err);
305                         if (*err)
306                             return 0; // ditto
307                         outData[n] = s / 32767.0;
308                     }
309                 }
310                 else if (_bytePerSample == 3)
311                 {
312                     for (n = 0; n < numSamples; ++n)
313                     {
314                         int s = _io.read_24bits_LE(_userData, err);
315                         if (*err)
316                             return 0; // ditto
317                         // duplicate sign bit
318                         s = (s << 8) >> 8;
319                         outData[n] = s / 8388607.0;
320                     }
321                 }
322                 else if (_bytePerSample == 4)
323                 {
324                     for (n = 0; n < numSamples; ++n)
325                     {
326                         int s = _io.read_uint_LE(_userData, err);
327                         if (*err)
328                             return 0; // ditto
329                         outData[n] = s / 2147483648.0;
330                     }
331                 }
332                 else
333                 {
334                     // Unsupported bit-depth for integer PCM data, should be 8, 16, 24 or 32 bits.
335                     *err = true;
336                     return 0;
337                 }
338             }
339             else
340                 assert(false); // should have been handled earlier, crash
341 
342             // Return number of integer samples read
343             return frames;
344         }
345 
346     package:
347         int _sampleRate;
348         int _channels;
349         int _audioFormat;
350         int _bytePerSample;
351         long _samplesOffsetInFile;
352         uint _lengthInFrames;
353         uint _framePosition;
354 
355     private:
356         void* _userData;
357         IOCallbacks* _io;
358     }
359 }
360 
361 
362 version(encodeWAV)
363 {
364     /// Use both for scanning and decoding
365     final class WAVEncoder
366     {
367     public:
368     @nogc:
369     nothrow:
370         enum Format
371         {
372             s8,
373             s16le,
374             s24le,
375             fp32le,
376             fp64le,
377         }
378 
379         static bool isFormatLinearPCM(Format fmt)
380         {
381             return fmt <= Format.s24le;
382         }
383 
384         this(IOCallbacks* io, 
385              void* userData, 
386              int sampleRate, 
387              int numChannels, 
388              Format format, 
389              bool enableDither,
390              bool* err)
391         {
392             *err = false;
393             _io = io;
394             _userData = userData;
395             _channels = numChannels;
396             _format = format;
397             _enableDither = enableDither;
398 
399             // Avoids a number of edge cases.
400             if (_channels < 0 || _channels > 1024)
401             {
402                 // Can't save a WAV with this number of channels.
403                 *err = true;
404                 return;
405             }
406 
407             // RIFF header
408             // its size will be overwritten at finalizing
409             _riffLengthOffset = _io.tell(_userData) + 4;
410             if (! _io.writeRIFFChunkHeader(_userData, RIFFChunkId!"RIFF", 0))
411             {
412                 *err = true;
413                 return;
414             }
415             if (! _io.write_uint_BE(_userData, RIFFChunkId!"WAVE"))
416             {
417                 *err = true;
418                 return;
419             }
420 
421             // 'fmt ' sub-chunk
422             if (! _io.writeRIFFChunkHeader(_userData, RIFFChunkId!"fmt ", 0x10))
423             {
424                 *err = true;
425                 return;
426             }
427             if (! _io.write_ushort_LE(_userData, isFormatLinearPCM(format) ? LinearPCM : FloatingPointIEEE))
428             {
429                 *err = true;
430                 return;
431             }
432             if (! _io.write_ushort_LE(_userData, cast(ushort)(_channels)))
433             {
434                 *err = true;
435                 return;
436             }
437             if (! _io.write_uint_LE(_userData, sampleRate))
438             {
439                 *err = true;
440                 return;
441             }
442 
443             size_t bytesPerSec = sampleRate * cast(size_t) frameSize();
444             if (!_io.write_uint_LE(_userData,  cast(uint)(bytesPerSec)))
445             {
446                 *err = true;
447                 return;
448             }
449 
450             int bytesPerFrame = frameSize();
451             if (!_io.write_ushort_LE(_userData, cast(ushort)bytesPerFrame))
452             {
453                 *err = true;
454                 return;
455             }
456 
457             if (!_io.write_ushort_LE(_userData, cast(ushort)(sampleSize() * 8)))
458             {
459                 *err = true;
460                 return;
461             }
462 
463             // data sub-chunk
464             _dataLengthOffset = _io.tell(_userData) + 4;
465             if(! _io.writeRIFFChunkHeader(_userData, RIFFChunkId!"data", 0)) // write 0 but temporarily, this will be overwritten at finalizing
466             {
467                 *err = true;
468                 return;
469             }
470             _writtenFrames = 0;
471         }
472 
473         // write interleaved samples
474         // `inSamples` should have enough room for frames * _channels
475         int writeSamples(T)(T* inSamples, int frames, bool* err) nothrow
476         {
477             int n = 0;
478             int samples = frames * _channels;
479                 
480             final switch(_format)
481             {
482                 case Format.s8:
483                     ditherInput(inSamples, samples, 127.0f);
484                     for ( ; n < samples; ++n)
485                     {
486                         double x = _ditherBuf[n];
487                         int b = cast(int)(128.5 + x * 127.0); 
488                         if (!_io.write_byte(_userData, cast(byte)b))
489                         {
490                             *err = true;
491                             return 0;
492                         }
493                     }
494                     break;
495 
496                 case Format.s16le:
497                     ditherInput(inSamples, samples, 32767.0f);
498                     for ( ; n < samples; ++n)
499                     {
500                         double x = _ditherBuf[n];
501                         int s = cast(int)(32768.5 + x * 32767.0);
502                         s -= 32768;
503                         assert(s >= -32767 && s <= 32767);
504                         if (!_io.write_short_LE(_userData, cast(short)s))
505                         {
506                             *err = true;
507                             return 0;
508                         }
509                     }
510                     break;
511 
512                 case Format.s24le:
513                     ditherInput(inSamples, samples, 8388607.0f);
514                     for ( ; n < samples; ++n)
515                     {
516                         double x = _ditherBuf[n];
517                         int s = cast(int)(8388608.5 + x * 8388607.0);
518                         s -= 8388608;
519                         assert(s >= -8388607 && s <= 8388607);
520                         if (!_io.write_24bits_LE(_userData, s))
521                         {
522                             *err = true;
523                             return 0;
524                         }
525                     }
526                     break;
527 
528                 case Format.fp32le:
529                     for ( ; n < samples; ++n)
530                     {
531                         if (!_io.write_float_LE(_userData, inSamples[n]))
532                         {
533                             *err = true;
534                             return 0;
535                         }
536                     }
537                     break;
538                 case Format.fp64le:
539                     for ( ; n < samples; ++n)
540                     {
541                         if (!_io.write_double_LE(_userData, inSamples[n]))
542                         {
543                             *err = true;
544                             return 0;
545                         }
546                     }
547                     break;
548             }
549             _writtenFrames += frames;
550             *err = false;
551 
552             return n;
553         }
554 
555         int sampleSize()
556         {
557             final switch(_format)
558             {
559                 case Format.s8:     return 1;
560                 case Format.s16le:  return 2;
561                 case Format.s24le:  return 3;
562                 case Format.fp32le: return 4;
563                 case Format.fp64le: return 8;
564             }
565         }
566 
567         int frameSize()
568         {
569             return sampleSize() * _channels;
570         }
571 
572         void finalizeEncoding(bool* err) 
573         {
574             size_t bytesOfData = frameSize() * _writtenFrames;
575 
576             // write final number of samples for the 'RIFF' chunk
577             {
578                 uint riffLength = cast(uint)( 4 + (4 + 4 + 16) + (4 + 4 + bytesOfData) );
579                 if (!_io.seek(_riffLengthOffset, false, _userData))
580                 {
581                     *err = true;
582                     return;
583                 }
584                 if (!_io.write_uint_LE(_userData, riffLength))
585                 {
586                     *err = true;
587                     return;
588                 }
589             }
590 
591             // write final number of samples for the 'data' chunk
592             {
593                 if (!_io.seek(_dataLengthOffset, false, _userData))
594                 {
595                     *err = true;
596                     return;
597                 }
598                 if (!_io.write_uint_LE(_userData, cast(uint)bytesOfData ))
599                 {
600                     *err = true;
601                     return;
602                 }
603             }
604             *err = true;
605         }
606 
607         ~this()
608         {
609              _ditherBuf.reallocBuffer(0);
610         }
611 
612     private:
613         void* _userData;
614         IOCallbacks* _io;
615         Format _format;
616         int _channels;
617         int _writtenFrames;
618         long _riffLengthOffset, _dataLengthOffset;
619 
620         bool _enableDither;
621         double[] _ditherBuf;
622         TPDFDither _tpdf;
623 
624         void ditherInput(T)(T* inSamples, int frames, double scaleFactor)
625         {
626             if (_ditherBuf.length < frames)
627                 _ditherBuf.reallocBuffer(frames);
628 
629             for (int n = 0; n < frames; ++n)
630             {
631                 _ditherBuf[n] = inSamples[n];
632             }
633 
634             if (_enableDither)
635                 _tpdf.process(_ditherBuf.ptr, frames, scaleFactor);
636         }
637     }
638 }
639 
640 
641 private:
642 
643 // wFormatTag
644 immutable int LinearPCM = 0x0001;
645 immutable int FloatingPointIEEE = 0x0003;
646 immutable int WAVE_FORMAT_EXTENSIBLE = 0xFFFE;
647 
648 
649 /+
650 MIT License
651 
652 Copyright (c) 2018 Chris Johnson
653 
654 Permission is hereby granted, free of charge, to any person obtaining a copy
655 of this software and associated documentation files (the "Software"), to deal
656 in the Software without restriction, including without limitation the rights
657 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
658 copies of the Software, and to permit persons to whom the Software is
659 furnished to do so, subject to the following conditions:
660 
661 The above copyright notice and this permission notice shall be included in all
662 copies or substantial portions of the Software.
663 
664 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
665 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
666 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
667 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
668 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
669 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
670 SOFTWARE.
671 +/
672 /// This is based upon TPDF Dither by Chris Johnson / AirWindows
673 /// though the algorithm changed quite a bit, tuned on 8-bit dither by ear.
674 public struct TPDFDither
675 {
676 nothrow:
677 @nogc:
678 
679     void process(double* inoutSamples, int frames, double scaleFactor)
680     {      
681         for (int n = 0; n < frames; ++n)
682         {
683             double x = inoutSamples[n];           
684 
685             x *= scaleFactor;
686             //0-1 is now one bit, now we dither
687 
688             enum double TUNE0 = 0.25; // could probably be better if tuned interactively
689             enum double TUNE1 = TUNE0*0.5; // ditto
690 
691             x += (0.5 - 0.5 * (TUNE0+TUNE1));
692             x += TUNE0 * (rand()/cast(double)RAND_MAX);
693             x += TUNE1 * (rand()/cast(double)RAND_MAX);
694             x = floor(x);
695             //TPDF: two 0-1 random noises
696             x /= scaleFactor;
697             if (x < -1.0) x = -1.0;
698             if (x > 1.0) x = 1.0;
699             inoutSamples[n] = x;
700         }
701     }
702 }
The OpenD Programming Language