The OpenD Programming Language

1 /**
2 
3 Copyright (c) 2023, Dominic Szablewski - https://phoboslab.org
4 SPDX-License-Identifier: MIT
5 
6 QOA - The "Quite OK Audio" format for fast, lossy audio compression
7 
8 
9 -- Data Format
10 
11 A QOA file has an 8 byte file header, followed by a number of frames. Each frame 
12 consists of an 8 byte frame header, the current 16 byte en-/decoder state per
13 channel and 256 slices per channel. Each slice is 8 bytes wide and encodes 20 
14 samples of audio data.
15 
16 Note that the last frame of a file may contain less than 256 slices per channel.
17 The last slice (per channel) in the last frame may contain less 20 samples, but
18 the slice will still be 8 bytes wide, with the unused samples zeroed out.
19 
20 The samplerate and number of channels is only stated in the frame headers, but
21 not in the file header. A decoder may peek into the first frame of the file to 
22 find these values.
23 
24 In a valid QOA file all frames have the same number of channels and the same
25 samplerate. These restrictions may be relaxed for streaming. This remains to 
26 be decided.
27 
28 All values in a QOA file are BIG ENDIAN. Luckily, EVERYTHING in a QOA file,
29 including the headers, is 64 bit aligned, so it's possible to read files with 
30 just a read_u64() that does the byte swapping if necessary.
31 
32 In pseudocode, the file layout is as follows:
33 
34 struct {
35     struct {
36         char     magic[4];         // magic bytes 'qoaf'
37         uint32_t samples;          // number of samples per channel in this file
38     } file_header;                 // = 64 bits
39 
40     struct {
41         struct {
42             uint8_t  num_channels; // number of channels
43             uint24_t samplerate;   // samplerate in hz
44             uint16_t fsamples;     // sample count per channel in this frame
45             uint16_t fsize;        // frame size (including the frame header)
46         } frame_header;            // = 64 bits
47 
48         struct {
49             int16_t history[4];    // = 64 bits
50             int16_t weights[4];    // = 64 bits
51         } lms_state[num_channels]; 
52 
53         qoa_slice_t slices[256][num_channels]; // = 64 bits each
54     } frames[samples * channels / qoa_max_framesize()];
55 } qoa_file;
56 
57 Wheras the 64bit qoa_slice_t is defined as follows:
58 
59 .- QOA_SLICE -- 64 bits, 20 samples --------------------------/  /------------.
60 |        Byte[0]         |        Byte[1]         |  Byte[2]  \  \  Byte[7]   |
61 | 7  6  5  4  3  2  1  0 | 7  6  5  4  3  2  1  0 | 7  6  5   /  /    2  1  0 |
62 |------------+--------+--------+--------+---------+---------+-\  \--+---------|
63 |  sf_index  |  r00   |   r01  |   r02  |  r03    |   r04   | /  /  |   r19   |
64 `-------------------------------------------------------------\  \------------`
65 
66 `sf_index` defines the scalefactor to use for this slice as an index into the
67 qoa_scalefactor_tab[16]
68 
69 `r00`--`r19` are the residuals for the individual samples, divided by the
70 scalefactor and quantized by the qoa_quant_tab[].
71 
72 In the decoder, a prediction of the next sample is computed by multiplying the 
73 state (the last four output samples) with the predictor. The residual from the 
74 slice is then dequantized using the qoa_dequant_tab[] and added to the 
75 prediction. The result is clamped to int16 to form the final output sample.
76 
77 */
78 /*
79 MIT License
80 
81 Copyright (c) 2022-2023 Dominic Szablewski
82 Copyright (c) 2023 Guillaume Piolat
83 
84 Permission is hereby granted, free of charge, to any person obtaining a copy
85 of this software and associated documentation files (the "Software"), to deal
86 in the Software without restriction, including without limitation the rights
87 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
88 copies of the Software, and to permit persons to whom the Software is
89 furnished to do so, subject to the following conditions:
90 
91 The above copyright notice and this permission notice shall be included in all
92 copies or substantial portions of the Software.
93 
94 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
95 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
96     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
97 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
98 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
99     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
100 SOFTWARE.
101 */
102 /**
103 Note: was extended to support seeking (input only), 
104      - chunk decoding and encoding to avoid having the whole song in memory
105 */
106 module audioformats.qoa;
107 
108 import audioformats.io;
109 import audioformats.internals;
110 import core.stdc.stdlib: malloc, free;
111 alias QOA_MALLOC = malloc;
112 alias QOA_FREE = free;
113 
114 nothrow @nogc private:
115 
116 enum int QOA_MIN_FILESIZE = 16;
117 enum int QOA_MAX_CHANNELS = 8;
118 enum int QOA_SLICE_LEN  = 20;
119 enum int QOA_SLICES_PER_FRAME = 256;
120 enum int QOA_FRAME_LEN = QOA_SLICES_PER_FRAME * QOA_SLICE_LEN;
121 enum int QOA_LMS_LEN = 4;
122 enum uint QOA_MAGIC = 0x716f6166; /* 'qoaf' in BE*/
123 
124 uint QOA_FRAME_SIZE(uint channels, uint slices) pure
125 {
126     return 8 + QOA_LMS_LEN * 4 * channels + 8 * slices * channels;
127 }
128 
129 struct qoa_lms_t
130 {
131     int[QOA_LMS_LEN] history;
132     int[QOA_LMS_LEN] weights;
133 }
134 
135 public struct qoa_desc
136 {
137     uint channels;
138     uint samplerate;
139     uint samples;
140     qoa_lms_t[QOA_MAX_CHANNELS] lms;
141 }
142 
143 alias qoa_uint64_t = ulong;
144 
145 /* The quant_tab provides an index into the dequant_tab for residuals in the
146 range of -8 .. 8. It maps this range to just 3bits and becomes less accurate at 
147 the higher end. Note that the residual zero is identical to the lowest positive 
148 value. This is mostly fine, since the qoa_div() function always rounds away 
149 from zero. */
150 static immutable int[17] qoa_quant_tab =
151 [
152     7, 7, 7, 5, 5, 3, 3, 1, /* -8..-1 */
153     0,                      /*  0     */
154     0, 2, 2, 4, 4, 6, 6, 6  /*  1.. 8 */
155 ];
156 
157 
158 /* We have 16 different scalefactors. Like the quantized residuals these become
159 less accurate at the higher end. In theory, the highest scalefactor that we
160 would need to encode the highest 16bit residual is (2**16)/8 = 8192. However we
161 rely on the LMS filter to predict samples accurately enough that a maximum 
162 residual of one quarter of the 16 bit range is sufficient. I.e. with the 
163 scalefactor 2048 times the quant range of 8 we can encode residuals up to 2**14.
164 
165 The scalefactor values are computed as:
166 scalefactor_tab[s] <- round(pow(s + 1, 2.75)) */
167 
168 static immutable int[16] qoa_scalefactor_tab =
169 [
170     1, 7, 21, 45, 84, 138, 211, 304, 421, 562, 731, 928, 1157, 1419, 1715, 2048
171 ];
172 
173 
174 /* The reciprocal_tab maps each of the 16 scalefactors to their rounded 
175 reciprocals 1/scalefactor. This allows us to calculate the scaled residuals in 
176 the encoder with just one multiplication instead of an expensive division. We 
177 do this in .16 fixed point with integers, instead of floats.
178 
179 The reciprocal_tab is computed as:
180 reciprocal_tab[s] <- ((1<<16) + scalefactor_tab[s] - 1) / scalefactor_tab[s] */
181 
182 static immutable int[16] qoa_reciprocal_tab = 
183 [
184     65536, 9363, 3121, 1457, 781, 475, 311, 216, 156, 117, 90, 71, 57, 47, 39, 32
185 ];
186 
187 
188 /* The dequant_tab maps each of the scalefactors and quantized residuals to 
189 their unscaled & dequantized version.
190 
191 Since qoa_div rounds away from the zero, the smallest entries are mapped to 3/4
192 instead of 1. The dequant_tab assumes the following dequantized values for each 
193 of the quant_tab indices and is computed as:
194 float dqt[8] = {0.75, -0.75, 2.5, -2.5, 4.5, -4.5, 7, -7};
195 dequant_tab[s][q] <- round(scalefactor_tab[s] * dqt[q]) */
196 
197 static immutable int[8][16] qoa_dequant_tab = 
198 [
199     [   1,    -1,    3,    -3,    5,    -5,     7,     -7],
200     [   5,    -5,   18,   -18,   32,   -32,    49,    -49],
201     [  16,   -16,   53,   -53,   95,   -95,   147,   -147],
202     [  34,   -34,  113,  -113,  203,  -203,   315,   -315],
203     [  63,   -63,  210,  -210,  378,  -378,   588,   -588],
204     [ 104,  -104,  345,  -345,  621,  -621,   966,   -966],
205     [ 158,  -158,  528,  -528,  950,  -950,  1477,  -1477],
206     [ 228,  -228,  760,  -760, 1368, -1368,  2128,  -2128],
207     [ 316,  -316, 1053, -1053, 1895, -1895,  2947,  -2947],
208     [ 422,  -422, 1405, -1405, 2529, -2529,  3934,  -3934],
209     [ 548,  -548, 1828, -1828, 3290, -3290,  5117,  -5117],
210     [ 696,  -696, 2320, -2320, 4176, -4176,  6496,  -6496],
211     [ 868,  -868, 2893, -2893, 5207, -5207,  8099,  -8099],
212     [1064, -1064, 3548, -3548, 6386, -6386,  9933,  -9933],
213     [1286, -1286, 4288, -4288, 7718, -7718, 12005, -12005],
214     [1536, -1536, 5120, -5120, 9216, -9216, 14336, -14336],
215 ];
216 
217 
218 /* The Least Mean Squares Filter is the heart of QOA. It predicts the next
219 sample based on the previous 4 reconstructed samples. It does so by continuously
220 adjusting 4 weights based on the residual of the previous prediction.
221 
222 The next sample is predicted as the sum of (weight[i] * history[i]).
223 
224 The adjustment of the weights is done with a "Sign-Sign-LMS" that adds or
225 subtracts the residual to each weight, based on the corresponding sample from 
226 the history. This, surprisingly, is sufficient to get worthwhile predictions.
227 
228 This is all done with fixed point integers. Hence the right-shifts when updating
229 the weights and calculating the prediction. */
230 
231 int qoa_lms_predict(qoa_lms_t *lms) pure
232 {
233     int prediction = 0;
234     for (int i = 0; i < QOA_LMS_LEN; i++) 
235     {
236         prediction += lms.weights[i] * lms.history[i];
237     }
238     return prediction >> 13;
239 }
240 
241 void qoa_lms_update(qoa_lms_t *lms, int sample, int residual) pure
242 {
243     int delta = residual >> 4;
244     for (int i = 0; i < QOA_LMS_LEN; i++) 
245     {
246         lms.weights[i] += lms.history[i] < 0 ? -delta : delta;
247     }
248 
249     for (int i = 0; i < QOA_LMS_LEN-1; i++) 
250     {
251         lms.history[i] = lms.history[i+1];
252     }
253     lms.history[QOA_LMS_LEN-1] = sample;
254 }
255 
256 
257 /* qoa_div() implements a rounding division, but avoids rounding to zero for 
258 small numbers. E.g. 0.1 will be rounded to 1. Note that 0 itself still 
259 returns as 0, which is handled in the qoa_quant_tab[].
260 qoa_div() takes an index into the .16 fixed point qoa_reciprocal_tab as an
261 argument, so it can do the division with a cheaper integer multiplication. */
262 
263 int qoa_div(int v, int scalefactor) pure
264 {
265     int reciprocal = qoa_reciprocal_tab[scalefactor];
266     int n = (v * reciprocal + (1 << 15)) >> 16;
267     n = n + ((v > 0) - (v < 0)) - ((n > 0) - (n < 0)); /* round away from 0 */
268     return n;
269 }
270 
271 int qoa_clamp(int v, int min, int max) pure
272 {
273     if (v < min) { return min; }
274     if (v > max) { return max; }
275     return v;
276 }
277 
278 int qoa_clamp_s16(int v) pure
279 {
280     if (cast(uint)(v + 32768) > 65535) 
281     {
282         if (v < -32768) { return -32768; }
283         if (v >  32767) { return  32767; }
284     }
285     return v;
286 }
287 
288 
289 
290 
291 /* -----------------------------------------------------------------------------
292     Encoder */
293 
294 
295 bool qoa_encode_frame(IOCallbacks* io, 
296                       void* userData, 
297                       const(short)* sample_data, 
298                       qoa_desc *desc, 
299                       uint frame_len)
300 {
301     uint channels = desc.channels;
302 
303     uint slices = (frame_len + QOA_SLICE_LEN - 1) / QOA_SLICE_LEN;
304     uint frame_size = QOA_FRAME_SIZE(channels, slices);
305 
306     if (!io.write_ulong_BE(userData, 
307         cast(qoa_uint64_t)desc.channels   << 56 |
308         cast(qoa_uint64_t)desc.samplerate << 32 |
309         cast(qoa_uint64_t)frame_len       << 16 |
310         cast(qoa_uint64_t)frame_size
311     ))
312         return false;
313 
314     /* Write the current LMS state */
315     for (int c = 0; c < channels; c++) {
316         qoa_uint64_t weights = 0;
317         qoa_uint64_t history = 0;
318         for (int i = 0; i < QOA_LMS_LEN; i++) {
319             history = (history << 16) | (desc.lms[c].history[i] & 0xffff);
320             weights = (weights << 16) | (desc.lms[c].weights[i] & 0xffff);
321         }
322         if (!io.write_ulong_BE(userData, history))
323             return false;
324         if (!io.write_ulong_BE(userData, weights))
325             return false;
326     }
327 
328     /* We encode all samples with the channels interleaved on a slice level.
329     E.g. for stereo: (ch-0, slice 0), (ch 1, slice 0), (ch 0, slice 1), ...*/
330     for (int sample_index = 0; sample_index < frame_len; sample_index += QOA_SLICE_LEN) 
331     {
332         for (int c = 0; c < channels; c++) 
333         {
334             int slice_len = qoa_clamp(QOA_SLICE_LEN, 0, frame_len - sample_index);
335             int slice_start = sample_index * channels + c;
336             int slice_end = (sample_index + slice_len) * channels + c;          
337 
338             /* Brute for search for the best scalefactor. Just go through all
339             16 scalefactors, encode all samples for the current slice and 
340             meassure the total squared error. */
341             qoa_uint64_t best_error = -1;
342             qoa_uint64_t best_slice;
343             qoa_lms_t best_lms;
344 
345             for (int scalefactor = 0; scalefactor < 16; scalefactor++) 
346             {
347                 /* We have to reset the LMS state to the last known good one
348                 before trying each scalefactor, as each pass updates the LMS
349                 state when encoding. */
350                 qoa_lms_t lms = desc.lms[c];
351                 qoa_uint64_t slice = scalefactor;
352                 qoa_uint64_t current_error = 0;
353 
354                 for (int si = slice_start; si < slice_end; si += channels) 
355                 {
356                     int sample = sample_data[si];
357                     int predicted = qoa_lms_predict(&lms);
358 
359                     int residual = sample - predicted;
360                     int scaled = qoa_div(residual, scalefactor);
361                     int clamped = qoa_clamp(scaled, -8, 8);
362                     int quantized = qoa_quant_tab[clamped + 8];
363                     int dequantized = qoa_dequant_tab[scalefactor][quantized];
364                     int reconstructed = qoa_clamp_s16(predicted + dequantized);
365 
366                     long error = (sample - reconstructed);
367                     current_error += error * error;
368                     if (current_error > best_error) 
369                     {
370                         break;
371                     }
372 
373                     qoa_lms_update(&lms, reconstructed, dequantized);
374                     slice = (slice << 3) | quantized;
375                 }
376 
377                 if (current_error < best_error) 
378                 {
379                     best_error = current_error;
380                     best_slice = slice;
381                     best_lms = lms;
382                 }
383             }
384 
385             desc.lms[c] = best_lms;
386             
387             /* If this slice was shorter than QOA_SLICE_LEN, we have to left-
388             shift all encoded data, to ensure the rightmost bits are the empty
389             ones. This should only happen in the last frame of a file as all
390             slices are completely filled otherwise. */
391             best_slice <<= (QOA_SLICE_LEN - slice_len) * 3;
392 
393             if (!io.write_ulong_BE(userData, best_slice))
394                 return false;
395         }
396     }
397     
398     return true;
399 }
400 
401 
402 
403 
404 /* -----------------------------------------------------------------------------
405     Decoder */
406 
407 uint qoa_max_frame_size(qoa_desc *qoa) 
408 {
409     return QOA_FRAME_SIZE(qoa.channels, QOA_SLICES_PER_FRAME);
410 }
411 
412 // Note: was changed, qoa_desc is allocated on heap
413 uint qoa_decode_header(IOCallbacks* io, void* userData, qoa_desc** qoadesc) 
414 {
415     uint p = 0;
416     if (io.remainingBytesToRead(userData) < QOA_MIN_FILESIZE) 
417     {
418         return 0;
419     }
420 
421     bool err;
422 
423     /* Read the file header, verify the magic number ('qoaf') and read the 
424     total number of samples. */
425     qoa_uint64_t file_header = io.read_ulong_BE(userData, &err);
426     if (err)
427         return 0;
428 
429     if ((file_header >> 32) != QOA_MAGIC) {
430         return 0;
431     }
432 
433     qoa_desc* desc = cast(qoa_desc*) QOA_MALLOC(qoa_desc.sizeof);
434     *qoadesc = desc;
435 
436     desc.samples = file_header & 0xffffffff;
437     if (!(desc.samples))
438         return 0;
439 
440     /* Peek into the first frame header to get the number of channels and
441     the samplerate. */
442     qoa_uint64_t frame_header = io.read_ulong_BE(userData, &err);
443     if (err)
444         return 0;
445     desc.channels   = (frame_header >> 56) & 0x0000ff;
446     desc.samplerate = (frame_header >> 32) & 0xffffff;
447 
448     if (desc.channels == 0 || desc.samples == 0 || desc.samplerate == 0) {
449         return 0;
450     }
451 
452     return 8;
453 }
454 
455 uint qoa_decode_frame(IOCallbacks* io, void* userData, qoa_desc *qoa, short *sample_data, uint *frame_len) 
456 {
457     uint p = 0;
458     *frame_len = 0;
459 
460     if (io.remainingBytesToRead(userData) < 8 + QOA_LMS_LEN * 4 * qoa.channels)
461         return 0;
462 
463     /* Read and verify the frame header */
464     bool err;
465     qoa_uint64_t frame_header = io.read_ulong_BE(userData, &err);
466     if (err)
467         return 0;
468     int channels   = (frame_header >> 56) & 0x0000ff;
469     int samplerate = (frame_header >> 32) & 0xffffff;
470     int samples    = (frame_header >> 16) & 0x00ffff;
471     int frame_size = (frame_header      ) & 0x00ffff;
472 
473     int data_size = frame_size - 8 - QOA_LMS_LEN * 4 * channels;
474     int num_slices = data_size / 8;
475     int max_total_samples = num_slices * QOA_SLICE_LEN;
476 
477     if (io.remainingBytesToRead(userData) < frame_size - 8)
478         return 0;
479     if (
480         channels != qoa.channels || 
481         samplerate != qoa.samplerate ||
482         samples * channels > max_total_samples
483     ) 
484     {
485         return 0;
486     }
487 
488     /* Read the LMS state: 4 x 2 bytes history, 4 x 2 bytes weights per channel */
489     for (int c = 0; c < channels; c++) 
490     {
491         qoa_uint64_t history = io.read_ulong_BE(userData, &err);
492         if (err) 
493             return 0;
494         qoa_uint64_t weights = io.read_ulong_BE(userData, &err);
495         if (err) 
496             return 0;
497         for (int i = 0; i < QOA_LMS_LEN; i++) {
498             qoa.lms[c].history[i] = (cast(short)(history >> 48));
499             history <<= 16;
500             qoa.lms[c].weights[i] = (cast(short)(weights >> 48));
501             weights <<= 16;
502         }
503     }
504 
505     /* Decode all slices for all channels in this frame */
506     for (int sample_index = 0; sample_index < samples; sample_index += QOA_SLICE_LEN) 
507     {
508         for (int c = 0; c < channels; c++) 
509         {
510             qoa_uint64_t slice = io.read_ulong_BE(userData, &err);
511             if (err) 
512                 return 0;
513 
514             int scalefactor = (slice >> 60) & 0xf;
515             int slice_start = sample_index * channels + c;
516             int slice_end = qoa_clamp(sample_index + QOA_SLICE_LEN, 0, samples) * channels + c;
517 
518             for (int si = slice_start; si < slice_end; si += channels) {
519                 int predicted = qoa_lms_predict(&qoa.lms[c]);
520                 int quantized = (slice >> 57) & 0x7;
521                 int dequantized = qoa_dequant_tab[scalefactor][quantized];
522                 int reconstructed = qoa_clamp_s16(predicted + dequantized);
523                 
524                 sample_data[si] = cast(short)reconstructed;
525                 slice <<= 3;
526 
527                 qoa_lms_update(&qoa.lms[c], reconstructed, dequantized);
528             }
529         }
530     }
531 
532     *frame_len = samples;
533     return p;
534 }
535 
536 
537 // Streaming encoder for QOA. Queues samples until a full frame can be produced.
538 public struct QOAEncoder
539 {
540 nothrow @nogc:
541     IOCallbacks* io;
542     void* userData;
543     int sampleRate;
544     int numChannels;
545 
546     qoa_desc* desc;
547 
548     short* buffer; // buffer[0..count] is the staging area before encoding
549     int count;
550     uint framesEncoded;
551 
552     void initialize(IOCallbacks* io, void* userData, int sampleRate, int numChannels, bool* err)
553     {        
554         this.io = io;
555         this.userData = userData;
556         this.sampleRate = sampleRate;
557         this.numChannels = numChannels;
558 
559         desc = cast(qoa_desc*) QOA_MALLOC(qoa_desc.sizeof);
560         desc.channels = numChannels;
561         desc.samplerate = sampleRate;
562         desc.samples = 0;
563 
564         framesEncoded = 0;
565 
566         for (int c = 0; c < desc.channels; c++) 
567         {
568             /* Set the initial LMS weights to {0, 0, -1, 2}. This helps with the 
569             prediction of the first few ms of a file. */
570             desc.lms[c].weights[0] = 0;
571             desc.lms[c].weights[1] = 0;
572             desc.lms[c].weights[2] = -(1<<13);
573             desc.lms[c].weights[3] =  (1<<14);
574 
575             /* Explicitly set the history samples to 0, as we might have some
576             garbage in there. */
577             for (int i = 0; i < QOA_LMS_LEN; i++)
578             {
579                 desc.lms[c].history[i] = 0;
580             }
581         }
582 
583         // We need a single QOA_FRAME_LEN buffer for encoding a full frame.
584         buffer = cast(short*) QOA_MALLOC(short.sizeof * QOA_FRAME_LEN * numChannels);
585         if (!buffer)
586         {
587             *err = true;
588             return;
589         }
590         count = 0;
591 
592         if (desc.samplerate == 0 || desc.samplerate > 0xffffff || desc.channels == 0 || desc.channels > QOA_MAX_CHANNELS)
593         {
594             *err = true;
595             return;
596         }
597 
598         // Skip QOA header for now
599         if (!io.write_ulong_BE(userData, 0))
600         {
601             *err = true;
602             return;
603         }
604 
605         *err = false;
606     }
607 
608     ~this()
609     {
610         QOA_FREE(buffer);
611         buffer = null;
612 
613         QOA_FREE(desc);
614         desc = null;
615     }
616 
617     int writeSamples(T)(const(T)* inSamples, int frames, bool* err)
618     {
619         int enqueued = 0; // frames put in buffer
620 
621         while (enqueued < frames)
622         {
623             int maxToEnqueue = frames - enqueued;
624             int storeRoom = QOA_FRAME_LEN - count;
625             int toEnqueue = storeRoom < maxToEnqueue ? storeRoom : maxToEnqueue;
626 
627             for (int n = 0; n < toEnqueue; ++n)
628             {
629                 for (int ch = 0; ch < numChannels; ++ch)
630                 {
631                     int index = n*numChannels+ch;
632                     double x = inSamples[index];
633                     int s = cast(int)(32768.5 + x * 32767.0);
634                     s -= 32768;
635                     assert(s >= -32767 && s <= 32767);
636                     buffer[(count+n)*numChannels+ch] = cast(short)s;
637                 }
638             }
639             count += toEnqueue;
640             
641             if (count == QOA_FRAME_LEN)
642             {
643                 bool success = outputFrame(QOA_FRAME_LEN);
644                 if (!success)
645                 {
646                     *err = true;
647                     return enqueued; // was an error
648                 }
649             }
650 
651             enqueued += toEnqueue;
652         }
653         *err = false;
654         return enqueued;
655     }
656 
657     bool outputFrame(int frames)
658     {
659         assert(frames > 0);
660         if (frames + framesEncoded < framesEncoded) // overflow, QOA too long
661             return false;
662 
663         bool success = qoa_encode_frame(io, userData, buffer, desc, frames);
664         if (!success)
665             return false;
666 
667         framesEncoded += frames;
668         count = 0;
669         return true;
670     }
671 
672     // true on success.
673     bool finalizeEncoding()
674     {
675         // 1. Encode remaining queued samples.
676         if (count > 0)
677         {
678             if (!outputFrame(count))
679                 return false;
680         }
681 
682         // 2. Finalize file.
683         long end = io.tell(userData);
684         
685         // Overwrite `samples` value in QOA header.
686         if (!io.seek(0, false, userData))
687             return false;
688        
689         if (!io.write_ulong_BE(userData, (cast(qoa_uint64_t)QOA_MAGIC << 32) | framesEncoded))
690             return false;
691 
692         // Put back cursor at the end.
693         // Note: finalizeEncoding could technically be called several time, and encoding could continue.
694         // But not supported by audio-formats API.
695         if (!io.seek(end, false, userData))
696             return false;
697 
698         return true;
699     }
700 }
701 
702 // Streaming decoder for QOA.
703 public struct QOADecoder
704 {
705 nothrow @nogc:
706     IOCallbacks* io;
707     void* userData;
708     short* buffer = null;
709     qoa_desc* desc;
710 
711     int numChannels;
712     int totalFrames;
713     float samplerate;
714 
715     int bufStart; // start of buffer
716     int bufStop; // end of buffer (bufStop - bufStart) is the number of frames in buffer
717 
718     int currentPositionFrame = -1;
719 
720     bool seekPosition(int positionFrame)
721     {
722         if (currentPositionFrame == positionFrame)
723             return true;
724 
725         // A QOA file has an 8 byte file header, followed by a number of frames. Each frame 
726         // consists of an 8 byte frame header, the current 16 byte en-/decoder state per
727         // channel and 256 slices per channel. Each slice is 8 bytes wide and encodes 20 
728         // samples of audio data.
729 
730         // Forget current decoding buffer content.
731         bufStop = 0;
732         bufStart = 0;
733 
734         uint sliceIndex = positionFrame / QOA_SLICE_LEN;
735         uint frameIndex = sliceIndex / QOA_SLICES_PER_FRAME;
736 
737         int remain = positionFrame - frameIndex*QOA_SLICES_PER_FRAME*QOA_SLICE_LEN;
738         assert(remain >= 0);
739 
740         uint byteSizeOfFullFrame = QOA_FRAME_SIZE(numChannels, QOA_SLICES_PER_FRAME);
741         uint frameOffset = 8 + byteSizeOfFullFrame * frameIndex;
742 
743         // goto this frame
744         if (!io.seek(frameOffset, false, userData))
745             return false;
746 
747         if (remain > 0)
748         {
749             // Read complete slice, refill buffer.
750             uint frameLen;
751             qoa_decode_frame(io, userData, desc, buffer, &frameLen);
752             bufStart = 0;
753             bufStop = frameLen;
754 
755             // Then read some sample to advance.
756             bool err;
757             int res = readSamples!float(null, remain, &err);
758             if (res != remain || err)
759                 return false; // Note: in this case currentPositionFrame is left invalid...
760         }   
761 
762         currentPositionFrame = positionFrame;
763         return true;
764     }
765 
766     int tellPosition()
767     {
768         return currentPositionFrame;
769     }
770 
771     // return true if this is a QOA. Taint io.
772     bool initialize(IOCallbacks* io, void* userData)
773     {
774         this.io = io;
775         this.userData = userData;
776 
777         if (qoa_decode_header(io, userData, &desc) != 8)
778             return false;
779 
780         this.numChannels = desc.channels;
781         this.totalFrames = desc.samples;
782         this.samplerate = desc.samplerate;
783 
784         if (!io.seek(8, false, userData))
785             return false;
786         currentPositionFrame = 0;
787 
788         // We need a single QOA_FRAME_LEN buffer for decoding.
789         buffer = cast(short*) QOA_MALLOC(short.sizeof * QOA_FRAME_LEN * numChannels);
790 
791         bufStart = 0; // Nothing in buffer
792         bufStop = 0;
793 
794         return true; // Note: we've read 16 bytes, so we seek to byte 8 (begin of first frame).
795     }
796 
797     ~this()
798     {
799         QOA_FREE(buffer);
800         buffer = null;
801 
802         QOA_FREE(desc);
803         desc = null;
804     }
805 
806     int readSamples(T)(T* outData, int frames, bool* err)
807     {
808         int offsetFrames = 0;
809         while (frames > 0)
810         {
811             // If no more data in buffer, read a frame
812             if (bufStop - bufStart == 0)
813             {
814                 uint frameLen;
815                 qoa_decode_frame(io, userData, desc, buffer, &frameLen);
816 
817                 if (frameLen == 0)
818                     return offsetFrames;
819 
820                 bufStart = 0;
821                 bufStop = frameLen;
822             }
823 
824             // How many samples we have in buffers? Take them.
825             int inStore = bufStop - bufStart;
826             if (inStore > frames)
827                 inStore = frames;
828 
829             if (outData !is null)
830             {
831                 enum float F = 1.0f / short.max;
832 
833                 for (int n = 0; n < inStore; ++n)
834                 {
835                     for (int ch = 0; ch < numChannels; ++ch)
836                     {
837                         int index = n*numChannels+ch;
838                         outData[offsetFrames*numChannels + index] = buffer[bufStart*numChannels + index] * F;
839                     }
840                 }
841             }
842 
843             bufStart += inStore;
844             offsetFrames += inStore;
845             currentPositionFrame += inStore;
846             frames -= inStore;
847             assert(bufStart <= bufStop);
848         }
849         return offsetFrames;
850     }
851 }