1 /** 2 3 Copyright (c) 2023, Dominic Szablewski - https://phoboslab.org 4 SPDX-License-Identifier: MIT 5 6 QOA - The "Quite OK Audio" format for fast, lossy audio compression 7 8 9 -- Data Format 10 11 A QOA file has an 8 byte file header, followed by a number of frames. Each frame 12 consists of an 8 byte frame header, the current 16 byte en-/decoder state per 13 channel and 256 slices per channel. Each slice is 8 bytes wide and encodes 20 14 samples of audio data. 15 16 Note that the last frame of a file may contain less than 256 slices per channel. 17 The last slice (per channel) in the last frame may contain less 20 samples, but 18 the slice will still be 8 bytes wide, with the unused samples zeroed out. 19 20 The samplerate and number of channels is only stated in the frame headers, but 21 not in the file header. A decoder may peek into the first frame of the file to 22 find these values. 23 24 In a valid QOA file all frames have the same number of channels and the same 25 samplerate. These restrictions may be relaxed for streaming. This remains to 26 be decided. 27 28 All values in a QOA file are BIG ENDIAN. Luckily, EVERYTHING in a QOA file, 29 including the headers, is 64 bit aligned, so it's possible to read files with 30 just a read_u64() that does the byte swapping if necessary. 31 32 In pseudocode, the file layout is as follows: 33 34 struct { 35 struct { 36 char magic[4]; // magic bytes 'qoaf' 37 uint32_t samples; // number of samples per channel in this file 38 } file_header; // = 64 bits 39 40 struct { 41 struct { 42 uint8_t num_channels; // number of channels 43 uint24_t samplerate; // samplerate in hz 44 uint16_t fsamples; // sample count per channel in this frame 45 uint16_t fsize; // frame size (including the frame header) 46 } frame_header; // = 64 bits 47 48 struct { 49 int16_t history[4]; // = 64 bits 50 int16_t weights[4]; // = 64 bits 51 } lms_state[num_channels]; 52 53 qoa_slice_t slices[256][num_channels]; // = 64 bits each 54 } frames[samples * channels / qoa_max_framesize()]; 55 } qoa_file; 56 57 Wheras the 64bit qoa_slice_t is defined as follows: 58 59 .- QOA_SLICE -- 64 bits, 20 samples --------------------------/ /------------. 60 | Byte[0] | Byte[1] | Byte[2] \ \ Byte[7] | 61 | 7 6 5 4 3 2 1 0 | 7 6 5 4 3 2 1 0 | 7 6 5 / / 2 1 0 | 62 |------------+--------+--------+--------+---------+---------+-\ \--+---------| 63 | sf_index | r00 | r01 | r02 | r03 | r04 | / / | r19 | 64 `-------------------------------------------------------------\ \------------` 65 66 `sf_index` defines the scalefactor to use for this slice as an index into the 67 qoa_scalefactor_tab[16] 68 69 `r00`--`r19` are the residuals for the individual samples, divided by the 70 scalefactor and quantized by the qoa_quant_tab[]. 71 72 In the decoder, a prediction of the next sample is computed by multiplying the 73 state (the last four output samples) with the predictor. The residual from the 74 slice is then dequantized using the qoa_dequant_tab[] and added to the 75 prediction. The result is clamped to int16 to form the final output sample. 76 77 */ 78 /* 79 MIT License 80 81 Copyright (c) 2022-2023 Dominic Szablewski 82 Copyright (c) 2023 Guillaume Piolat 83 84 Permission is hereby granted, free of charge, to any person obtaining a copy 85 of this software and associated documentation files (the "Software"), to deal 86 in the Software without restriction, including without limitation the rights 87 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 88 copies of the Software, and to permit persons to whom the Software is 89 furnished to do so, subject to the following conditions: 90 91 The above copyright notice and this permission notice shall be included in all 92 copies or substantial portions of the Software. 93 94 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 95 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 96 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 97 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 98 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 99 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 100 SOFTWARE. 101 */ 102 /** 103 Note: was extended to support seeking (input only), 104 - chunk decoding and encoding to avoid having the whole song in memory 105 */ 106 module audioformats.qoa; 107 108 import audioformats.io; 109 import audioformats.internals; 110 import core.stdc.stdlib: malloc, free; 111 alias QOA_MALLOC = malloc; 112 alias QOA_FREE = free; 113 114 nothrow @nogc private: 115 116 enum int QOA_MIN_FILESIZE = 16; 117 enum int QOA_MAX_CHANNELS = 8; 118 enum int QOA_SLICE_LEN = 20; 119 enum int QOA_SLICES_PER_FRAME = 256; 120 enum int QOA_FRAME_LEN = QOA_SLICES_PER_FRAME * QOA_SLICE_LEN; 121 enum int QOA_LMS_LEN = 4; 122 enum uint QOA_MAGIC = 0x716f6166; /* 'qoaf' in BE*/ 123 124 uint QOA_FRAME_SIZE(uint channels, uint slices) pure 125 { 126 return 8 + QOA_LMS_LEN * 4 * channels + 8 * slices * channels; 127 } 128 129 struct qoa_lms_t 130 { 131 int[QOA_LMS_LEN] history; 132 int[QOA_LMS_LEN] weights; 133 } 134 135 public struct qoa_desc 136 { 137 uint channels; 138 uint samplerate; 139 uint samples; 140 qoa_lms_t[QOA_MAX_CHANNELS] lms; 141 } 142 143 alias qoa_uint64_t = ulong; 144 145 /* The quant_tab provides an index into the dequant_tab for residuals in the 146 range of -8 .. 8. It maps this range to just 3bits and becomes less accurate at 147 the higher end. Note that the residual zero is identical to the lowest positive 148 value. This is mostly fine, since the qoa_div() function always rounds away 149 from zero. */ 150 static immutable int[17] qoa_quant_tab = 151 [ 152 7, 7, 7, 5, 5, 3, 3, 1, /* -8..-1 */ 153 0, /* 0 */ 154 0, 2, 2, 4, 4, 6, 6, 6 /* 1.. 8 */ 155 ]; 156 157 158 /* We have 16 different scalefactors. Like the quantized residuals these become 159 less accurate at the higher end. In theory, the highest scalefactor that we 160 would need to encode the highest 16bit residual is (2**16)/8 = 8192. However we 161 rely on the LMS filter to predict samples accurately enough that a maximum 162 residual of one quarter of the 16 bit range is sufficient. I.e. with the 163 scalefactor 2048 times the quant range of 8 we can encode residuals up to 2**14. 164 165 The scalefactor values are computed as: 166 scalefactor_tab[s] <- round(pow(s + 1, 2.75)) */ 167 168 static immutable int[16] qoa_scalefactor_tab = 169 [ 170 1, 7, 21, 45, 84, 138, 211, 304, 421, 562, 731, 928, 1157, 1419, 1715, 2048 171 ]; 172 173 174 /* The reciprocal_tab maps each of the 16 scalefactors to their rounded 175 reciprocals 1/scalefactor. This allows us to calculate the scaled residuals in 176 the encoder with just one multiplication instead of an expensive division. We 177 do this in .16 fixed point with integers, instead of floats. 178 179 The reciprocal_tab is computed as: 180 reciprocal_tab[s] <- ((1<<16) + scalefactor_tab[s] - 1) / scalefactor_tab[s] */ 181 182 static immutable int[16] qoa_reciprocal_tab = 183 [ 184 65536, 9363, 3121, 1457, 781, 475, 311, 216, 156, 117, 90, 71, 57, 47, 39, 32 185 ]; 186 187 188 /* The dequant_tab maps each of the scalefactors and quantized residuals to 189 their unscaled & dequantized version. 190 191 Since qoa_div rounds away from the zero, the smallest entries are mapped to 3/4 192 instead of 1. The dequant_tab assumes the following dequantized values for each 193 of the quant_tab indices and is computed as: 194 float dqt[8] = {0.75, -0.75, 2.5, -2.5, 4.5, -4.5, 7, -7}; 195 dequant_tab[s][q] <- round(scalefactor_tab[s] * dqt[q]) */ 196 197 static immutable int[8][16] qoa_dequant_tab = 198 [ 199 [ 1, -1, 3, -3, 5, -5, 7, -7], 200 [ 5, -5, 18, -18, 32, -32, 49, -49], 201 [ 16, -16, 53, -53, 95, -95, 147, -147], 202 [ 34, -34, 113, -113, 203, -203, 315, -315], 203 [ 63, -63, 210, -210, 378, -378, 588, -588], 204 [ 104, -104, 345, -345, 621, -621, 966, -966], 205 [ 158, -158, 528, -528, 950, -950, 1477, -1477], 206 [ 228, -228, 760, -760, 1368, -1368, 2128, -2128], 207 [ 316, -316, 1053, -1053, 1895, -1895, 2947, -2947], 208 [ 422, -422, 1405, -1405, 2529, -2529, 3934, -3934], 209 [ 548, -548, 1828, -1828, 3290, -3290, 5117, -5117], 210 [ 696, -696, 2320, -2320, 4176, -4176, 6496, -6496], 211 [ 868, -868, 2893, -2893, 5207, -5207, 8099, -8099], 212 [1064, -1064, 3548, -3548, 6386, -6386, 9933, -9933], 213 [1286, -1286, 4288, -4288, 7718, -7718, 12005, -12005], 214 [1536, -1536, 5120, -5120, 9216, -9216, 14336, -14336], 215 ]; 216 217 218 /* The Least Mean Squares Filter is the heart of QOA. It predicts the next 219 sample based on the previous 4 reconstructed samples. It does so by continuously 220 adjusting 4 weights based on the residual of the previous prediction. 221 222 The next sample is predicted as the sum of (weight[i] * history[i]). 223 224 The adjustment of the weights is done with a "Sign-Sign-LMS" that adds or 225 subtracts the residual to each weight, based on the corresponding sample from 226 the history. This, surprisingly, is sufficient to get worthwhile predictions. 227 228 This is all done with fixed point integers. Hence the right-shifts when updating 229 the weights and calculating the prediction. */ 230 231 int qoa_lms_predict(qoa_lms_t *lms) pure 232 { 233 int prediction = 0; 234 for (int i = 0; i < QOA_LMS_LEN; i++) 235 { 236 prediction += lms.weights[i] * lms.history[i]; 237 } 238 return prediction >> 13; 239 } 240 241 void qoa_lms_update(qoa_lms_t *lms, int sample, int residual) pure 242 { 243 int delta = residual >> 4; 244 for (int i = 0; i < QOA_LMS_LEN; i++) 245 { 246 lms.weights[i] += lms.history[i] < 0 ? -delta : delta; 247 } 248 249 for (int i = 0; i < QOA_LMS_LEN-1; i++) 250 { 251 lms.history[i] = lms.history[i+1]; 252 } 253 lms.history[QOA_LMS_LEN-1] = sample; 254 } 255 256 257 /* qoa_div() implements a rounding division, but avoids rounding to zero for 258 small numbers. E.g. 0.1 will be rounded to 1. Note that 0 itself still 259 returns as 0, which is handled in the qoa_quant_tab[]. 260 qoa_div() takes an index into the .16 fixed point qoa_reciprocal_tab as an 261 argument, so it can do the division with a cheaper integer multiplication. */ 262 263 int qoa_div(int v, int scalefactor) pure 264 { 265 int reciprocal = qoa_reciprocal_tab[scalefactor]; 266 int n = (v * reciprocal + (1 << 15)) >> 16; 267 n = n + ((v > 0) - (v < 0)) - ((n > 0) - (n < 0)); /* round away from 0 */ 268 return n; 269 } 270 271 int qoa_clamp(int v, int min, int max) pure 272 { 273 if (v < min) { return min; } 274 if (v > max) { return max; } 275 return v; 276 } 277 278 int qoa_clamp_s16(int v) pure 279 { 280 if (cast(uint)(v + 32768) > 65535) 281 { 282 if (v < -32768) { return -32768; } 283 if (v > 32767) { return 32767; } 284 } 285 return v; 286 } 287 288 289 290 291 /* ----------------------------------------------------------------------------- 292 Encoder */ 293 294 295 bool qoa_encode_frame(IOCallbacks* io, 296 void* userData, 297 const(short)* sample_data, 298 qoa_desc *desc, 299 uint frame_len) 300 { 301 uint channels = desc.channels; 302 303 uint slices = (frame_len + QOA_SLICE_LEN - 1) / QOA_SLICE_LEN; 304 uint frame_size = QOA_FRAME_SIZE(channels, slices); 305 306 if (!io.write_ulong_BE(userData, 307 cast(qoa_uint64_t)desc.channels << 56 | 308 cast(qoa_uint64_t)desc.samplerate << 32 | 309 cast(qoa_uint64_t)frame_len << 16 | 310 cast(qoa_uint64_t)frame_size 311 )) 312 return false; 313 314 /* Write the current LMS state */ 315 for (int c = 0; c < channels; c++) { 316 qoa_uint64_t weights = 0; 317 qoa_uint64_t history = 0; 318 for (int i = 0; i < QOA_LMS_LEN; i++) { 319 history = (history << 16) | (desc.lms[c].history[i] & 0xffff); 320 weights = (weights << 16) | (desc.lms[c].weights[i] & 0xffff); 321 } 322 if (!io.write_ulong_BE(userData, history)) 323 return false; 324 if (!io.write_ulong_BE(userData, weights)) 325 return false; 326 } 327 328 /* We encode all samples with the channels interleaved on a slice level. 329 E.g. for stereo: (ch-0, slice 0), (ch 1, slice 0), (ch 0, slice 1), ...*/ 330 for (int sample_index = 0; sample_index < frame_len; sample_index += QOA_SLICE_LEN) 331 { 332 for (int c = 0; c < channels; c++) 333 { 334 int slice_len = qoa_clamp(QOA_SLICE_LEN, 0, frame_len - sample_index); 335 int slice_start = sample_index * channels + c; 336 int slice_end = (sample_index + slice_len) * channels + c; 337 338 /* Brute for search for the best scalefactor. Just go through all 339 16 scalefactors, encode all samples for the current slice and 340 meassure the total squared error. */ 341 qoa_uint64_t best_error = -1; 342 qoa_uint64_t best_slice; 343 qoa_lms_t best_lms; 344 345 for (int scalefactor = 0; scalefactor < 16; scalefactor++) 346 { 347 /* We have to reset the LMS state to the last known good one 348 before trying each scalefactor, as each pass updates the LMS 349 state when encoding. */ 350 qoa_lms_t lms = desc.lms[c]; 351 qoa_uint64_t slice = scalefactor; 352 qoa_uint64_t current_error = 0; 353 354 for (int si = slice_start; si < slice_end; si += channels) 355 { 356 int sample = sample_data[si]; 357 int predicted = qoa_lms_predict(&lms); 358 359 int residual = sample - predicted; 360 int scaled = qoa_div(residual, scalefactor); 361 int clamped = qoa_clamp(scaled, -8, 8); 362 int quantized = qoa_quant_tab[clamped + 8]; 363 int dequantized = qoa_dequant_tab[scalefactor][quantized]; 364 int reconstructed = qoa_clamp_s16(predicted + dequantized); 365 366 long error = (sample - reconstructed); 367 current_error += error * error; 368 if (current_error > best_error) 369 { 370 break; 371 } 372 373 qoa_lms_update(&lms, reconstructed, dequantized); 374 slice = (slice << 3) | quantized; 375 } 376 377 if (current_error < best_error) 378 { 379 best_error = current_error; 380 best_slice = slice; 381 best_lms = lms; 382 } 383 } 384 385 desc.lms[c] = best_lms; 386 387 /* If this slice was shorter than QOA_SLICE_LEN, we have to left- 388 shift all encoded data, to ensure the rightmost bits are the empty 389 ones. This should only happen in the last frame of a file as all 390 slices are completely filled otherwise. */ 391 best_slice <<= (QOA_SLICE_LEN - slice_len) * 3; 392 393 if (!io.write_ulong_BE(userData, best_slice)) 394 return false; 395 } 396 } 397 398 return true; 399 } 400 401 402 403 404 /* ----------------------------------------------------------------------------- 405 Decoder */ 406 407 uint qoa_max_frame_size(qoa_desc *qoa) 408 { 409 return QOA_FRAME_SIZE(qoa.channels, QOA_SLICES_PER_FRAME); 410 } 411 412 // Note: was changed, qoa_desc is allocated on heap 413 uint qoa_decode_header(IOCallbacks* io, void* userData, qoa_desc** qoadesc) 414 { 415 uint p = 0; 416 if (io.remainingBytesToRead(userData) < QOA_MIN_FILESIZE) 417 { 418 return 0; 419 } 420 421 bool err; 422 423 /* Read the file header, verify the magic number ('qoaf') and read the 424 total number of samples. */ 425 qoa_uint64_t file_header = io.read_ulong_BE(userData, &err); 426 if (err) 427 return 0; 428 429 if ((file_header >> 32) != QOA_MAGIC) { 430 return 0; 431 } 432 433 qoa_desc* desc = cast(qoa_desc*) QOA_MALLOC(qoa_desc.sizeof); 434 *qoadesc = desc; 435 436 desc.samples = file_header & 0xffffffff; 437 if (!(desc.samples)) 438 return 0; 439 440 /* Peek into the first frame header to get the number of channels and 441 the samplerate. */ 442 qoa_uint64_t frame_header = io.read_ulong_BE(userData, &err); 443 if (err) 444 return 0; 445 desc.channels = (frame_header >> 56) & 0x0000ff; 446 desc.samplerate = (frame_header >> 32) & 0xffffff; 447 448 if (desc.channels == 0 || desc.samples == 0 || desc.samplerate == 0) { 449 return 0; 450 } 451 452 return 8; 453 } 454 455 uint qoa_decode_frame(IOCallbacks* io, void* userData, qoa_desc *qoa, short *sample_data, uint *frame_len) 456 { 457 uint p = 0; 458 *frame_len = 0; 459 460 if (io.remainingBytesToRead(userData) < 8 + QOA_LMS_LEN * 4 * qoa.channels) 461 return 0; 462 463 /* Read and verify the frame header */ 464 bool err; 465 qoa_uint64_t frame_header = io.read_ulong_BE(userData, &err); 466 if (err) 467 return 0; 468 int channels = (frame_header >> 56) & 0x0000ff; 469 int samplerate = (frame_header >> 32) & 0xffffff; 470 int samples = (frame_header >> 16) & 0x00ffff; 471 int frame_size = (frame_header ) & 0x00ffff; 472 473 int data_size = frame_size - 8 - QOA_LMS_LEN * 4 * channels; 474 int num_slices = data_size / 8; 475 int max_total_samples = num_slices * QOA_SLICE_LEN; 476 477 if (io.remainingBytesToRead(userData) < frame_size - 8) 478 return 0; 479 if ( 480 channels != qoa.channels || 481 samplerate != qoa.samplerate || 482 samples * channels > max_total_samples 483 ) 484 { 485 return 0; 486 } 487 488 /* Read the LMS state: 4 x 2 bytes history, 4 x 2 bytes weights per channel */ 489 for (int c = 0; c < channels; c++) 490 { 491 qoa_uint64_t history = io.read_ulong_BE(userData, &err); 492 if (err) 493 return 0; 494 qoa_uint64_t weights = io.read_ulong_BE(userData, &err); 495 if (err) 496 return 0; 497 for (int i = 0; i < QOA_LMS_LEN; i++) { 498 qoa.lms[c].history[i] = (cast(short)(history >> 48)); 499 history <<= 16; 500 qoa.lms[c].weights[i] = (cast(short)(weights >> 48)); 501 weights <<= 16; 502 } 503 } 504 505 /* Decode all slices for all channels in this frame */ 506 for (int sample_index = 0; sample_index < samples; sample_index += QOA_SLICE_LEN) 507 { 508 for (int c = 0; c < channels; c++) 509 { 510 qoa_uint64_t slice = io.read_ulong_BE(userData, &err); 511 if (err) 512 return 0; 513 514 int scalefactor = (slice >> 60) & 0xf; 515 int slice_start = sample_index * channels + c; 516 int slice_end = qoa_clamp(sample_index + QOA_SLICE_LEN, 0, samples) * channels + c; 517 518 for (int si = slice_start; si < slice_end; si += channels) { 519 int predicted = qoa_lms_predict(&qoa.lms[c]); 520 int quantized = (slice >> 57) & 0x7; 521 int dequantized = qoa_dequant_tab[scalefactor][quantized]; 522 int reconstructed = qoa_clamp_s16(predicted + dequantized); 523 524 sample_data[si] = cast(short)reconstructed; 525 slice <<= 3; 526 527 qoa_lms_update(&qoa.lms[c], reconstructed, dequantized); 528 } 529 } 530 } 531 532 *frame_len = samples; 533 return p; 534 } 535 536 537 // Streaming encoder for QOA. Queues samples until a full frame can be produced. 538 public struct QOAEncoder 539 { 540 nothrow @nogc: 541 IOCallbacks* io; 542 void* userData; 543 int sampleRate; 544 int numChannels; 545 546 qoa_desc* desc; 547 548 short* buffer; // buffer[0..count] is the staging area before encoding 549 int count; 550 uint framesEncoded; 551 552 void initialize(IOCallbacks* io, void* userData, int sampleRate, int numChannels, bool* err) 553 { 554 this.io = io; 555 this.userData = userData; 556 this.sampleRate = sampleRate; 557 this.numChannels = numChannels; 558 559 desc = cast(qoa_desc*) QOA_MALLOC(qoa_desc.sizeof); 560 desc.channels = numChannels; 561 desc.samplerate = sampleRate; 562 desc.samples = 0; 563 564 framesEncoded = 0; 565 566 for (int c = 0; c < desc.channels; c++) 567 { 568 /* Set the initial LMS weights to {0, 0, -1, 2}. This helps with the 569 prediction of the first few ms of a file. */ 570 desc.lms[c].weights[0] = 0; 571 desc.lms[c].weights[1] = 0; 572 desc.lms[c].weights[2] = -(1<<13); 573 desc.lms[c].weights[3] = (1<<14); 574 575 /* Explicitly set the history samples to 0, as we might have some 576 garbage in there. */ 577 for (int i = 0; i < QOA_LMS_LEN; i++) 578 { 579 desc.lms[c].history[i] = 0; 580 } 581 } 582 583 // We need a single QOA_FRAME_LEN buffer for encoding a full frame. 584 buffer = cast(short*) QOA_MALLOC(short.sizeof * QOA_FRAME_LEN * numChannels); 585 if (!buffer) 586 { 587 *err = true; 588 return; 589 } 590 count = 0; 591 592 if (desc.samplerate == 0 || desc.samplerate > 0xffffff || desc.channels == 0 || desc.channels > QOA_MAX_CHANNELS) 593 { 594 *err = true; 595 return; 596 } 597 598 // Skip QOA header for now 599 if (!io.write_ulong_BE(userData, 0)) 600 { 601 *err = true; 602 return; 603 } 604 605 *err = false; 606 } 607 608 ~this() 609 { 610 QOA_FREE(buffer); 611 buffer = null; 612 613 QOA_FREE(desc); 614 desc = null; 615 } 616 617 int writeSamples(T)(const(T)* inSamples, int frames, bool* err) 618 { 619 int enqueued = 0; // frames put in buffer 620 621 while (enqueued < frames) 622 { 623 int maxToEnqueue = frames - enqueued; 624 int storeRoom = QOA_FRAME_LEN - count; 625 int toEnqueue = storeRoom < maxToEnqueue ? storeRoom : maxToEnqueue; 626 627 for (int n = 0; n < toEnqueue; ++n) 628 { 629 for (int ch = 0; ch < numChannels; ++ch) 630 { 631 int index = n*numChannels+ch; 632 double x = inSamples[index]; 633 int s = cast(int)(32768.5 + x * 32767.0); 634 s -= 32768; 635 assert(s >= -32767 && s <= 32767); 636 buffer[(count+n)*numChannels+ch] = cast(short)s; 637 } 638 } 639 count += toEnqueue; 640 641 if (count == QOA_FRAME_LEN) 642 { 643 bool success = outputFrame(QOA_FRAME_LEN); 644 if (!success) 645 { 646 *err = true; 647 return enqueued; // was an error 648 } 649 } 650 651 enqueued += toEnqueue; 652 } 653 *err = false; 654 return enqueued; 655 } 656 657 bool outputFrame(int frames) 658 { 659 assert(frames > 0); 660 if (frames + framesEncoded < framesEncoded) // overflow, QOA too long 661 return false; 662 663 bool success = qoa_encode_frame(io, userData, buffer, desc, frames); 664 if (!success) 665 return false; 666 667 framesEncoded += frames; 668 count = 0; 669 return true; 670 } 671 672 // true on success. 673 bool finalizeEncoding() 674 { 675 // 1. Encode remaining queued samples. 676 if (count > 0) 677 { 678 if (!outputFrame(count)) 679 return false; 680 } 681 682 // 2. Finalize file. 683 long end = io.tell(userData); 684 685 // Overwrite `samples` value in QOA header. 686 if (!io.seek(0, false, userData)) 687 return false; 688 689 if (!io.write_ulong_BE(userData, (cast(qoa_uint64_t)QOA_MAGIC << 32) | framesEncoded)) 690 return false; 691 692 // Put back cursor at the end. 693 // Note: finalizeEncoding could technically be called several time, and encoding could continue. 694 // But not supported by audio-formats API. 695 if (!io.seek(end, false, userData)) 696 return false; 697 698 return true; 699 } 700 } 701 702 // Streaming decoder for QOA. 703 public struct QOADecoder 704 { 705 nothrow @nogc: 706 IOCallbacks* io; 707 void* userData; 708 short* buffer = null; 709 qoa_desc* desc; 710 711 int numChannels; 712 int totalFrames; 713 float samplerate; 714 715 int bufStart; // start of buffer 716 int bufStop; // end of buffer (bufStop - bufStart) is the number of frames in buffer 717 718 int currentPositionFrame = -1; 719 720 bool seekPosition(int positionFrame) 721 { 722 if (currentPositionFrame == positionFrame) 723 return true; 724 725 // A QOA file has an 8 byte file header, followed by a number of frames. Each frame 726 // consists of an 8 byte frame header, the current 16 byte en-/decoder state per 727 // channel and 256 slices per channel. Each slice is 8 bytes wide and encodes 20 728 // samples of audio data. 729 730 // Forget current decoding buffer content. 731 bufStop = 0; 732 bufStart = 0; 733 734 uint sliceIndex = positionFrame / QOA_SLICE_LEN; 735 uint frameIndex = sliceIndex / QOA_SLICES_PER_FRAME; 736 737 int remain = positionFrame - frameIndex*QOA_SLICES_PER_FRAME*QOA_SLICE_LEN; 738 assert(remain >= 0); 739 740 uint byteSizeOfFullFrame = QOA_FRAME_SIZE(numChannels, QOA_SLICES_PER_FRAME); 741 uint frameOffset = 8 + byteSizeOfFullFrame * frameIndex; 742 743 // goto this frame 744 if (!io.seek(frameOffset, false, userData)) 745 return false; 746 747 if (remain > 0) 748 { 749 // Read complete slice, refill buffer. 750 uint frameLen; 751 qoa_decode_frame(io, userData, desc, buffer, &frameLen); 752 bufStart = 0; 753 bufStop = frameLen; 754 755 // Then read some sample to advance. 756 bool err; 757 int res = readSamples!float(null, remain, &err); 758 if (res != remain || err) 759 return false; // Note: in this case currentPositionFrame is left invalid... 760 } 761 762 currentPositionFrame = positionFrame; 763 return true; 764 } 765 766 int tellPosition() 767 { 768 return currentPositionFrame; 769 } 770 771 // return true if this is a QOA. Taint io. 772 bool initialize(IOCallbacks* io, void* userData) 773 { 774 this.io = io; 775 this.userData = userData; 776 777 if (qoa_decode_header(io, userData, &desc) != 8) 778 return false; 779 780 this.numChannels = desc.channels; 781 this.totalFrames = desc.samples; 782 this.samplerate = desc.samplerate; 783 784 if (!io.seek(8, false, userData)) 785 return false; 786 currentPositionFrame = 0; 787 788 // We need a single QOA_FRAME_LEN buffer for decoding. 789 buffer = cast(short*) QOA_MALLOC(short.sizeof * QOA_FRAME_LEN * numChannels); 790 791 bufStart = 0; // Nothing in buffer 792 bufStop = 0; 793 794 return true; // Note: we've read 16 bytes, so we seek to byte 8 (begin of first frame). 795 } 796 797 ~this() 798 { 799 QOA_FREE(buffer); 800 buffer = null; 801 802 QOA_FREE(desc); 803 desc = null; 804 } 805 806 int readSamples(T)(T* outData, int frames, bool* err) 807 { 808 int offsetFrames = 0; 809 while (frames > 0) 810 { 811 // If no more data in buffer, read a frame 812 if (bufStop - bufStart == 0) 813 { 814 uint frameLen; 815 qoa_decode_frame(io, userData, desc, buffer, &frameLen); 816 817 if (frameLen == 0) 818 return offsetFrames; 819 820 bufStart = 0; 821 bufStop = frameLen; 822 } 823 824 // How many samples we have in buffers? Take them. 825 int inStore = bufStop - bufStart; 826 if (inStore > frames) 827 inStore = frames; 828 829 if (outData !is null) 830 { 831 enum float F = 1.0f / short.max; 832 833 for (int n = 0; n < inStore; ++n) 834 { 835 for (int ch = 0; ch < numChannels; ++ch) 836 { 837 int index = n*numChannels+ch; 838 outData[offsetFrames*numChannels + index] = buffer[bufStart*numChannels + index] * F; 839 } 840 } 841 } 842 843 bufStart += inStore; 844 offsetFrames += inStore; 845 currentPositionFrame += inStore; 846 frames -= inStore; 847 assert(bufStart <= bufStop); 848 } 849 return offsetFrames; 850 } 851 }