1 module audioformats.stb_vorbis2; 2 3 // Ogg Vorbis audio decoder - v1.22 - public domain 4 // http://nothings.org/stb_vorbis/ 5 // 6 // Original version written by Sean Barrett in 2007. 7 // 8 // Originally sponsored by RAD Game Tools. Seeking implementation 9 // sponsored by Phillip Bennefall, Marc Andersen, Aaron Baker, 10 // Elias Software, Aras Pranckevicius, and Sean Barrett. 11 // 12 // Translated to D by Guillaume Piolat. 13 // 14 // LICENSE 15 // 16 // See end of file for license information. 17 // 18 // Limitations: 19 // 20 // - floor 0 not supported (used in old ogg vorbis files pre-2004) 21 // - lossless sample-truncation at beginning ignored 22 // - cannot concatenate multiple vorbis streams 23 // - sample positions are 32-bit, limiting seekable 192Khz 24 // files to around 6 hours (Ogg supports 64-bit) 25 // 26 // Feature contributors: 27 // Dougall Johnson (sample-exact seeking) 28 // 29 // Bugfix/warning contributors: 30 // Terje Mathisen Niklas Frykholm Andy Hill 31 // Casey Muratori John Bolton Gargaj 32 // Laurent Gomila Marc LeBlanc Ronny Chevalier 33 // Bernhard Wodo Evan Balster github:alxprd 34 // Tom Beaumont Ingo Leitgeb Nicolas Guillemot 35 // Phillip Bennefall Rohit Thiago Goulart 36 // github:manxorist Saga Musix github:infatum 37 // Timur Gagiev Maxwell Koo Peter Waller 38 // github:audinowho Dougall Johnson David Reid 39 // github:Clownacy Pedro J. Estebanez Remi Verschelde 40 // AnthoFoxo github:morlat Gabriel Ravier 41 // 42 // Partial history: 43 // 1.22d - 2021-10-31 - translated to D to update the one in audio-formats, and perhaps get seeking. 44 // 1.22 - 2021-07-11 - various small fixes 45 // 1.21 - 2021-07-02 - fix bug for files with no comments 46 // 1.20 - 2020-07-11 - several small fixes 47 // 1.19 - 2020-02-05 - warnings 48 // 1.18 - 2020-02-02 - fix seek bugs; parse header comments; misc warnings etc. 49 // 1.17 - 2019-07-08 - fix CVE-2019-13217..CVE-2019-13223 (by ForAllSecure) 50 // 1.16 - 2019-03-04 - fix warnings 51 // 1.15 - 2019-02-07 - explicit failure if Ogg Skeleton data is found 52 // 1.14 - 2018-02-11 - delete bogus dealloca usage 53 // 1.13 - 2018-01-29 - fix truncation of last frame (hopefully) 54 // 1.12 - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files 55 // 1.11 - 2017-07-23 - fix MinGW compilation 56 // 1.10 - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory 57 // 1.09 - 2016-04-04 - back out 'truncation of last frame' fix from previous version 58 // 1.08 - 2016-04-02 - warnings; setup memory leaks; truncation of last frame 59 // 1.07 - 2015-01-16 - fixes for crashes on invalid files; warning fixes; const 60 // 1.06 - 2015-08-31 - full, correct support for seeking API (Dougall Johnson) 61 // some crash fixes when out of memory or with corrupt files 62 // fix some inappropriately signed shifts 63 // 1.05 - 2015-04-19 - don't define __forceinline if it's redundant 64 // 1.04 - 2014-08-27 - fix missing const-correct case in API 65 // 1.03 - 2014-08-07 - warning fixes 66 // 1.02 - 2014-07-09 - declare qsort comparison as explicitly _cdecl in Windows 67 // 1.01 - 2014-06-18 - fix stb_vorbis_get_samples_float (interleaved was correct) 68 // 1.0 - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in >2-channel; 69 // (API change) report sample rate for decode-full-file funcs 70 // 71 // See end of file for full version history. 72 // 73 // Notes about the D translation: removed push data API. 74 75 76 /////////// THREAD SAFETY 77 78 // Individual stb_vorbis* handles are not thread-safe; you cannot decode from 79 // them from multiple threads at the same time. However, you can have multiple 80 // stb_vorbis* handles and decode from them independently in multiple thrads. 81 82 83 /////////// MEMORY ALLOCATION 84 85 // normally stb_vorbis uses malloc() to allocate memory at startup, 86 // and alloca() to allocate temporary memory during a frame on the 87 // stack. (Memory consumption will depend on the amount of setup 88 // data in the file and how you set the compile flags for speed 89 // vs. size. In my test files the maximal-size usage is ~150KB.) 90 // 91 // You can modify the wrapper functions in the source (setup_malloc, 92 // setup_temp_malloc, temp_malloc) to change this behavior, or you 93 // can use a simpler allocation model: you pass in a buffer from 94 // which stb_vorbis will allocate _all_ its memory (including the 95 // temp memory). "open" may fail with a VORBIS_outofmem if you 96 // do not pass in enough data; there is no way to determine how 97 // much you do need except to succeed (at which point you can 98 // query get_info to find the exact amount required. yes I know 99 // this is lame). 100 // 101 // If you pass in a non-NULL buffer of the type below, allocation 102 // will occur from it as described above. Otherwise just pass NULL 103 // to use malloc()/alloca() 104 105 import core.stdc.stdlib: malloc, free, qsort, abs; 106 import core.stdc.string: memset, memcmp, memcpy; 107 import std.math: ldexp, sin, cos, floor, exp, log, pow; 108 import audioformats.io; 109 110 nothrow: 111 @nogc: 112 113 struct stb_vorbis_alloc 114 { 115 ubyte* alloc_buffer; 116 int alloc_buffer_length_in_bytes; 117 } 118 119 120 /////////// FUNCTIONS USEABLE WITH ALL INPUT MODES 121 122 struct stb_vorbis_info 123 { 124 uint sample_rate; 125 int channels; 126 127 uint setup_memory_required; 128 uint setup_temp_memory_required; 129 uint temp_memory_required; 130 131 int max_frame_size; 132 } 133 134 struct stb_vorbis_comment 135 { 136 char *vendor; 137 138 int comment_list_length; 139 char **comment_list; 140 } 141 142 //////// ERROR CODES 143 144 alias STBVorbisError = int; 145 enum : STBVorbisError 146 { 147 VORBIS__no_error, 148 149 VORBIS_need_more_data=1, // not a real error 150 151 VORBIS_invalid_api_mixing, // can't mix API modes 152 VORBIS_outofmem, // not enough memory 153 VORBIS_feature_not_supported, // uses floor 0 154 VORBIS_too_many_channels, // STB_VORBIS_MAX_CHANNELS is too small 155 VORBIS_file_open_failure, // fopen() failed 156 VORBIS_seek_without_length, // can't seek in unknown-length file 157 158 VORBIS_unexpected_eof=10, // file is truncated? 159 VORBIS_seek_invalid, // seek past EOF 160 161 // decoding errors (corrupt/invalid stream) -- you probably 162 // don't care about the exact details of these 163 164 // vorbis errors: 165 VORBIS_invalid_setup=20, 166 VORBIS_invalid_stream, 167 168 // ogg errors: 169 VORBIS_missing_capture_pattern=30, 170 VORBIS_invalid_stream_structure_version, 171 VORBIS_continued_packet_flag_invalid, 172 VORBIS_incorrect_stream_serial_number, 173 VORBIS_invalid_first_page, 174 VORBIS_bad_packet_type, 175 VORBIS_cant_find_last_page, 176 VORBIS_seek_failed, 177 VORBIS_ogg_skeleton_not_supported 178 } 179 180 // 181 // HEADER ENDS HERE 182 // 183 ////////////////////////////////////////////////////////////////////////////// 184 185 // global configuration settings (e.g. set these in the project/makefile), 186 // or just set them in this file at the top (although ideally the first few 187 // should be visible when the header file is compiled too, although it's not 188 // crucial) 189 190 // STB_VORBIS_NO_STDIO 191 // does not compile the code for the APIs that use FILE *s internally 192 // or externally (implied by STB_VORBIS_NO_PULLDATA_API) 193 // #define STB_VORBIS_NO_STDIO 194 195 // STB_VORBIS_NO_INTEGER_CONVERSION 196 // does not compile the code for converting audio sample data from 197 // float to integer (implied by STB_VORBIS_NO_PULLDATA_API) 198 // #define STB_VORBIS_NO_INTEGER_CONVERSION 199 200 // STB_VORBIS_NO_FAST_SCALED_FLOAT 201 // does not use a fast float-to-int trick to accelerate float-to-int on 202 // most platforms which requires endianness be defined correctly. 203 //#define STB_VORBIS_NO_FAST_SCALED_FLOAT 204 205 206 // STB_VORBIS_MAX_CHANNELS [number] 207 // globally define this to the maximum number of channels you need. 208 // The spec does not put a restriction on channels except that 209 // the count is stored in a byte, so 255 is the hard limit. 210 // Reducing this saves about 16 bytes per value, so using 16 saves 211 // (255-16)*16 or around 4KB. Plus anything other memory usage 212 // I forgot to account for. Can probably go as low as 8 (7.1 audio), 213 // 6 (5.1 audio), or 2 (stereo only). 214 enum STB_VORBIS_MAX_CHANNELS = 16; 215 216 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number] 217 // sets the log size of the huffman-acceleration table. Maximum 218 // supported value is 24. with larger numbers, more decodings are O(1), 219 // but the table size is larger so worse cache missing, so you'll have 220 // to probe (and try multiple ogg vorbis files) to find the sweet spot. 221 enum STB_VORBIS_FAST_HUFFMAN_LENGTH = 10; 222 223 // STB_VORBIS_FAST_BINARY_LENGTH [number] 224 // sets the log size of the binary-search acceleration table. this 225 // is used in similar fashion to the fast-huffman size to set initial 226 // parameters for the binary search 227 228 // STB_VORBIS_FAST_HUFFMAN_INT 229 // The fast huffman tables are much more efficient if they can be 230 // stored as 16-bit results instead of 32-bit results. This restricts 231 // the codebooks to having only 65535 possible outcomes, though. 232 // (At least, accelerated by the huffman table.) 233 enum STB_VORBIS_FAST_HUFFMAN_SHORT = true; 234 235 enum STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH = false; 236 237 enum STB_VORBIS_DIVIDES_IN_CODEBOOK = false; // cannot be true, support for this not translated 238 enum STB_VORBIS_DIVIDES_IN_RESIDUE = false; 239 240 enum STB_VORBIS_DIVIDE_TABLE = false; 241 enum STB_VORBIS_NO_DEFER_FLOOR = false; // STB_VORBIS_NO_DEFER_FLOOR not defined 242 enum STB_VORBIS_NO_CRT = false; 243 enum STB_VORBIS_NO_STDIO = false; 244 enum STB_VORBIS_NO_INTEGER_CONVERSION = true; 245 enum STB_VORBIS_NO_FAST_SCALED_FLOAT = true; 246 247 static assert(STB_VORBIS_MAX_CHANNELS <= 256); 248 static assert(STB_VORBIS_FAST_HUFFMAN_LENGTH <= 24); 249 250 enum MAX_BLOCKSIZE_LOG = 13; // from specification 251 enum MAX_BLOCKSIZE = (1 << MAX_BLOCKSIZE_LOG); 252 253 alias uint8 = ubyte; 254 alias int8 = byte; 255 alias uint16 = ushort; 256 alias int16 = short; 257 alias uint32 = uint; 258 alias int32 = int; 259 260 enum TRUE = 1; 261 enum FALSE = 0; 262 enum void* NULL = null; 263 264 alias codetype = float; 265 266 // @NOTE 267 // 268 // Some arrays below are tagged "//varies", which means it's actually 269 // a variable-sized piece of data, but rather than malloc I assume it's 270 // small enough it's better to just allocate it all together with the 271 // main thing 272 // 273 // Most of the variables are specified with the smallest size I could pack 274 // them into. It might give better performance to make them all full-sized 275 // integers. It should be safe to freely rearrange the structures or change 276 // the sizes larger--nothing relies on silently truncating etc., nor the 277 // order of variables. 278 279 enum FAST_HUFFMAN_TABLE_SIZE = (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH); 280 enum FAST_HUFFMAN_TABLE_MASK = (FAST_HUFFMAN_TABLE_SIZE - 1); 281 282 struct Codebook 283 { 284 int dimensions, entries; 285 uint8 *codeword_lengths; 286 float minimum_value; 287 float delta_value; 288 uint8 value_bits; 289 uint8 lookup_type; 290 uint8 sequence_p; 291 uint8 sparse; 292 uint32 lookup_values; 293 codetype *multiplicands; 294 uint32 *codewords; 295 int16[FAST_HUFFMAN_TABLE_SIZE] fast_huffman; 296 uint32 *sorted_codewords; 297 int *sorted_values; 298 int sorted_entries; 299 } ; 300 301 struct Floor0 302 { 303 uint8 order; 304 uint16 rate; 305 uint16 bark_map_size; 306 uint8 amplitude_bits; 307 uint8 amplitude_offset; 308 uint8 number_of_books; 309 uint8[16] book_list; // varies 310 } 311 312 struct Floor1 313 { 314 uint8 partitions; 315 uint8[32] partition_class_list; // varies 316 uint8[16] class_dimensions; // varies 317 uint8[16] class_subclasses; // varies 318 uint8[16] class_masterbooks; // varies 319 int16[8][16] subclass_books; // varies 320 uint16[31*8+2] Xlist; // varies 321 uint8[31*8+2] sorted_order; 322 uint8[2][31*8+2] neighbors; 323 uint8 floor1_multiplier; 324 uint8 rangebits; 325 int values; 326 } 327 328 union Floor 329 { 330 Floor0 floor0; 331 Floor1 floor1; 332 } 333 334 struct Residue 335 { 336 uint32 begin, end; 337 uint32 part_size; 338 uint8 classifications; 339 uint8 classbook; 340 uint8 **classdata; 341 int16[8]*residue_books; 342 } ; 343 344 struct MappingChannel 345 { 346 uint8 magnitude; 347 uint8 angle; 348 uint8 mux; 349 } ; 350 351 struct Mapping 352 { 353 uint16 coupling_steps; 354 MappingChannel *chan; 355 uint8 submaps; 356 uint8[15] submap_floor; // varies 357 uint8[15] submap_residue; // varies 358 } ; 359 360 struct Mode 361 { 362 uint8 blockflag; 363 uint8 mapping; 364 uint16 windowtype; 365 uint16 transformtype; 366 } 367 368 struct CRCscan 369 { 370 uint32 goal_crc; // expected crc if match 371 int bytes_left; // bytes left in packet 372 uint32 crc_so_far; // running crc 373 int bytes_done; // bytes processed in _current_ chunk 374 uint32 sample_loc; // granule pos encoded in page 375 } 376 377 struct ProbedPage 378 { 379 uint32 page_start, page_end; 380 uint32 last_decoded_sample; 381 } 382 383 struct stb_vorbis 384 { 385 // user-accessible info 386 uint sample_rate; 387 int channels; 388 389 uint setup_memory_required; 390 uint temp_memory_required; 391 uint setup_temp_memory_required; 392 393 char *vendor; 394 int comment_list_length; 395 char **comment_list; 396 397 // input config 398 IOCallbacks* _io; 399 void* _userData; 400 401 /*uint8 *stream; 402 uint8 *stream_start; 403 uint8 *stream_end;*/ 404 405 uint32 stream_len; 406 407 // the page to seek to when seeking to start, may be zero 408 uint32 first_audio_page_offset; 409 410 // p_first is the page on which the first audio packet ends 411 // (but not necessarily the page on which it starts) 412 ProbedPage p_first, p_last; 413 414 // memory management 415 stb_vorbis_alloc alloc; 416 int setup_offset; 417 int temp_offset; 418 419 // run-time results 420 int eof; 421 STBVorbisError error; 422 423 // user-useful data 424 425 // header info 426 int[2] blocksize; 427 int blocksize_0, blocksize_1; 428 int codebook_count; 429 Codebook *codebooks; 430 int floor_count; 431 uint16[64] floor_types; // varies 432 Floor *floor_config; 433 int residue_count; 434 uint16[64] residue_types; // varies 435 Residue *residue_config; 436 int mapping_count; 437 Mapping *mapping; 438 int mode_count; 439 Mode[64] mode_config; // varies 440 441 uint32 total_samples; 442 443 // decode buffer 444 float*[STB_VORBIS_MAX_CHANNELS] channel_buffers; 445 float*[STB_VORBIS_MAX_CHANNELS] outputs ; 446 447 float*[STB_VORBIS_MAX_CHANNELS] previous_window; 448 int previous_length; 449 450 int16*[STB_VORBIS_MAX_CHANNELS] finalY; 451 452 long current_sample_loc; // sample location of next sample to decode 453 454 455 uint32 current_loc; // sample location of next frame to decode 456 int current_loc_valid; 457 458 // per-blocksize precomputed data 459 460 // twiddle factors 461 float*[2] A, B, C; 462 float*[2] window; 463 uint16*[2] bit_reverse; 464 465 // current page/packet/segment streaming info 466 uint32 serial; // stream serial number for verification 467 int last_page; 468 int segment_count; 469 uint8[255] segments; 470 uint8 page_flag; 471 uint8 bytes_in_seg; 472 uint8 first_decode; 473 int next_seg; 474 int last_seg; // flag that we're on the last segment 475 int last_seg_which; // what was the segment number of the last seg? 476 uint32 acc; 477 int valid_bits; 478 int packet_bytes; 479 int end_seg_with_known_loc; 480 uint32 known_loc_for_packet; 481 int discard_samples_deferred; 482 uint32 samples_output; 483 484 // sample-access 485 int channel_buffer_start; 486 int channel_buffer_end; 487 488 uint32[256] crc_table; // was a global, moved here 489 }; 490 491 alias vorb = stb_vorbis; 492 493 static int error(vorb *f, STBVorbisError e) 494 { 495 f.error = e; 496 if (!f.eof && e != VORBIS_need_more_data) { 497 f.error=e; // breakpoint for debugging 498 } 499 return 0; 500 } 501 502 503 // these functions are used for allocating temporary memory 504 // while decoding. if you can afford the stack space, use 505 // alloca(); otherwise, provide a temp buffer and it will 506 // allocate out of those. 507 508 size_t array_size_required(size_t count, size_t size) 509 { 510 return (count * ((void *).sizeof + size)); 511 } 512 513 void* temp_alloc(vorb *f, size_t size) 514 { 515 assert(f.alloc.alloc_buffer); 516 return setup_temp_malloc(f, cast(int)size); // note: never use alloca 517 } 518 519 void temp_free(vorb *f, void* p) 520 { 521 } 522 523 int temp_alloc_save(vorb *f) 524 { 525 return f.temp_offset; 526 } 527 528 void temp_alloc_restore(vorb *f, int p) 529 { 530 f.temp_offset = p; 531 } 532 533 void* temp_block_array(vorb *f, size_t count, size_t size) 534 { 535 return make_block_array(temp_alloc(f, cast(int) array_size_required(count,size)), cast(int) count, cast(int)size); 536 } 537 538 // given a sufficiently large block of memory, make an array of pointers to subblocks of it 539 static void *make_block_array(void *mem, int count, int size) 540 { 541 int i; 542 void ** p = cast(void **) mem; 543 char *q = cast(char *) (p + count); 544 for (i=0; i < count; ++i) { 545 p[i] = q; 546 q += size; 547 } 548 return p; 549 } 550 551 void *setup_malloc(vorb *f, size_t sz) 552 { 553 return setup_malloc(f, cast(int)sz); 554 } 555 556 void *setup_malloc(vorb *f, int sz) 557 { 558 sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs. 559 f.setup_memory_required += sz; 560 if (f.alloc.alloc_buffer) { 561 void *p = cast(char *) f.alloc.alloc_buffer + f.setup_offset; 562 if (f.setup_offset + sz > f.temp_offset) return NULL; 563 f.setup_offset += sz; 564 return p; 565 } 566 return sz ? malloc(sz) : NULL; 567 } 568 569 static void setup_free(vorb *f, void *p) 570 { 571 if (f.alloc.alloc_buffer) return; // do nothing; setup mem is a stack 572 free(p); 573 } 574 575 void *setup_temp_malloc(vorb *f, int sz) 576 { 577 sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs. 578 if (f.alloc.alloc_buffer) { 579 if (f.temp_offset - sz < f.setup_offset) 580 return NULL; 581 f.temp_offset -= sz; 582 return cast(char *) f.alloc.alloc_buffer + f.temp_offset; 583 } 584 return malloc(sz); 585 } 586 587 void setup_temp_free(vorb *f, void *p, size_t sz) 588 { 589 if (f.alloc.alloc_buffer) 590 { 591 f.temp_offset += (sz+7)&~7; 592 return; 593 } 594 free(p); 595 } 596 597 enum uint CRC32_POLY = 0x04c11db7; // from spec 598 599 void crc32_init(vorb* f) 600 { 601 int i,j; 602 uint32 s; 603 for(i=0; i < 256; i++) { 604 for (s=cast(uint32) i << 24, j=0; j < 8; ++j) 605 s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0); 606 f.crc_table[i] = s; 607 } 608 } 609 610 uint32 crc32_update(vorb* f, uint32 crc, uint8 byte_) 611 { 612 return (crc << 8) ^ f.crc_table[byte_ ^ (crc >> 24)]; 613 } 614 615 616 // used in setup, and for huffman that doesn't go fast path 617 static uint bit_reverse(uint n) 618 { 619 n = ((n & 0xAAAAAAAA) >> 1) | ((n & 0x55555555) << 1); 620 n = ((n & 0xCCCCCCCC) >> 2) | ((n & 0x33333333) << 2); 621 n = ((n & 0xF0F0F0F0) >> 4) | ((n & 0x0F0F0F0F) << 4); 622 n = ((n & 0xFF00FF00) >> 8) | ((n & 0x00FF00FF) << 8); 623 return (n >> 16) | (n << 16); 624 } 625 626 static float square(float x) 627 { 628 return x*x; 629 } 630 631 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3 632 // as required by the specification. fast(?) implementation from stb.h 633 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup 634 static int ilog(int32 n) 635 { 636 static immutable byte[16] log2_4 = [ 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 ]; 637 638 if (n < 0) return 0; // signed n returns 0 639 640 // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29) 641 if (n < (1 << 14)) 642 if (n < (1 << 4)) return 0 + log2_4[n ]; 643 else if (n < (1 << 9)) return 5 + log2_4[n >> 5]; 644 else return 10 + log2_4[n >> 10]; 645 else if (n < (1 << 24)) 646 if (n < (1 << 19)) return 15 + log2_4[n >> 15]; 647 else return 20 + log2_4[n >> 20]; 648 else if (n < (1 << 29)) return 25 + log2_4[n >> 25]; 649 else return 30 + log2_4[n >> 30]; 650 } 651 652 enum M_PI = 3.14159265358979323846264f; // from CRC 653 654 // code length assigned to a value with no huffman encoding 655 enum NO_CODE = 255; 656 657 /////////////////////// LEAF SETUP FUNCTIONS ////////////////////////// 658 // 659 // these functions are only called at setup, and only a few times 660 // per file 661 662 static float float32_unpack(uint32 x) 663 { 664 // from the specification 665 uint32 mantissa = x & 0x1fffff; 666 uint32 sign = x & 0x80000000; 667 uint32 exp = (x & 0x7fe00000) >> 21; 668 double res = sign ? -cast(double)mantissa : cast(double)mantissa; 669 return cast(float) ldexp(cast(float)res, cast(int)exp-788); 670 } 671 672 673 // zlib & jpeg huffman tables assume that the output symbols 674 // can either be arbitrarily arranged, or have monotonically 675 // increasing frequencies--they rely on the lengths being sorted; 676 // this makes for a very simple generation algorithm. 677 // vorbis allows a huffman table with non-sorted lengths. This 678 // requires a more sophisticated construction, since symbols in 679 // order do not map to huffman codes "in order". 680 static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values) 681 { 682 if (!c.sparse) { 683 c.codewords [symbol] = huff_code; 684 } else { 685 c.codewords [count] = huff_code; 686 c.codeword_lengths[count] = cast(ubyte)len; 687 values [count] = symbol; 688 } 689 } 690 691 static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values) 692 { 693 int i,k,m=0; 694 uint32[32] available; 695 696 memset(available.ptr, 0, 32); 697 // find the first entry 698 for (k=0; k < n; ++k) if (len[k] < NO_CODE) break; 699 if (k == n) { assert(c.sorted_entries == 0); return TRUE; } 700 assert(len[k] < 32); // no error return required, code reading lens checks this 701 // add to the list 702 add_entry(c, 0, k, m++, len[k], values); 703 // add all available leaves 704 for (i=1; i <= len[k]; ++i) 705 available[i] = 1U << (32-i); 706 // note that the above code treats the first case specially, 707 // but it's really the same as the following code, so they 708 // could probably be combined (except the initial code is 0, 709 // and I use 0 in available[] to mean 'empty') 710 for (i=k+1; i < n; ++i) { 711 uint32 res; 712 int z = len[i], y; 713 if (z == NO_CODE) continue; 714 assert(z < 32); // no error return required, code reading lens checks this 715 // find lowest available leaf (should always be earliest, 716 // which is what the specification calls for) 717 // note that this property, and the fact we can never have 718 // more than one free leaf at a given level, isn't totally 719 // trivial to prove, but it seems true and the assert never 720 // fires, so! 721 while (z > 0 && !available[z]) --z; 722 if (z == 0) { return FALSE; } 723 res = available[z]; 724 available[z] = 0; 725 add_entry(c, bit_reverse(res), i, m++, len[i], values); 726 // propagate availability up the tree 727 if (z != len[i]) { 728 for (y=len[i]; y > z; --y) { 729 assert(available[y] == 0); 730 available[y] = res + (1 << (32-y)); 731 } 732 } 733 } 734 return TRUE; 735 } 736 737 // accelerated huffman table allows fast O(1) match of all symbols 738 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH 739 static void compute_accelerated_huffman(Codebook *c) 740 { 741 int i, len; 742 for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i) 743 c.fast_huffman[i] = -1; 744 745 len = c.sparse ? c.sorted_entries : c.entries; 746 if (len > 32767) len = 32767; // largest possible value we can encode! 747 for (i=0; i < len; ++i) { 748 if (c.codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) { 749 uint32 z = c.sparse ? bit_reverse(c.sorted_codewords[i]) : c.codewords[i]; 750 // set table entries for all bit combinations in the higher bits 751 while (z < FAST_HUFFMAN_TABLE_SIZE) { 752 c.fast_huffman[z] = cast(short)i; 753 z += 1 << c.codeword_lengths[i]; 754 } 755 } 756 } 757 } 758 759 extern(C) int uint32_compare_2(const void *p, const void *q) 760 { 761 uint32 x = *cast(uint32 *) p; 762 uint32 y = *cast(uint32 *) q; 763 return x < y ? -1 : x > y; 764 } 765 766 static int include_in_sort(Codebook *c, uint8 len) 767 { 768 if (c.sparse) { assert(len != NO_CODE); return TRUE; } 769 if (len == NO_CODE) return FALSE; 770 if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE; 771 return FALSE; 772 } 773 774 // if the fast table above doesn't work, we want to binary 775 // search them... need to reverse the bits 776 static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values) 777 { 778 int i, len; 779 // build a list of all the entries 780 // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN. 781 // this is kind of a frivolous optimization--I don't see any performance improvement, 782 // but it's like 4 extra lines of code, so. 783 if (!c.sparse) { 784 int k = 0; 785 for (i=0; i < c.entries; ++i) 786 if (include_in_sort(c, lengths[i])) 787 c.sorted_codewords[k++] = bit_reverse(c.codewords[i]); 788 assert(k == c.sorted_entries); 789 } else { 790 for (i=0; i < c.sorted_entries; ++i) 791 c.sorted_codewords[i] = bit_reverse(c.codewords[i]); 792 } 793 794 qsort(c.sorted_codewords, c.sorted_entries, (c.sorted_codewords[0]).sizeof, &uint32_compare_2); 795 c.sorted_codewords[c.sorted_entries] = 0xffffffff; 796 797 len = c.sparse ? c.sorted_entries : c.entries; 798 // now we need to indicate how they correspond; we could either 799 // #1: sort a different data structure that says who they correspond to 800 // #2: for each sorted entry, search the original list to find who corresponds 801 // #3: for each original entry, find the sorted entry 802 // #1 requires extra storage, #2 is slow, #3 can use binary search! 803 for (i=0; i < len; ++i) { 804 ubyte huff_len = c.sparse ? lengths[values[i]] : lengths[i]; 805 if (include_in_sort(c, huff_len)) { 806 uint32 code = bit_reverse(c.codewords[i]); 807 int x=0, n=c.sorted_entries; 808 while (n > 1) { 809 // invariant: sc[x] <= code < sc[x+n] 810 int m = x + (n >> 1); 811 if (c.sorted_codewords[m] <= code) { 812 x = m; 813 n -= (n>>1); 814 } else { 815 n >>= 1; 816 } 817 } 818 assert(c.sorted_codewords[x] == code); 819 if (c.sparse) { 820 c.sorted_values[x] = values[i]; 821 c.codeword_lengths[x] = huff_len; 822 } else { 823 c.sorted_values[x] = i; 824 } 825 } 826 } 827 } 828 829 // only run while parsing the header (3 times) 830 static int vorbis_validate(uint8 *data) 831 { 832 static immutable char[6] vorbis = "vorbis"; 833 return memcmp(data, vorbis.ptr, 6) == 0; 834 } 835 836 // called from setup only, once per code book 837 // (formula implied by specification) 838 static int lookup1_values(int entries, int dim) 839 { 840 int r = cast(int) floor(exp(cast(float) log(cast(float) entries) / dim)); 841 if (cast(int) floor(pow(cast(float) r+1, dim)) <= entries) // (int) cast for MinGW warning; 842 ++r; // floor() to avoid _ftol() when non-CRT 843 if (pow(cast(float) r+1, dim) <= entries) 844 return -1; 845 if (cast(int) floor(pow(cast(float) r, dim)) > entries) 846 return -1; 847 return r; 848 } 849 850 // called twice per file 851 static void compute_twiddle_factors(int n, float *A, float *B, float *C) 852 { 853 int n4 = n >> 2, n8 = n >> 3; 854 int k,k2; 855 856 for (k=k2=0; k < n4; ++k,k2+=2) { 857 A[k2 ] = cast(float) cos(4*k*M_PI/n); 858 A[k2+1] = cast(float) -sin(4*k*M_PI/n); 859 B[k2 ] = cast(float) cos((k2+1)*M_PI/n/2) * 0.5f; 860 B[k2+1] = cast(float) sin((k2+1)*M_PI/n/2) * 0.5f; 861 } 862 for (k=k2=0; k < n8; ++k,k2+=2) { 863 C[k2 ] = cast(float) cos(2*(k2+1)*M_PI/n); 864 C[k2+1] = cast(float) -sin(2*(k2+1)*M_PI/n); 865 } 866 } 867 868 static void compute_window(int n, float *window) 869 { 870 int n2 = n >> 1, i; 871 for (i=0; i < n2; ++i) 872 window[i] = cast(float) sin(0.5 * M_PI * square(cast(float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI))); 873 } 874 875 static void compute_bitreverse(int n, uint16 *rev) 876 { 877 int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions 878 int i, n8 = n >> 3; 879 for (i=0; i < n8; ++i) 880 rev[i] = cast(ushort) ( (bit_reverse(i) >> (32-ld+3)) << 2 ); 881 } 882 883 static int init_blocksize(vorb *f, int b, int n) 884 { 885 int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3; 886 f.A[b] = cast(float *) setup_malloc(f, 4 * n2); 887 f.B[b] = cast(float *) setup_malloc(f, 4 * n2); 888 f.C[b] = cast(float *) setup_malloc(f, 4 * n4); 889 if (!f.A[b] || !f.B[b] || !f.C[b]) return error(f, VORBIS_outofmem); 890 compute_twiddle_factors(n, f.A[b], f.B[b], f.C[b]); 891 f.window[b] = cast(float *) setup_malloc(f, 4 * n2); 892 if (!f.window[b]) return error(f, VORBIS_outofmem); 893 compute_window(n, f.window[b]); 894 f.bit_reverse[b] = cast(uint16 *) setup_malloc(f, 2 * n8); 895 if (!f.bit_reverse[b]) return error(f, VORBIS_outofmem); 896 compute_bitreverse(n, f.bit_reverse[b]); 897 return TRUE; 898 } 899 900 static void neighbors(uint16 *x, int n, int *plow, int *phigh) 901 { 902 int low = -1; 903 int high = 65536; 904 int i; 905 for (i=0; i < n; ++i) { 906 if (x[i] > low && x[i] < x[n]) { *plow = i; low = x[i]; } 907 if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; } 908 } 909 } 910 911 // this has been repurposed so y is now the original index instead of y 912 struct stbv__floor_ordering 913 { 914 uint16 x,id; 915 } 916 917 extern(C) int point_compare_2(const void *p, const void *q) 918 { 919 stbv__floor_ordering *a = cast(stbv__floor_ordering *) p; 920 stbv__floor_ordering *b = cast(stbv__floor_ordering *) q; 921 return a.x < b.x ? -1 : a.x > b.x; 922 } 923 924 // 925 /////////////////////// END LEAF SETUP FUNCTIONS ////////////////////////// 926 927 uint8 get8(vorb *z) 928 { 929 ubyte r; 930 if (z._io.read(&r, 1, z._userData) == 1) 931 return r; 932 else 933 { 934 z.eof = TRUE; 935 return 0; 936 } 937 } 938 939 static uint32 get32(vorb *f) 940 { 941 uint32 x; 942 x = get8(f); 943 x += get8(f) << 8; 944 x += get8(f) << 16; 945 x += cast(uint32) get8(f) << 24; 946 return x; 947 } 948 949 static int getn(vorb *z, uint8 *data, int n) 950 { 951 ubyte r; 952 if (z._io.read(data, n, z._userData) == n) 953 return 1; 954 else 955 { 956 z.eof = 1; 957 return 0; 958 } 959 } 960 961 void skip(vorb *z, int n, bool* err) 962 { 963 bool success = z._io.skip(n, z._userData); 964 *err = !success; 965 } 966 967 static int set_file_offset(stb_vorbis *f, uint loc) 968 { 969 if (f._io.seek( loc, false, f._userData)) 970 { 971 return 1; 972 } 973 else 974 { 975 f.eof = 1; 976 f._io.seek(f.stream_len, false, f._userData); // stb_vorbis set to end of the file, for some reason. 977 return 0; 978 } 979 } 980 981 982 static immutable uint8[4] ogg_page_header = [ 0x4f, 0x67, 0x67, 0x53 ]; 983 984 static int capture_pattern(vorb *f) 985 { 986 if (0x4f != get8(f)) return FALSE; 987 if (0x67 != get8(f)) return FALSE; 988 if (0x67 != get8(f)) return FALSE; 989 if (0x53 != get8(f)) return FALSE; 990 return TRUE; 991 } 992 993 enum PAGEFLAG_continued_packet = 1; 994 enum PAGEFLAG_first_page = 2; 995 enum PAGEFLAG_last_page = 4; 996 997 static int start_page_no_capturepattern(vorb *f) 998 { 999 uint32 loc0,loc1,n; 1000 if (f.first_decode) { 1001 f.p_first.page_start = stb_vorbis_get_file_offset(f) - 4; 1002 } 1003 // stream structure version 1004 if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version); 1005 // header flag 1006 f.page_flag = get8(f); 1007 // absolute granule position 1008 loc0 = get32(f); 1009 loc1 = get32(f); 1010 // @TODO: validate loc0,loc1 as valid positions? 1011 // stream serial number -- vorbis doesn't interleave, so discard 1012 get32(f); 1013 //if (f.serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number); 1014 // page sequence number 1015 n = get32(f); 1016 f.last_page = n; 1017 // CRC32 1018 get32(f); 1019 // page_segments 1020 f.segment_count = get8(f); 1021 if (!getn(f, f.segments.ptr, f.segment_count)) 1022 return error(f, VORBIS_unexpected_eof); 1023 // assume we _don't_ know any the sample position of any segments 1024 f.end_seg_with_known_loc = -2; 1025 if (loc0 != ~0U || loc1 != ~0U) { 1026 int i; 1027 // determine which packet is the last one that will complete 1028 for (i=f.segment_count-1; i >= 0; --i) 1029 if (f.segments[i] < 255) 1030 break; 1031 // 'i' is now the index of the _last_ segment of a packet that ends 1032 if (i >= 0) { 1033 f.end_seg_with_known_loc = i; 1034 f.known_loc_for_packet = loc0; 1035 } 1036 } 1037 if (f.first_decode) { 1038 int i,len; 1039 len = 0; 1040 for (i=0; i < f.segment_count; ++i) 1041 len += f.segments[i]; 1042 len += 27 + f.segment_count; 1043 f.p_first.page_end = f.p_first.page_start + len; 1044 f.p_first.last_decoded_sample = loc0; 1045 } 1046 f.next_seg = 0; 1047 return TRUE; 1048 } 1049 1050 static int start_page(vorb *f) 1051 { 1052 if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern); 1053 return start_page_no_capturepattern(f); 1054 } 1055 1056 static int start_packet(vorb *f) 1057 { 1058 while (f.next_seg == -1) { 1059 if (!start_page(f)) return FALSE; 1060 if (f.page_flag & PAGEFLAG_continued_packet) 1061 return error(f, VORBIS_continued_packet_flag_invalid); 1062 } 1063 f.last_seg = FALSE; 1064 f.valid_bits = 0; 1065 f.packet_bytes = 0; 1066 f.bytes_in_seg = 0; 1067 // f.next_seg is now valid 1068 return TRUE; 1069 } 1070 1071 int maybe_start_packet(vorb *f) 1072 { 1073 if (f.next_seg == -1) { 1074 int x = get8(f); 1075 if (f.eof) return FALSE; // EOF at page boundary is not an error! 1076 if (0x4f != x ) return error(f, VORBIS_missing_capture_pattern); 1077 if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern); 1078 if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern); 1079 if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern); 1080 if (!start_page_no_capturepattern(f)) return FALSE; 1081 if (f.page_flag & PAGEFLAG_continued_packet) { 1082 // set up enough state that we can read this packet if we want, 1083 // e.g. during recovery 1084 f.last_seg = FALSE; 1085 f.bytes_in_seg = 0; 1086 return error(f, VORBIS_continued_packet_flag_invalid); 1087 } 1088 } 1089 return start_packet(f); 1090 } 1091 1092 static int next_segment(vorb *f) 1093 { 1094 int len; 1095 if (f.last_seg) return 0; 1096 if (f.next_seg == -1) { 1097 f.last_seg_which = f.segment_count-1; // in case start_page fails 1098 if (!start_page(f)) { f.last_seg = 1; return 0; } 1099 if (!(f.page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid); 1100 } 1101 len = f.segments[f.next_seg++]; 1102 if (len < 255) { 1103 f.last_seg = TRUE; 1104 f.last_seg_which = f.next_seg-1; 1105 } 1106 if (f.next_seg >= f.segment_count) 1107 f.next_seg = -1; 1108 assert(f.bytes_in_seg == 0); 1109 f.bytes_in_seg = cast(ubyte)len; 1110 return len; 1111 } 1112 1113 enum EOP = (-1); 1114 enum INVALID_BITS = (-1); 1115 1116 static int get8_packet_raw(vorb *f) 1117 { 1118 if (!f.bytes_in_seg) { // CLANG! 1119 if (f.last_seg) return EOP; 1120 else if (!next_segment(f)) return EOP; 1121 } 1122 assert(f.bytes_in_seg > 0); 1123 --f.bytes_in_seg; 1124 ++f.packet_bytes; 1125 return get8(f); 1126 } 1127 1128 static int get8_packet(vorb *f) 1129 { 1130 int x = get8_packet_raw(f); 1131 f.valid_bits = 0; 1132 return x; 1133 } 1134 1135 static int get32_packet(vorb *f) 1136 { 1137 uint32 x; 1138 x = get8_packet(f); 1139 x += get8_packet(f) << 8; 1140 x += get8_packet(f) << 16; 1141 x += cast(uint32) get8_packet(f) << 24; 1142 return x; 1143 } 1144 1145 static void flush_packet(vorb *f) 1146 { 1147 while (get8_packet_raw(f) != EOP) 1148 { 1149 } 1150 } 1151 1152 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important 1153 // as the huffman decoder? 1154 static uint32 get_bits(vorb *f, int n) 1155 { 1156 uint32 z; 1157 1158 if (f.valid_bits < 0) return 0; 1159 if (f.valid_bits < n) { 1160 if (n > 24) { 1161 // the accumulator technique below would not work correctly in this case 1162 z = get_bits(f, 24); 1163 z += get_bits(f, n-24) << 24; 1164 return z; 1165 } 1166 if (f.valid_bits == 0) f.acc = 0; 1167 while (f.valid_bits < n) { 1168 int z2 = get8_packet_raw(f); 1169 if (z2 == EOP) { 1170 f.valid_bits = INVALID_BITS; 1171 return 0; 1172 } 1173 f.acc += z2 << f.valid_bits; 1174 f.valid_bits += 8; 1175 } 1176 } 1177 1178 assert(f.valid_bits >= n); 1179 z = f.acc & ((1 << n)-1); 1180 f.acc >>= n; 1181 f.valid_bits -= n; 1182 return z; 1183 } 1184 1185 // @OPTIMIZE: primary accumulator for huffman 1186 // expand the buffer to as many bits as possible without reading off end of packet 1187 // it might be nice to allow f.valid_bits and f.acc to be stored in registers, 1188 // e.g. cache them locally and decode locally 1189 static void prep_huffman(vorb *f) 1190 { 1191 if (f.valid_bits <= 24) { 1192 if (f.valid_bits == 0) f.acc = 0; 1193 do { 1194 int z; 1195 if (f.last_seg && !f.bytes_in_seg) return; 1196 z = get8_packet_raw(f); 1197 if (z == EOP) return; 1198 f.acc += cast(uint) z << f.valid_bits; 1199 f.valid_bits += 8; 1200 } while (f.valid_bits <= 24); 1201 } 1202 } 1203 1204 enum 1205 { 1206 VORBIS_packet_id = 1, 1207 VORBIS_packet_comment = 3, 1208 VORBIS_packet_setup = 5 1209 }; 1210 1211 static int codebook_decode_scalar_raw(vorb *f, Codebook *c) 1212 { 1213 int i; 1214 prep_huffman(f); 1215 1216 if (c.codewords == NULL && c.sorted_codewords == NULL) 1217 return -1; 1218 1219 // cases to use binary search: sorted_codewords && !c.codewords 1220 // sorted_codewords && c.entries > 8 1221 if (c.entries > 8 ? c.sorted_codewords!=NULL : !c.codewords) { 1222 // binary search 1223 uint32 code = bit_reverse(f.acc); 1224 int x=0, n=c.sorted_entries, len; 1225 1226 while (n > 1) { 1227 // invariant: sc[x] <= code < sc[x+n] 1228 int m = x + (n >> 1); 1229 if (c.sorted_codewords[m] <= code) { 1230 x = m; 1231 n -= (n>>1); 1232 } else { 1233 n >>= 1; 1234 } 1235 } 1236 // x is now the sorted index 1237 if (!c.sparse) x = c.sorted_values[x]; 1238 // x is now sorted index if sparse, or symbol otherwise 1239 len = c.codeword_lengths[x]; 1240 if (f.valid_bits >= len) { 1241 f.acc >>= len; 1242 f.valid_bits -= len; 1243 return x; 1244 } 1245 1246 f.valid_bits = 0; 1247 return -1; 1248 } 1249 1250 // if small, linear search 1251 assert(!c.sparse); 1252 for (i=0; i < c.entries; ++i) { 1253 if (c.codeword_lengths[i] == NO_CODE) continue; 1254 if (c.codewords[i] == (f.acc & ((1 << c.codeword_lengths[i])-1))) { 1255 if (f.valid_bits >= c.codeword_lengths[i]) { 1256 f.acc >>= c.codeword_lengths[i]; 1257 f.valid_bits -= c.codeword_lengths[i]; 1258 return i; 1259 } 1260 f.valid_bits = 0; 1261 return -1; 1262 } 1263 } 1264 1265 error(f, VORBIS_invalid_stream); 1266 f.valid_bits = 0; 1267 return -1; 1268 } 1269 1270 1271 static int codebook_decode_scalar(vorb *f, Codebook *c) 1272 { 1273 int i; 1274 if (f.valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH) 1275 prep_huffman(f); 1276 // fast huffman table lookup 1277 i = f.acc & FAST_HUFFMAN_TABLE_MASK; 1278 i = c.fast_huffman[i]; 1279 if (i >= 0) { 1280 f.acc >>= c.codeword_lengths[i]; 1281 f.valid_bits -= c.codeword_lengths[i]; 1282 if (f.valid_bits < 0) { f.valid_bits = 0; return -1; } 1283 return i; 1284 } 1285 return codebook_decode_scalar_raw(f,c); 1286 } 1287 1288 void DECODE_RAW(ref int var, vorb* f, Codebook* c) 1289 { 1290 var = codebook_decode_scalar(f,c); 1291 } 1292 1293 void DECODE(ref int var, vorb* f, Codebook* c) 1294 { 1295 var = codebook_decode_scalar(f,c); 1296 if (c.sparse) 1297 var = c.sorted_values[var]; 1298 } 1299 1300 void DECODE_VQ(ref int var, vorb* f, Codebook* c) 1301 { 1302 DECODE_RAW(var, f, c); 1303 } 1304 1305 1306 1307 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case 1308 // where we avoid one addition 1309 codetype CODEBOOK_ELEMENT(Codebook* c, int off) 1310 { 1311 return (c.multiplicands[off]); 1312 } 1313 1314 codetype CODEBOOK_ELEMENT_FAST(Codebook* c, int off) 1315 { 1316 return (c.multiplicands[off]); 1317 } 1318 1319 codetype CODEBOOK_ELEMENT_BASE(Codebook* c) 1320 { 1321 return 0; 1322 } 1323 1324 static int codebook_decode_start(vorb *f, Codebook *c) 1325 { 1326 int z = -1; 1327 1328 // type 0 is only legal in a scalar context 1329 if (c.lookup_type == 0) 1330 error(f, VORBIS_invalid_stream); 1331 else { 1332 DECODE_VQ(z,f,c); 1333 if (c.sparse) assert(z < c.sorted_entries); 1334 if (z < 0) { // check for EOP 1335 if (!f.bytes_in_seg) 1336 if (f.last_seg) 1337 return z; 1338 error(f, VORBIS_invalid_stream); 1339 } 1340 } 1341 return z; 1342 } 1343 1344 static int codebook_decode(vorb *f, Codebook *c, float *output, int len) 1345 { 1346 int i,z = codebook_decode_start(f,c); 1347 if (z < 0) return FALSE; 1348 if (len > c.dimensions) len = c.dimensions; 1349 1350 z *= c.dimensions; 1351 if (c.sequence_p) 1352 { 1353 float last = CODEBOOK_ELEMENT_BASE(c); 1354 for (i=0; i < len; ++i) 1355 { 1356 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last; 1357 output[i] += val; 1358 last = val + c.minimum_value; 1359 } 1360 } 1361 else 1362 { 1363 float last = CODEBOOK_ELEMENT_BASE(c); 1364 for (i=0; i < len; ++i) 1365 { 1366 output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last; 1367 } 1368 } 1369 return TRUE; 1370 } 1371 1372 static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step) 1373 { 1374 int i,z = codebook_decode_start(f,c); 1375 float last = CODEBOOK_ELEMENT_BASE(c); 1376 if (z < 0) return FALSE; 1377 if (len > c.dimensions) len = c.dimensions; 1378 1379 z *= c.dimensions; 1380 for (i=0; i < len; ++i) { 1381 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last; 1382 output[i*step] += val; 1383 if (c.sequence_p) last = val; 1384 } 1385 1386 return TRUE; 1387 } 1388 1389 static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode) 1390 { 1391 int c_inter = *c_inter_p; 1392 int p_inter = *p_inter_p; 1393 int i,z, effective = c.dimensions; 1394 1395 // type 0 is only legal in a scalar context 1396 if (c.lookup_type == 0) return error(f, VORBIS_invalid_stream); 1397 1398 while (total_decode > 0) { 1399 float last = CODEBOOK_ELEMENT_BASE(c); 1400 DECODE_VQ(z,f,c); 1401 static if (!STB_VORBIS_DIVIDES_IN_CODEBOOK) 1402 { 1403 assert(!c.sparse || z < c.sorted_entries); 1404 } 1405 if (z < 0) { 1406 if (!f.bytes_in_seg) 1407 if (f.last_seg) return FALSE; 1408 return error(f, VORBIS_invalid_stream); 1409 } 1410 1411 // if this will take us off the end of the buffers, stop short! 1412 // we check by computing the length of the virtual interleaved 1413 // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter), 1414 // and the length we'll be using (effective) 1415 if (c_inter + p_inter*ch + effective > len * ch) { 1416 effective = len*ch - (p_inter*ch - c_inter); 1417 } 1418 1419 { 1420 z *= c.dimensions; 1421 if (c.sequence_p) { 1422 for (i=0; i < effective; ++i) { 1423 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last; 1424 if (outputs[c_inter]) 1425 outputs[c_inter][p_inter] += val; 1426 if (++c_inter == ch) { c_inter = 0; ++p_inter; } 1427 last = val; 1428 } 1429 } else { 1430 for (i=0; i < effective; ++i) { 1431 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last; 1432 if (outputs[c_inter]) 1433 outputs[c_inter][p_inter] += val; 1434 if (++c_inter == ch) { c_inter = 0; ++p_inter; } 1435 } 1436 } 1437 } 1438 1439 total_decode -= effective; 1440 } 1441 *c_inter_p = c_inter; 1442 *p_inter_p = p_inter; 1443 return TRUE; 1444 } 1445 1446 static int predict_point(int x, int x0, int x1, int y0, int y1) 1447 { 1448 int dy = y1 - y0; 1449 int adx = x1 - x0; 1450 // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86? 1451 int err = abs(dy) * (x - x0); 1452 int off = err / adx; 1453 return dy < 0 ? y0 - off : y0 + off; 1454 } 1455 1456 // the following table is block-copied from the specification 1457 static immutable float[256] inverse_db_table = 1458 [ 1459 1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f, 1460 1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f, 1461 1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f, 1462 2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f, 1463 2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f, 1464 3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f, 1465 4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f, 1466 6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f, 1467 7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f, 1468 1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f, 1469 1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f, 1470 1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f, 1471 2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f, 1472 2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f, 1473 3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f, 1474 4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f, 1475 5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f, 1476 7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f, 1477 9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f, 1478 1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f, 1479 1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f, 1480 2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f, 1481 2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f, 1482 3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f, 1483 4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f, 1484 5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f, 1485 7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f, 1486 9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f, 1487 0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f, 1488 0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f, 1489 0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f, 1490 0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f, 1491 0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f, 1492 0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f, 1493 0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f, 1494 0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f, 1495 0.00092223983f, 0.00098217216f, 0.0010459992f, 0.0011139742f, 1496 0.0011863665f, 0.0012634633f, 0.0013455702f, 0.0014330129f, 1497 0.0015261382f, 0.0016253153f, 0.0017309374f, 0.0018434235f, 1498 0.0019632195f, 0.0020908006f, 0.0022266726f, 0.0023713743f, 1499 0.0025254795f, 0.0026895994f, 0.0028643847f, 0.0030505286f, 1500 0.0032487691f, 0.0034598925f, 0.0036847358f, 0.0039241906f, 1501 0.0041792066f, 0.0044507950f, 0.0047400328f, 0.0050480668f, 1502 0.0053761186f, 0.0057254891f, 0.0060975636f, 0.0064938176f, 1503 0.0069158225f, 0.0073652516f, 0.0078438871f, 0.0083536271f, 1504 0.0088964928f, 0.009474637f, 0.010090352f, 0.010746080f, 1505 0.011444421f, 0.012188144f, 0.012980198f, 0.013823725f, 1506 0.014722068f, 0.015678791f, 0.016697687f, 0.017782797f, 1507 0.018938423f, 0.020169149f, 0.021479854f, 0.022875735f, 1508 0.024362330f, 0.025945531f, 0.027631618f, 0.029427276f, 1509 0.031339626f, 0.033376252f, 0.035545228f, 0.037855157f, 1510 0.040315199f, 0.042935108f, 0.045725273f, 0.048696758f, 1511 0.051861348f, 0.055231591f, 0.058820850f, 0.062643361f, 1512 0.066714279f, 0.071049749f, 0.075666962f, 0.080584227f, 1513 0.085821044f, 0.091398179f, 0.097337747f, 0.10366330f, 1514 0.11039993f, 0.11757434f, 0.12521498f, 0.13335215f, 1515 0.14201813f, 0.15124727f, 0.16107617f, 0.17154380f, 1516 0.18269168f, 0.19456402f, 0.20720788f, 0.22067342f, 1517 0.23501402f, 0.25028656f, 0.26655159f, 0.28387361f, 1518 0.30232132f, 0.32196786f, 0.34289114f, 0.36517414f, 1519 0.38890521f, 0.41417847f, 0.44109412f, 0.46975890f, 1520 0.50028648f, 0.53279791f, 0.56742212f, 0.60429640f, 1521 0.64356699f, 0.68538959f, 0.72993007f, 0.77736504f, 1522 0.82788260f, 0.88168307f, 0.9389798f, 1.0f 1523 ]; 1524 1525 1526 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine, 1527 // note that you must produce bit-identical output to decode correctly; 1528 // this specific sequence of operations is specified in the spec (it's 1529 // drawing integer-quantized frequency-space lines that the encoder 1530 // expects to be exactly the same) 1531 // ... also, isn't the whole point of Bresenham's algorithm to NOT 1532 // have to divide in the setup? sigh. 1533 1534 static void draw_line(float *output, int x0, int y0, int x1, int y1, int n) 1535 { 1536 int dy = y1 - y0; 1537 int adx = x1 - x0; 1538 int ady = abs(dy); 1539 int base; 1540 int x=x0,y=y0; 1541 int err = 0; 1542 int sy; 1543 1544 base = dy / adx; 1545 if (dy < 0) 1546 sy = base - 1; 1547 else 1548 sy = base+1; 1549 ady -= abs(base) * adx; 1550 if (x1 > n) x1 = n; 1551 if (x < x1) { 1552 output[x] *= inverse_db_table[y&255]; 1553 for (++x; x < x1; ++x) { 1554 err += ady; 1555 if (err >= adx) { 1556 err -= adx; 1557 y += sy; 1558 } else 1559 y += base; 1560 output[x] *= inverse_db_table[y&255]; 1561 } 1562 } 1563 } 1564 1565 static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype) 1566 { 1567 int k; 1568 if (rtype == 0) { 1569 int step = n / book.dimensions; 1570 for (k=0; k < step; ++k) 1571 if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step)) 1572 return FALSE; 1573 } else { 1574 for (k=0; k < n; ) { 1575 if (!codebook_decode(f, book, target+offset, n-k)) 1576 return FALSE; 1577 k += book.dimensions; 1578 offset += book.dimensions; 1579 } 1580 } 1581 return TRUE; 1582 } 1583 1584 // n is 1/2 of the blocksize -- 1585 // specification: "Correct per-vector decode length is [n]/2" 1586 static void decode_residue(vorb *f, float **residue_buffers, int ch, int n, int rn, uint8 *do_not_decode) 1587 { 1588 int i,j,pass; 1589 Residue *r = f.residue_config + rn; 1590 int rtype = f.residue_types[rn]; 1591 int c = r.classbook; 1592 int classwords = f.codebooks[c].dimensions; 1593 uint actual_size = rtype == 2 ? n*2 : n; 1594 uint limit_r_begin = (r.begin < actual_size ? r.begin : actual_size); 1595 uint limit_r_end = (r.end < actual_size ? r.end : actual_size); 1596 int n_read = limit_r_end - limit_r_begin; 1597 int part_read = n_read / r.part_size; 1598 int temp_alloc_point = temp_alloc_save(f); 1599 uint8 ***part_classdata = cast(uint8 ***) temp_block_array(f,f.channels, part_read * (uint8*).sizeof); 1600 1601 for (i=0; i < ch; ++i) 1602 if (!do_not_decode[i]) 1603 memset(residue_buffers[i], 0, float.sizeof * n); 1604 1605 if (rtype == 2 && ch != 1) { 1606 for (j=0; j < ch; ++j) 1607 if (!do_not_decode[j]) 1608 break; 1609 if (j == ch) 1610 goto done; 1611 1612 for (pass=0; pass < 8; ++pass) { 1613 int pcount = 0, class_set = 0; 1614 if (ch == 2) { 1615 while (pcount < part_read) { 1616 int z = r.begin + pcount*r.part_size; 1617 int c_inter = (z & 1), p_inter = z>>1; 1618 if (pass == 0) { 1619 Codebook *c2 = f.codebooks+r.classbook; 1620 int q; 1621 DECODE(q,f,c2); 1622 if (q == EOP) goto done; 1623 part_classdata[0][class_set] = r.classdata[q]; 1624 } 1625 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) 1626 { 1627 int z2 = r.begin + pcount*r.part_size; 1628 int c2 = part_classdata[0][class_set][i]; 1629 int b = r.residue_books[c2][pass]; 1630 if (b >= 0) { 1631 Codebook *book = f.codebooks + b; 1632 1633 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) 1634 goto done; 1635 1636 } 1637 else 1638 { 1639 z2 += r.part_size; 1640 c_inter = z2 & 1; 1641 p_inter = z2 >> 1; 1642 } 1643 } 1644 ++class_set; 1645 } 1646 } else if (ch > 2) { 1647 while (pcount < part_read) { 1648 int z = r.begin + pcount*r.part_size; 1649 int c_inter = z % ch, p_inter = z/ch; 1650 if (pass == 0) { 1651 Codebook *c3 = f.codebooks+r.classbook; 1652 int q; 1653 DECODE(q,f,c3); 1654 if (q == EOP) goto done; 1655 part_classdata[0][class_set] = r.classdata[q]; 1656 } 1657 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) { 1658 int z3 = r.begin + pcount*r.part_size; 1659 int c3 = part_classdata[0][class_set][i]; 1660 int b = r.residue_books[c3][pass]; 1661 if (b >= 0) { 1662 Codebook *book = f.codebooks + b; 1663 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) 1664 goto done; 1665 } else { 1666 z3 += r.part_size; 1667 c_inter = z3 % ch; 1668 p_inter = z3 / ch; 1669 } 1670 } 1671 ++class_set; 1672 } 1673 } 1674 } 1675 goto done; 1676 } 1677 1678 for (pass=0; pass < 8; ++pass) { 1679 int pcount = 0, class_set=0; 1680 while (pcount < part_read) { 1681 if (pass == 0) { 1682 for (j=0; j < ch; ++j) { 1683 if (!do_not_decode[j]) { 1684 Codebook *c4 = f.codebooks+r.classbook; 1685 int temp; 1686 DECODE(temp,f,c4); 1687 if (temp == EOP) goto done; 1688 part_classdata[j][class_set] = r.classdata[temp]; 1689 } 1690 } 1691 } 1692 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) { 1693 for (j=0; j < ch; ++j) { 1694 if (!do_not_decode[j]) { 1695 int c5 = part_classdata[j][class_set][i]; 1696 int b = r.residue_books[c5][pass]; 1697 if (b >= 0) { 1698 float *target = residue_buffers[j]; 1699 int offset = r.begin + pcount * r.part_size; 1700 int n2 = r.part_size; 1701 Codebook *book = f.codebooks + b; 1702 if (!residue_decode(f, book, target, offset, n2, rtype)) 1703 goto done; 1704 } 1705 } 1706 } 1707 } 1708 ++class_set; 1709 } 1710 } 1711 done: 1712 1713 temp_free(f,part_classdata); 1714 temp_alloc_restore(f,temp_alloc_point); 1715 } 1716 1717 // the following were split out into separate functions while optimizing; 1718 // they could be pushed back up but eh. __forceinline showed no change; 1719 // they're probably already being inlined. 1720 static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A) 1721 { 1722 float *ee0 = e + i_off; 1723 float *ee2 = ee0 + k_off; 1724 int i; 1725 1726 assert((n & 3) == 0); 1727 for (i=(n>>2); i > 0; --i) { 1728 float k00_20, k01_21; 1729 k00_20 = ee0[ 0] - ee2[ 0]; 1730 k01_21 = ee0[-1] - ee2[-1]; 1731 ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0]; 1732 ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1]; 1733 ee2[ 0] = k00_20 * A[0] - k01_21 * A[1]; 1734 ee2[-1] = k01_21 * A[0] + k00_20 * A[1]; 1735 A += 8; 1736 1737 k00_20 = ee0[-2] - ee2[-2]; 1738 k01_21 = ee0[-3] - ee2[-3]; 1739 ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2]; 1740 ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3]; 1741 ee2[-2] = k00_20 * A[0] - k01_21 * A[1]; 1742 ee2[-3] = k01_21 * A[0] + k00_20 * A[1]; 1743 A += 8; 1744 1745 k00_20 = ee0[-4] - ee2[-4]; 1746 k01_21 = ee0[-5] - ee2[-5]; 1747 ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4]; 1748 ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5]; 1749 ee2[-4] = k00_20 * A[0] - k01_21 * A[1]; 1750 ee2[-5] = k01_21 * A[0] + k00_20 * A[1]; 1751 A += 8; 1752 1753 k00_20 = ee0[-6] - ee2[-6]; 1754 k01_21 = ee0[-7] - ee2[-7]; 1755 ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6]; 1756 ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7]; 1757 ee2[-6] = k00_20 * A[0] - k01_21 * A[1]; 1758 ee2[-7] = k01_21 * A[0] + k00_20 * A[1]; 1759 A += 8; 1760 ee0 -= 8; 1761 ee2 -= 8; 1762 } 1763 } 1764 1765 static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1) 1766 { 1767 int i; 1768 float k00_20, k01_21; 1769 1770 float *e0 = e + d0; 1771 float *e2 = e0 + k_off; 1772 1773 for (i=lim >> 2; i > 0; --i) { 1774 k00_20 = e0[-0] - e2[-0]; 1775 k01_21 = e0[-1] - e2[-1]; 1776 e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0]; 1777 e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1]; 1778 e2[-0] = (k00_20)*A[0] - (k01_21) * A[1]; 1779 e2[-1] = (k01_21)*A[0] + (k00_20) * A[1]; 1780 1781 A += k1; 1782 1783 k00_20 = e0[-2] - e2[-2]; 1784 k01_21 = e0[-3] - e2[-3]; 1785 e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2]; 1786 e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3]; 1787 e2[-2] = (k00_20)*A[0] - (k01_21) * A[1]; 1788 e2[-3] = (k01_21)*A[0] + (k00_20) * A[1]; 1789 1790 A += k1; 1791 1792 k00_20 = e0[-4] - e2[-4]; 1793 k01_21 = e0[-5] - e2[-5]; 1794 e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4]; 1795 e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5]; 1796 e2[-4] = (k00_20)*A[0] - (k01_21) * A[1]; 1797 e2[-5] = (k01_21)*A[0] + (k00_20) * A[1]; 1798 1799 A += k1; 1800 1801 k00_20 = e0[-6] - e2[-6]; 1802 k01_21 = e0[-7] - e2[-7]; 1803 e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6]; 1804 e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7]; 1805 e2[-6] = (k00_20)*A[0] - (k01_21) * A[1]; 1806 e2[-7] = (k01_21)*A[0] + (k00_20) * A[1]; 1807 1808 e0 -= 8; 1809 e2 -= 8; 1810 1811 A += k1; 1812 } 1813 } 1814 1815 static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0) 1816 { 1817 int i; 1818 float A0 = A[0]; 1819 float A1 = A[0+1]; 1820 float A2 = A[0+a_off]; 1821 float A3 = A[0+a_off+1]; 1822 float A4 = A[0+a_off*2+0]; 1823 float A5 = A[0+a_off*2+1]; 1824 float A6 = A[0+a_off*3+0]; 1825 float A7 = A[0+a_off*3+1]; 1826 1827 float k00,k11; 1828 1829 float *ee0 = e +i_off; 1830 float *ee2 = ee0+k_off; 1831 1832 for (i=n; i > 0; --i) { 1833 k00 = ee0[ 0] - ee2[ 0]; 1834 k11 = ee0[-1] - ee2[-1]; 1835 ee0[ 0] = ee0[ 0] + ee2[ 0]; 1836 ee0[-1] = ee0[-1] + ee2[-1]; 1837 ee2[ 0] = (k00) * A0 - (k11) * A1; 1838 ee2[-1] = (k11) * A0 + (k00) * A1; 1839 1840 k00 = ee0[-2] - ee2[-2]; 1841 k11 = ee0[-3] - ee2[-3]; 1842 ee0[-2] = ee0[-2] + ee2[-2]; 1843 ee0[-3] = ee0[-3] + ee2[-3]; 1844 ee2[-2] = (k00) * A2 - (k11) * A3; 1845 ee2[-3] = (k11) * A2 + (k00) * A3; 1846 1847 k00 = ee0[-4] - ee2[-4]; 1848 k11 = ee0[-5] - ee2[-5]; 1849 ee0[-4] = ee0[-4] + ee2[-4]; 1850 ee0[-5] = ee0[-5] + ee2[-5]; 1851 ee2[-4] = (k00) * A4 - (k11) * A5; 1852 ee2[-5] = (k11) * A4 + (k00) * A5; 1853 1854 k00 = ee0[-6] - ee2[-6]; 1855 k11 = ee0[-7] - ee2[-7]; 1856 ee0[-6] = ee0[-6] + ee2[-6]; 1857 ee0[-7] = ee0[-7] + ee2[-7]; 1858 ee2[-6] = (k00) * A6 - (k11) * A7; 1859 ee2[-7] = (k11) * A6 + (k00) * A7; 1860 1861 ee0 -= k0; 1862 ee2 -= k0; 1863 } 1864 } 1865 1866 static void iter_54(float *z) 1867 { 1868 float k00,k11,k22,k33; 1869 float y0,y1,y2,y3; 1870 1871 k00 = z[ 0] - z[-4]; 1872 y0 = z[ 0] + z[-4]; 1873 y2 = z[-2] + z[-6]; 1874 k22 = z[-2] - z[-6]; 1875 1876 z[-0] = y0 + y2; // z0 + z4 + z2 + z6 1877 z[-2] = y0 - y2; // z0 + z4 - z2 - z6 1878 1879 // done with y0,y2 1880 1881 k33 = z[-3] - z[-7]; 1882 1883 z[-4] = k00 + k33; // z0 - z4 + z3 - z7 1884 z[-6] = k00 - k33; // z0 - z4 - z3 + z7 1885 1886 // done with k33 1887 1888 k11 = z[-1] - z[-5]; 1889 y1 = z[-1] + z[-5]; 1890 y3 = z[-3] + z[-7]; 1891 1892 z[-1] = y1 + y3; // z1 + z5 + z3 + z7 1893 z[-3] = y1 - y3; // z1 + z5 - z3 - z7 1894 z[-5] = k11 - k22; // z1 - z5 + z2 - z6 1895 z[-7] = k11 + k22; // z1 - z5 - z2 + z6 1896 } 1897 1898 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n) 1899 { 1900 int a_off = base_n >> 3; 1901 float A2 = A[0+a_off]; 1902 float *z = e + i_off; 1903 float *base = z - 16 * n; 1904 1905 while (z > base) { 1906 float k00,k11; 1907 float l00,l11; 1908 1909 k00 = z[-0] - z[ -8]; 1910 k11 = z[-1] - z[ -9]; 1911 l00 = z[-2] - z[-10]; 1912 l11 = z[-3] - z[-11]; 1913 z[ -0] = z[-0] + z[ -8]; 1914 z[ -1] = z[-1] + z[ -9]; 1915 z[ -2] = z[-2] + z[-10]; 1916 z[ -3] = z[-3] + z[-11]; 1917 z[ -8] = k00; 1918 z[ -9] = k11; 1919 z[-10] = (l00+l11) * A2; 1920 z[-11] = (l11-l00) * A2; 1921 1922 k00 = z[ -4] - z[-12]; 1923 k11 = z[ -5] - z[-13]; 1924 l00 = z[ -6] - z[-14]; 1925 l11 = z[ -7] - z[-15]; 1926 z[ -4] = z[ -4] + z[-12]; 1927 z[ -5] = z[ -5] + z[-13]; 1928 z[ -6] = z[ -6] + z[-14]; 1929 z[ -7] = z[ -7] + z[-15]; 1930 z[-12] = k11; 1931 z[-13] = -k00; 1932 z[-14] = (l11-l00) * A2; 1933 z[-15] = (l00+l11) * -A2; 1934 1935 iter_54(z); 1936 iter_54(z-8); 1937 z -= 16; 1938 } 1939 } 1940 1941 static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype) 1942 { 1943 int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l; 1944 int ld; 1945 // @OPTIMIZE: reduce register pressure by using fewer variables? 1946 int save_point = temp_alloc_save(f); 1947 float *buf2 = cast(float *) temp_alloc(f, n2 * float.sizeof); 1948 float* u = null; 1949 float* v = null; 1950 // twiddle factors 1951 float *A = f.A[blocktype]; 1952 1953 // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio" 1954 // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function. 1955 1956 // kernel from paper 1957 1958 1959 // merged: 1960 // copy and reflect spectral data 1961 // step 0 1962 1963 // note that it turns out that the items added together during 1964 // this step are, in fact, being added to themselves (as reflected 1965 // by step 0). inexplicable inefficiency! this became obvious 1966 // once I combined the passes. 1967 1968 // so there's a missing 'times 2' here (for adding X to itself). 1969 // this propagates through linearly to the end, where the numbers 1970 // are 1/2 too small, and need to be compensated for. 1971 1972 { 1973 float *d, e, AA, e_stop; 1974 d = &buf2[n2-2]; 1975 AA = A; 1976 e = &buffer[0]; 1977 e_stop = &buffer[n2]; 1978 while (e != e_stop) { 1979 d[1] = (e[0] * AA[0] - e[2]*AA[1]); 1980 d[0] = (e[0] * AA[1] + e[2]*AA[0]); 1981 d -= 2; 1982 AA += 2; 1983 e += 4; 1984 } 1985 1986 e = &buffer[n2-3]; 1987 while (d >= buf2) { 1988 d[1] = (-e[2] * AA[0] - -e[0]*AA[1]); 1989 d[0] = (-e[2] * AA[1] + -e[0]*AA[0]); 1990 d -= 2; 1991 AA += 2; 1992 e -= 4; 1993 } 1994 } 1995 1996 // now we use symbolic names for these, so that we can 1997 // possibly swap their meaning as we change which operations 1998 // are in place 1999 2000 u = buffer; 2001 v = buf2; 2002 2003 // step 2 (paper output is w, now u) 2004 // this could be in place, but the data ends up in the wrong 2005 // place... _somebody_'s got to swap it, so this is nominated 2006 { 2007 float *AA = &A[n2-8]; 2008 float *d0, d1, e0, e1; 2009 2010 e0 = &v[n4]; 2011 e1 = &v[0]; 2012 2013 d0 = &u[n4]; 2014 d1 = &u[0]; 2015 2016 while (AA >= A) { 2017 float v40_20, v41_21; 2018 2019 v41_21 = e0[1] - e1[1]; 2020 v40_20 = e0[0] - e1[0]; 2021 d0[1] = e0[1] + e1[1]; 2022 d0[0] = e0[0] + e1[0]; 2023 d1[1] = v41_21*AA[4] - v40_20*AA[5]; 2024 d1[0] = v40_20*AA[4] + v41_21*AA[5]; 2025 2026 v41_21 = e0[3] - e1[3]; 2027 v40_20 = e0[2] - e1[2]; 2028 d0[3] = e0[3] + e1[3]; 2029 d0[2] = e0[2] + e1[2]; 2030 d1[3] = v41_21*AA[0] - v40_20*AA[1]; 2031 d1[2] = v40_20*AA[0] + v41_21*AA[1]; 2032 2033 AA -= 8; 2034 2035 d0 += 4; 2036 d1 += 4; 2037 e0 += 4; 2038 e1 += 4; 2039 } 2040 } 2041 2042 // step 3 2043 ld = ilog(n) - 1; // ilog is off-by-one from normal definitions 2044 2045 // optimized step 3: 2046 2047 // the original step3 loop can be nested r inside s or s inside r; 2048 // it's written originally as s inside r, but this is dumb when r 2049 // iterates many times, and s few. So I have two copies of it and 2050 // switch between them halfway. 2051 2052 // this is iteration 0 of step 3 2053 imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A); 2054 imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A); 2055 2056 // this is iteration 1 of step 3 2057 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16); 2058 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16); 2059 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16); 2060 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16); 2061 2062 l=2; 2063 for (; l < (ld-3)>>1; ++l) { 2064 int k0 = n >> (l+2), k0_2 = k0>>1; 2065 int lim = 1 << (l+1); 2066 int i; 2067 for (i=0; i < lim; ++i) 2068 imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3)); 2069 } 2070 2071 for (; l < ld-6; ++l) { 2072 int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1; 2073 int rlim = n >> (l+6), r; 2074 int lim = 1 << (l+1); 2075 int i_off; 2076 float *A0 = A; 2077 i_off = n2-1; 2078 for (r=rlim; r > 0; --r) { 2079 imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0); 2080 A0 += k1*4; 2081 i_off -= 8; 2082 } 2083 } 2084 2085 // iterations with count: 2086 // ld-6,-5,-4 all interleaved together 2087 // the big win comes from getting rid of needless flops 2088 // due to the constants on pass 5 & 4 being all 1 and 0; 2089 // combining them to be simultaneous to improve cache made little difference 2090 imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n); 2091 2092 // output is u 2093 2094 // step 4, 5, and 6 2095 // cannot be in-place because of step 5 2096 { 2097 uint16 *bitrev = f.bit_reverse[blocktype]; 2098 // weirdly, I'd have thought reading sequentially and writing 2099 // erratically would have been better than vice-versa, but in 2100 // fact that's not what my testing showed. (That is, with 2101 // j = bitreverse(i), do you read i and write j, or read j and write i.) 2102 2103 float *d0 = &v[n4-4]; 2104 float *d1 = &v[n2-4]; 2105 while (d0 >= v) { 2106 int k4; 2107 2108 k4 = bitrev[0]; 2109 d1[3] = u[k4+0]; 2110 d1[2] = u[k4+1]; 2111 d0[3] = u[k4+2]; 2112 d0[2] = u[k4+3]; 2113 2114 k4 = bitrev[1]; 2115 d1[1] = u[k4+0]; 2116 d1[0] = u[k4+1]; 2117 d0[1] = u[k4+2]; 2118 d0[0] = u[k4+3]; 2119 2120 d0 -= 4; 2121 d1 -= 4; 2122 bitrev += 2; 2123 } 2124 } 2125 // (paper output is u, now v) 2126 2127 2128 // data must be in buf2 2129 assert(v == buf2); 2130 2131 // step 7 (paper output is v, now v) 2132 // this is now in place 2133 { 2134 float *C = f.C[blocktype]; 2135 float *d, e; 2136 2137 d = v; 2138 e = v + n2 - 4; 2139 2140 while (d < e) { 2141 float a02,a11,b0,b1,b2,b3; 2142 2143 a02 = d[0] - e[2]; 2144 a11 = d[1] + e[3]; 2145 2146 b0 = C[1]*a02 + C[0]*a11; 2147 b1 = C[1]*a11 - C[0]*a02; 2148 2149 b2 = d[0] + e[ 2]; 2150 b3 = d[1] - e[ 3]; 2151 2152 d[0] = b2 + b0; 2153 d[1] = b3 + b1; 2154 e[2] = b2 - b0; 2155 e[3] = b1 - b3; 2156 2157 a02 = d[2] - e[0]; 2158 a11 = d[3] + e[1]; 2159 2160 b0 = C[3]*a02 + C[2]*a11; 2161 b1 = C[3]*a11 - C[2]*a02; 2162 2163 b2 = d[2] + e[ 0]; 2164 b3 = d[3] - e[ 1]; 2165 2166 d[2] = b2 + b0; 2167 d[3] = b3 + b1; 2168 e[0] = b2 - b0; 2169 e[1] = b1 - b3; 2170 2171 C += 4; 2172 d += 4; 2173 e -= 4; 2174 } 2175 } 2176 2177 // data must be in buf2 2178 2179 2180 // step 8+decode (paper output is X, now buffer) 2181 // this generates pairs of data a la 8 and pushes them directly through 2182 // the decode kernel (pushing rather than pulling) to avoid having 2183 // to make another pass later 2184 2185 // this cannot POSSIBLY be in place, so we refer to the buffers directly 2186 2187 { 2188 float *d0, d1, d2, d3; 2189 2190 float *B = f.B[blocktype] + n2 - 8; 2191 float *e = buf2 + n2 - 8; 2192 d0 = &buffer[0]; 2193 d1 = &buffer[n2-4]; 2194 d2 = &buffer[n2]; 2195 d3 = &buffer[n-4]; 2196 while (e >= v) { 2197 float p0,p1,p2,p3; 2198 2199 p3 = e[6]*B[7] - e[7]*B[6]; 2200 p2 = -e[6]*B[6] - e[7]*B[7]; 2201 2202 d0[0] = p3; 2203 d1[3] = - p3; 2204 d2[0] = p2; 2205 d3[3] = p2; 2206 2207 p1 = e[4]*B[5] - e[5]*B[4]; 2208 p0 = -e[4]*B[4] - e[5]*B[5]; 2209 2210 d0[1] = p1; 2211 d1[2] = - p1; 2212 d2[1] = p0; 2213 d3[2] = p0; 2214 2215 p3 = e[2]*B[3] - e[3]*B[2]; 2216 p2 = -e[2]*B[2] - e[3]*B[3]; 2217 2218 d0[2] = p3; 2219 d1[1] = - p3; 2220 d2[2] = p2; 2221 d3[1] = p2; 2222 2223 p1 = e[0]*B[1] - e[1]*B[0]; 2224 p0 = -e[0]*B[0] - e[1]*B[1]; 2225 2226 d0[3] = p1; 2227 d1[0] = - p1; 2228 d2[3] = p0; 2229 d3[0] = p0; 2230 2231 B -= 8; 2232 e -= 8; 2233 d0 += 4; 2234 d2 += 4; 2235 d1 -= 4; 2236 d3 -= 4; 2237 } 2238 } 2239 2240 temp_free(f,buf2); 2241 temp_alloc_restore(f,save_point); 2242 } 2243 2244 2245 static float *get_window(vorb *f, int len) 2246 { 2247 len <<= 1; 2248 if (len == f.blocksize_0) return f.window[0]; 2249 if (len == f.blocksize_1) return f.window[1]; 2250 return null; 2251 } 2252 2253 alias YTYPE = int16; 2254 2255 static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag) 2256 { 2257 int n2 = n >> 1; 2258 int s = map.chan[i].mux, floor; 2259 floor = map.submap_floor[s]; 2260 if (f.floor_types[floor] == 0) { 2261 return error(f, VORBIS_invalid_stream); 2262 } else { 2263 Floor1 *g = &f.floor_config[floor].floor1; 2264 int j,q; 2265 int lx = 0, ly = finalY[0] * g.floor1_multiplier; 2266 for (q=1; q < g.values; ++q) { 2267 j = g.sorted_order[q]; 2268 if (finalY[j] >= 0) 2269 { 2270 int hy = finalY[j] * g.floor1_multiplier; 2271 int hx = g.Xlist[j]; 2272 if (lx != hx) 2273 draw_line(target, lx,ly, hx,hy, n2); 2274 lx = hx, ly = hy; 2275 } 2276 } 2277 if (lx < n2) { 2278 // optimization of: draw_line(target, lx,ly, n,ly, n2); 2279 for (j=lx; j < n2; ++j) 2280 target[j] *= inverse_db_table[ly]; 2281 } 2282 } 2283 return TRUE; 2284 } 2285 2286 // The meaning of "left" and "right" 2287 // 2288 // For a given frame: 2289 // we compute samples from 0..n 2290 // window_center is n/2 2291 // we'll window and mix the samples from left_start to left_end with data from the previous frame 2292 // all of the samples from left_end to right_start can be output without mixing; however, 2293 // this interval is 0-length except when transitioning between short and long frames 2294 // all of the samples from right_start to right_end need to be mixed with the next frame, 2295 // which we don't have, so those get saved in a buffer 2296 // frame N's right_end-right_start, the number of samples to mix with the next frame, 2297 // has to be the same as frame N+1's left_end-left_start (which they are by 2298 // construction) 2299 2300 static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode) 2301 { 2302 Mode *m; 2303 int i, n, prev, next, window_center; 2304 f.channel_buffer_start = f.channel_buffer_end = 0; 2305 2306 retry: 2307 if (f.eof) return FALSE; 2308 if (!maybe_start_packet(f)) 2309 return FALSE; 2310 // check packet type 2311 if (get_bits(f,1) != 0) { 2312 while (EOP != get8_packet(f)) {} 2313 goto retry; 2314 } 2315 2316 if (f.alloc.alloc_buffer) 2317 assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset); 2318 2319 i = get_bits(f, ilog(f.mode_count-1)); 2320 if (i == EOP) return FALSE; 2321 if (i >= f.mode_count) return FALSE; 2322 *mode = i; 2323 m = f.mode_config.ptr + i; 2324 if (m.blockflag) { 2325 n = f.blocksize_1; 2326 prev = get_bits(f,1); 2327 next = get_bits(f,1); 2328 } else { 2329 prev = next = 0; 2330 n = f.blocksize_0; 2331 } 2332 2333 // WINDOWING 2334 2335 window_center = n >> 1; 2336 if (m.blockflag && !prev) { 2337 *p_left_start = (n - f.blocksize_0) >> 2; 2338 *p_left_end = (n + f.blocksize_0) >> 2; 2339 } else { 2340 *p_left_start = 0; 2341 *p_left_end = window_center; 2342 } 2343 if (m.blockflag && !next) { 2344 *p_right_start = (n*3 - f.blocksize_0) >> 2; 2345 *p_right_end = (n*3 + f.blocksize_0) >> 2; 2346 } else { 2347 *p_right_start = window_center; 2348 *p_right_end = n; 2349 } 2350 2351 return TRUE; 2352 } 2353 2354 static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left) 2355 { 2356 Mapping *map; 2357 int i,j,k,n,n2; 2358 int[256] zero_channel; 2359 int[256] really_zero_channel; 2360 2361 // WINDOWING 2362 2363 n = f.blocksize[m.blockflag]; 2364 map = &f.mapping[m.mapping]; 2365 2366 // FLOORS 2367 n2 = n >> 1; 2368 2369 for (i=0; i < f.channels; ++i) { 2370 int s = map.chan[i].mux, floor; 2371 zero_channel[i] = FALSE; 2372 floor = map.submap_floor[s]; 2373 if (f.floor_types[floor] == 0) { 2374 return error(f, VORBIS_invalid_stream); 2375 } else { 2376 Floor1 *g = &f.floor_config[floor].floor1; 2377 if (get_bits(f, 1)) { 2378 short *finalY; 2379 uint8[256] step2_flag; 2380 static immutable int[4] range_list = [ 256, 128, 86, 64 ]; 2381 int range = range_list[g.floor1_multiplier-1]; 2382 int offset = 2; 2383 finalY = f.finalY[i]; 2384 finalY[0] = cast(short) get_bits(f, ilog(range)-1); 2385 finalY[1] = cast(short) get_bits(f, ilog(range)-1); 2386 for (j=0; j < g.partitions; ++j) { 2387 int pclass = g.partition_class_list[j]; 2388 int cdim = g.class_dimensions[pclass]; 2389 int cbits = g.class_subclasses[pclass]; 2390 int csub = (1 << cbits)-1; 2391 int cval = 0; 2392 if (cbits) { 2393 Codebook *c = f.codebooks + g.class_masterbooks[pclass]; 2394 DECODE(cval,f,c); 2395 } 2396 for (k=0; k < cdim; ++k) { 2397 int book = g.subclass_books[pclass][cval & csub]; 2398 cval = cval >> cbits; 2399 if (book >= 0) { 2400 int temp; 2401 Codebook *c = f.codebooks + book; 2402 DECODE(temp,f,c); 2403 finalY[offset++] = cast(short)temp; 2404 } else 2405 finalY[offset++] = 0; 2406 } 2407 } 2408 if (f.valid_bits == INVALID_BITS) goto error; // behavior according to spec 2409 step2_flag[0] = step2_flag[1] = 1; 2410 for (j=2; j < g.values; ++j) { 2411 int low, high, pred, highroom, lowroom, room, val; 2412 low = g.neighbors[j][0]; 2413 high = g.neighbors[j][1]; 2414 //neighbors(g.Xlist, j, &low, &high); 2415 pred = predict_point(g.Xlist[j], g.Xlist[low], g.Xlist[high], finalY[low], finalY[high]); 2416 val = finalY[j]; 2417 highroom = range - pred; 2418 lowroom = pred; 2419 if (highroom < lowroom) 2420 room = highroom * 2; 2421 else 2422 room = lowroom * 2; 2423 if (val) { 2424 step2_flag[low] = step2_flag[high] = 1; 2425 step2_flag[j] = 1; 2426 if (val >= room) 2427 if (highroom > lowroom) 2428 finalY[j] = cast(short)(val - lowroom + pred); 2429 else 2430 finalY[j] = cast(short)(pred - val + highroom - 1); 2431 else 2432 if (val & 1) 2433 finalY[j] = cast(short)(pred - ((val+1)>>1)); 2434 else 2435 finalY[j] = cast(short)(pred + (val>>1)); 2436 } else { 2437 step2_flag[j] = 0; 2438 finalY[j] = cast(short)pred; 2439 } 2440 } 2441 2442 // defer final floor computation until _after_ residue 2443 for (j=0; j < g.values; ++j) { 2444 if (!step2_flag[j]) 2445 finalY[j] = -1; 2446 } 2447 } else { 2448 error: 2449 zero_channel[i] = TRUE; 2450 } 2451 // So we just defer everything else to later 2452 2453 // at this point we've decoded the floor into buffer 2454 } 2455 } 2456 2457 // at this point we've decoded all floors 2458 2459 if (f.alloc.alloc_buffer) 2460 assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset); 2461 2462 // re-enable coupled channels if necessary 2463 memcpy(really_zero_channel.ptr, zero_channel.ptr, (really_zero_channel[0]).sizeof * f.channels); 2464 for (i=0; i < map.coupling_steps; ++i) 2465 if (!zero_channel[map.chan[i].magnitude] || !zero_channel[map.chan[i].angle]) { 2466 zero_channel[map.chan[i].magnitude] = zero_channel[map.chan[i].angle] = FALSE; 2467 } 2468 2469 // RESIDUE DECODE 2470 for (i=0; i < map.submaps; ++i) { 2471 float *[STB_VORBIS_MAX_CHANNELS] residue_buffers; 2472 int r; 2473 uint8[256] do_not_decode; 2474 int ch = 0; 2475 for (j=0; j < f.channels; ++j) { 2476 if (map.chan[j].mux == i) { 2477 if (zero_channel[j]) { 2478 do_not_decode[ch] = TRUE; 2479 residue_buffers[ch] = null; 2480 } else { 2481 do_not_decode[ch] = FALSE; 2482 residue_buffers[ch] = f.channel_buffers[j]; 2483 } 2484 ++ch; 2485 } 2486 } 2487 r = map.submap_residue[i]; 2488 decode_residue(f, residue_buffers.ptr, ch, n2, r, do_not_decode.ptr); 2489 } 2490 2491 if (f.alloc.alloc_buffer) 2492 assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset); 2493 2494 // INVERSE COUPLING 2495 for (i = map.coupling_steps-1; i >= 0; --i) { 2496 int n3 = n >> 1; 2497 float *m_ = f.channel_buffers[map.chan[i].magnitude]; 2498 float *a = f.channel_buffers[map.chan[i].angle ]; 2499 for (j=0; j < n3; ++j) { 2500 float a2,m2; 2501 if (m_[j] > 0) 2502 if (a[j] > 0) 2503 m2 = m_[j], a2 = m_[j] - a[j]; 2504 else 2505 a2 = m_[j], m2 = m_[j] + a[j]; 2506 else 2507 if (a[j] > 0) 2508 m2 = m_[j], a2 = m_[j] + a[j]; 2509 else 2510 a2 = m_[j], m2 = m_[j] - a[j]; 2511 m_[j] = m2; 2512 a[j] = a2; 2513 } 2514 } 2515 2516 // finish decoding the floors 2517 for (i=0; i < f.channels; ++i) { 2518 if (really_zero_channel[i]) { 2519 memset(f.channel_buffers[i], 0, (*f.channel_buffers[i]).sizeof * n2); 2520 } else { 2521 do_floor(f, map, i, n, f.channel_buffers[i], f.finalY[i], null); 2522 } 2523 } 2524 2525 // INVERSE MDCT 2526 for (i=0; i < f.channels; ++i) 2527 inverse_mdct(f.channel_buffers[i], n, f, m.blockflag); 2528 2529 // this shouldn't be necessary, unless we exited on an error 2530 // and want to flush to get to the next packet 2531 flush_packet(f); 2532 2533 if (f.first_decode) { 2534 // assume we start so first non-discarded sample is sample 0 2535 // this isn't to spec, but spec would require us to read ahead 2536 // and decode the size of all current frames--could be done, 2537 // but presumably it's not a commonly used feature 2538 f.current_loc = 0u - n2; // start of first frame is positioned for discard (NB this is an intentional unsigned overflow/wrap-around) 2539 // we might have to discard samples "from" the next frame too, 2540 // if we're lapping a large block then a small at the start? 2541 f.discard_samples_deferred = n - right_end; 2542 f.current_loc_valid = TRUE; 2543 f.first_decode = FALSE; 2544 } else if (f.discard_samples_deferred) { 2545 if (f.discard_samples_deferred >= right_start - left_start) { 2546 f.discard_samples_deferred -= (right_start - left_start); 2547 left_start = right_start; 2548 *p_left = left_start; 2549 } else { 2550 left_start += f.discard_samples_deferred; 2551 *p_left = left_start; 2552 f.discard_samples_deferred = 0; 2553 } 2554 } else if (f.previous_length == 0 && f.current_loc_valid) { 2555 // we're recovering from a seek... that means we're going to discard 2556 // the samples from this packet even though we know our position from 2557 // the last page header, so we need to update the position based on 2558 // the discarded samples here 2559 // but wait, the code below is going to add this in itself even 2560 // on a discard, so we don't need to do it here... 2561 } 2562 2563 // check if we have ogg information about the sample # for this packet 2564 if (f.last_seg_which == f.end_seg_with_known_loc) { 2565 // if we have a valid current loc, and this is final: 2566 if (f.current_loc_valid && (f.page_flag & PAGEFLAG_last_page)) { 2567 uint32 current_end = f.known_loc_for_packet; 2568 // then let's infer the size of the (probably) short final frame 2569 if (current_end < f.current_loc + (right_end-left_start)) { 2570 if (current_end < f.current_loc) { 2571 // negative truncation, that's impossible! 2572 *len = 0; 2573 } else { 2574 *len = current_end - f.current_loc; 2575 } 2576 *len += left_start; // this doesn't seem right, but has no ill effect on my test files 2577 if (*len > right_end) *len = right_end; // this should never happen 2578 f.current_loc += *len; 2579 return TRUE; 2580 } 2581 } 2582 // otherwise, just set our sample loc 2583 // guess that the ogg granule pos refers to the _middle_ of the 2584 // last frame? 2585 // set f.current_loc to the position of left_start 2586 f.current_loc = f.known_loc_for_packet - (n2-left_start); 2587 f.current_loc_valid = TRUE; 2588 } 2589 if (f.current_loc_valid) 2590 f.current_loc += (right_start - left_start); 2591 2592 if (f.alloc.alloc_buffer) 2593 assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset); 2594 *len = right_end; // ignore samples after the window goes to 0 2595 2596 return TRUE; 2597 } 2598 2599 static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right) 2600 { 2601 int mode, left_end, right_end; 2602 if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0; 2603 return vorbis_decode_packet_rest(f, len, f.mode_config.ptr + mode, *p_left, left_end, *p_right, right_end, p_left); 2604 } 2605 2606 static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right) 2607 { 2608 int prev,i,j; 2609 // we use right&left (the start of the right- and left-window sin()-regions) 2610 // to determine how much to return, rather than inferring from the rules 2611 // (same result, clearer code); 'left' indicates where our sin() window 2612 // starts, therefore where the previous window's right edge starts, and 2613 // therefore where to start mixing from the previous buffer. 'right' 2614 // indicates where our sin() ending-window starts, therefore that's where 2615 // we start saving, and where our returned-data ends. 2616 2617 // mixin from previous window 2618 if (f.previous_length) { 2619 int i2, j2, n = f.previous_length; 2620 float *w = get_window(f, n); 2621 if (w == NULL) return 0; 2622 for (i2=0; i2 < f.channels; ++i2) { 2623 for (j2=0; j2 < n; ++j2) 2624 f.channel_buffers[i2][left+j2] = 2625 f.channel_buffers[i2][left+j2]*w[ j2] + 2626 f.previous_window[i2][ j2]*w[n-1-j2]; 2627 } 2628 } 2629 2630 prev = f.previous_length; 2631 2632 // last half of this data becomes previous window 2633 f.previous_length = len - right; 2634 2635 // @OPTIMIZE: could avoid this copy by double-buffering the 2636 // output (flipping previous_window with channel_buffers), but 2637 // then previous_window would have to be 2x as large, and 2638 // channel_buffers couldn't be temp mem (although they're NOT 2639 // currently temp mem, they could be (unless we want to level 2640 // performance by spreading out the computation)) 2641 for (i=0; i < f.channels; ++i) 2642 for (j=0; right+j < len; ++j) 2643 f.previous_window[i][j] = f.channel_buffers[i][right+j]; 2644 2645 if (!prev) 2646 // there was no previous packet, so this data isn't valid... 2647 // this isn't entirely true, only the would-have-overlapped data 2648 // isn't valid, but this seems to be what the spec requires 2649 return 0; 2650 2651 // truncate a short frame 2652 if (len < right) right = len; 2653 2654 f.samples_output += right-left; 2655 2656 return right - left; 2657 } 2658 2659 static int vorbis_pump_first_frame(stb_vorbis *f) 2660 { 2661 int len, right, left, res; 2662 res = vorbis_decode_packet(f, &len, &left, &right); 2663 if (res) 2664 vorbis_finish_frame(f, len, left, right); 2665 return res; 2666 } 2667 2668 2669 static int start_decoder(vorb *f) 2670 { 2671 uint8[6] header; 2672 uint8 x,y; 2673 int len,i,j,k, max_submaps = 0; 2674 int longest_floorlist=0; 2675 2676 // first page, first packet 2677 f.first_decode = TRUE; 2678 2679 if (!start_page(f)) return FALSE; 2680 // validate page flag 2681 if (!(f.page_flag & PAGEFLAG_first_page)) return error(f, VORBIS_invalid_first_page); 2682 if (f.page_flag & PAGEFLAG_last_page) return error(f, VORBIS_invalid_first_page); 2683 if (f.page_flag & PAGEFLAG_continued_packet) return error(f, VORBIS_invalid_first_page); 2684 // check for expected packet length 2685 if (f.segment_count != 1) return error(f, VORBIS_invalid_first_page); 2686 if (f.segments[0] != 30) { 2687 // check for the Ogg skeleton fishead identifying header to refine our error 2688 if (f.segments[0] == 64 && 2689 getn(f, header.ptr, 6) && 2690 header[0] == 'f' && 2691 header[1] == 'i' && 2692 header[2] == 's' && 2693 header[3] == 'h' && 2694 header[4] == 'e' && 2695 header[5] == 'a' && 2696 get8(f) == 'd' && 2697 get8(f) == '\0') return error(f, VORBIS_ogg_skeleton_not_supported); 2698 else 2699 return error(f, VORBIS_invalid_first_page); 2700 } 2701 2702 // read packet 2703 // check packet header 2704 if (get8(f) != VORBIS_packet_id) return error(f, VORBIS_invalid_first_page); 2705 if (!getn(f, header.ptr, 6)) return error(f, VORBIS_unexpected_eof); 2706 if (!vorbis_validate(header.ptr)) return error(f, VORBIS_invalid_first_page); 2707 // vorbis_version 2708 if (get32(f) != 0) return error(f, VORBIS_invalid_first_page); 2709 f.channels = get8(f); if (!f.channels) return error(f, VORBIS_invalid_first_page); 2710 if (f.channels > STB_VORBIS_MAX_CHANNELS) return error(f, VORBIS_too_many_channels); 2711 f.sample_rate = get32(f); if (!f.sample_rate) return error(f, VORBIS_invalid_first_page); 2712 get32(f); // bitrate_maximum 2713 get32(f); // bitrate_nominal 2714 get32(f); // bitrate_minimum 2715 x = get8(f); 2716 { 2717 int log0,log1; 2718 log0 = x & 15; 2719 log1 = x >> 4; 2720 f.blocksize_0 = 1 << log0; 2721 f.blocksize_1 = 1 << log1; 2722 if (log0 < 6 || log0 > 13) return error(f, VORBIS_invalid_setup); 2723 if (log1 < 6 || log1 > 13) return error(f, VORBIS_invalid_setup); 2724 if (log0 > log1) return error(f, VORBIS_invalid_setup); 2725 } 2726 2727 // framing_flag 2728 x = get8(f); 2729 if (!(x & 1)) return error(f, VORBIS_invalid_first_page); 2730 2731 // second packet! 2732 if (!start_page(f)) return FALSE; 2733 2734 if (!start_packet(f)) return FALSE; 2735 2736 if (!next_segment(f)) return FALSE; 2737 2738 if (get8_packet(f) != VORBIS_packet_comment) return error(f, VORBIS_invalid_setup); 2739 for (i=0; i < 6; ++i) header[i] = cast(ubyte) get8_packet(f); 2740 if (!vorbis_validate(header.ptr)) return error(f, VORBIS_invalid_setup); 2741 //file vendor 2742 len = get32_packet(f); 2743 f.vendor = cast(char*)setup_malloc(f, len+1); 2744 if (f.vendor == NULL) return error(f, VORBIS_outofmem); 2745 for(i=0; i < len; ++i) { 2746 f.vendor[i] = cast(char) get8_packet(f); 2747 } 2748 f.vendor[len] = cast(char)'\0'; 2749 //user comments 2750 f.comment_list_length = get32_packet(f); 2751 f.comment_list = null; 2752 if (f.comment_list_length > 0) 2753 { 2754 f.comment_list = cast(char**) setup_malloc(f, cast(int)( (char*).sizeof * (f.comment_list_length) )); 2755 if (f.comment_list == NULL) return error(f, VORBIS_outofmem); 2756 } 2757 2758 for(i=0; i < f.comment_list_length; ++i) { 2759 len = get32_packet(f); 2760 f.comment_list[i] = cast(char*)setup_malloc(f, len+1); 2761 if (f.comment_list[i] == null) return error(f, VORBIS_outofmem); 2762 2763 for(j=0; j < len; ++j) { 2764 f.comment_list[i][j] = cast(char) get8_packet(f); 2765 } 2766 f.comment_list[i][len] = cast(char)'\0'; 2767 } 2768 2769 // framing_flag 2770 x = cast(ubyte) get8_packet(f); 2771 if (!(x & 1)) 2772 return error(f, VORBIS_invalid_setup); 2773 2774 bool err; 2775 skip(f, f.bytes_in_seg, &err); 2776 if (err) 2777 return error(f, VORBIS_invalid_setup); 2778 f.bytes_in_seg = 0; 2779 2780 do { 2781 len = next_segment(f); 2782 skip(f, len, &err); 2783 if (err) 2784 return error(f, VORBIS_invalid_setup); 2785 f.bytes_in_seg = 0; 2786 } while (len); 2787 2788 // third packet! 2789 if (!start_packet(f)) return FALSE; 2790 2791 crc32_init(f); 2792 2793 if (get8_packet(f) != VORBIS_packet_setup) 2794 return error(f, VORBIS_invalid_setup); 2795 for (i=0; i < 6; ++i) header[i] = cast(ubyte) get8_packet(f); 2796 if (!vorbis_validate(header.ptr)) return error(f, VORBIS_invalid_setup); 2797 2798 // codebooks 2799 2800 f.codebook_count = get_bits(f,8) + 1; 2801 f.codebooks = cast(Codebook *) setup_malloc(f, cast(int)( (*f.codebooks).sizeof * f.codebook_count) ); 2802 if (f.codebooks == null) return error(f, VORBIS_outofmem); 2803 memset(f.codebooks, 0, (*f.codebooks).sizeof * f.codebook_count); 2804 for (i=0; i < f.codebook_count; ++i) { 2805 uint32 *values; 2806 int ordered, sorted_count; 2807 int total=0; 2808 uint8 *lengths; 2809 Codebook *c = f.codebooks+i; 2810 x = cast(ubyte) get_bits(f, 8); if (x != 0x42) return error(f, VORBIS_invalid_setup); 2811 x = cast(ubyte) get_bits(f, 8); if (x != 0x43) return error(f, VORBIS_invalid_setup); 2812 x = cast(ubyte) get_bits(f, 8); if (x != 0x56) return error(f, VORBIS_invalid_setup); 2813 x = cast(ubyte) get_bits(f, 8); 2814 c.dimensions = (get_bits(f, 8)<<8) + x; 2815 x = cast(ubyte) get_bits(f, 8); 2816 y = cast(ubyte) get_bits(f, 8); 2817 c.entries = (get_bits(f, 8)<<16) + (y<<8) + x; 2818 ordered = get_bits(f,1); 2819 c.sparse = ordered ? 0 : cast(ubyte) get_bits(f,1); 2820 2821 if (c.dimensions == 0 && c.entries != 0) return error(f, VORBIS_invalid_setup); 2822 2823 if (c.sparse) 2824 lengths = cast(uint8 *) setup_temp_malloc(f, c.entries); 2825 else 2826 lengths = c.codeword_lengths = cast(uint8 *) setup_malloc(f, c.entries); 2827 2828 if (!lengths) return error(f, VORBIS_outofmem); 2829 2830 if (ordered) { 2831 int current_entry = 0; 2832 int current_length = get_bits(f,5) + 1; 2833 while (current_entry < c.entries) { 2834 int limit = c.entries - current_entry; 2835 int n = get_bits(f, ilog(limit)); 2836 if (current_length >= 32) return error(f, VORBIS_invalid_setup); 2837 if (current_entry + n > cast(int) c.entries) { return error(f, VORBIS_invalid_setup); } 2838 memset(lengths + current_entry, current_length, n); 2839 current_entry += n; 2840 ++current_length; 2841 } 2842 } else { 2843 for (j=0; j < c.entries; ++j) { 2844 int present = c.sparse ? get_bits(f,1) : 1; 2845 if (present) { 2846 lengths[j] = cast(ubyte) (get_bits(f, 5) + 1); 2847 ++total; 2848 if (lengths[j] == 32) 2849 return error(f, VORBIS_invalid_setup); 2850 } else { 2851 lengths[j] = NO_CODE; 2852 } 2853 } 2854 } 2855 2856 if (c.sparse && total >= c.entries >> 2) { 2857 // convert sparse items to non-sparse! 2858 if (c.entries > cast(int) f.setup_temp_memory_required) 2859 f.setup_temp_memory_required = c.entries; 2860 2861 c.codeword_lengths = cast(uint8 *) setup_malloc(f, c.entries); 2862 if (c.codeword_lengths == null) return error(f, VORBIS_outofmem); 2863 memcpy(c.codeword_lengths, lengths, c.entries); 2864 setup_temp_free(f, lengths, c.entries); // note this is only safe if there have been no intervening temp mallocs! 2865 lengths = c.codeword_lengths; 2866 c.sparse = 0; 2867 } 2868 2869 // compute the size of the sorted tables 2870 if (c.sparse) { 2871 sorted_count = total; 2872 } else { 2873 sorted_count = 0; 2874 for (j=0; j < c.entries; ++j) 2875 if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE) 2876 ++sorted_count; 2877 } 2878 2879 c.sorted_entries = sorted_count; 2880 values = null; 2881 2882 if (!c.sparse) { 2883 c.codewords = cast(uint32 *) setup_malloc(f, cast(int)( (c.codewords[0]).sizeof * c.entries ) ); 2884 if (!c.codewords) return error(f, VORBIS_outofmem); 2885 } else { 2886 uint size; 2887 if (c.sorted_entries) { 2888 c.codeword_lengths = cast(uint8 *) setup_malloc(f, c.sorted_entries); 2889 if (!c.codeword_lengths) return error(f, VORBIS_outofmem); 2890 c.codewords = cast(uint32 *) setup_temp_malloc(f, cast(int)( (*c.codewords).sizeof * c.sorted_entries )); 2891 if (!c.codewords) return error(f, VORBIS_outofmem); 2892 values = cast(uint32 *) setup_temp_malloc(f, cast(int)( (*values).sizeof * c.sorted_entries ) ); 2893 if (!values) return error(f, VORBIS_outofmem); 2894 } 2895 size = cast(uint)( c.entries + ((*c.codewords).sizeof + (*values).sizeof) * c.sorted_entries ); 2896 if (size > f.setup_temp_memory_required) 2897 f.setup_temp_memory_required = size; 2898 } 2899 2900 if (!compute_codewords(c, lengths, c.entries, values)) { 2901 if (c.sparse) setup_temp_free(f, values, 0); 2902 return error(f, VORBIS_invalid_setup); 2903 } 2904 2905 if (c.sorted_entries) { 2906 // allocate an extra slot for sentinels 2907 c.sorted_codewords = cast(uint32 *) setup_malloc(f, cast(int)( (*c.sorted_codewords).sizeof * (c.sorted_entries+1) )); 2908 if (c.sorted_codewords == null) return error(f, VORBIS_outofmem); 2909 // allocate an extra slot at the front so that c.sorted_values[-1] is defined 2910 // so that we can catch that case without an extra if 2911 c.sorted_values = cast( int *) setup_malloc(f, cast(int)( (*c.sorted_values ).sizeof * (c.sorted_entries+1) ) ); 2912 if (c.sorted_values == null) return error(f, VORBIS_outofmem); 2913 ++c.sorted_values; 2914 c.sorted_values[-1] = -1; 2915 compute_sorted_huffman(c, lengths, values); 2916 } 2917 2918 if (c.sparse) { 2919 setup_temp_free(f, values, (*values).sizeof*c.sorted_entries); 2920 setup_temp_free(f, c.codewords, (*c.codewords).sizeof*c.sorted_entries); 2921 setup_temp_free(f, lengths, c.entries); 2922 c.codewords = null; 2923 } 2924 2925 compute_accelerated_huffman(c); 2926 2927 c.lookup_type = cast(ubyte) get_bits(f, 4); 2928 if (c.lookup_type > 2) return error(f, VORBIS_invalid_setup); 2929 if (c.lookup_type > 0) { 2930 uint16 *mults; 2931 c.minimum_value = float32_unpack(get_bits(f, 32)); 2932 c.delta_value = float32_unpack(get_bits(f, 32)); 2933 c.value_bits = cast(ubyte) (get_bits(f, 4)+1); 2934 c.sequence_p = cast(ubyte) get_bits(f,1); 2935 if (c.lookup_type == 1) { 2936 int values2 = lookup1_values(c.entries, c.dimensions); 2937 if (values2 < 0) return error(f, VORBIS_invalid_setup); 2938 c.lookup_values = cast(uint32) values2; 2939 } else { 2940 c.lookup_values = c.entries * c.dimensions; 2941 } 2942 if (c.lookup_values == 0) return error(f, VORBIS_invalid_setup); 2943 mults = cast(uint16 *) setup_temp_malloc(f, cast(int)( (mults[0]).sizeof * c.lookup_values)); 2944 if (mults == null) return error(f, VORBIS_outofmem); 2945 for (j=0; j < cast(int) c.lookup_values; ++j) { 2946 int q = get_bits(f, c.value_bits); 2947 if (q == EOP) 2948 { 2949 setup_temp_free(f,mults,(mults[0]).sizeof*c.lookup_values); 2950 return error(f, VORBIS_invalid_setup); 2951 } 2952 mults[j] = cast(ushort)q; 2953 } 2954 2955 if (c.lookup_type == 1) { 2956 int len2, sparse = c.sparse; 2957 float last=0; 2958 // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop 2959 if (sparse) { 2960 if (c.sorted_entries == 0) goto skip; 2961 c.multiplicands = cast(codetype *) setup_malloc(f, (c.multiplicands[0]).sizeof * c.sorted_entries * c.dimensions); 2962 } else 2963 c.multiplicands = cast(codetype *) setup_malloc(f, (c.multiplicands[0]).sizeof * c.entries * c.dimensions); 2964 if (c.multiplicands == null) { setup_temp_free(f,mults,(mults[0]).sizeof*c.lookup_values); return error(f, VORBIS_outofmem); } 2965 len2 = sparse ? c.sorted_entries : c.entries; 2966 for (j=0; j < len2; ++j) { 2967 uint z = sparse ? c.sorted_values[j] : j; 2968 uint div=1; 2969 for (k=0; k < c.dimensions; ++k) { 2970 int off = (z / div) % c.lookup_values; 2971 float val = mults[off]*c.delta_value + c.minimum_value + last; 2972 c.multiplicands[j*c.dimensions + k] = val; 2973 if (c.sequence_p) 2974 last = val; 2975 if (k+1 < c.dimensions) { 2976 if (div > uint.max / cast(uint) c.lookup_values) { 2977 setup_temp_free(f, mults,(mults[0]).sizeof*c.lookup_values); 2978 return error(f, VORBIS_invalid_setup); 2979 } 2980 div *= c.lookup_values; 2981 } 2982 } 2983 } 2984 c.lookup_type = 2; 2985 } 2986 else 2987 { 2988 float last=0; 2989 c.multiplicands = cast(codetype *) setup_malloc(f, (c.multiplicands[0]).sizeof * c.lookup_values); 2990 if (c.multiplicands == null) { setup_temp_free(f, mults,(mults[0]).sizeof*c.lookup_values); return error(f, VORBIS_outofmem); } 2991 for (j=0; j < cast(int) c.lookup_values; ++j) { 2992 float val = mults[j] * c.delta_value + c.minimum_value + last; 2993 c.multiplicands[j] = val; 2994 if (c.sequence_p) 2995 last = val; 2996 } 2997 } 2998 skip:; 2999 setup_temp_free(f, mults, (mults[0]).sizeof * c.lookup_values); 3000 3001 } 3002 } 3003 3004 // time domain transfers (notused) 3005 3006 x = cast(ubyte)(get_bits(f, 6) + 1); 3007 for (i=0; i < x; ++i) { 3008 uint32 z = get_bits(f, 16); 3009 if (z != 0) return error(f, VORBIS_invalid_setup); 3010 } 3011 3012 // Floors 3013 f.floor_count = get_bits(f, 6)+1; 3014 f.floor_config = cast(Floor *) setup_malloc(f, f.floor_count * (*f.floor_config).sizeof); 3015 if (f.floor_config == null) return error(f, VORBIS_outofmem); 3016 for (i=0; i < f.floor_count; ++i) { 3017 f.floor_types[i] = cast(ushort) get_bits(f, 16); 3018 if (f.floor_types[i] > 1) return error(f, VORBIS_invalid_setup); 3019 if (f.floor_types[i] == 0) { 3020 Floor0 *g = &f.floor_config[i].floor0; 3021 g.order = cast(ubyte) get_bits(f,8); 3022 g.rate = cast(ushort) get_bits(f,16); 3023 g.bark_map_size = cast(ushort) get_bits(f,16); 3024 g.amplitude_bits = cast(ubyte) get_bits(f,6); 3025 g.amplitude_offset = cast(ubyte) get_bits(f,8); 3026 g.number_of_books = cast(ubyte)(get_bits(f,4) + 1); 3027 for (j=0; j < g.number_of_books; ++j) 3028 g.book_list[j] = cast(ubyte) get_bits(f,8); 3029 return error(f, VORBIS_feature_not_supported); 3030 } else { 3031 stbv__floor_ordering[31*8+2] p; 3032 Floor1 *g = &f.floor_config[i].floor1; 3033 int max_class = -1; 3034 g.partitions = cast(ubyte) get_bits(f, 5); 3035 for (j=0; j < g.partitions; ++j) { 3036 g.partition_class_list[j] = cast(ubyte) get_bits(f, 4); 3037 if (g.partition_class_list[j] > max_class) 3038 max_class = g.partition_class_list[j]; 3039 } 3040 for (j=0; j <= max_class; ++j) { 3041 g.class_dimensions[j] = cast(ubyte)(get_bits(f, 3)+1); 3042 g.class_subclasses[j] = cast(ubyte)get_bits(f, 2); 3043 if (g.class_subclasses[j]) { 3044 g.class_masterbooks[j] = cast(ubyte) get_bits(f, 8); 3045 if (g.class_masterbooks[j] >= f.codebook_count) return error(f, VORBIS_invalid_setup); 3046 } 3047 for (k=0; k < 1 << g.class_subclasses[j]; ++k) { 3048 g.subclass_books[j][k] = cast(int16) (get_bits(f,8)-1); 3049 if (g.subclass_books[j][k] >= f.codebook_count) return error(f, VORBIS_invalid_setup); 3050 } 3051 } 3052 g.floor1_multiplier = cast(ubyte)(get_bits(f,2)+1); 3053 g.rangebits = cast(ubyte) get_bits(f,4); 3054 g.Xlist[0] = 0; 3055 g.Xlist[1] = cast(ushort)(1 << g.rangebits); 3056 g.values = 2; 3057 for (j=0; j < g.partitions; ++j) { 3058 int c = g.partition_class_list[j]; 3059 for (k=0; k < g.class_dimensions[c]; ++k) { 3060 g.Xlist[g.values] = cast(ushort) get_bits(f, g.rangebits); 3061 ++g.values; 3062 } 3063 } 3064 // precompute the sorting 3065 for (j=0; j < g.values; ++j) { 3066 p[j].x = g.Xlist[j]; 3067 p[j].id = cast(ushort) j; 3068 } 3069 qsort(p.ptr, g.values, (p[0]).sizeof, &point_compare_2); 3070 for (j=0; j < g.values-1; ++j) 3071 if (p[j].x == p[j+1].x) 3072 return error(f, VORBIS_invalid_setup); 3073 for (j=0; j < g.values; ++j) 3074 g.sorted_order[j] = cast(uint8) p[j].id; 3075 // precompute the neighbors 3076 for (j=2; j < g.values; ++j) { 3077 int low = 0,hi = 0; 3078 neighbors(g.Xlist.ptr, j, &low,&hi); 3079 g.neighbors[j][0] = cast(ubyte)low; 3080 g.neighbors[j][1] = cast(ubyte)hi; 3081 } 3082 3083 if (g.values > longest_floorlist) 3084 longest_floorlist = g.values; 3085 } 3086 } 3087 3088 // Residue 3089 f.residue_count = get_bits(f, 6)+1; 3090 f.residue_config = cast(Residue *) setup_malloc(f, f.residue_count * (f.residue_config[0]).sizeof); 3091 if (f.residue_config == null) return error(f, VORBIS_outofmem); 3092 memset(f.residue_config, 0, f.residue_count * (f.residue_config[0]).sizeof); 3093 for (i=0; i < f.residue_count; ++i) { 3094 uint8[64] residue_cascade; 3095 Residue *r = f.residue_config+i; 3096 f.residue_types[i] = cast(ushort) get_bits(f, 16); 3097 if (f.residue_types[i] > 2) return error(f, VORBIS_invalid_setup); 3098 r.begin = get_bits(f, 24); 3099 r.end = get_bits(f, 24); 3100 if (r.end < r.begin) return error(f, VORBIS_invalid_setup); 3101 r.part_size = get_bits(f,24)+1; 3102 r.classifications = cast(ubyte)(get_bits(f,6)+1); 3103 r.classbook = cast(ubyte) get_bits(f,8); 3104 if (r.classbook >= f.codebook_count) return error(f, VORBIS_invalid_setup); 3105 for (j=0; j < r.classifications; ++j) { 3106 uint8 high_bits=0; 3107 uint8 low_bits = cast(ubyte) get_bits(f,3); 3108 if (get_bits(f,1)) 3109 high_bits = cast(ubyte) get_bits(f,5); 3110 residue_cascade[j] = cast(ubyte)(high_bits*8 + low_bits); 3111 } 3112 r.residue_books = cast(int16[8]*) setup_malloc(f, (r.residue_books[0]).sizeof * r.classifications); 3113 if (r.residue_books == null) return error(f, VORBIS_outofmem); 3114 for (j=0; j < r.classifications; ++j) { 3115 for (k=0; k < 8; ++k) { 3116 if (residue_cascade[j] & (1 << k)) { 3117 r.residue_books[j][k] = cast(short) get_bits(f, 8); 3118 if (r.residue_books[j][k] >= f.codebook_count) return error(f, VORBIS_invalid_setup); 3119 } else { 3120 r.residue_books[j][k] = -1; 3121 } 3122 } 3123 } 3124 // precompute the classifications[] array to avoid inner-loop mod/divide 3125 // call it 'classdata' since we already have r.classifications 3126 r.classdata = cast(uint8 **) setup_malloc(f, (*r.classdata).sizeof * f.codebooks[r.classbook].entries); 3127 if (!r.classdata) return error(f, VORBIS_outofmem); 3128 memset(r.classdata, 0, (*r.classdata).sizeof * f.codebooks[r.classbook].entries); 3129 for (j=0; j < f.codebooks[r.classbook].entries; ++j) { 3130 int classwords = f.codebooks[r.classbook].dimensions; 3131 int temp = j; 3132 r.classdata[j] = cast(uint8 *) setup_malloc(f, (r.classdata[j][0]).sizeof * classwords); 3133 if (r.classdata[j] == null) return error(f, VORBIS_outofmem); 3134 for (k=classwords-1; k >= 0; --k) { 3135 r.classdata[j][k] = cast(ubyte)(temp % r.classifications); 3136 temp /= r.classifications; 3137 } 3138 } 3139 } 3140 3141 f.mapping_count = get_bits(f,6)+1; 3142 f.mapping = cast(Mapping *) setup_malloc(f, f.mapping_count * (*f.mapping).sizeof); 3143 if (f.mapping == null) return error(f, VORBIS_outofmem); 3144 memset(f.mapping, 0, f.mapping_count * (*f.mapping).sizeof); 3145 for (i=0; i < f.mapping_count; ++i) { 3146 Mapping *m = f.mapping + i; 3147 int mapping_type = get_bits(f,16); 3148 if (mapping_type != 0) return error(f, VORBIS_invalid_setup); 3149 m.chan = cast(MappingChannel *) setup_malloc(f, f.channels * (*m.chan).sizeof); 3150 if (m.chan == null) return error(f, VORBIS_outofmem); 3151 if (get_bits(f,1)) 3152 m.submaps = cast(ubyte)(get_bits(f,4)+1); 3153 else 3154 m.submaps = 1; 3155 if (m.submaps > max_submaps) 3156 max_submaps = m.submaps; 3157 if (get_bits(f,1)) { 3158 m.coupling_steps = cast(ushort)(get_bits(f,8)+1); 3159 if (m.coupling_steps > f.channels) return error(f, VORBIS_invalid_setup); 3160 for (k=0; k < m.coupling_steps; ++k) { 3161 m.chan[k].magnitude = cast(ubyte) get_bits(f, ilog(f.channels-1)); 3162 m.chan[k].angle = cast(ubyte) get_bits(f, ilog(f.channels-1)); 3163 if (m.chan[k].magnitude >= f.channels) return error(f, VORBIS_invalid_setup); 3164 if (m.chan[k].angle >= f.channels) return error(f, VORBIS_invalid_setup); 3165 if (m.chan[k].magnitude == m.chan[k].angle) return error(f, VORBIS_invalid_setup); 3166 } 3167 } else 3168 m.coupling_steps = 0; 3169 3170 // reserved field 3171 if (get_bits(f,2)) return error(f, VORBIS_invalid_setup); 3172 if (m.submaps > 1) { 3173 for (j=0; j < f.channels; ++j) { 3174 m.chan[j].mux = cast(ubyte) get_bits(f, 4); 3175 if (m.chan[j].mux >= m.submaps) return error(f, VORBIS_invalid_setup); 3176 } 3177 } else 3178 // @SPECIFICATION: this case is missing from the spec 3179 for (j=0; j < f.channels; ++j) 3180 m.chan[j].mux = 0; 3181 3182 for (j=0; j < m.submaps; ++j) { 3183 get_bits(f,8); // discard 3184 m.submap_floor[j] = cast(ubyte) get_bits(f,8); 3185 m.submap_residue[j] = cast(ubyte) get_bits(f,8); 3186 if (m.submap_floor[j] >= f.floor_count) return error(f, VORBIS_invalid_setup); 3187 if (m.submap_residue[j] >= f.residue_count) return error(f, VORBIS_invalid_setup); 3188 } 3189 } 3190 3191 // Modes 3192 f.mode_count = get_bits(f, 6)+1; 3193 for (i=0; i < f.mode_count; ++i) { 3194 Mode *m = f.mode_config.ptr+i; 3195 m.blockflag = cast(ubyte) get_bits(f,1); 3196 m.windowtype = cast(ushort) get_bits(f,16); 3197 m.transformtype = cast(ushort) get_bits(f,16); 3198 m.mapping = cast(ubyte) get_bits(f,8); 3199 if (m.windowtype != 0) return error(f, VORBIS_invalid_setup); 3200 if (m.transformtype != 0) return error(f, VORBIS_invalid_setup); 3201 if (m.mapping >= f.mapping_count) return error(f, VORBIS_invalid_setup); 3202 } 3203 3204 flush_packet(f); 3205 3206 f.previous_length = 0; 3207 3208 for (i=0; i < f.channels; ++i) { 3209 f.channel_buffers[i] = cast(float *) setup_malloc(f, (float).sizeof * f.blocksize_1); 3210 f.previous_window[i] = cast(float *) setup_malloc(f, (float).sizeof * f.blocksize_1/2); 3211 f.finalY[i] = cast(int16 *) setup_malloc(f, (int16).sizeof * longest_floorlist); 3212 if (f.channel_buffers[i] == null || f.previous_window[i] == null || f.finalY[i] == null) return error(f, VORBIS_outofmem); 3213 memset(f.channel_buffers[i], 0, (float).sizeof * f.blocksize_1); 3214 } 3215 3216 if (!init_blocksize(f, 0, f.blocksize_0)) return FALSE; 3217 if (!init_blocksize(f, 1, f.blocksize_1)) return FALSE; 3218 f.blocksize[0] = f.blocksize_0; 3219 f.blocksize[1] = f.blocksize_1; 3220 3221 // compute how much temporary memory is needed 3222 3223 // 1. 3224 { 3225 uint32 imdct_mem = cast(uint)( (f.blocksize_1 * (float.sizeof) >> 1)); 3226 uint32 classify_mem; 3227 int i2,max_part_read=0; 3228 for (i2=0; i2 < f.residue_count; ++i2) { 3229 Residue *r = f.residue_config + i2; 3230 uint actual_size = f.blocksize_1 / 2; 3231 uint limit_r_begin = r.begin < actual_size ? r.begin : actual_size; 3232 uint limit_r_end = r.end < actual_size ? r.end : actual_size; 3233 int n_read = limit_r_end - limit_r_begin; 3234 int part_read = n_read / r.part_size; 3235 if (part_read > max_part_read) 3236 max_part_read = part_read; 3237 } 3238 classify_mem = cast(uint)( f.channels * ((void*).sizeof + max_part_read * (uint8 *).sizeof)); 3239 // maximum reasonable partition size is f.blocksize_1 3240 3241 f.temp_memory_required = classify_mem; 3242 if (imdct_mem > f.temp_memory_required) 3243 f.temp_memory_required = imdct_mem; 3244 } 3245 3246 3247 if (f.alloc.alloc_buffer) { 3248 assert(f.temp_offset == f.alloc.alloc_buffer_length_in_bytes); 3249 // check if there's enough temp memory so we don't error later 3250 if (f.setup_offset + (*f).sizeof + f.temp_memory_required > cast(uint) f.temp_offset) 3251 return error(f, VORBIS_outofmem); 3252 } 3253 3254 // @TODO: stb_vorbis_seek_start expects first_audio_page_offset to point to a page 3255 // without PAGEFLAG_continued_packet, so this either points to the first page, or 3256 // the page after the end of the headers. It might be cleaner to point to a page 3257 // in the middle of the headers, when that's the page where the first audio packet 3258 // starts, but we'd have to also correctly skip the end of any continued packet in 3259 // stb_vorbis_seek_start. 3260 if (f.next_seg == -1) { 3261 f.first_audio_page_offset = stb_vorbis_get_file_offset(f); 3262 } else { 3263 f.first_audio_page_offset = 0; 3264 } 3265 3266 return TRUE; 3267 } 3268 3269 static void vorbis_deinit(stb_vorbis *p) 3270 { 3271 int i,j; 3272 3273 setup_free(p, p.vendor); 3274 for (i=0; i < p.comment_list_length; ++i) { 3275 setup_free(p, p.comment_list[i]); 3276 } 3277 setup_free(p, p.comment_list); 3278 3279 if (p.residue_config) { 3280 for (i=0; i < p.residue_count; ++i) { 3281 Residue *r = p.residue_config+i; 3282 if (r.classdata) { 3283 for (j=0; j < p.codebooks[r.classbook].entries; ++j) 3284 setup_free(p, r.classdata[j]); 3285 setup_free(p, r.classdata); 3286 } 3287 setup_free(p, r.residue_books); 3288 } 3289 } 3290 3291 if (p.codebooks) { 3292 for (i=0; i < p.codebook_count; ++i) { 3293 Codebook *c = p.codebooks + i; 3294 setup_free(p, c.codeword_lengths); 3295 setup_free(p, c.multiplicands); 3296 setup_free(p, c.codewords); 3297 setup_free(p, c.sorted_codewords); 3298 // c.sorted_values[-1] is the first entry in the array 3299 setup_free(p, c.sorted_values ? c.sorted_values-1 : null); 3300 } 3301 setup_free(p, p.codebooks); 3302 } 3303 setup_free(p, p.floor_config); 3304 setup_free(p, p.residue_config); 3305 if (p.mapping) { 3306 for (i=0; i < p.mapping_count; ++i) 3307 setup_free(p, p.mapping[i].chan); 3308 setup_free(p, p.mapping); 3309 } 3310 for (i=0; i < p.channels && i < STB_VORBIS_MAX_CHANNELS; ++i) { 3311 setup_free(p, p.channel_buffers[i]); 3312 setup_free(p, p.previous_window[i]); 3313 setup_free(p, p.finalY[i]); 3314 } 3315 for (i=0; i < 2; ++i) { 3316 setup_free(p, p.A[i]); 3317 setup_free(p, p.B[i]); 3318 setup_free(p, p.C[i]); 3319 setup_free(p, p.window[i]); 3320 setup_free(p, p.bit_reverse[i]); 3321 } 3322 } 3323 3324 void stb_vorbis_close(stb_vorbis *p) 3325 { 3326 vorbis_deinit(p); 3327 setup_free(p,p); 3328 } 3329 3330 static void vorbis_init(stb_vorbis *p, stb_vorbis_alloc *z) 3331 { 3332 memset(p, 0, (*p).sizeof); // NULL out all malloc'd pointers to start 3333 if (z) { 3334 p.alloc = *z; 3335 p.alloc.alloc_buffer_length_in_bytes &= ~7; 3336 p.temp_offset = p.alloc.alloc_buffer_length_in_bytes; 3337 } 3338 p.eof = 0; 3339 p.error = VORBIS__no_error; 3340 p.codebooks = null; 3341 } 3342 3343 long stb_vorbis_get_sample_offset(stb_vorbis *f) 3344 { 3345 // Note: this is rewritten vs original stb_vorbis, who wasn't returning something good. 3346 if (f.current_loc_valid) 3347 { 3348 return f.current_sample_loc; 3349 } 3350 else 3351 return -1; 3352 } 3353 3354 stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f) 3355 { 3356 stb_vorbis_info d; 3357 d.channels = f.channels; 3358 d.sample_rate = f.sample_rate; 3359 d.setup_memory_required = f.setup_memory_required; 3360 d.setup_temp_memory_required = f.setup_temp_memory_required; 3361 d.temp_memory_required = f.temp_memory_required; 3362 d.max_frame_size = f.blocksize_1 >> 1; 3363 return d; 3364 } 3365 3366 stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f) 3367 { 3368 stb_vorbis_comment d; 3369 d.vendor = f.vendor; 3370 d.comment_list_length = f.comment_list_length; 3371 d.comment_list = f.comment_list; 3372 return d; 3373 } 3374 3375 int stb_vorbis_get_error(stb_vorbis *f) 3376 { 3377 int e = f.error; 3378 f.error = VORBIS__no_error; 3379 return e; 3380 } 3381 3382 static stb_vorbis * vorbis_alloc(stb_vorbis *f) 3383 { 3384 stb_vorbis *p = cast(stb_vorbis *) setup_malloc(f, stb_vorbis.sizeof); 3385 return p; 3386 } 3387 3388 uint stb_vorbis_get_file_offset(stb_vorbis *f) 3389 { 3390 return cast(uint) f._io.tell(f._userData); 3391 } 3392 3393 // 3394 // DATA-PULLING API 3395 // 3396 3397 uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last) 3398 { 3399 for(;;) { 3400 int n; 3401 if (f.eof) return 0; 3402 n = get8(f); 3403 if (n == 0x4f) { // page header candidate 3404 uint retry_loc = stb_vorbis_get_file_offset(f); 3405 int i; 3406 // check if we're off the end of a file_section stream 3407 if (retry_loc - 25 > f.stream_len) 3408 return 0; 3409 // check the rest of the header 3410 for (i=1; i < 4; ++i) 3411 if (get8(f) != ogg_page_header[i]) 3412 break; 3413 if (f.eof) return 0; 3414 if (i == 4) { 3415 uint8[27] header; 3416 uint32 i2, crc, goal, len; 3417 for (i2=0; i2 < 4; ++i2) 3418 header[i2] = ogg_page_header[i2]; 3419 for (; i2 < 27; ++i2) 3420 header[i2] = get8(f); 3421 if (f.eof) return 0; 3422 if (header[4] != 0) goto invalid; 3423 goal = header[22] + (header[23] << 8) + (header[24]<<16) + (cast(uint32)header[25]<<24); 3424 for (i2=22; i2 < 26; ++i2) 3425 header[i2] = 0; 3426 crc = 0; 3427 for (i2=0; i2 < 27; ++i2) 3428 crc = crc32_update(f, crc, header[i2]); 3429 len = 0; 3430 for (i2=0; i2 < header[26]; ++i2) { 3431 int s = get8(f); 3432 crc = crc32_update(f, crc,cast(ubyte)s); 3433 len += s; 3434 } 3435 if (len && f.eof) return 0; 3436 for (i2=0; i2 < len; ++i2) 3437 crc = crc32_update(f, crc, get8(f)); 3438 // finished parsing probable page 3439 if (crc == goal) { 3440 // we could now check that it's either got the last 3441 // page flag set, OR it's followed by the capture 3442 // pattern, but I guess TECHNICALLY you could have 3443 // a file with garbage between each ogg page and recover 3444 // from it automatically? So even though that paranoia 3445 // might decrease the chance of an invalid decode by 3446 // another 2^32, not worth it since it would hose those 3447 // invalid-but-useful files? 3448 if (end) 3449 *end = stb_vorbis_get_file_offset(f); 3450 if (last) { 3451 if (header[5] & 0x04) 3452 *last = 1; 3453 else 3454 *last = 0; 3455 } 3456 set_file_offset(f, retry_loc-1); 3457 return 1; 3458 } 3459 } 3460 invalid: 3461 // not a valid page, so rewind and look for next one 3462 set_file_offset(f, retry_loc); 3463 } 3464 } 3465 assert(false); 3466 } 3467 3468 3469 enum SAMPLE_unknown = 0xffffffff; 3470 3471 // seeking is implemented with a binary search, which narrows down the range to 3472 // 64K, before using a linear search (because finding the synchronization 3473 // pattern can be expensive, and the chance we'd find the end page again is 3474 // relatively high for small ranges) 3475 // 3476 // two initial interpolation-style probes are used at the start of the search 3477 // to try to bound either side of the binary search sensibly, while still 3478 // working in O(log n) time if they fail. 3479 3480 static int get_seek_page_info(stb_vorbis *f, ProbedPage *z) 3481 { 3482 uint8[27] header; 3483 uint8[255] lacing; 3484 int i,len; 3485 3486 // record where the page starts 3487 z.page_start = stb_vorbis_get_file_offset(f); 3488 3489 // parse the header 3490 getn(f, header.ptr, 27); 3491 if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S') 3492 return 0; 3493 getn(f, lacing.ptr, header[26]); 3494 3495 // determine the length of the payload 3496 len = 0; 3497 for (i=0; i < header[26]; ++i) 3498 len += lacing[i]; 3499 3500 // this implies where the page ends 3501 z.page_end = z.page_start + 27 + header[26] + len; 3502 3503 // read the last-decoded sample out of the data 3504 z.last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24); 3505 3506 // restore file state to where we were 3507 set_file_offset(f, z.page_start); 3508 return 1; 3509 } 3510 3511 // rarely used function to seek back to the preceding page while finding the 3512 // start of a packet 3513 static int go_to_page_before(stb_vorbis *f, uint limit_offset) 3514 { 3515 uint previous_safe, end; 3516 3517 // now we want to seek back 64K from the limit 3518 if (limit_offset >= 65536 && limit_offset-65536 >= f.first_audio_page_offset) 3519 previous_safe = limit_offset - 65536; 3520 else 3521 previous_safe = f.first_audio_page_offset; 3522 3523 set_file_offset(f, previous_safe); 3524 3525 while (vorbis_find_page(f, &end, null)) { 3526 if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset) 3527 return 1; 3528 set_file_offset(f, end); 3529 } 3530 3531 return 0; 3532 } 3533 3534 // implements the search logic for finding a page and starting decoding. if 3535 // the function succeeds, current_loc_valid will be true and current_loc will 3536 // be less than or equal to the provided sample number (the closer the 3537 // better). 3538 static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number) 3539 { 3540 ProbedPage left, right, mid; 3541 int i, start_seg_with_known_loc, end_pos, page_start; 3542 uint32 delta, stream_length, padding, last_sample_limit; 3543 double offset = 0.0, bytes_per_sample = 0.0; 3544 int probe = 0; 3545 3546 // find the last page and validate the target sample 3547 stream_length = stb_vorbis_stream_length_in_samples(f); 3548 if (stream_length == 0) return error(f, VORBIS_seek_without_length); 3549 if (sample_number > stream_length) return error(f, VORBIS_seek_invalid); 3550 3551 // this is the maximum difference between the window-center (which is the 3552 // actual granule position value), and the right-start (which the spec 3553 // indicates should be the granule position (give or take one)). 3554 padding = ((f.blocksize_1 - f.blocksize_0) >> 2); 3555 if (sample_number < padding) 3556 last_sample_limit = 0; 3557 else 3558 last_sample_limit = sample_number - padding; 3559 3560 left = f.p_first; 3561 while (left.last_decoded_sample == ~0U) { 3562 // (untested) the first page does not have a 'last_decoded_sample' 3563 set_file_offset(f, left.page_end); 3564 if (!get_seek_page_info(f, &left)) goto error; 3565 } 3566 3567 right = f.p_last; 3568 assert(right.last_decoded_sample != ~0U); 3569 3570 // starting from the start is handled differently 3571 if (last_sample_limit <= left.last_decoded_sample) { 3572 if (stb_vorbis_seek_start(f)) { 3573 if (f.current_loc > sample_number) 3574 return error(f, VORBIS_seek_failed); 3575 return 1; 3576 } 3577 return 0; 3578 } 3579 3580 while (left.page_end != right.page_start) { 3581 assert(left.page_end < right.page_start); 3582 // search range in bytes 3583 delta = right.page_start - left.page_end; 3584 if (delta <= 65536) { 3585 // there's only 64K left to search - handle it linearly 3586 set_file_offset(f, left.page_end); 3587 } else { 3588 if (probe < 2) { 3589 if (probe == 0) { 3590 // first probe (interpolate) 3591 double data_bytes = right.page_end - left.page_start; 3592 bytes_per_sample = data_bytes / right.last_decoded_sample; 3593 offset = left.page_start + bytes_per_sample * (last_sample_limit - left.last_decoded_sample); 3594 } else { 3595 // second probe (try to bound the other side) 3596 double error = (cast(double) last_sample_limit - mid.last_decoded_sample) * bytes_per_sample; 3597 if (error >= 0 && error < 8000) error = 8000; 3598 if (error < 0 && error > -8000) error = -8000; 3599 offset += error * 2; 3600 } 3601 3602 // ensure the offset is valid 3603 if (offset < left.page_end) 3604 offset = left.page_end; 3605 if (offset > right.page_start - 65536) 3606 offset = right.page_start - 65536; 3607 3608 set_file_offset(f, cast(uint) offset); 3609 } else { 3610 // binary search for large ranges (offset by 32K to ensure 3611 // we don't hit the right page) 3612 set_file_offset(f, left.page_end + (delta / 2) - 32768); 3613 } 3614 3615 if (!vorbis_find_page(f, null, null)) goto error; 3616 } 3617 3618 for (;;) { 3619 if (!get_seek_page_info(f, &mid)) goto error; 3620 if (mid.last_decoded_sample != ~0U) break; 3621 // (untested) no frames end on this page 3622 set_file_offset(f, mid.page_end); 3623 assert(mid.page_start < right.page_start); 3624 } 3625 3626 // if we've just found the last page again then we're in a tricky file, 3627 // and we're close enough (if it wasn't an interpolation probe). 3628 if (mid.page_start == right.page_start) { 3629 if (probe >= 2 || delta <= 65536) 3630 break; 3631 } else { 3632 if (last_sample_limit < mid.last_decoded_sample) 3633 right = mid; 3634 else 3635 left = mid; 3636 } 3637 3638 ++probe; 3639 } 3640 3641 // seek back to start of the last packet 3642 page_start = left.page_start; 3643 set_file_offset(f, page_start); 3644 if (!start_page(f)) return error(f, VORBIS_seek_failed); 3645 end_pos = f.end_seg_with_known_loc; 3646 assert(end_pos >= 0); 3647 3648 for (;;) { 3649 for (i = end_pos; i > 0; --i) 3650 if (f.segments[i-1] != 255) 3651 break; 3652 3653 start_seg_with_known_loc = i; 3654 3655 if (start_seg_with_known_loc > 0 || !(f.page_flag & PAGEFLAG_continued_packet)) 3656 break; 3657 3658 // (untested) the final packet begins on an earlier page 3659 if (!go_to_page_before(f, page_start)) 3660 goto error; 3661 3662 page_start = stb_vorbis_get_file_offset(f); 3663 if (!start_page(f)) goto error; 3664 end_pos = f.segment_count - 1; 3665 } 3666 3667 // prepare to start decoding 3668 f.current_loc_valid = FALSE; 3669 f.last_seg = FALSE; 3670 f.valid_bits = 0; 3671 f.packet_bytes = 0; 3672 f.bytes_in_seg = 0; 3673 f.previous_length = 0; 3674 f.next_seg = start_seg_with_known_loc; 3675 3676 for (i = 0; i < start_seg_with_known_loc; i++) 3677 { 3678 bool err; 3679 skip(f, f.segments[i], &err); 3680 if (err) 3681 return error(f, VORBIS_seek_failed); 3682 } 3683 3684 // start decoding (optimizable - this frame is generally discarded) 3685 if (!vorbis_pump_first_frame(f)) 3686 return 0; 3687 if (f.current_loc > sample_number) 3688 return error(f, VORBIS_seek_failed); 3689 return 1; 3690 3691 error: 3692 // try to restore the file to a valid state 3693 stb_vorbis_seek_start(f); 3694 return error(f, VORBIS_seek_failed); 3695 } 3696 3697 // the same as vorbis_decode_initial, but without advancing 3698 static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode) 3699 { 3700 int bits_read, bytes_read; 3701 3702 if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode)) 3703 return 0; 3704 3705 // either 1 or 2 bytes were read, figure out which so we can rewind 3706 bits_read = 1 + ilog(f.mode_count-1); 3707 if (f.mode_config[*mode].blockflag) 3708 bits_read += 2; 3709 bytes_read = (bits_read + 7) / 8; 3710 3711 f.bytes_in_seg += bytes_read; 3712 f.packet_bytes -= bytes_read; 3713 bool err; 3714 skip(f, -bytes_read, &err); 3715 if (err) 3716 return 0; 3717 3718 if (f.next_seg == -1) 3719 f.next_seg = f.segment_count - 1; 3720 else 3721 f.next_seg--; 3722 f.valid_bits = 0; 3723 3724 return 1; 3725 } 3726 3727 int stb_vorbis_seek_frame(stb_vorbis *f, uint sample_number) 3728 { 3729 uint32 max_frame_samples; 3730 3731 // fast page-level search 3732 if (!seek_to_sample_coarse(f, sample_number)) 3733 return 0; 3734 3735 assert(f.current_loc_valid); 3736 assert(f.current_loc <= sample_number); 3737 3738 // linear search for the relevant packet 3739 max_frame_samples = (f.blocksize_1*3 - f.blocksize_0) >> 2; 3740 while (f.current_loc < sample_number) { 3741 int left_start, left_end, right_start, right_end, mode, frame_samples; 3742 if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode)) 3743 return error(f, VORBIS_seek_failed); 3744 // calculate the number of samples returned by the next frame 3745 frame_samples = right_start - left_start; 3746 if (f.current_loc + frame_samples > sample_number) { 3747 return 1; // the next frame will contain the sample 3748 } else if (f.current_loc + frame_samples + max_frame_samples > sample_number) { 3749 // there's a chance the frame after this could contain the sample 3750 vorbis_pump_first_frame(f); 3751 } else { 3752 // this frame is too early to be relevant 3753 f.current_loc += frame_samples; 3754 f.previous_length = 0; 3755 maybe_start_packet(f); 3756 flush_packet(f); 3757 } 3758 } 3759 // the next frame should start with the sample 3760 if (f.current_loc != sample_number) return error(f, VORBIS_seek_failed); 3761 return 1; 3762 } 3763 3764 int stb_vorbis_seek(stb_vorbis *f, long sample_number) 3765 { 3766 if (sample_number < 0) 3767 return 0; 3768 3769 if (sample_number > uint.max) 3770 return 0; 3771 3772 if (!stb_vorbis_seek_frame(f, cast(uint)sample_number)) 3773 return 0; 3774 3775 if (sample_number != f.current_loc) { 3776 int n; 3777 uint32 frame_start = f.current_loc; 3778 stb_vorbis_get_frame_float(f, &n, null); 3779 assert(sample_number > frame_start); 3780 assert(f.channel_buffer_start + cast(int) (sample_number-frame_start) <= f.channel_buffer_end); 3781 f.channel_buffer_start += (sample_number - frame_start); 3782 } 3783 f.current_sample_loc = sample_number; 3784 3785 return 1; 3786 } 3787 3788 int stb_vorbis_seek_start(stb_vorbis *f) 3789 { 3790 set_file_offset(f, f.first_audio_page_offset); 3791 f.previous_length = 0; 3792 f.first_decode = TRUE; 3793 f.next_seg = -1; 3794 return vorbis_pump_first_frame(f); 3795 } 3796 3797 uint stb_vorbis_stream_length_in_samples(stb_vorbis *f) 3798 { 3799 uint restore_offset, previous_safe; 3800 uint end, last_page_loc; 3801 3802 if (!f.total_samples) { 3803 uint last; 3804 uint32 lo,hi; 3805 char[6] header; 3806 3807 // first, store the current decode position so we can restore it 3808 restore_offset = stb_vorbis_get_file_offset(f); 3809 3810 // now we want to seek back 64K from the end (the last page must 3811 // be at most a little less than 64K, but let's allow a little slop) 3812 if (f.stream_len >= 65536 && f.stream_len-65536 >= f.first_audio_page_offset) 3813 previous_safe = f.stream_len - 65536; 3814 else 3815 previous_safe = f.first_audio_page_offset; 3816 3817 set_file_offset(f, previous_safe); 3818 // previous_safe is now our candidate 'earliest known place that seeking 3819 // to will lead to the final page' 3820 3821 if (!vorbis_find_page(f, &end, &last)) { 3822 // if we can't find a page, we're hosed! 3823 f.error = VORBIS_cant_find_last_page; 3824 f.total_samples = 0xffffffff; 3825 goto done; 3826 } 3827 3828 // check if there are more pages 3829 last_page_loc = stb_vorbis_get_file_offset(f); 3830 3831 // stop when the last_page flag is set, not when we reach eof; 3832 // this allows us to stop short of a 'file_section' end without 3833 // explicitly checking the length of the section 3834 while (!last) { 3835 set_file_offset(f, end); 3836 if (!vorbis_find_page(f, &end, &last)) { 3837 // the last page we found didn't have the 'last page' flag 3838 // set. whoops! 3839 break; 3840 } 3841 //previous_safe = last_page_loc+1; // NOTE: not used after this point, but note for debugging 3842 last_page_loc = stb_vorbis_get_file_offset(f); 3843 } 3844 3845 set_file_offset(f, last_page_loc); 3846 3847 // parse the header 3848 getn(f, cast(ubyte*)header.ptr, 6); 3849 // extract the absolute granule position 3850 lo = get32(f); 3851 hi = get32(f); 3852 if (lo == 0xffffffff && hi == 0xffffffff) { 3853 f.error = VORBIS_cant_find_last_page; 3854 f.total_samples = SAMPLE_unknown; 3855 goto done; 3856 } 3857 if (hi) 3858 lo = 0xfffffffe; // saturate 3859 f.total_samples = lo; 3860 3861 f.p_last.page_start = last_page_loc; 3862 f.p_last.page_end = end; 3863 f.p_last.last_decoded_sample = lo; 3864 3865 done: 3866 set_file_offset(f, restore_offset); 3867 } 3868 return f.total_samples == SAMPLE_unknown ? 0 : f.total_samples; 3869 } 3870 3871 float stb_vorbis_stream_length_in_seconds(stb_vorbis *f) 3872 { 3873 return stb_vorbis_stream_length_in_samples(f) / cast(float) f.sample_rate; 3874 } 3875 3876 int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output) 3877 { 3878 int len, right,left,i; 3879 3880 if (!vorbis_decode_packet(f, &len, &left, &right)) { 3881 f.channel_buffer_start = f.channel_buffer_end = 0; 3882 return 0; 3883 } 3884 3885 len = vorbis_finish_frame(f, len, left, right); 3886 for (i=0; i < f.channels; ++i) 3887 f.outputs[i] = f.channel_buffers[i] + left; 3888 3889 f.channel_buffer_start = left; 3890 f.channel_buffer_end = left+len; 3891 3892 if (channels) *channels = f.channels; 3893 if (output) *output = f.outputs.ptr; 3894 return len; 3895 } 3896 3897 stb_vorbis* stb_vorbis_open_file_section(IOCallbacks* io, void* userData, int *error, stb_vorbis_alloc *alloc, uint length) 3898 { 3899 stb_vorbis *f; 3900 stb_vorbis p; 3901 vorbis_init(&p, alloc); 3902 p._io = io; 3903 p._userData = userData; 3904 p.stream_len = length; 3905 if (start_decoder(&p)) 3906 { 3907 f = vorbis_alloc(&p); 3908 if (f) 3909 { 3910 *f = p; 3911 vorbis_pump_first_frame(f); 3912 return f; 3913 } 3914 } 3915 if (error) *error = p.error; 3916 vorbis_deinit(&p); 3917 return null; 3918 } 3919 3920 stb_vorbis* stb_vorbis_open_file(IOCallbacks* io, void* userData, int *error, stb_vorbis_alloc *alloc) 3921 { 3922 uint len = cast(uint) io.getFileLength(userData); 3923 return stb_vorbis_open_file_section(io, userData, error, alloc, len); 3924 } 3925 3926 3927 int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats) 3928 { 3929 float **outputs; 3930 int len = num_floats / channels; 3931 int n=0; 3932 int z = f.channels; 3933 if (z > channels) z = channels; 3934 while (n < len) { 3935 int i,j; 3936 int k = f.channel_buffer_end - f.channel_buffer_start; 3937 if (n+k >= len) k = len - n; 3938 for (j=0; j < k; ++j) { 3939 for (i=0; i < z; ++i) 3940 *buffer++ = f.channel_buffers[i][f.channel_buffer_start+j]; 3941 for ( ; i < channels; ++i) 3942 *buffer++ = 0; 3943 } 3944 n += k; 3945 f.channel_buffer_start += k; 3946 if (n == len) 3947 break; 3948 if (!stb_vorbis_get_frame_float(f, null, &outputs)) 3949 break; 3950 } 3951 return n; 3952 } 3953 3954 int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples) 3955 { 3956 float **outputs; 3957 int n=0; 3958 int z = f.channels; 3959 if (z > channels) z = channels; 3960 while (n < num_samples) { 3961 int i; 3962 int k = f.channel_buffer_end - f.channel_buffer_start; 3963 if (n+k >= num_samples) k = num_samples - n; 3964 if (k) { 3965 for (i=0; i < z; ++i) 3966 memcpy(buffer[i]+n, f.channel_buffers[i]+f.channel_buffer_start, (float.sizeof)*k); 3967 for ( ; i < channels; ++i) 3968 memset(buffer[i]+n, 0, (float.sizeof) * k); 3969 } 3970 n += k; 3971 f.channel_buffer_start += k; 3972 if (n == num_samples) 3973 break; 3974 if (!stb_vorbis_get_frame_float(f, null, &outputs)) 3975 break; 3976 } 3977 return n; 3978 } 3979 3980 /* Version history 3981 1.17 - 2019-07-08 - fix CVE-2019-13217, -13218, -13219, -13220, -13221, -13222, -13223 3982 found with Mayhem by ForAllSecure 3983 1.16 - 2019-03-04 - fix warnings 3984 1.15 - 2019-02-07 - explicit failure if Ogg Skeleton data is found 3985 1.14 - 2018-02-11 - delete bogus dealloca usage 3986 1.13 - 2018-01-29 - fix truncation of last frame (hopefully) 3987 1.12 - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files 3988 1.11 - 2017-07-23 - fix MinGW compilation 3989 1.10 - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory 3990 1.09 - 2016-04-04 - back out 'avoid discarding last frame' fix from previous version 3991 1.08 - 2016-04-02 - fixed multiple warnings; fix setup memory leaks; 3992 avoid discarding last frame of audio data 3993 1.07 - 2015-01-16 - fixed some warnings, fix mingw, const-correct API 3994 some more crash fixes when out of memory or with corrupt files 3995 1.06 - 2015-08-31 - full, correct support for seeking API (Dougall Johnson) 3996 some crash fixes when out of memory or with corrupt files 3997 1.05 - 2015-04-19 - don't define __forceinline if it's redundant 3998 1.04 - 2014-08-27 - fix missing const-correct case in API 3999 1.03 - 2014-08-07 - Warning fixes 4000 1.02 - 2014-07-09 - Declare qsort compare function _cdecl on windows 4001 1.01 - 2014-06-18 - fix stb_vorbis_get_samples_float 4002 1.0 - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in multichannel 4003 (API change) report sample rate for decode-full-file funcs 4004 0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila 4005 0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem 4006 0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence 4007 0.99993 - remove assert that fired on legal files with empty tables 4008 0.99992 - rewind-to-start 4009 0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo 4010 0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++ 4011 0.9998 - add a full-decode function with a memory source 4012 0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition 4013 0.9996 - query length of vorbis stream in samples/seconds 4014 0.9995 - bugfix to another optimization that only happened in certain files 4015 0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors 4016 0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation 4017 0.9992 - performance improvement of IMDCT; now performs close to reference implementation 4018 0.9991 - performance improvement of IMDCT 4019 0.999 - (should have been 0.9990) performance improvement of IMDCT 4020 0.998 - no-CRT support from Casey Muratori 4021 0.997 - bugfixes for bugs found by Terje Mathisen 4022 0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen 4023 0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen 4024 0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen 4025 0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen 4026 0.992 - fixes for MinGW warning 4027 0.991 - turn fast-float-conversion on by default 4028 0.990 - fix push-mode seek recovery if you seek into the headers 4029 0.98b - fix to bad release of 0.98 4030 0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode 4031 0.97 - builds under c++ (typecasting, don't use 'class' keyword) 4032 0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code 4033 0.95 - clamping code for 16-bit functions 4034 0.94 - not publically released 4035 0.93 - fixed all-zero-floor case (was decoding garbage) 4036 0.92 - fixed a memory leak 4037 0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION 4038 0.90 - first public release 4039 */ 4040 4041 /* 4042 ------------------------------------------------------------------------------ 4043 This software is available under 2 licenses -- choose whichever you prefer. 4044 ------------------------------------------------------------------------------ 4045 ALTERNATIVE A - MIT License 4046 Copyright (c) 2017 Sean Barrett 4047 Permission is hereby granted, free of charge, to any person obtaining a copy of 4048 this software and associated documentation files (the "Software"), to deal in 4049 the Software without restriction, including without limitation the rights to 4050 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 4051 of the Software, and to permit persons to whom the Software is furnished to do 4052 so, subject to the following conditions: 4053 The above copyright notice and this permission notice shall be included in all 4054 copies or substantial portions of the Software. 4055 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 4056 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 4057 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 4058 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 4059 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 4060 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 4061 SOFTWARE. 4062 ------------------------------------------------------------------------------ 4063 ALTERNATIVE B - Public Domain (www.unlicense.org) 4064 This is free and unencumbered software released into the public domain. 4065 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this 4066 software, either in source code form or as a compiled binary, for any purpose, 4067 commercial or non-commercial, and by any means. 4068 In jurisdictions that recognize copyright laws, the author or authors of this 4069 software dedicate any and all copyright interest in the software to the public 4070 domain. We make this dedication for the benefit of the public at large and to 4071 the detriment of our heirs and successors. We intend this dedication to be an 4072 overt act of relinquishment in perpetuity of all present and future rights to 4073 this software under copyright law. 4074 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 4075 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 4076 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 4077 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 4078 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 4079 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 4080 ------------------------------------------------------------------------------ 4081 */