The OpenD Programming Language

1 module audioformats.stb_vorbis2;
2 
3 // Ogg Vorbis audio decoder - v1.22 - public domain
4 // http://nothings.org/stb_vorbis/
5 //
6 // Original version written by Sean Barrett in 2007.
7 //
8 // Originally sponsored by RAD Game Tools. Seeking implementation
9 // sponsored by Phillip Bennefall, Marc Andersen, Aaron Baker,
10 // Elias Software, Aras Pranckevicius, and Sean Barrett.
11 //
12 // Translated to D by Guillaume Piolat.
13 //
14 // LICENSE
15 //
16 //   See end of file for license information.
17 //
18 // Limitations:
19 //
20 //   - floor 0 not supported (used in old ogg vorbis files pre-2004)
21 //   - lossless sample-truncation at beginning ignored
22 //   - cannot concatenate multiple vorbis streams
23 //   - sample positions are 32-bit, limiting seekable 192Khz
24 //       files to around 6 hours (Ogg supports 64-bit)
25 //
26 // Feature contributors:
27 //    Dougall Johnson (sample-exact seeking)
28 //
29 // Bugfix/warning contributors:
30 //    Terje Mathisen     Niklas Frykholm     Andy Hill
31 //    Casey Muratori     John Bolton         Gargaj
32 //    Laurent Gomila     Marc LeBlanc        Ronny Chevalier
33 //    Bernhard Wodo      Evan Balster        github:alxprd
34 //    Tom Beaumont       Ingo Leitgeb        Nicolas Guillemot
35 //    Phillip Bennefall  Rohit               Thiago Goulart
36 //    github:manxorist   Saga Musix          github:infatum
37 //    Timur Gagiev       Maxwell Koo         Peter Waller
38 //    github:audinowho   Dougall Johnson     David Reid
39 //    github:Clownacy    Pedro J. Estebanez  Remi Verschelde
40 //    AnthoFoxo          github:morlat       Gabriel Ravier
41 //
42 // Partial history:
43 //    1.22d   - 2021-10-31 - translated to D to update the one in audio-formats, and perhaps get seeking.
44 //    1.22    - 2021-07-11 - various small fixes
45 //    1.21    - 2021-07-02 - fix bug for files with no comments
46 //    1.20    - 2020-07-11 - several small fixes
47 //    1.19    - 2020-02-05 - warnings
48 //    1.18    - 2020-02-02 - fix seek bugs; parse header comments; misc warnings etc.
49 //    1.17    - 2019-07-08 - fix CVE-2019-13217..CVE-2019-13223 (by ForAllSecure)
50 //    1.16    - 2019-03-04 - fix warnings
51 //    1.15    - 2019-02-07 - explicit failure if Ogg Skeleton data is found
52 //    1.14    - 2018-02-11 - delete bogus dealloca usage
53 //    1.13    - 2018-01-29 - fix truncation of last frame (hopefully)
54 //    1.12    - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
55 //    1.11    - 2017-07-23 - fix MinGW compilation
56 //    1.10    - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
57 //    1.09    - 2016-04-04 - back out 'truncation of last frame' fix from previous version
58 //    1.08    - 2016-04-02 - warnings; setup memory leaks; truncation of last frame
59 //    1.07    - 2015-01-16 - fixes for crashes on invalid files; warning fixes; const
60 //    1.06    - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
61 //                           some crash fixes when out of memory or with corrupt files
62 //                           fix some inappropriately signed shifts
63 //    1.05    - 2015-04-19 - don't define __forceinline if it's redundant
64 //    1.04    - 2014-08-27 - fix missing const-correct case in API
65 //    1.03    - 2014-08-07 - warning fixes
66 //    1.02    - 2014-07-09 - declare qsort comparison as explicitly _cdecl in Windows
67 //    1.01    - 2014-06-18 - fix stb_vorbis_get_samples_float (interleaved was correct)
68 //    1.0     - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
69 //                           (API change) report sample rate for decode-full-file funcs
70 //
71 // See end of file for full version history.
72 //
73 // Notes about the D translation: removed push data API.
74 
75 
76 ///////////   THREAD SAFETY
77 
78 // Individual stb_vorbis* handles are not thread-safe; you cannot decode from
79 // them from multiple threads at the same time. However, you can have multiple
80 // stb_vorbis* handles and decode from them independently in multiple thrads.
81 
82 
83 ///////////   MEMORY ALLOCATION
84 
85 // normally stb_vorbis uses malloc() to allocate memory at startup,
86 // and alloca() to allocate temporary memory during a frame on the
87 // stack. (Memory consumption will depend on the amount of setup
88 // data in the file and how you set the compile flags for speed
89 // vs. size. In my test files the maximal-size usage is ~150KB.)
90 //
91 // You can modify the wrapper functions in the source (setup_malloc,
92 // setup_temp_malloc, temp_malloc) to change this behavior, or you
93 // can use a simpler allocation model: you pass in a buffer from
94 // which stb_vorbis will allocate _all_ its memory (including the
95 // temp memory). "open" may fail with a VORBIS_outofmem if you
96 // do not pass in enough data; there is no way to determine how
97 // much you do need except to succeed (at which point you can
98 // query get_info to find the exact amount required. yes I know
99 // this is lame).
100 //
101 // If you pass in a non-NULL buffer of the type below, allocation
102 // will occur from it as described above. Otherwise just pass NULL
103 // to use malloc()/alloca()
104 
105 import core.stdc.stdlib: malloc, free, qsort, abs;
106 import core.stdc.string: memset, memcmp, memcpy;
107 import std.math: ldexp, sin, cos, floor, exp, log, pow;
108 import audioformats.io;
109 
110 nothrow:
111 @nogc:
112 
113 struct stb_vorbis_alloc
114 {
115    ubyte* alloc_buffer;
116    int   alloc_buffer_length_in_bytes;
117 }
118 
119 
120 ///////////   FUNCTIONS USEABLE WITH ALL INPUT MODES
121 
122 struct stb_vorbis_info
123 {
124    uint sample_rate;
125    int channels;
126 
127    uint setup_memory_required;
128    uint setup_temp_memory_required;
129    uint temp_memory_required;
130 
131    int max_frame_size;
132 }
133 
134 struct stb_vorbis_comment
135 {
136    char *vendor;
137 
138    int comment_list_length;
139    char **comment_list;
140 }
141 
142 ////////   ERROR CODES
143 
144 alias STBVorbisError = int;
145 enum : STBVorbisError
146 {
147    VORBIS__no_error,
148 
149    VORBIS_need_more_data=1,             // not a real error
150 
151    VORBIS_invalid_api_mixing,           // can't mix API modes
152    VORBIS_outofmem,                     // not enough memory
153    VORBIS_feature_not_supported,        // uses floor 0
154    VORBIS_too_many_channels,            // STB_VORBIS_MAX_CHANNELS is too small
155    VORBIS_file_open_failure,            // fopen() failed
156    VORBIS_seek_without_length,          // can't seek in unknown-length file
157 
158    VORBIS_unexpected_eof=10,            // file is truncated?
159    VORBIS_seek_invalid,                 // seek past EOF
160 
161    // decoding errors (corrupt/invalid stream) -- you probably
162    // don't care about the exact details of these
163 
164    // vorbis errors:
165    VORBIS_invalid_setup=20,
166    VORBIS_invalid_stream,
167 
168    // ogg errors:
169    VORBIS_missing_capture_pattern=30,
170    VORBIS_invalid_stream_structure_version,
171    VORBIS_continued_packet_flag_invalid,
172    VORBIS_incorrect_stream_serial_number,
173    VORBIS_invalid_first_page,
174    VORBIS_bad_packet_type,
175    VORBIS_cant_find_last_page,
176    VORBIS_seek_failed,
177    VORBIS_ogg_skeleton_not_supported
178 }
179 
180 //
181 //  HEADER ENDS HERE
182 //
183 //////////////////////////////////////////////////////////////////////////////
184 
185 // global configuration settings (e.g. set these in the project/makefile),
186 // or just set them in this file at the top (although ideally the first few
187 // should be visible when the header file is compiled too, although it's not
188 // crucial)
189 
190 // STB_VORBIS_NO_STDIO
191 //     does not compile the code for the APIs that use FILE *s internally
192 //     or externally (implied by STB_VORBIS_NO_PULLDATA_API)
193 // #define STB_VORBIS_NO_STDIO
194 
195 // STB_VORBIS_NO_INTEGER_CONVERSION
196 //     does not compile the code for converting audio sample data from
197 //     float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
198 // #define STB_VORBIS_NO_INTEGER_CONVERSION
199 
200 // STB_VORBIS_NO_FAST_SCALED_FLOAT
201 //      does not use a fast float-to-int trick to accelerate float-to-int on
202 //      most platforms which requires endianness be defined correctly.
203 //#define STB_VORBIS_NO_FAST_SCALED_FLOAT
204 
205 
206 // STB_VORBIS_MAX_CHANNELS [number]
207 //     globally define this to the maximum number of channels you need.
208 //     The spec does not put a restriction on channels except that
209 //     the count is stored in a byte, so 255 is the hard limit.
210 //     Reducing this saves about 16 bytes per value, so using 16 saves
211 //     (255-16)*16 or around 4KB. Plus anything other memory usage
212 //     I forgot to account for. Can probably go as low as 8 (7.1 audio),
213 //     6 (5.1 audio), or 2 (stereo only).
214 enum STB_VORBIS_MAX_CHANNELS = 16;
215 
216 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
217 //     sets the log size of the huffman-acceleration table.  Maximum
218 //     supported value is 24. with larger numbers, more decodings are O(1),
219 //     but the table size is larger so worse cache missing, so you'll have
220 //     to probe (and try multiple ogg vorbis files) to find the sweet spot.
221 enum STB_VORBIS_FAST_HUFFMAN_LENGTH = 10;
222 
223 // STB_VORBIS_FAST_BINARY_LENGTH [number]
224 //     sets the log size of the binary-search acceleration table. this
225 //     is used in similar fashion to the fast-huffman size to set initial
226 //     parameters for the binary search
227 
228 // STB_VORBIS_FAST_HUFFMAN_INT
229 //     The fast huffman tables are much more efficient if they can be
230 //     stored as 16-bit results instead of 32-bit results. This restricts
231 //     the codebooks to having only 65535 possible outcomes, though.
232 //     (At least, accelerated by the huffman table.)
233 enum STB_VORBIS_FAST_HUFFMAN_SHORT = true;
234 
235 enum STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH = false;
236 
237 enum STB_VORBIS_DIVIDES_IN_CODEBOOK = false; // cannot be true, support for this not translated
238 enum STB_VORBIS_DIVIDES_IN_RESIDUE = false;
239 
240 enum STB_VORBIS_DIVIDE_TABLE = false;
241 enum STB_VORBIS_NO_DEFER_FLOOR = false; // STB_VORBIS_NO_DEFER_FLOOR not defined
242 enum STB_VORBIS_NO_CRT = false;
243 enum STB_VORBIS_NO_STDIO = false;
244 enum STB_VORBIS_NO_INTEGER_CONVERSION = true;
245 enum STB_VORBIS_NO_FAST_SCALED_FLOAT = true;
246 
247 static assert(STB_VORBIS_MAX_CHANNELS <= 256);
248 static assert(STB_VORBIS_FAST_HUFFMAN_LENGTH <= 24);
249 
250 enum MAX_BLOCKSIZE_LOG = 13; // from specification
251 enum MAX_BLOCKSIZE = (1 << MAX_BLOCKSIZE_LOG);
252 
253 alias uint8 = ubyte;
254 alias int8 = byte;
255 alias uint16 = ushort;
256 alias int16 = short;
257 alias uint32 = uint;
258 alias int32 = int;
259 
260 enum TRUE = 1;
261 enum FALSE = 0;
262 enum void* NULL = null;
263 
264 alias codetype = float;
265 
266 // @NOTE
267 //
268 // Some arrays below are tagged "//varies", which means it's actually
269 // a variable-sized piece of data, but rather than malloc I assume it's
270 // small enough it's better to just allocate it all together with the
271 // main thing
272 //
273 // Most of the variables are specified with the smallest size I could pack
274 // them into. It might give better performance to make them all full-sized
275 // integers. It should be safe to freely rearrange the structures or change
276 // the sizes larger--nothing relies on silently truncating etc., nor the
277 // order of variables.
278 
279 enum FAST_HUFFMAN_TABLE_SIZE = (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH);
280 enum FAST_HUFFMAN_TABLE_MASK = (FAST_HUFFMAN_TABLE_SIZE - 1);
281 
282 struct Codebook
283 {
284    int dimensions, entries;
285    uint8 *codeword_lengths;
286    float  minimum_value;
287    float  delta_value;
288    uint8  value_bits;
289    uint8  lookup_type;
290    uint8  sequence_p;
291    uint8  sparse;
292    uint32 lookup_values;
293    codetype *multiplicands;
294    uint32 *codewords;
295    int16[FAST_HUFFMAN_TABLE_SIZE] fast_huffman;
296    uint32 *sorted_codewords;
297    int    *sorted_values;
298    int     sorted_entries;
299 } ;
300 
301 struct Floor0
302 {
303    uint8 order;
304    uint16 rate;
305    uint16 bark_map_size;
306    uint8 amplitude_bits;
307    uint8 amplitude_offset;
308    uint8 number_of_books;
309    uint8[16] book_list; // varies
310 }
311 
312 struct Floor1
313 {
314    uint8 partitions;
315    uint8[32] partition_class_list; // varies
316    uint8[16] class_dimensions; // varies
317    uint8[16] class_subclasses; // varies
318    uint8[16] class_masterbooks; // varies
319    int16[8][16] subclass_books; // varies
320    uint16[31*8+2] Xlist; // varies
321    uint8[31*8+2] sorted_order;
322    uint8[2][31*8+2] neighbors;
323    uint8 floor1_multiplier;
324    uint8 rangebits;
325    int values;
326 }
327 
328 union Floor
329 {
330    Floor0 floor0;
331    Floor1 floor1;
332 }
333 
334 struct Residue
335 {
336    uint32 begin, end;
337    uint32 part_size;
338    uint8 classifications;
339    uint8 classbook;
340    uint8 **classdata;
341    int16[8]*residue_books;
342 } ;
343 
344 struct MappingChannel
345 {
346    uint8 magnitude;
347    uint8 angle;
348    uint8 mux;
349 } ;
350 
351 struct Mapping
352 {
353    uint16 coupling_steps;
354    MappingChannel *chan;
355    uint8  submaps;
356    uint8[15]  submap_floor; // varies
357    uint8[15]  submap_residue; // varies
358 } ;
359 
360 struct Mode
361 {
362    uint8 blockflag;
363    uint8 mapping;
364    uint16 windowtype;
365    uint16 transformtype;
366 }
367 
368 struct CRCscan
369 {
370    uint32  goal_crc;    // expected crc if match
371    int     bytes_left;  // bytes left in packet
372    uint32  crc_so_far;  // running crc
373    int     bytes_done;  // bytes processed in _current_ chunk
374    uint32  sample_loc;  // granule pos encoded in page
375 }
376 
377 struct ProbedPage
378 {
379    uint32 page_start, page_end;
380    uint32 last_decoded_sample;
381 }
382 
383 struct stb_vorbis
384 {
385   // user-accessible info
386    uint sample_rate;
387    int channels;
388 
389    uint setup_memory_required;
390    uint temp_memory_required;
391    uint setup_temp_memory_required;
392 
393    char *vendor;
394    int comment_list_length;
395    char **comment_list;
396 
397   // input config
398    IOCallbacks* _io;
399    void* _userData;
400 
401    /*uint8 *stream;
402    uint8 *stream_start;
403    uint8 *stream_end;*/
404 
405    uint32 stream_len;
406 
407    // the page to seek to when seeking to start, may be zero
408    uint32 first_audio_page_offset;
409 
410    // p_first is the page on which the first audio packet ends
411    // (but not necessarily the page on which it starts)
412    ProbedPage p_first, p_last;
413 
414   // memory management
415    stb_vorbis_alloc alloc;
416    int setup_offset;
417    int temp_offset;
418 
419   // run-time results
420    int eof;
421    STBVorbisError error;
422 
423   // user-useful data
424 
425   // header info
426    int[2] blocksize;
427    int blocksize_0, blocksize_1;
428    int codebook_count;
429    Codebook *codebooks;
430    int floor_count;
431    uint16[64] floor_types; // varies
432    Floor *floor_config;
433    int residue_count;
434    uint16[64] residue_types; // varies
435    Residue *residue_config;
436    int mapping_count;
437    Mapping *mapping;
438    int mode_count;
439    Mode[64] mode_config;  // varies
440 
441    uint32 total_samples;
442 
443   // decode buffer
444    float*[STB_VORBIS_MAX_CHANNELS] channel_buffers;
445    float*[STB_VORBIS_MAX_CHANNELS] outputs        ;
446 
447    float*[STB_VORBIS_MAX_CHANNELS] previous_window;
448    int previous_length;
449 
450    int16*[STB_VORBIS_MAX_CHANNELS] finalY;
451 
452    long current_sample_loc; // sample location of next sample to decode
453 
454    
455    uint32 current_loc; // sample location of next frame to decode
456    int    current_loc_valid;
457 
458   // per-blocksize precomputed data
459 
460    // twiddle factors
461    float*[2] A, B, C;
462    float*[2] window;
463    uint16*[2] bit_reverse;
464 
465   // current page/packet/segment streaming info
466    uint32 serial; // stream serial number for verification
467    int last_page;
468    int segment_count;
469    uint8[255] segments;
470    uint8 page_flag;
471    uint8 bytes_in_seg;
472    uint8 first_decode;
473    int next_seg;
474    int last_seg;  // flag that we're on the last segment
475    int last_seg_which; // what was the segment number of the last seg?
476    uint32 acc;
477    int valid_bits;
478    int packet_bytes;
479    int end_seg_with_known_loc;
480    uint32 known_loc_for_packet;
481    int discard_samples_deferred;
482    uint32 samples_output;
483 
484   // sample-access
485    int channel_buffer_start;
486    int channel_buffer_end;
487 
488    uint32[256] crc_table; // was a global, moved here
489 };
490 
491 alias vorb = stb_vorbis;
492 
493 static int error(vorb *f, STBVorbisError e)
494 {
495    f.error = e;
496    if (!f.eof && e != VORBIS_need_more_data) {
497       f.error=e; // breakpoint for debugging
498    }
499    return 0;
500 }
501 
502 
503 // these functions are used for allocating temporary memory
504 // while decoding. if you can afford the stack space, use
505 // alloca(); otherwise, provide a temp buffer and it will
506 // allocate out of those.
507 
508 size_t array_size_required(size_t count, size_t size)
509 {
510     return (count * ((void *).sizeof + size));
511 }
512 
513 void* temp_alloc(vorb *f, size_t size)
514 {
515     assert(f.alloc.alloc_buffer);
516     return setup_temp_malloc(f, cast(int)size); // note: never use alloca
517 }
518 
519 void temp_free(vorb *f, void* p)
520 {
521 }
522 
523 int temp_alloc_save(vorb *f)
524 {
525     return f.temp_offset;
526 }
527 
528 void temp_alloc_restore(vorb *f, int p)
529 {
530     f.temp_offset = p;
531 }
532 
533 void* temp_block_array(vorb *f, size_t count, size_t size)
534 {
535     return make_block_array(temp_alloc(f, cast(int) array_size_required(count,size)), cast(int) count, cast(int)size);
536 }
537 
538 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
539 static void *make_block_array(void *mem, int count, int size)
540 {
541    int i;
542    void ** p = cast(void **) mem;
543    char *q = cast(char *) (p + count);
544    for (i=0; i < count; ++i) {
545       p[i] = q;
546       q += size;
547    }
548    return p;
549 }
550 
551 void *setup_malloc(vorb *f, size_t sz)
552 {
553     return setup_malloc(f, cast(int)sz);
554 }
555 
556 void *setup_malloc(vorb *f, int sz)
557 {
558    sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs.
559    f.setup_memory_required += sz;
560    if (f.alloc.alloc_buffer) {
561       void *p = cast(char *) f.alloc.alloc_buffer + f.setup_offset;
562       if (f.setup_offset + sz > f.temp_offset) return NULL;
563       f.setup_offset += sz;
564       return p;
565    }
566    return sz ? malloc(sz) : NULL;
567 }
568 
569 static void setup_free(vorb *f, void *p)
570 {
571    if (f.alloc.alloc_buffer) return; // do nothing; setup mem is a stack
572    free(p);
573 }
574 
575 void *setup_temp_malloc(vorb *f, int sz)
576 {
577    sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs.
578    if (f.alloc.alloc_buffer) {
579       if (f.temp_offset - sz < f.setup_offset) 
580           return NULL;
581       f.temp_offset -= sz;
582       return cast(char *) f.alloc.alloc_buffer + f.temp_offset;
583    }
584    return malloc(sz);
585 }
586 
587 void setup_temp_free(vorb *f, void *p, size_t sz)
588 {
589    if (f.alloc.alloc_buffer) 
590    {
591       f.temp_offset += (sz+7)&~7;
592       return;
593    }
594    free(p);
595 }
596 
597 enum uint CRC32_POLY = 0x04c11db7;   // from spec
598 
599 void crc32_init(vorb* f)
600 {
601    int i,j;
602    uint32 s;
603    for(i=0; i < 256; i++) {
604       for (s=cast(uint32) i << 24, j=0; j < 8; ++j)
605          s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
606       f.crc_table[i] = s;
607    }
608 }
609 
610 uint32 crc32_update(vorb* f, uint32 crc, uint8 byte_)
611 {
612    return (crc << 8) ^ f.crc_table[byte_ ^ (crc >> 24)];
613 }
614 
615 
616 // used in setup, and for huffman that doesn't go fast path
617 static uint bit_reverse(uint n)
618 {
619   n = ((n & 0xAAAAAAAA) >>  1) | ((n & 0x55555555) << 1);
620   n = ((n & 0xCCCCCCCC) >>  2) | ((n & 0x33333333) << 2);
621   n = ((n & 0xF0F0F0F0) >>  4) | ((n & 0x0F0F0F0F) << 4);
622   n = ((n & 0xFF00FF00) >>  8) | ((n & 0x00FF00FF) << 8);
623   return (n >> 16) | (n << 16);
624 }
625 
626 static float square(float x)
627 {
628    return x*x;
629 }
630 
631 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
632 // as required by the specification. fast(?) implementation from stb.h
633 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
634 static int ilog(int32 n)
635 {
636    static immutable byte[16] log2_4 = [ 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 ];
637 
638    if (n < 0) return 0; // signed n returns 0
639 
640    // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
641    if (n < (1 << 14))
642         if (n < (1 <<  4))            return  0 + log2_4[n      ];
643         else if (n < (1 <<  9))       return  5 + log2_4[n >>  5];
644              else                     return 10 + log2_4[n >> 10];
645    else if (n < (1 << 24))
646              if (n < (1 << 19))       return 15 + log2_4[n >> 15];
647              else                     return 20 + log2_4[n >> 20];
648         else if (n < (1 << 29))       return 25 + log2_4[n >> 25];
649              else                     return 30 + log2_4[n >> 30];
650 }
651 
652 enum M_PI = 3.14159265358979323846264f;  // from CRC
653 
654 // code length assigned to a value with no huffman encoding
655 enum NO_CODE = 255;
656 
657 /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
658 //
659 // these functions are only called at setup, and only a few times
660 // per file
661 
662 static float float32_unpack(uint32 x)
663 {
664    // from the specification
665    uint32 mantissa = x & 0x1fffff;
666    uint32 sign = x & 0x80000000;
667    uint32 exp = (x & 0x7fe00000) >> 21;
668    double res = sign ? -cast(double)mantissa : cast(double)mantissa;
669    return cast(float) ldexp(cast(float)res, cast(int)exp-788);
670 }
671 
672 
673 // zlib & jpeg huffman tables assume that the output symbols
674 // can either be arbitrarily arranged, or have monotonically
675 // increasing frequencies--they rely on the lengths being sorted;
676 // this makes for a very simple generation algorithm.
677 // vorbis allows a huffman table with non-sorted lengths. This
678 // requires a more sophisticated construction, since symbols in
679 // order do not map to huffman codes "in order".
680 static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
681 {
682    if (!c.sparse) {
683       c.codewords      [symbol] = huff_code;
684    } else {
685       c.codewords       [count] = huff_code;
686       c.codeword_lengths[count] = cast(ubyte)len;
687       values             [count] = symbol;
688    }
689 }
690 
691 static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
692 {
693    int i,k,m=0;
694    uint32[32] available;
695 
696    memset(available.ptr, 0, 32);
697    // find the first entry
698    for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
699    if (k == n) { assert(c.sorted_entries == 0); return TRUE; }
700    assert(len[k] < 32); // no error return required, code reading lens checks this
701    // add to the list
702    add_entry(c, 0, k, m++, len[k], values);
703    // add all available leaves
704    for (i=1; i <= len[k]; ++i)
705       available[i] = 1U << (32-i);
706    // note that the above code treats the first case specially,
707    // but it's really the same as the following code, so they
708    // could probably be combined (except the initial code is 0,
709    // and I use 0 in available[] to mean 'empty')
710    for (i=k+1; i < n; ++i) {
711       uint32 res;
712       int z = len[i], y;
713       if (z == NO_CODE) continue;
714       assert(z < 32); // no error return required, code reading lens checks this
715       // find lowest available leaf (should always be earliest,
716       // which is what the specification calls for)
717       // note that this property, and the fact we can never have
718       // more than one free leaf at a given level, isn't totally
719       // trivial to prove, but it seems true and the assert never
720       // fires, so!
721       while (z > 0 && !available[z]) --z;
722       if (z == 0) { return FALSE; }
723       res = available[z];
724       available[z] = 0;
725       add_entry(c, bit_reverse(res), i, m++, len[i], values);
726       // propagate availability up the tree
727       if (z != len[i]) {
728          for (y=len[i]; y > z; --y) {
729             assert(available[y] == 0);
730             available[y] = res + (1 << (32-y));
731          }
732       }
733    }
734    return TRUE;
735 }
736 
737 // accelerated huffman table allows fast O(1) match of all symbols
738 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
739 static void compute_accelerated_huffman(Codebook *c)
740 {
741    int i, len;
742    for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
743       c.fast_huffman[i] = -1;
744 
745    len = c.sparse ? c.sorted_entries : c.entries;
746    if (len > 32767) len = 32767; // largest possible value we can encode!
747    for (i=0; i < len; ++i) {
748       if (c.codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
749          uint32 z = c.sparse ? bit_reverse(c.sorted_codewords[i]) : c.codewords[i];
750          // set table entries for all bit combinations in the higher bits
751          while (z < FAST_HUFFMAN_TABLE_SIZE) {
752              c.fast_huffman[z] = cast(short)i;
753              z += 1 << c.codeword_lengths[i];
754          }
755       }
756    }
757 }
758 
759 extern(C) int uint32_compare_2(const void *p, const void *q)
760 {
761    uint32 x = *cast(uint32 *) p;
762    uint32 y = *cast(uint32 *) q;
763    return x < y ? -1 : x > y;
764 }
765 
766 static int include_in_sort(Codebook *c, uint8 len)
767 {
768    if (c.sparse) { assert(len != NO_CODE); return TRUE; }
769    if (len == NO_CODE) return FALSE;
770    if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
771    return FALSE;
772 }
773 
774 // if the fast table above doesn't work, we want to binary
775 // search them... need to reverse the bits
776 static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
777 {
778    int i, len;
779    // build a list of all the entries
780    // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
781    // this is kind of a frivolous optimization--I don't see any performance improvement,
782    // but it's like 4 extra lines of code, so.
783    if (!c.sparse) {
784       int k = 0;
785       for (i=0; i < c.entries; ++i)
786          if (include_in_sort(c, lengths[i]))
787             c.sorted_codewords[k++] = bit_reverse(c.codewords[i]);
788       assert(k == c.sorted_entries);
789    } else {
790       for (i=0; i < c.sorted_entries; ++i)
791          c.sorted_codewords[i] = bit_reverse(c.codewords[i]);
792    }
793 
794    qsort(c.sorted_codewords, c.sorted_entries, (c.sorted_codewords[0]).sizeof, &uint32_compare_2);
795    c.sorted_codewords[c.sorted_entries] = 0xffffffff;
796 
797    len = c.sparse ? c.sorted_entries : c.entries;
798    // now we need to indicate how they correspond; we could either
799    //   #1: sort a different data structure that says who they correspond to
800    //   #2: for each sorted entry, search the original list to find who corresponds
801    //   #3: for each original entry, find the sorted entry
802    // #1 requires extra storage, #2 is slow, #3 can use binary search!
803    for (i=0; i < len; ++i) {
804       ubyte huff_len = c.sparse ? lengths[values[i]] : lengths[i];
805       if (include_in_sort(c, huff_len)) {
806          uint32 code = bit_reverse(c.codewords[i]);
807          int x=0, n=c.sorted_entries;
808          while (n > 1) {
809             // invariant: sc[x] <= code < sc[x+n]
810             int m = x + (n >> 1);
811             if (c.sorted_codewords[m] <= code) {
812                x = m;
813                n -= (n>>1);
814             } else {
815                n >>= 1;
816             }
817          }
818          assert(c.sorted_codewords[x] == code);
819          if (c.sparse) {
820             c.sorted_values[x] = values[i];
821             c.codeword_lengths[x] = huff_len;
822          } else {
823             c.sorted_values[x] = i;
824          }
825       }
826    }
827 }
828 
829 // only run while parsing the header (3 times)
830 static int vorbis_validate(uint8 *data)
831 {
832    static immutable char[6] vorbis = "vorbis";
833    return memcmp(data, vorbis.ptr, 6) == 0;
834 }
835 
836 // called from setup only, once per code book
837 // (formula implied by specification)
838 static int lookup1_values(int entries, int dim)
839 {
840    int r = cast(int) floor(exp(cast(float) log(cast(float) entries) / dim));
841    if (cast(int) floor(pow(cast(float) r+1, dim)) <= entries)   // (int) cast for MinGW warning;
842       ++r;                                              // floor() to avoid _ftol() when non-CRT
843    if (pow(cast(float) r+1, dim) <= entries)
844       return -1;
845    if (cast(int) floor(pow(cast(float) r, dim)) > entries)
846       return -1;
847    return r;
848 }
849 
850 // called twice per file
851 static void compute_twiddle_factors(int n, float *A, float *B, float *C)
852 {
853    int n4 = n >> 2, n8 = n >> 3;
854    int k,k2;
855 
856    for (k=k2=0; k < n4; ++k,k2+=2) {
857       A[k2  ] = cast(float)  cos(4*k*M_PI/n);
858       A[k2+1] = cast(float) -sin(4*k*M_PI/n);
859       B[k2  ] = cast(float)  cos((k2+1)*M_PI/n/2) * 0.5f;
860       B[k2+1] = cast(float)  sin((k2+1)*M_PI/n/2) * 0.5f;
861    }
862    for (k=k2=0; k < n8; ++k,k2+=2) {
863       C[k2  ] = cast(float)  cos(2*(k2+1)*M_PI/n);
864       C[k2+1] = cast(float) -sin(2*(k2+1)*M_PI/n);
865    }
866 }
867 
868 static void compute_window(int n, float *window)
869 {
870    int n2 = n >> 1, i;
871    for (i=0; i < n2; ++i)
872       window[i] = cast(float) sin(0.5 * M_PI * square(cast(float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
873 }
874 
875 static void compute_bitreverse(int n, uint16 *rev)
876 {
877    int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
878    int i, n8 = n >> 3;
879    for (i=0; i < n8; ++i)
880       rev[i] = cast(ushort) ( (bit_reverse(i) >> (32-ld+3)) << 2 );
881 }
882 
883 static int init_blocksize(vorb *f, int b, int n)
884 {
885    int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
886    f.A[b] = cast(float *) setup_malloc(f, 4 * n2);
887    f.B[b] = cast(float *) setup_malloc(f, 4 * n2);
888    f.C[b] = cast(float *) setup_malloc(f, 4 * n4);
889    if (!f.A[b] || !f.B[b] || !f.C[b]) return error(f, VORBIS_outofmem);
890    compute_twiddle_factors(n, f.A[b], f.B[b], f.C[b]);
891    f.window[b] = cast(float *) setup_malloc(f, 4 * n2);
892    if (!f.window[b]) return error(f, VORBIS_outofmem);
893    compute_window(n, f.window[b]);
894    f.bit_reverse[b] = cast(uint16 *) setup_malloc(f, 2 * n8);
895    if (!f.bit_reverse[b]) return error(f, VORBIS_outofmem);
896    compute_bitreverse(n, f.bit_reverse[b]);
897    return TRUE;
898 }
899 
900 static void neighbors(uint16 *x, int n, int *plow, int *phigh)
901 {
902    int low = -1;
903    int high = 65536;
904    int i;
905    for (i=0; i < n; ++i) {
906       if (x[i] > low  && x[i] < x[n]) { *plow  = i; low = x[i]; }
907       if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
908    }
909 }
910 
911 // this has been repurposed so y is now the original index instead of y
912 struct stbv__floor_ordering
913 {
914    uint16 x,id;
915 }
916 
917 extern(C) int point_compare_2(const void *p, const void *q)
918 {
919    stbv__floor_ordering *a = cast(stbv__floor_ordering *) p;
920    stbv__floor_ordering *b = cast(stbv__floor_ordering *) q;
921    return a.x < b.x ? -1 : a.x > b.x;
922 }
923 
924 //
925 /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
926 
927 uint8 get8(vorb *z)
928 {
929     ubyte r;
930     if (z._io.read(&r, 1, z._userData) == 1)
931         return r;
932     else
933     {
934         z.eof = TRUE; 
935         return 0;
936     }
937 }
938 
939 static uint32 get32(vorb *f)
940 {
941    uint32 x;
942    x = get8(f);
943    x += get8(f) << 8;
944    x += get8(f) << 16;
945    x += cast(uint32) get8(f) << 24;
946    return x;
947 }
948 
949 static int getn(vorb *z, uint8 *data, int n)
950 {
951     ubyte r;
952     if (z._io.read(data, n, z._userData) == n)
953         return 1;
954     else
955     {
956         z.eof = 1; 
957         return 0;
958     }
959 }
960 
961 void skip(vorb *z, int n, bool* err)
962 {
963     bool success = z._io.skip(n, z._userData);
964     *err = !success;
965 }
966 
967 static int set_file_offset(stb_vorbis *f, uint loc)
968 {
969     if (f._io.seek( loc, false, f._userData))
970     {
971         return 1;
972     }
973     else
974     {
975         f.eof = 1;
976         f._io.seek(f.stream_len, false, f._userData); // stb_vorbis set to end of the file, for some reason.
977         return 0;
978     }
979 }
980 
981 
982 static immutable uint8[4] ogg_page_header = [ 0x4f, 0x67, 0x67, 0x53 ];
983 
984 static int capture_pattern(vorb *f)
985 {
986    if (0x4f != get8(f)) return FALSE;
987    if (0x67 != get8(f)) return FALSE;
988    if (0x67 != get8(f)) return FALSE;
989    if (0x53 != get8(f)) return FALSE;
990    return TRUE;
991 }
992 
993 enum PAGEFLAG_continued_packet  = 1;
994 enum PAGEFLAG_first_page        = 2;
995 enum PAGEFLAG_last_page         = 4;
996 
997 static int start_page_no_capturepattern(vorb *f)
998 {
999    uint32 loc0,loc1,n;
1000    if (f.first_decode) {
1001       f.p_first.page_start = stb_vorbis_get_file_offset(f) - 4;
1002    }
1003    // stream structure version
1004    if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
1005    // header flag
1006    f.page_flag = get8(f);
1007    // absolute granule position
1008    loc0 = get32(f);
1009    loc1 = get32(f);
1010    // @TODO: validate loc0,loc1 as valid positions?
1011    // stream serial number -- vorbis doesn't interleave, so discard
1012    get32(f);
1013    //if (f.serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
1014    // page sequence number
1015    n = get32(f);
1016    f.last_page = n;
1017    // CRC32
1018    get32(f);
1019    // page_segments
1020    f.segment_count = get8(f);
1021    if (!getn(f, f.segments.ptr, f.segment_count))
1022       return error(f, VORBIS_unexpected_eof);
1023    // assume we _don't_ know any the sample position of any segments
1024    f.end_seg_with_known_loc = -2;
1025    if (loc0 != ~0U || loc1 != ~0U) {
1026       int i;
1027       // determine which packet is the last one that will complete
1028       for (i=f.segment_count-1; i >= 0; --i)
1029          if (f.segments[i] < 255)
1030             break;
1031       // 'i' is now the index of the _last_ segment of a packet that ends
1032       if (i >= 0) {
1033          f.end_seg_with_known_loc = i;
1034          f.known_loc_for_packet   = loc0;
1035       }
1036    }
1037    if (f.first_decode) {
1038       int i,len;
1039       len = 0;
1040       for (i=0; i < f.segment_count; ++i)
1041          len += f.segments[i];
1042       len += 27 + f.segment_count;
1043       f.p_first.page_end = f.p_first.page_start + len;
1044       f.p_first.last_decoded_sample = loc0;
1045    }
1046    f.next_seg = 0;
1047    return TRUE;
1048 }
1049 
1050 static int start_page(vorb *f)
1051 {
1052    if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
1053    return start_page_no_capturepattern(f);
1054 }
1055 
1056 static int start_packet(vorb *f)
1057 {
1058    while (f.next_seg == -1) {
1059       if (!start_page(f)) return FALSE;
1060       if (f.page_flag & PAGEFLAG_continued_packet)
1061          return error(f, VORBIS_continued_packet_flag_invalid);
1062    }
1063    f.last_seg = FALSE;
1064    f.valid_bits = 0;
1065    f.packet_bytes = 0;
1066    f.bytes_in_seg = 0;
1067    // f.next_seg is now valid
1068    return TRUE;
1069 }
1070 
1071 int maybe_start_packet(vorb *f)
1072 {
1073    if (f.next_seg == -1) {
1074       int x = get8(f);
1075       if (f.eof) return FALSE; // EOF at page boundary is not an error!
1076       if (0x4f != x      ) return error(f, VORBIS_missing_capture_pattern);
1077       if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1078       if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1079       if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1080       if (!start_page_no_capturepattern(f)) return FALSE;
1081       if (f.page_flag & PAGEFLAG_continued_packet) {
1082          // set up enough state that we can read this packet if we want,
1083          // e.g. during recovery
1084          f.last_seg = FALSE;
1085          f.bytes_in_seg = 0;
1086          return error(f, VORBIS_continued_packet_flag_invalid);
1087       }
1088    }
1089    return start_packet(f);
1090 }
1091 
1092 static int next_segment(vorb *f)
1093 {
1094    int len;
1095    if (f.last_seg) return 0;
1096    if (f.next_seg == -1) {
1097       f.last_seg_which = f.segment_count-1; // in case start_page fails
1098       if (!start_page(f)) { f.last_seg = 1; return 0; }
1099       if (!(f.page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
1100    }
1101    len = f.segments[f.next_seg++];
1102    if (len < 255) {
1103       f.last_seg = TRUE;
1104       f.last_seg_which = f.next_seg-1;
1105    }
1106    if (f.next_seg >= f.segment_count)
1107       f.next_seg = -1;
1108    assert(f.bytes_in_seg == 0);
1109    f.bytes_in_seg = cast(ubyte)len;
1110    return len;
1111 }
1112 
1113 enum EOP    = (-1);
1114 enum INVALID_BITS = (-1);
1115 
1116 static int get8_packet_raw(vorb *f)
1117 {
1118    if (!f.bytes_in_seg) {  // CLANG!
1119       if (f.last_seg) return EOP;
1120       else if (!next_segment(f)) return EOP;
1121    }
1122    assert(f.bytes_in_seg > 0);
1123    --f.bytes_in_seg;
1124    ++f.packet_bytes;
1125    return get8(f);
1126 }
1127 
1128 static int get8_packet(vorb *f)
1129 {
1130    int x = get8_packet_raw(f);
1131    f.valid_bits = 0;
1132    return x;
1133 }
1134 
1135 static int get32_packet(vorb *f)
1136 {
1137    uint32 x;
1138    x = get8_packet(f);
1139    x += get8_packet(f) << 8;
1140    x += get8_packet(f) << 16;
1141    x += cast(uint32) get8_packet(f) << 24;
1142    return x;
1143 }
1144 
1145 static void flush_packet(vorb *f)
1146 {
1147    while (get8_packet_raw(f) != EOP)
1148    {
1149    }
1150 }
1151 
1152 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1153 // as the huffman decoder?
1154 static uint32 get_bits(vorb *f, int n)
1155 {
1156    uint32 z;
1157 
1158    if (f.valid_bits < 0) return 0;
1159    if (f.valid_bits < n) {
1160       if (n > 24) {
1161          // the accumulator technique below would not work correctly in this case
1162          z = get_bits(f, 24);
1163          z += get_bits(f, n-24) << 24;
1164          return z;
1165       }
1166       if (f.valid_bits == 0) f.acc = 0;
1167       while (f.valid_bits < n) {
1168          int z2 = get8_packet_raw(f);
1169          if (z2 == EOP) {
1170             f.valid_bits = INVALID_BITS;
1171             return 0;
1172          }
1173          f.acc += z2 << f.valid_bits;
1174          f.valid_bits += 8;
1175       }
1176    }
1177 
1178    assert(f.valid_bits >= n);
1179    z = f.acc & ((1 << n)-1);
1180    f.acc >>= n;
1181    f.valid_bits -= n;
1182    return z;
1183 }
1184 
1185 // @OPTIMIZE: primary accumulator for huffman
1186 // expand the buffer to as many bits as possible without reading off end of packet
1187 // it might be nice to allow f.valid_bits and f.acc to be stored in registers,
1188 // e.g. cache them locally and decode locally
1189 static void prep_huffman(vorb *f)
1190 {
1191    if (f.valid_bits <= 24) {
1192       if (f.valid_bits == 0) f.acc = 0;
1193       do {
1194          int z;
1195          if (f.last_seg && !f.bytes_in_seg) return;
1196          z = get8_packet_raw(f);
1197          if (z == EOP) return;
1198          f.acc += cast(uint) z << f.valid_bits;
1199          f.valid_bits += 8;
1200       } while (f.valid_bits <= 24);
1201    }
1202 }
1203 
1204 enum
1205 {
1206    VORBIS_packet_id = 1,
1207    VORBIS_packet_comment = 3,
1208    VORBIS_packet_setup = 5
1209 };
1210 
1211 static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
1212 {
1213    int i;
1214    prep_huffman(f);
1215 
1216    if (c.codewords == NULL && c.sorted_codewords == NULL)
1217       return -1;
1218 
1219    // cases to use binary search: sorted_codewords && !c.codewords
1220    //                             sorted_codewords && c.entries > 8
1221    if (c.entries > 8 ? c.sorted_codewords!=NULL : !c.codewords) {
1222       // binary search
1223       uint32 code = bit_reverse(f.acc);
1224       int x=0, n=c.sorted_entries, len;
1225 
1226       while (n > 1) {
1227          // invariant: sc[x] <= code < sc[x+n]
1228          int m = x + (n >> 1);
1229          if (c.sorted_codewords[m] <= code) {
1230             x = m;
1231             n -= (n>>1);
1232          } else {
1233             n >>= 1;
1234          }
1235       }
1236       // x is now the sorted index
1237       if (!c.sparse) x = c.sorted_values[x];
1238       // x is now sorted index if sparse, or symbol otherwise
1239       len = c.codeword_lengths[x];
1240       if (f.valid_bits >= len) {
1241          f.acc >>= len;
1242          f.valid_bits -= len;
1243          return x;
1244       }
1245 
1246       f.valid_bits = 0;
1247       return -1;
1248    }
1249 
1250    // if small, linear search
1251    assert(!c.sparse);
1252    for (i=0; i < c.entries; ++i) {
1253       if (c.codeword_lengths[i] == NO_CODE) continue;
1254       if (c.codewords[i] == (f.acc & ((1 << c.codeword_lengths[i])-1))) {
1255          if (f.valid_bits >= c.codeword_lengths[i]) {
1256             f.acc >>= c.codeword_lengths[i];
1257             f.valid_bits -= c.codeword_lengths[i];
1258             return i;
1259          }
1260          f.valid_bits = 0;
1261          return -1;
1262       }
1263    }
1264 
1265    error(f, VORBIS_invalid_stream);
1266    f.valid_bits = 0;
1267    return -1;
1268 }
1269 
1270 
1271 static int codebook_decode_scalar(vorb *f, Codebook *c)
1272 {
1273    int i;
1274    if (f.valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
1275       prep_huffman(f);
1276    // fast huffman table lookup
1277    i = f.acc & FAST_HUFFMAN_TABLE_MASK;
1278    i = c.fast_huffman[i];
1279    if (i >= 0) {
1280       f.acc >>= c.codeword_lengths[i];
1281       f.valid_bits -= c.codeword_lengths[i];
1282       if (f.valid_bits < 0) { f.valid_bits = 0; return -1; }
1283       return i;
1284    }
1285    return codebook_decode_scalar_raw(f,c);
1286 }
1287 
1288 void DECODE_RAW(ref int var, vorb* f, Codebook* c)
1289 {
1290     var = codebook_decode_scalar(f,c);
1291 }
1292 
1293 void DECODE(ref int var, vorb* f, Codebook* c)
1294 {
1295     var = codebook_decode_scalar(f,c);
1296     if (c.sparse) 
1297         var = c.sorted_values[var];
1298 }
1299 
1300 void DECODE_VQ(ref int var, vorb* f, Codebook* c)
1301 {
1302     DECODE_RAW(var, f, c);
1303 }
1304 
1305 
1306 
1307 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1308 // where we avoid one addition
1309 codetype CODEBOOK_ELEMENT(Codebook* c, int off)
1310 {
1311     return (c.multiplicands[off]);
1312 }
1313 
1314 codetype CODEBOOK_ELEMENT_FAST(Codebook* c, int off)
1315 {
1316     return (c.multiplicands[off]);
1317 }
1318 
1319 codetype CODEBOOK_ELEMENT_BASE(Codebook* c)
1320 {
1321     return 0;
1322 }
1323 
1324 static int codebook_decode_start(vorb *f, Codebook *c)
1325 {
1326    int z = -1;
1327 
1328    // type 0 is only legal in a scalar context
1329    if (c.lookup_type == 0)
1330       error(f, VORBIS_invalid_stream);
1331    else {
1332       DECODE_VQ(z,f,c);
1333       if (c.sparse) assert(z < c.sorted_entries);
1334       if (z < 0) {  // check for EOP
1335          if (!f.bytes_in_seg)
1336             if (f.last_seg)
1337                return z;
1338          error(f, VORBIS_invalid_stream);
1339       }
1340    }
1341    return z;
1342 }
1343 
1344 static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
1345 {
1346     int i,z = codebook_decode_start(f,c);
1347     if (z < 0) return FALSE;
1348     if (len > c.dimensions) len = c.dimensions;
1349 
1350     z *= c.dimensions;
1351     if (c.sequence_p) 
1352     {
1353         float last = CODEBOOK_ELEMENT_BASE(c);
1354         for (i=0; i < len; ++i) 
1355         {
1356             float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1357             output[i] += val;
1358             last = val + c.minimum_value;
1359         }
1360     } 
1361     else 
1362     {
1363         float last = CODEBOOK_ELEMENT_BASE(c);
1364         for (i=0; i < len; ++i) 
1365         {
1366             output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1367         }
1368     }
1369     return TRUE;
1370 }
1371 
1372 static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
1373 {
1374     int i,z = codebook_decode_start(f,c);
1375     float last = CODEBOOK_ELEMENT_BASE(c);
1376     if (z < 0) return FALSE;
1377     if (len > c.dimensions) len = c.dimensions;
1378 
1379    z *= c.dimensions;
1380    for (i=0; i < len; ++i) {
1381       float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1382       output[i*step] += val;
1383       if (c.sequence_p) last = val;
1384    }
1385 
1386    return TRUE;
1387 }
1388 
1389 static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1390 {
1391    int c_inter = *c_inter_p;
1392    int p_inter = *p_inter_p;
1393    int i,z, effective = c.dimensions;
1394 
1395    // type 0 is only legal in a scalar context
1396    if (c.lookup_type == 0)   return error(f, VORBIS_invalid_stream);
1397 
1398    while (total_decode > 0) {
1399       float last = CODEBOOK_ELEMENT_BASE(c);
1400       DECODE_VQ(z,f,c);
1401       static if (!STB_VORBIS_DIVIDES_IN_CODEBOOK)
1402       {
1403         assert(!c.sparse || z < c.sorted_entries);
1404       }
1405       if (z < 0) {
1406          if (!f.bytes_in_seg)
1407             if (f.last_seg) return FALSE;
1408          return error(f, VORBIS_invalid_stream);
1409       }
1410 
1411       // if this will take us off the end of the buffers, stop short!
1412       // we check by computing the length of the virtual interleaved
1413       // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1414       // and the length we'll be using (effective)
1415       if (c_inter + p_inter*ch + effective > len * ch) {
1416          effective = len*ch - (p_inter*ch - c_inter);
1417       }
1418 
1419       {
1420          z *= c.dimensions;
1421          if (c.sequence_p) {
1422             for (i=0; i < effective; ++i) {
1423                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1424                if (outputs[c_inter])
1425                   outputs[c_inter][p_inter] += val;
1426                if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1427                last = val;
1428             }
1429          } else {
1430             for (i=0; i < effective; ++i) {
1431                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1432                if (outputs[c_inter])
1433                   outputs[c_inter][p_inter] += val;
1434                if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1435             }
1436          }
1437       }
1438 
1439       total_decode -= effective;
1440    }
1441    *c_inter_p = c_inter;
1442    *p_inter_p = p_inter;
1443    return TRUE;
1444 }
1445 
1446 static int predict_point(int x, int x0, int x1, int y0, int y1)
1447 {
1448    int dy = y1 - y0;
1449    int adx = x1 - x0;
1450    // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1451    int err = abs(dy) * (x - x0);
1452    int off = err / adx;
1453    return dy < 0 ? y0 - off : y0 + off;
1454 }
1455 
1456 // the following table is block-copied from the specification
1457 static immutable float[256] inverse_db_table =
1458 [
1459   1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1460   1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1461   1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1462   2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1463   2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1464   3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1465   4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1466   6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1467   7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1468   1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1469   1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1470   1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1471   2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1472   2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1473   3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1474   4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1475   5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1476   7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1477   9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1478   1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1479   1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1480   2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1481   2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1482   3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1483   4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1484   5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1485   7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1486   9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1487   0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1488   0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1489   0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1490   0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1491   0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1492   0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1493   0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1494   0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1495   0.00092223983f, 0.00098217216f, 0.0010459992f,  0.0011139742f,
1496   0.0011863665f,  0.0012634633f,  0.0013455702f,  0.0014330129f,
1497   0.0015261382f,  0.0016253153f,  0.0017309374f,  0.0018434235f,
1498   0.0019632195f,  0.0020908006f,  0.0022266726f,  0.0023713743f,
1499   0.0025254795f,  0.0026895994f,  0.0028643847f,  0.0030505286f,
1500   0.0032487691f,  0.0034598925f,  0.0036847358f,  0.0039241906f,
1501   0.0041792066f,  0.0044507950f,  0.0047400328f,  0.0050480668f,
1502   0.0053761186f,  0.0057254891f,  0.0060975636f,  0.0064938176f,
1503   0.0069158225f,  0.0073652516f,  0.0078438871f,  0.0083536271f,
1504   0.0088964928f,  0.009474637f,   0.010090352f,   0.010746080f,
1505   0.011444421f,   0.012188144f,   0.012980198f,   0.013823725f,
1506   0.014722068f,   0.015678791f,   0.016697687f,   0.017782797f,
1507   0.018938423f,   0.020169149f,   0.021479854f,   0.022875735f,
1508   0.024362330f,   0.025945531f,   0.027631618f,   0.029427276f,
1509   0.031339626f,   0.033376252f,   0.035545228f,   0.037855157f,
1510   0.040315199f,   0.042935108f,   0.045725273f,   0.048696758f,
1511   0.051861348f,   0.055231591f,   0.058820850f,   0.062643361f,
1512   0.066714279f,   0.071049749f,   0.075666962f,   0.080584227f,
1513   0.085821044f,   0.091398179f,   0.097337747f,   0.10366330f,
1514   0.11039993f,    0.11757434f,    0.12521498f,    0.13335215f,
1515   0.14201813f,    0.15124727f,    0.16107617f,    0.17154380f,
1516   0.18269168f,    0.19456402f,    0.20720788f,    0.22067342f,
1517   0.23501402f,    0.25028656f,    0.26655159f,    0.28387361f,
1518   0.30232132f,    0.32196786f,    0.34289114f,    0.36517414f,
1519   0.38890521f,    0.41417847f,    0.44109412f,    0.46975890f,
1520   0.50028648f,    0.53279791f,    0.56742212f,    0.60429640f,
1521   0.64356699f,    0.68538959f,    0.72993007f,    0.77736504f,
1522   0.82788260f,    0.88168307f,    0.9389798f,     1.0f
1523  ];
1524 
1525 
1526 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
1527 // note that you must produce bit-identical output to decode correctly;
1528 // this specific sequence of operations is specified in the spec (it's
1529 // drawing integer-quantized frequency-space lines that the encoder
1530 // expects to be exactly the same)
1531 //     ... also, isn't the whole point of Bresenham's algorithm to NOT
1532 // have to divide in the setup? sigh.
1533 
1534 static void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
1535 {
1536    int dy = y1 - y0;
1537    int adx = x1 - x0;
1538    int ady = abs(dy);
1539    int base;
1540    int x=x0,y=y0;
1541    int err = 0;
1542    int sy;
1543 
1544    base = dy / adx;
1545    if (dy < 0)
1546       sy = base - 1;
1547    else
1548       sy = base+1;
1549    ady -= abs(base) * adx;
1550    if (x1 > n) x1 = n;
1551    if (x < x1) {
1552       output[x] *= inverse_db_table[y&255];
1553       for (++x; x < x1; ++x) {
1554          err += ady;
1555          if (err >= adx) {
1556             err -= adx;
1557             y += sy;
1558          } else
1559             y += base;
1560          output[x] *= inverse_db_table[y&255];
1561       }
1562    }
1563 }
1564 
1565 static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
1566 {
1567    int k;
1568    if (rtype == 0) {
1569       int step = n / book.dimensions;
1570       for (k=0; k < step; ++k)
1571          if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
1572             return FALSE;
1573    } else {
1574       for (k=0; k < n; ) {
1575          if (!codebook_decode(f, book, target+offset, n-k))
1576             return FALSE;
1577          k += book.dimensions;
1578          offset += book.dimensions;
1579       }
1580    }
1581    return TRUE;
1582 }
1583 
1584 // n is 1/2 of the blocksize --
1585 // specification: "Correct per-vector decode length is [n]/2"
1586 static void decode_residue(vorb *f, float **residue_buffers, int ch, int n, int rn, uint8 *do_not_decode)
1587 {
1588    int i,j,pass;
1589    Residue *r = f.residue_config + rn;
1590    int rtype = f.residue_types[rn];
1591    int c = r.classbook;
1592    int classwords = f.codebooks[c].dimensions;
1593    uint actual_size = rtype == 2 ? n*2 : n;
1594    uint limit_r_begin = (r.begin < actual_size ? r.begin : actual_size);
1595    uint limit_r_end   = (r.end   < actual_size ? r.end   : actual_size);
1596    int n_read = limit_r_end - limit_r_begin;
1597    int part_read = n_read / r.part_size;
1598    int temp_alloc_point = temp_alloc_save(f);
1599    uint8 ***part_classdata = cast(uint8 ***) temp_block_array(f,f.channels, part_read * (uint8*).sizeof);
1600   
1601    for (i=0; i < ch; ++i)
1602       if (!do_not_decode[i])
1603          memset(residue_buffers[i], 0, float.sizeof * n);
1604 
1605    if (rtype == 2 && ch != 1) {
1606       for (j=0; j < ch; ++j)
1607          if (!do_not_decode[j])
1608             break;
1609       if (j == ch)
1610          goto done;
1611 
1612       for (pass=0; pass < 8; ++pass) {
1613          int pcount = 0, class_set = 0;
1614          if (ch == 2) {
1615             while (pcount < part_read) {
1616                int z = r.begin + pcount*r.part_size;
1617                int c_inter = (z & 1), p_inter = z>>1;
1618                if (pass == 0) {
1619                   Codebook *c2 = f.codebooks+r.classbook;
1620                   int q;
1621                   DECODE(q,f,c2);
1622                   if (q == EOP) goto done;
1623                   part_classdata[0][class_set] = r.classdata[q];
1624                }
1625                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) 
1626                {
1627                   int z2 = r.begin + pcount*r.part_size;
1628                   int c2 = part_classdata[0][class_set][i];
1629                   int b = r.residue_books[c2][pass];
1630                   if (b >= 0) {
1631                      Codebook *book = f.codebooks + b;
1632             
1633                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size))
1634                         goto done;
1635             
1636                   } 
1637                   else 
1638                   {
1639                      z2 += r.part_size;
1640                      c_inter = z2 & 1;
1641                      p_inter = z2 >> 1;
1642                   }
1643                }
1644                ++class_set;
1645             }
1646          } else if (ch > 2) {
1647             while (pcount < part_read) {
1648                int z = r.begin + pcount*r.part_size;
1649                int c_inter = z % ch, p_inter = z/ch;
1650                if (pass == 0) {
1651                   Codebook *c3 = f.codebooks+r.classbook;
1652                   int q;
1653                   DECODE(q,f,c3);
1654                   if (q == EOP) goto done;
1655                   part_classdata[0][class_set] = r.classdata[q];
1656                }
1657                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
1658                   int z3 = r.begin + pcount*r.part_size;
1659                   int c3 = part_classdata[0][class_set][i];
1660                   int b = r.residue_books[c3][pass];
1661                   if (b >= 0) {
1662                      Codebook *book = f.codebooks + b;
1663                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size))
1664                         goto done;
1665                   } else {
1666                      z3 += r.part_size;
1667                      c_inter = z3 % ch;
1668                      p_inter = z3 / ch;
1669                   }
1670                }
1671                ++class_set;
1672             }
1673          }
1674       }
1675       goto done;
1676    }
1677 
1678    for (pass=0; pass < 8; ++pass) {
1679       int pcount = 0, class_set=0;
1680       while (pcount < part_read) {
1681          if (pass == 0) {
1682             for (j=0; j < ch; ++j) {
1683                if (!do_not_decode[j]) {
1684                   Codebook *c4 = f.codebooks+r.classbook;
1685                   int temp;
1686                   DECODE(temp,f,c4);
1687                   if (temp == EOP) goto done;
1688                   part_classdata[j][class_set] = r.classdata[temp];
1689                }
1690             }
1691          }
1692          for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
1693             for (j=0; j < ch; ++j) {
1694                if (!do_not_decode[j]) {
1695                   int c5 = part_classdata[j][class_set][i];
1696                   int b = r.residue_books[c5][pass];
1697                   if (b >= 0) {
1698                      float *target = residue_buffers[j];
1699                      int offset = r.begin + pcount * r.part_size;
1700                      int n2 = r.part_size;
1701                      Codebook *book = f.codebooks + b;
1702                      if (!residue_decode(f, book, target, offset, n2, rtype))
1703                         goto done;
1704                   }
1705                }
1706             }
1707          }
1708          ++class_set;
1709       }
1710    }
1711   done:
1712 
1713    temp_free(f,part_classdata);
1714    temp_alloc_restore(f,temp_alloc_point);
1715 }
1716 
1717 // the following were split out into separate functions while optimizing;
1718 // they could be pushed back up but eh. __forceinline showed no change;
1719 // they're probably already being inlined.
1720 static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
1721 {
1722    float *ee0 = e + i_off;
1723    float *ee2 = ee0 + k_off;
1724    int i;
1725 
1726    assert((n & 3) == 0);
1727    for (i=(n>>2); i > 0; --i) {
1728       float k00_20, k01_21;
1729       k00_20  = ee0[ 0] - ee2[ 0];
1730       k01_21  = ee0[-1] - ee2[-1];
1731       ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
1732       ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
1733       ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
1734       ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
1735       A += 8;
1736 
1737       k00_20  = ee0[-2] - ee2[-2];
1738       k01_21  = ee0[-3] - ee2[-3];
1739       ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
1740       ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
1741       ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
1742       ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
1743       A += 8;
1744 
1745       k00_20  = ee0[-4] - ee2[-4];
1746       k01_21  = ee0[-5] - ee2[-5];
1747       ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
1748       ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
1749       ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
1750       ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
1751       A += 8;
1752 
1753       k00_20  = ee0[-6] - ee2[-6];
1754       k01_21  = ee0[-7] - ee2[-7];
1755       ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
1756       ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
1757       ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
1758       ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
1759       A += 8;
1760       ee0 -= 8;
1761       ee2 -= 8;
1762    }
1763 }
1764 
1765 static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
1766 {
1767    int i;
1768    float k00_20, k01_21;
1769 
1770    float *e0 = e + d0;
1771    float *e2 = e0 + k_off;
1772 
1773    for (i=lim >> 2; i > 0; --i) {
1774       k00_20 = e0[-0] - e2[-0];
1775       k01_21 = e0[-1] - e2[-1];
1776       e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
1777       e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
1778       e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
1779       e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
1780 
1781       A += k1;
1782 
1783       k00_20 = e0[-2] - e2[-2];
1784       k01_21 = e0[-3] - e2[-3];
1785       e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
1786       e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
1787       e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
1788       e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
1789 
1790       A += k1;
1791 
1792       k00_20 = e0[-4] - e2[-4];
1793       k01_21 = e0[-5] - e2[-5];
1794       e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
1795       e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
1796       e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
1797       e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
1798 
1799       A += k1;
1800 
1801       k00_20 = e0[-6] - e2[-6];
1802       k01_21 = e0[-7] - e2[-7];
1803       e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
1804       e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
1805       e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
1806       e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
1807 
1808       e0 -= 8;
1809       e2 -= 8;
1810 
1811       A += k1;
1812    }
1813 }
1814 
1815 static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
1816 {
1817    int i;
1818    float A0 = A[0];
1819    float A1 = A[0+1];
1820    float A2 = A[0+a_off];
1821    float A3 = A[0+a_off+1];
1822    float A4 = A[0+a_off*2+0];
1823    float A5 = A[0+a_off*2+1];
1824    float A6 = A[0+a_off*3+0];
1825    float A7 = A[0+a_off*3+1];
1826 
1827    float k00,k11;
1828 
1829    float *ee0 = e  +i_off;
1830    float *ee2 = ee0+k_off;
1831 
1832    for (i=n; i > 0; --i) {
1833       k00     = ee0[ 0] - ee2[ 0];
1834       k11     = ee0[-1] - ee2[-1];
1835       ee0[ 0] =  ee0[ 0] + ee2[ 0];
1836       ee0[-1] =  ee0[-1] + ee2[-1];
1837       ee2[ 0] = (k00) * A0 - (k11) * A1;
1838       ee2[-1] = (k11) * A0 + (k00) * A1;
1839 
1840       k00     = ee0[-2] - ee2[-2];
1841       k11     = ee0[-3] - ee2[-3];
1842       ee0[-2] =  ee0[-2] + ee2[-2];
1843       ee0[-3] =  ee0[-3] + ee2[-3];
1844       ee2[-2] = (k00) * A2 - (k11) * A3;
1845       ee2[-3] = (k11) * A2 + (k00) * A3;
1846 
1847       k00     = ee0[-4] - ee2[-4];
1848       k11     = ee0[-5] - ee2[-5];
1849       ee0[-4] =  ee0[-4] + ee2[-4];
1850       ee0[-5] =  ee0[-5] + ee2[-5];
1851       ee2[-4] = (k00) * A4 - (k11) * A5;
1852       ee2[-5] = (k11) * A4 + (k00) * A5;
1853 
1854       k00     = ee0[-6] - ee2[-6];
1855       k11     = ee0[-7] - ee2[-7];
1856       ee0[-6] =  ee0[-6] + ee2[-6];
1857       ee0[-7] =  ee0[-7] + ee2[-7];
1858       ee2[-6] = (k00) * A6 - (k11) * A7;
1859       ee2[-7] = (k11) * A6 + (k00) * A7;
1860 
1861       ee0 -= k0;
1862       ee2 -= k0;
1863    }
1864 }
1865 
1866 static void iter_54(float *z)
1867 {
1868    float k00,k11,k22,k33;
1869    float y0,y1,y2,y3;
1870 
1871    k00  = z[ 0] - z[-4];
1872    y0   = z[ 0] + z[-4];
1873    y2   = z[-2] + z[-6];
1874    k22  = z[-2] - z[-6];
1875 
1876    z[-0] = y0 + y2;      // z0 + z4 + z2 + z6
1877    z[-2] = y0 - y2;      // z0 + z4 - z2 - z6
1878 
1879    // done with y0,y2
1880 
1881    k33  = z[-3] - z[-7];
1882 
1883    z[-4] = k00 + k33;    // z0 - z4 + z3 - z7
1884    z[-6] = k00 - k33;    // z0 - z4 - z3 + z7
1885 
1886    // done with k33
1887 
1888    k11  = z[-1] - z[-5];
1889    y1   = z[-1] + z[-5];
1890    y3   = z[-3] + z[-7];
1891 
1892    z[-1] = y1 + y3;      // z1 + z5 + z3 + z7
1893    z[-3] = y1 - y3;      // z1 + z5 - z3 - z7
1894    z[-5] = k11 - k22;    // z1 - z5 + z2 - z6
1895    z[-7] = k11 + k22;    // z1 - z5 - z2 + z6
1896 }
1897 
1898 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
1899 {
1900    int a_off = base_n >> 3;
1901    float A2 = A[0+a_off];
1902    float *z = e + i_off;
1903    float *base = z - 16 * n;
1904 
1905    while (z > base) {
1906       float k00,k11;
1907       float l00,l11;
1908 
1909       k00    = z[-0] - z[ -8];
1910       k11    = z[-1] - z[ -9];
1911       l00    = z[-2] - z[-10];
1912       l11    = z[-3] - z[-11];
1913       z[ -0] = z[-0] + z[ -8];
1914       z[ -1] = z[-1] + z[ -9];
1915       z[ -2] = z[-2] + z[-10];
1916       z[ -3] = z[-3] + z[-11];
1917       z[ -8] = k00;
1918       z[ -9] = k11;
1919       z[-10] = (l00+l11) * A2;
1920       z[-11] = (l11-l00) * A2;
1921 
1922       k00    = z[ -4] - z[-12];
1923       k11    = z[ -5] - z[-13];
1924       l00    = z[ -6] - z[-14];
1925       l11    = z[ -7] - z[-15];
1926       z[ -4] = z[ -4] + z[-12];
1927       z[ -5] = z[ -5] + z[-13];
1928       z[ -6] = z[ -6] + z[-14];
1929       z[ -7] = z[ -7] + z[-15];
1930       z[-12] = k11;
1931       z[-13] = -k00;
1932       z[-14] = (l11-l00) * A2;
1933       z[-15] = (l00+l11) * -A2;
1934 
1935       iter_54(z);
1936       iter_54(z-8);
1937       z -= 16;
1938    }
1939 }
1940 
1941 static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
1942 {
1943    int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
1944    int ld;
1945    // @OPTIMIZE: reduce register pressure by using fewer variables?
1946    int save_point = temp_alloc_save(f);
1947    float *buf2 = cast(float *) temp_alloc(f, n2 * float.sizeof);
1948    float* u = null;
1949    float* v = null;
1950    // twiddle factors
1951    float *A = f.A[blocktype];
1952 
1953    // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
1954    // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
1955 
1956    // kernel from paper
1957 
1958 
1959    // merged:
1960    //   copy and reflect spectral data
1961    //   step 0
1962 
1963    // note that it turns out that the items added together during
1964    // this step are, in fact, being added to themselves (as reflected
1965    // by step 0). inexplicable inefficiency! this became obvious
1966    // once I combined the passes.
1967 
1968    // so there's a missing 'times 2' here (for adding X to itself).
1969    // this propagates through linearly to the end, where the numbers
1970    // are 1/2 too small, and need to be compensated for.
1971 
1972    {
1973       float *d, e, AA, e_stop;
1974       d = &buf2[n2-2];
1975       AA = A;
1976       e = &buffer[0];
1977       e_stop = &buffer[n2];
1978       while (e != e_stop) {
1979          d[1] = (e[0] * AA[0] - e[2]*AA[1]);
1980          d[0] = (e[0] * AA[1] + e[2]*AA[0]);
1981          d -= 2;
1982          AA += 2;
1983          e += 4;
1984       }
1985 
1986       e = &buffer[n2-3];
1987       while (d >= buf2) {
1988          d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
1989          d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
1990          d -= 2;
1991          AA += 2;
1992          e -= 4;
1993       }
1994    }
1995 
1996    // now we use symbolic names for these, so that we can
1997    // possibly swap their meaning as we change which operations
1998    // are in place
1999 
2000    u = buffer;
2001    v = buf2;
2002 
2003    // step 2    (paper output is w, now u)
2004    // this could be in place, but the data ends up in the wrong
2005    // place... _somebody_'s got to swap it, so this is nominated
2006    {
2007       float *AA = &A[n2-8];
2008       float *d0, d1,  e0, e1;
2009 
2010       e0 = &v[n4];
2011       e1 = &v[0];
2012 
2013       d0 = &u[n4];
2014       d1 = &u[0];
2015 
2016       while (AA >= A) {
2017          float v40_20, v41_21;
2018 
2019          v41_21 = e0[1] - e1[1];
2020          v40_20 = e0[0] - e1[0];
2021          d0[1]  = e0[1] + e1[1];
2022          d0[0]  = e0[0] + e1[0];
2023          d1[1]  = v41_21*AA[4] - v40_20*AA[5];
2024          d1[0]  = v40_20*AA[4] + v41_21*AA[5];
2025 
2026          v41_21 = e0[3] - e1[3];
2027          v40_20 = e0[2] - e1[2];
2028          d0[3]  = e0[3] + e1[3];
2029          d0[2]  = e0[2] + e1[2];
2030          d1[3]  = v41_21*AA[0] - v40_20*AA[1];
2031          d1[2]  = v40_20*AA[0] + v41_21*AA[1];
2032 
2033          AA -= 8;
2034 
2035          d0 += 4;
2036          d1 += 4;
2037          e0 += 4;
2038          e1 += 4;
2039       }
2040    }
2041 
2042    // step 3
2043    ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2044 
2045    // optimized step 3:
2046 
2047    // the original step3 loop can be nested r inside s or s inside r;
2048    // it's written originally as s inside r, but this is dumb when r
2049    // iterates many times, and s few. So I have two copies of it and
2050    // switch between them halfway.
2051 
2052    // this is iteration 0 of step 3
2053    imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
2054    imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
2055 
2056    // this is iteration 1 of step 3
2057    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
2058    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
2059    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
2060    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
2061 
2062    l=2;
2063    for (; l < (ld-3)>>1; ++l) {
2064       int k0 = n >> (l+2), k0_2 = k0>>1;
2065       int lim = 1 << (l+1);
2066       int i;
2067       for (i=0; i < lim; ++i)
2068          imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
2069    }
2070 
2071    for (; l < ld-6; ++l) {
2072       int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
2073       int rlim = n >> (l+6), r;
2074       int lim = 1 << (l+1);
2075       int i_off;
2076       float *A0 = A;
2077       i_off = n2-1;
2078       for (r=rlim; r > 0; --r) {
2079          imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2080          A0 += k1*4;
2081          i_off -= 8;
2082       }
2083    }
2084 
2085    // iterations with count:
2086    //   ld-6,-5,-4 all interleaved together
2087    //       the big win comes from getting rid of needless flops
2088    //         due to the constants on pass 5 & 4 being all 1 and 0;
2089    //       combining them to be simultaneous to improve cache made little difference
2090    imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
2091 
2092    // output is u
2093 
2094    // step 4, 5, and 6
2095    // cannot be in-place because of step 5
2096    {
2097       uint16 *bitrev = f.bit_reverse[blocktype];
2098       // weirdly, I'd have thought reading sequentially and writing
2099       // erratically would have been better than vice-versa, but in
2100       // fact that's not what my testing showed. (That is, with
2101       // j = bitreverse(i), do you read i and write j, or read j and write i.)
2102 
2103       float *d0 = &v[n4-4];
2104       float *d1 = &v[n2-4];
2105       while (d0 >= v) {
2106          int k4;
2107 
2108          k4 = bitrev[0];
2109          d1[3] = u[k4+0];
2110          d1[2] = u[k4+1];
2111          d0[3] = u[k4+2];
2112          d0[2] = u[k4+3];
2113 
2114          k4 = bitrev[1];
2115          d1[1] = u[k4+0];
2116          d1[0] = u[k4+1];
2117          d0[1] = u[k4+2];
2118          d0[0] = u[k4+3];
2119 
2120          d0 -= 4;
2121          d1 -= 4;
2122          bitrev += 2;
2123       }
2124    }
2125    // (paper output is u, now v)
2126 
2127 
2128    // data must be in buf2
2129    assert(v == buf2);
2130 
2131    // step 7   (paper output is v, now v)
2132    // this is now in place
2133    {
2134       float *C = f.C[blocktype];
2135       float *d, e;
2136 
2137       d = v;
2138       e = v + n2 - 4;
2139 
2140       while (d < e) {
2141          float a02,a11,b0,b1,b2,b3;
2142 
2143          a02 = d[0] - e[2];
2144          a11 = d[1] + e[3];
2145 
2146          b0 = C[1]*a02 + C[0]*a11;
2147          b1 = C[1]*a11 - C[0]*a02;
2148 
2149          b2 = d[0] + e[ 2];
2150          b3 = d[1] - e[ 3];
2151 
2152          d[0] = b2 + b0;
2153          d[1] = b3 + b1;
2154          e[2] = b2 - b0;
2155          e[3] = b1 - b3;
2156 
2157          a02 = d[2] - e[0];
2158          a11 = d[3] + e[1];
2159 
2160          b0 = C[3]*a02 + C[2]*a11;
2161          b1 = C[3]*a11 - C[2]*a02;
2162 
2163          b2 = d[2] + e[ 0];
2164          b3 = d[3] - e[ 1];
2165 
2166          d[2] = b2 + b0;
2167          d[3] = b3 + b1;
2168          e[0] = b2 - b0;
2169          e[1] = b1 - b3;
2170 
2171          C += 4;
2172          d += 4;
2173          e -= 4;
2174       }
2175    }
2176 
2177    // data must be in buf2
2178 
2179 
2180    // step 8+decode   (paper output is X, now buffer)
2181    // this generates pairs of data a la 8 and pushes them directly through
2182    // the decode kernel (pushing rather than pulling) to avoid having
2183    // to make another pass later
2184 
2185    // this cannot POSSIBLY be in place, so we refer to the buffers directly
2186 
2187    {
2188       float *d0, d1, d2, d3;
2189 
2190       float *B = f.B[blocktype] + n2 - 8;
2191       float *e = buf2 + n2 - 8;
2192       d0 = &buffer[0];
2193       d1 = &buffer[n2-4];
2194       d2 = &buffer[n2];
2195       d3 = &buffer[n-4];
2196       while (e >= v) {
2197          float p0,p1,p2,p3;
2198 
2199          p3 =  e[6]*B[7] - e[7]*B[6];
2200          p2 = -e[6]*B[6] - e[7]*B[7];
2201 
2202          d0[0] =   p3;
2203          d1[3] = - p3;
2204          d2[0] =   p2;
2205          d3[3] =   p2;
2206 
2207          p1 =  e[4]*B[5] - e[5]*B[4];
2208          p0 = -e[4]*B[4] - e[5]*B[5];
2209 
2210          d0[1] =   p1;
2211          d1[2] = - p1;
2212          d2[1] =   p0;
2213          d3[2] =   p0;
2214 
2215          p3 =  e[2]*B[3] - e[3]*B[2];
2216          p2 = -e[2]*B[2] - e[3]*B[3];
2217 
2218          d0[2] =   p3;
2219          d1[1] = - p3;
2220          d2[2] =   p2;
2221          d3[1] =   p2;
2222 
2223          p1 =  e[0]*B[1] - e[1]*B[0];
2224          p0 = -e[0]*B[0] - e[1]*B[1];
2225 
2226          d0[3] =   p1;
2227          d1[0] = - p1;
2228          d2[3] =   p0;
2229          d3[0] =   p0;
2230 
2231          B -= 8;
2232          e -= 8;
2233          d0 += 4;
2234          d2 += 4;
2235          d1 -= 4;
2236          d3 -= 4;
2237       }
2238    }
2239 
2240    temp_free(f,buf2);
2241    temp_alloc_restore(f,save_point);
2242 }
2243 
2244 
2245 static float *get_window(vorb *f, int len)
2246 {
2247    len <<= 1;
2248    if (len == f.blocksize_0) return f.window[0];
2249    if (len == f.blocksize_1) return f.window[1];
2250    return null;
2251 }
2252 
2253 alias YTYPE = int16;
2254 
2255 static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag)
2256 {
2257    int n2 = n >> 1;
2258    int s = map.chan[i].mux, floor;
2259    floor = map.submap_floor[s];
2260    if (f.floor_types[floor] == 0) {
2261       return error(f, VORBIS_invalid_stream);
2262    } else {
2263       Floor1 *g = &f.floor_config[floor].floor1;
2264       int j,q;
2265       int lx = 0, ly = finalY[0] * g.floor1_multiplier;
2266       for (q=1; q < g.values; ++q) {
2267          j = g.sorted_order[q];
2268          if (finalY[j] >= 0)
2269          {
2270             int hy = finalY[j] * g.floor1_multiplier;
2271             int hx = g.Xlist[j];
2272             if (lx != hx)
2273                draw_line(target, lx,ly, hx,hy, n2);
2274             lx = hx, ly = hy;
2275          }
2276       }
2277       if (lx < n2) {
2278          // optimization of: draw_line(target, lx,ly, n,ly, n2);
2279          for (j=lx; j < n2; ++j)
2280             target[j] *= inverse_db_table[ly];
2281       }
2282    }
2283    return TRUE;
2284 }
2285 
2286 // The meaning of "left" and "right"
2287 //
2288 // For a given frame:
2289 //     we compute samples from 0..n
2290 //     window_center is n/2
2291 //     we'll window and mix the samples from left_start to left_end with data from the previous frame
2292 //     all of the samples from left_end to right_start can be output without mixing; however,
2293 //        this interval is 0-length except when transitioning between short and long frames
2294 //     all of the samples from right_start to right_end need to be mixed with the next frame,
2295 //        which we don't have, so those get saved in a buffer
2296 //     frame N's right_end-right_start, the number of samples to mix with the next frame,
2297 //        has to be the same as frame N+1's left_end-left_start (which they are by
2298 //        construction)
2299 
2300 static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
2301 {
2302    Mode *m;
2303    int i, n, prev, next, window_center;
2304    f.channel_buffer_start = f.channel_buffer_end = 0;
2305 
2306   retry:
2307    if (f.eof) return FALSE;
2308    if (!maybe_start_packet(f))
2309       return FALSE;
2310    // check packet type
2311    if (get_bits(f,1) != 0) {      
2312       while (EOP != get8_packet(f)) {}
2313       goto retry;
2314    }
2315 
2316    if (f.alloc.alloc_buffer)
2317       assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2318 
2319    i = get_bits(f, ilog(f.mode_count-1));
2320    if (i == EOP) return FALSE;
2321    if (i >= f.mode_count) return FALSE;
2322    *mode = i;
2323    m = f.mode_config.ptr + i;
2324    if (m.blockflag) {
2325       n = f.blocksize_1;
2326       prev = get_bits(f,1);
2327       next = get_bits(f,1);
2328    } else {
2329       prev = next = 0;
2330       n = f.blocksize_0;
2331    }
2332 
2333 // WINDOWING
2334 
2335    window_center = n >> 1;
2336    if (m.blockflag && !prev) {
2337       *p_left_start = (n - f.blocksize_0) >> 2;
2338       *p_left_end   = (n + f.blocksize_0) >> 2;
2339    } else {
2340       *p_left_start = 0;
2341       *p_left_end   = window_center;
2342    }
2343    if (m.blockflag && !next) {
2344       *p_right_start = (n*3 - f.blocksize_0) >> 2;
2345       *p_right_end   = (n*3 + f.blocksize_0) >> 2;
2346    } else {
2347       *p_right_start = window_center;
2348       *p_right_end   = n;
2349    }
2350 
2351    return TRUE;
2352 }
2353 
2354 static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
2355 {
2356    Mapping *map;
2357    int i,j,k,n,n2;
2358    int[256] zero_channel;
2359    int[256] really_zero_channel;
2360 
2361 // WINDOWING
2362 
2363    n = f.blocksize[m.blockflag];
2364    map = &f.mapping[m.mapping];
2365 
2366 // FLOORS
2367    n2 = n >> 1;
2368 
2369    for (i=0; i < f.channels; ++i) {
2370       int s = map.chan[i].mux, floor;
2371       zero_channel[i] = FALSE;
2372       floor = map.submap_floor[s];
2373       if (f.floor_types[floor] == 0) {
2374          return error(f, VORBIS_invalid_stream);
2375       } else {
2376          Floor1 *g = &f.floor_config[floor].floor1;
2377          if (get_bits(f, 1)) {
2378             short *finalY;
2379             uint8[256] step2_flag;
2380             static immutable int[4] range_list = [ 256, 128, 86, 64 ];
2381             int range = range_list[g.floor1_multiplier-1];
2382             int offset = 2;
2383             finalY = f.finalY[i];
2384             finalY[0] = cast(short) get_bits(f, ilog(range)-1);
2385             finalY[1] = cast(short) get_bits(f, ilog(range)-1);
2386             for (j=0; j < g.partitions; ++j) {
2387                int pclass = g.partition_class_list[j];
2388                int cdim = g.class_dimensions[pclass];
2389                int cbits = g.class_subclasses[pclass];
2390                int csub = (1 << cbits)-1;
2391                int cval = 0;
2392                if (cbits) {
2393                   Codebook *c = f.codebooks + g.class_masterbooks[pclass];
2394                   DECODE(cval,f,c);
2395                }
2396                for (k=0; k < cdim; ++k) {
2397                   int book = g.subclass_books[pclass][cval & csub];
2398                   cval = cval >> cbits;
2399                   if (book >= 0) {
2400                      int temp;
2401                      Codebook *c = f.codebooks + book;
2402                      DECODE(temp,f,c);
2403                      finalY[offset++] = cast(short)temp;
2404                   } else
2405                      finalY[offset++] = 0;
2406                }
2407             }
2408             if (f.valid_bits == INVALID_BITS) goto error; // behavior according to spec
2409             step2_flag[0] = step2_flag[1] = 1;
2410             for (j=2; j < g.values; ++j) {
2411                int low, high, pred, highroom, lowroom, room, val;
2412                low = g.neighbors[j][0];
2413                high = g.neighbors[j][1];
2414                //neighbors(g.Xlist, j, &low, &high);
2415                pred = predict_point(g.Xlist[j], g.Xlist[low], g.Xlist[high], finalY[low], finalY[high]);
2416                val = finalY[j];
2417                highroom = range - pred;
2418                lowroom = pred;
2419                if (highroom < lowroom)
2420                   room = highroom * 2;
2421                else
2422                   room = lowroom * 2;
2423                if (val) {
2424                   step2_flag[low] = step2_flag[high] = 1;
2425                   step2_flag[j] = 1;
2426                   if (val >= room)
2427                      if (highroom > lowroom)
2428                         finalY[j] = cast(short)(val - lowroom + pred);
2429                      else
2430                         finalY[j] = cast(short)(pred - val + highroom - 1);
2431                   else
2432                      if (val & 1)
2433                         finalY[j] = cast(short)(pred - ((val+1)>>1));
2434                      else
2435                         finalY[j] = cast(short)(pred + (val>>1));
2436                } else {
2437                   step2_flag[j] = 0;
2438                   finalY[j] = cast(short)pred;
2439                }
2440             }
2441 
2442             // defer final floor computation until _after_ residue
2443             for (j=0; j < g.values; ++j) {
2444                if (!step2_flag[j])
2445                   finalY[j] = -1;
2446             }
2447          } else {
2448            error:
2449             zero_channel[i] = TRUE;
2450          }
2451          // So we just defer everything else to later
2452 
2453          // at this point we've decoded the floor into buffer
2454       }
2455    }
2456 
2457    // at this point we've decoded all floors
2458 
2459    if (f.alloc.alloc_buffer)
2460       assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2461 
2462    // re-enable coupled channels if necessary
2463    memcpy(really_zero_channel.ptr, zero_channel.ptr, (really_zero_channel[0]).sizeof * f.channels);
2464    for (i=0; i < map.coupling_steps; ++i)
2465       if (!zero_channel[map.chan[i].magnitude] || !zero_channel[map.chan[i].angle]) {
2466          zero_channel[map.chan[i].magnitude] = zero_channel[map.chan[i].angle] = FALSE;
2467       }
2468 
2469 // RESIDUE DECODE
2470    for (i=0; i < map.submaps; ++i) {
2471       float *[STB_VORBIS_MAX_CHANNELS] residue_buffers;
2472       int r;
2473       uint8[256] do_not_decode;
2474       int ch = 0;
2475       for (j=0; j < f.channels; ++j) {
2476          if (map.chan[j].mux == i) {
2477             if (zero_channel[j]) {
2478                do_not_decode[ch] = TRUE;
2479                residue_buffers[ch] = null;
2480             } else {
2481                do_not_decode[ch] = FALSE;
2482                residue_buffers[ch] = f.channel_buffers[j];
2483             }
2484             ++ch;
2485          }
2486       }
2487       r = map.submap_residue[i];
2488       decode_residue(f, residue_buffers.ptr, ch, n2, r, do_not_decode.ptr);
2489    }
2490 
2491    if (f.alloc.alloc_buffer)
2492       assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2493 
2494 // INVERSE COUPLING
2495    for (i = map.coupling_steps-1; i >= 0; --i) {
2496       int n3 = n >> 1;
2497       float *m_ = f.channel_buffers[map.chan[i].magnitude];
2498       float *a = f.channel_buffers[map.chan[i].angle    ];
2499       for (j=0; j < n3; ++j) {
2500          float a2,m2;
2501          if (m_[j] > 0)
2502             if (a[j] > 0)
2503                m2 = m_[j], a2 = m_[j] - a[j];
2504             else
2505                a2 = m_[j], m2 = m_[j] + a[j];
2506          else
2507             if (a[j] > 0)
2508                m2 = m_[j], a2 = m_[j] + a[j];
2509             else
2510                a2 = m_[j], m2 = m_[j] - a[j];
2511          m_[j] = m2;
2512          a[j] = a2;
2513       }
2514    }
2515 
2516    // finish decoding the floors
2517    for (i=0; i < f.channels; ++i) {
2518       if (really_zero_channel[i]) {
2519          memset(f.channel_buffers[i], 0, (*f.channel_buffers[i]).sizeof * n2);
2520       } else {
2521          do_floor(f, map, i, n, f.channel_buffers[i], f.finalY[i], null);
2522       }
2523    }
2524 
2525    // INVERSE MDCT
2526    for (i=0; i < f.channels; ++i)
2527       inverse_mdct(f.channel_buffers[i], n, f, m.blockflag);
2528 
2529    // this shouldn't be necessary, unless we exited on an error
2530    // and want to flush to get to the next packet
2531    flush_packet(f);
2532 
2533    if (f.first_decode) {
2534       // assume we start so first non-discarded sample is sample 0
2535       // this isn't to spec, but spec would require us to read ahead
2536       // and decode the size of all current frames--could be done,
2537       // but presumably it's not a commonly used feature
2538       f.current_loc = 0u - n2; // start of first frame is positioned for discard (NB this is an intentional unsigned overflow/wrap-around)
2539       // we might have to discard samples "from" the next frame too,
2540       // if we're lapping a large block then a small at the start?
2541       f.discard_samples_deferred = n - right_end;
2542       f.current_loc_valid = TRUE;
2543       f.first_decode = FALSE;
2544    } else if (f.discard_samples_deferred) {
2545       if (f.discard_samples_deferred >= right_start - left_start) {
2546          f.discard_samples_deferred -= (right_start - left_start);
2547          left_start = right_start;
2548          *p_left = left_start;
2549       } else {
2550          left_start += f.discard_samples_deferred;
2551          *p_left = left_start;
2552          f.discard_samples_deferred = 0;
2553       }
2554    } else if (f.previous_length == 0 && f.current_loc_valid) {
2555       // we're recovering from a seek... that means we're going to discard
2556       // the samples from this packet even though we know our position from
2557       // the last page header, so we need to update the position based on
2558       // the discarded samples here
2559       // but wait, the code below is going to add this in itself even
2560       // on a discard, so we don't need to do it here...
2561    }
2562 
2563    // check if we have ogg information about the sample # for this packet
2564    if (f.last_seg_which == f.end_seg_with_known_loc) {
2565       // if we have a valid current loc, and this is final:
2566       if (f.current_loc_valid && (f.page_flag & PAGEFLAG_last_page)) {
2567          uint32 current_end = f.known_loc_for_packet;
2568          // then let's infer the size of the (probably) short final frame
2569          if (current_end < f.current_loc + (right_end-left_start)) {
2570             if (current_end < f.current_loc) {
2571                // negative truncation, that's impossible!
2572                *len = 0;
2573             } else {
2574                *len = current_end - f.current_loc;
2575             }
2576             *len += left_start; // this doesn't seem right, but has no ill effect on my test files
2577             if (*len > right_end) *len = right_end; // this should never happen
2578             f.current_loc += *len;
2579             return TRUE;
2580          }
2581       }
2582       // otherwise, just set our sample loc
2583       // guess that the ogg granule pos refers to the _middle_ of the
2584       // last frame?
2585       // set f.current_loc to the position of left_start
2586       f.current_loc = f.known_loc_for_packet - (n2-left_start);
2587       f.current_loc_valid = TRUE;
2588    }
2589    if (f.current_loc_valid)
2590       f.current_loc += (right_start - left_start);
2591 
2592    if (f.alloc.alloc_buffer)
2593       assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2594    *len = right_end;  // ignore samples after the window goes to 0
2595 
2596    return TRUE;
2597 }
2598 
2599 static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
2600 {
2601    int mode, left_end, right_end;
2602    if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
2603    return vorbis_decode_packet_rest(f, len, f.mode_config.ptr + mode, *p_left, left_end, *p_right, right_end, p_left);
2604 }
2605 
2606 static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
2607 {
2608    int prev,i,j;
2609    // we use right&left (the start of the right- and left-window sin()-regions)
2610    // to determine how much to return, rather than inferring from the rules
2611    // (same result, clearer code); 'left' indicates where our sin() window
2612    // starts, therefore where the previous window's right edge starts, and
2613    // therefore where to start mixing from the previous buffer. 'right'
2614    // indicates where our sin() ending-window starts, therefore that's where
2615    // we start saving, and where our returned-data ends.
2616 
2617    // mixin from previous window
2618    if (f.previous_length) {
2619       int i2, j2, n = f.previous_length;
2620       float *w = get_window(f, n);
2621       if (w == NULL) return 0;
2622       for (i2=0; i2 < f.channels; ++i2) {
2623          for (j2=0; j2 < n; ++j2)
2624             f.channel_buffers[i2][left+j2] =
2625                f.channel_buffers[i2][left+j2]*w[    j2] +
2626                f.previous_window[i2][     j2]*w[n-1-j2];
2627       }
2628    }
2629 
2630    prev = f.previous_length;
2631 
2632    // last half of this data becomes previous window
2633    f.previous_length = len - right;
2634 
2635    // @OPTIMIZE: could avoid this copy by double-buffering the
2636    // output (flipping previous_window with channel_buffers), but
2637    // then previous_window would have to be 2x as large, and
2638    // channel_buffers couldn't be temp mem (although they're NOT
2639    // currently temp mem, they could be (unless we want to level
2640    // performance by spreading out the computation))
2641    for (i=0; i < f.channels; ++i)
2642       for (j=0; right+j < len; ++j)
2643          f.previous_window[i][j] = f.channel_buffers[i][right+j];
2644 
2645    if (!prev)
2646       // there was no previous packet, so this data isn't valid...
2647       // this isn't entirely true, only the would-have-overlapped data
2648       // isn't valid, but this seems to be what the spec requires
2649       return 0;
2650 
2651    // truncate a short frame
2652    if (len < right) right = len;
2653 
2654    f.samples_output += right-left;
2655 
2656    return right - left;
2657 }
2658 
2659 static int vorbis_pump_first_frame(stb_vorbis *f)
2660 {
2661    int len, right, left, res;
2662    res = vorbis_decode_packet(f, &len, &left, &right);
2663    if (res)
2664       vorbis_finish_frame(f, len, left, right);
2665    return res;
2666 }
2667 
2668 
2669 static int start_decoder(vorb *f)
2670 {
2671    uint8[6] header;
2672    uint8 x,y;
2673    int len,i,j,k, max_submaps = 0;
2674    int longest_floorlist=0;
2675 
2676    // first page, first packet
2677    f.first_decode = TRUE;
2678 
2679    if (!start_page(f))                              return FALSE;
2680    // validate page flag
2681    if (!(f.page_flag & PAGEFLAG_first_page))       return error(f, VORBIS_invalid_first_page);
2682    if (f.page_flag & PAGEFLAG_last_page)           return error(f, VORBIS_invalid_first_page);
2683    if (f.page_flag & PAGEFLAG_continued_packet)    return error(f, VORBIS_invalid_first_page);
2684    // check for expected packet length
2685    if (f.segment_count != 1)                       return error(f, VORBIS_invalid_first_page);
2686    if (f.segments[0] != 30) {
2687       // check for the Ogg skeleton fishead identifying header to refine our error
2688       if (f.segments[0] == 64 &&
2689           getn(f, header.ptr, 6) &&
2690           header[0] == 'f' &&
2691           header[1] == 'i' &&
2692           header[2] == 's' &&
2693           header[3] == 'h' &&
2694           header[4] == 'e' &&
2695           header[5] == 'a' &&
2696           get8(f)   == 'd' &&
2697           get8(f)   == '\0')                        return error(f, VORBIS_ogg_skeleton_not_supported);
2698       else
2699                                                     return error(f, VORBIS_invalid_first_page);
2700    }
2701 
2702    // read packet
2703    // check packet header
2704    if (get8(f) != VORBIS_packet_id)                 return error(f, VORBIS_invalid_first_page);
2705    if (!getn(f, header.ptr, 6))                         return error(f, VORBIS_unexpected_eof);
2706    if (!vorbis_validate(header.ptr))                    return error(f, VORBIS_invalid_first_page);
2707    // vorbis_version
2708    if (get32(f) != 0)                               return error(f, VORBIS_invalid_first_page);
2709    f.channels = get8(f); if (!f.channels)         return error(f, VORBIS_invalid_first_page);
2710    if (f.channels > STB_VORBIS_MAX_CHANNELS)       return error(f, VORBIS_too_many_channels);
2711    f.sample_rate = get32(f); if (!f.sample_rate)  return error(f, VORBIS_invalid_first_page);
2712    get32(f); // bitrate_maximum
2713    get32(f); // bitrate_nominal
2714    get32(f); // bitrate_minimum
2715    x = get8(f);
2716    {
2717       int log0,log1;
2718       log0 = x & 15;
2719       log1 = x >> 4;
2720       f.blocksize_0 = 1 << log0;
2721       f.blocksize_1 = 1 << log1;
2722       if (log0 < 6 || log0 > 13)                       return error(f, VORBIS_invalid_setup);
2723       if (log1 < 6 || log1 > 13)                       return error(f, VORBIS_invalid_setup);
2724       if (log0 > log1)                                 return error(f, VORBIS_invalid_setup);
2725    }
2726 
2727    // framing_flag
2728    x = get8(f);
2729    if (!(x & 1))                                    return error(f, VORBIS_invalid_first_page);
2730 
2731    // second packet!
2732    if (!start_page(f))                              return FALSE;
2733 
2734    if (!start_packet(f))                            return FALSE;
2735 
2736    if (!next_segment(f))                            return FALSE;
2737 
2738    if (get8_packet(f) != VORBIS_packet_comment)            return error(f, VORBIS_invalid_setup);
2739    for (i=0; i < 6; ++i) header[i] = cast(ubyte) get8_packet(f);
2740    if (!vorbis_validate(header.ptr))                    return error(f, VORBIS_invalid_setup);
2741    //file vendor
2742    len = get32_packet(f);
2743    f.vendor = cast(char*)setup_malloc(f, len+1);
2744    if (f.vendor == NULL)                           return error(f, VORBIS_outofmem);
2745    for(i=0; i < len; ++i) {
2746       f.vendor[i] = cast(char) get8_packet(f);
2747    }
2748    f.vendor[len] = cast(char)'\0';
2749    //user comments
2750    f.comment_list_length = get32_packet(f);
2751    f.comment_list = null;
2752    if (f.comment_list_length > 0)
2753    {
2754       f.comment_list = cast(char**) setup_malloc(f, cast(int)( (char*).sizeof * (f.comment_list_length) ));
2755       if (f.comment_list == NULL)                  return error(f, VORBIS_outofmem);
2756    }
2757 
2758    for(i=0; i < f.comment_list_length; ++i) {
2759       len = get32_packet(f);
2760       f.comment_list[i] = cast(char*)setup_malloc(f, len+1);
2761       if (f.comment_list[i] == null)               return error(f, VORBIS_outofmem);
2762 
2763       for(j=0; j < len; ++j) {
2764          f.comment_list[i][j] = cast(char) get8_packet(f);
2765       }
2766       f.comment_list[i][len] = cast(char)'\0';
2767    }
2768 
2769    // framing_flag
2770    x = cast(ubyte) get8_packet(f);
2771    if (!(x & 1))
2772        return error(f, VORBIS_invalid_setup);
2773 
2774    bool err;
2775    skip(f, f.bytes_in_seg, &err);
2776    if (err)
2777        return error(f, VORBIS_invalid_setup);
2778    f.bytes_in_seg = 0;
2779 
2780    do {
2781       len = next_segment(f);
2782       skip(f, len, &err);
2783       if (err) 
2784           return error(f, VORBIS_invalid_setup);
2785       f.bytes_in_seg = 0;
2786    } while (len);
2787 
2788    // third packet!
2789    if (!start_packet(f))                            return FALSE;
2790 
2791    crc32_init(f);
2792 
2793    if (get8_packet(f) != VORBIS_packet_setup)
2794        return error(f, VORBIS_invalid_setup);
2795    for (i=0; i < 6; ++i) header[i] = cast(ubyte) get8_packet(f);
2796    if (!vorbis_validate(header.ptr))                    return error(f, VORBIS_invalid_setup);
2797 
2798    // codebooks
2799 
2800    f.codebook_count = get_bits(f,8) + 1;
2801    f.codebooks = cast(Codebook *) setup_malloc(f, cast(int)( (*f.codebooks).sizeof * f.codebook_count) );
2802    if (f.codebooks == null)                        return error(f, VORBIS_outofmem);
2803    memset(f.codebooks, 0, (*f.codebooks).sizeof * f.codebook_count);
2804    for (i=0; i < f.codebook_count; ++i) {
2805       uint32 *values;
2806       int ordered, sorted_count;
2807       int total=0;
2808       uint8 *lengths;
2809       Codebook *c = f.codebooks+i;
2810       x = cast(ubyte) get_bits(f, 8); if (x != 0x42)            return error(f, VORBIS_invalid_setup);
2811       x = cast(ubyte) get_bits(f, 8); if (x != 0x43)            return error(f, VORBIS_invalid_setup);
2812       x = cast(ubyte) get_bits(f, 8); if (x != 0x56)            return error(f, VORBIS_invalid_setup);
2813       x = cast(ubyte) get_bits(f, 8);
2814       c.dimensions = (get_bits(f, 8)<<8) + x;
2815       x = cast(ubyte) get_bits(f, 8);
2816       y = cast(ubyte) get_bits(f, 8);
2817       c.entries = (get_bits(f, 8)<<16) + (y<<8) + x;
2818       ordered = get_bits(f,1);
2819       c.sparse = ordered ? 0 : cast(ubyte) get_bits(f,1);
2820 
2821       if (c.dimensions == 0 && c.entries != 0)    return error(f, VORBIS_invalid_setup);
2822 
2823       if (c.sparse)
2824          lengths = cast(uint8 *) setup_temp_malloc(f, c.entries);
2825       else
2826          lengths = c.codeword_lengths = cast(uint8 *) setup_malloc(f, c.entries);
2827 
2828       if (!lengths) return error(f, VORBIS_outofmem);
2829 
2830       if (ordered) {
2831          int current_entry = 0;
2832          int current_length = get_bits(f,5) + 1;
2833          while (current_entry < c.entries) {
2834             int limit = c.entries - current_entry;
2835             int n = get_bits(f, ilog(limit));
2836             if (current_length >= 32) return error(f, VORBIS_invalid_setup);
2837             if (current_entry + n > cast(int) c.entries) { return error(f, VORBIS_invalid_setup); }
2838             memset(lengths + current_entry, current_length, n);
2839             current_entry += n;
2840             ++current_length;
2841          }
2842       } else {
2843          for (j=0; j < c.entries; ++j) {
2844             int present = c.sparse ? get_bits(f,1) : 1;
2845             if (present) {
2846                lengths[j] = cast(ubyte) (get_bits(f, 5) + 1);
2847                ++total;
2848                if (lengths[j] == 32)
2849                   return error(f, VORBIS_invalid_setup);
2850             } else {
2851                lengths[j] = NO_CODE;
2852             }
2853          }
2854       }
2855 
2856       if (c.sparse && total >= c.entries >> 2) {
2857          // convert sparse items to non-sparse!
2858          if (c.entries > cast(int) f.setup_temp_memory_required)
2859             f.setup_temp_memory_required = c.entries;
2860 
2861          c.codeword_lengths = cast(uint8 *) setup_malloc(f, c.entries);
2862          if (c.codeword_lengths == null) return error(f, VORBIS_outofmem);
2863          memcpy(c.codeword_lengths, lengths, c.entries);
2864          setup_temp_free(f, lengths, c.entries); // note this is only safe if there have been no intervening temp mallocs!
2865          lengths = c.codeword_lengths;
2866          c.sparse = 0;
2867       }
2868 
2869       // compute the size of the sorted tables
2870       if (c.sparse) {
2871          sorted_count = total;
2872       } else {
2873          sorted_count = 0;
2874          for (j=0; j < c.entries; ++j)
2875             if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
2876                ++sorted_count;
2877       }
2878 
2879       c.sorted_entries = sorted_count;
2880       values = null;
2881 
2882       if (!c.sparse) {
2883          c.codewords = cast(uint32 *) setup_malloc(f, cast(int)( (c.codewords[0]).sizeof * c.entries ) );
2884          if (!c.codewords)                  return error(f, VORBIS_outofmem);
2885       } else {
2886          uint size;
2887          if (c.sorted_entries) {
2888             c.codeword_lengths = cast(uint8 *) setup_malloc(f, c.sorted_entries);
2889             if (!c.codeword_lengths)           return error(f, VORBIS_outofmem);
2890             c.codewords = cast(uint32 *) setup_temp_malloc(f, cast(int)( (*c.codewords).sizeof * c.sorted_entries ));
2891             if (!c.codewords)                  return error(f, VORBIS_outofmem);
2892             values = cast(uint32 *) setup_temp_malloc(f, cast(int)( (*values).sizeof * c.sorted_entries ) );
2893             if (!values)                        return error(f, VORBIS_outofmem);
2894          }
2895          size = cast(uint)( c.entries + ((*c.codewords).sizeof + (*values).sizeof) * c.sorted_entries );
2896          if (size > f.setup_temp_memory_required)
2897             f.setup_temp_memory_required = size;
2898       }
2899 
2900       if (!compute_codewords(c, lengths, c.entries, values)) {
2901          if (c.sparse) setup_temp_free(f, values, 0);
2902          return error(f, VORBIS_invalid_setup);
2903       }
2904 
2905       if (c.sorted_entries) {
2906          // allocate an extra slot for sentinels
2907          c.sorted_codewords = cast(uint32 *) setup_malloc(f, cast(int)( (*c.sorted_codewords).sizeof * (c.sorted_entries+1) ));
2908          if (c.sorted_codewords == null) return error(f, VORBIS_outofmem);
2909          // allocate an extra slot at the front so that c.sorted_values[-1] is defined
2910          // so that we can catch that case without an extra if
2911          c.sorted_values    = cast( int   *) setup_malloc(f, cast(int)( (*c.sorted_values   ).sizeof * (c.sorted_entries+1) ) );
2912          if (c.sorted_values == null) return error(f, VORBIS_outofmem);
2913          ++c.sorted_values;
2914          c.sorted_values[-1] = -1;
2915          compute_sorted_huffman(c, lengths, values);
2916       }
2917 
2918       if (c.sparse) {
2919          setup_temp_free(f, values, (*values).sizeof*c.sorted_entries);
2920          setup_temp_free(f, c.codewords, (*c.codewords).sizeof*c.sorted_entries);
2921          setup_temp_free(f, lengths, c.entries);
2922          c.codewords = null;
2923       }
2924 
2925       compute_accelerated_huffman(c);
2926 
2927       c.lookup_type = cast(ubyte) get_bits(f, 4);
2928       if (c.lookup_type > 2) return error(f, VORBIS_invalid_setup);
2929       if (c.lookup_type > 0) {
2930          uint16 *mults;
2931          c.minimum_value = float32_unpack(get_bits(f, 32));
2932          c.delta_value = float32_unpack(get_bits(f, 32));
2933          c.value_bits = cast(ubyte) (get_bits(f, 4)+1);
2934          c.sequence_p = cast(ubyte) get_bits(f,1);
2935          if (c.lookup_type == 1) {
2936             int values2 = lookup1_values(c.entries, c.dimensions);
2937             if (values2 < 0) return error(f, VORBIS_invalid_setup);
2938             c.lookup_values = cast(uint32) values2;
2939          } else {
2940             c.lookup_values = c.entries * c.dimensions;
2941          }
2942          if (c.lookup_values == 0) return error(f, VORBIS_invalid_setup);
2943          mults = cast(uint16 *) setup_temp_malloc(f, cast(int)( (mults[0]).sizeof * c.lookup_values));
2944          if (mults == null) return error(f, VORBIS_outofmem);
2945          for (j=0; j < cast(int) c.lookup_values; ++j) {
2946             int q = get_bits(f, c.value_bits);
2947             if (q == EOP) 
2948             { 
2949                 setup_temp_free(f,mults,(mults[0]).sizeof*c.lookup_values); 
2950                 return error(f, VORBIS_invalid_setup); 
2951             }
2952             mults[j] = cast(ushort)q;
2953          }
2954 
2955          if (c.lookup_type == 1) {
2956             int len2, sparse = c.sparse;
2957             float last=0;
2958             // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
2959             if (sparse) {
2960                if (c.sorted_entries == 0) goto skip;
2961                c.multiplicands = cast(codetype *) setup_malloc(f, (c.multiplicands[0]).sizeof * c.sorted_entries * c.dimensions);
2962             } else
2963                c.multiplicands = cast(codetype *) setup_malloc(f, (c.multiplicands[0]).sizeof * c.entries        * c.dimensions);
2964             if (c.multiplicands == null) { setup_temp_free(f,mults,(mults[0]).sizeof*c.lookup_values); return error(f, VORBIS_outofmem); }
2965             len2 = sparse ? c.sorted_entries : c.entries;
2966             for (j=0; j < len2; ++j) {
2967                uint z = sparse ? c.sorted_values[j] : j;
2968                uint div=1;
2969                for (k=0; k < c.dimensions; ++k) {
2970                   int off = (z / div) % c.lookup_values;
2971                   float val = mults[off]*c.delta_value + c.minimum_value + last;
2972                   c.multiplicands[j*c.dimensions + k] = val;
2973                   if (c.sequence_p)
2974                      last = val;
2975                   if (k+1 < c.dimensions) {
2976                      if (div > uint.max / cast(uint) c.lookup_values) {
2977                         setup_temp_free(f, mults,(mults[0]).sizeof*c.lookup_values);
2978                         return error(f, VORBIS_invalid_setup);
2979                      }
2980                      div *= c.lookup_values;
2981                   }
2982                }
2983             }
2984             c.lookup_type = 2;
2985          }
2986          else
2987          {
2988             float last=0;
2989             c.multiplicands = cast(codetype *) setup_malloc(f, (c.multiplicands[0]).sizeof * c.lookup_values);
2990             if (c.multiplicands == null) { setup_temp_free(f, mults,(mults[0]).sizeof*c.lookup_values); return error(f, VORBIS_outofmem); }
2991             for (j=0; j < cast(int) c.lookup_values; ++j) {
2992                float val = mults[j] * c.delta_value + c.minimum_value + last;
2993                c.multiplicands[j] = val;
2994                if (c.sequence_p)
2995                   last = val;
2996             }
2997          }
2998         skip:;
2999          setup_temp_free(f, mults, (mults[0]).sizeof * c.lookup_values);
3000 
3001       }
3002    }
3003 
3004    // time domain transfers (notused)
3005 
3006    x = cast(ubyte)(get_bits(f, 6) + 1);
3007    for (i=0; i < x; ++i) {
3008       uint32 z = get_bits(f, 16);
3009       if (z != 0) return error(f, VORBIS_invalid_setup);
3010    }
3011 
3012    // Floors
3013    f.floor_count = get_bits(f, 6)+1;
3014    f.floor_config = cast(Floor *)  setup_malloc(f, f.floor_count * (*f.floor_config).sizeof);
3015    if (f.floor_config == null) return error(f, VORBIS_outofmem);
3016    for (i=0; i < f.floor_count; ++i) {
3017       f.floor_types[i] = cast(ushort) get_bits(f, 16);
3018       if (f.floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
3019       if (f.floor_types[i] == 0) {
3020          Floor0 *g = &f.floor_config[i].floor0;
3021          g.order = cast(ubyte) get_bits(f,8);
3022          g.rate = cast(ushort) get_bits(f,16);
3023          g.bark_map_size = cast(ushort) get_bits(f,16);
3024          g.amplitude_bits = cast(ubyte) get_bits(f,6);
3025          g.amplitude_offset = cast(ubyte) get_bits(f,8);
3026          g.number_of_books = cast(ubyte)(get_bits(f,4) + 1);
3027          for (j=0; j < g.number_of_books; ++j)
3028             g.book_list[j] = cast(ubyte) get_bits(f,8);
3029          return error(f, VORBIS_feature_not_supported);
3030       } else {
3031          stbv__floor_ordering[31*8+2] p;
3032          Floor1 *g = &f.floor_config[i].floor1;
3033          int max_class = -1;
3034          g.partitions = cast(ubyte) get_bits(f, 5);
3035          for (j=0; j < g.partitions; ++j) {
3036             g.partition_class_list[j] = cast(ubyte) get_bits(f, 4);
3037             if (g.partition_class_list[j] > max_class)
3038                max_class = g.partition_class_list[j];
3039          }
3040          for (j=0; j <= max_class; ++j) {
3041             g.class_dimensions[j] = cast(ubyte)(get_bits(f, 3)+1);
3042             g.class_subclasses[j] = cast(ubyte)get_bits(f, 2);
3043             if (g.class_subclasses[j]) {
3044                g.class_masterbooks[j] = cast(ubyte) get_bits(f, 8);
3045                if (g.class_masterbooks[j] >= f.codebook_count) return error(f, VORBIS_invalid_setup);
3046             }
3047             for (k=0; k < 1 << g.class_subclasses[j]; ++k) {
3048                g.subclass_books[j][k] = cast(int16) (get_bits(f,8)-1);
3049                if (g.subclass_books[j][k] >= f.codebook_count) return error(f, VORBIS_invalid_setup);
3050             }
3051          }
3052          g.floor1_multiplier = cast(ubyte)(get_bits(f,2)+1);
3053          g.rangebits = cast(ubyte) get_bits(f,4);
3054          g.Xlist[0] = 0;
3055          g.Xlist[1] = cast(ushort)(1 << g.rangebits);
3056          g.values = 2;
3057          for (j=0; j < g.partitions; ++j) {
3058             int c = g.partition_class_list[j];
3059             for (k=0; k < g.class_dimensions[c]; ++k) {
3060                g.Xlist[g.values] = cast(ushort) get_bits(f, g.rangebits);
3061                ++g.values;
3062             }
3063          }
3064          // precompute the sorting
3065          for (j=0; j < g.values; ++j) {
3066             p[j].x = g.Xlist[j];
3067             p[j].id = cast(ushort) j;
3068          }
3069          qsort(p.ptr, g.values, (p[0]).sizeof, &point_compare_2);
3070          for (j=0; j < g.values-1; ++j)
3071             if (p[j].x == p[j+1].x)
3072                return error(f, VORBIS_invalid_setup);
3073          for (j=0; j < g.values; ++j)
3074             g.sorted_order[j] = cast(uint8) p[j].id;
3075          // precompute the neighbors
3076          for (j=2; j < g.values; ++j) {
3077             int low = 0,hi = 0;
3078             neighbors(g.Xlist.ptr, j, &low,&hi);
3079             g.neighbors[j][0] = cast(ubyte)low;
3080             g.neighbors[j][1] = cast(ubyte)hi;
3081          }
3082 
3083          if (g.values > longest_floorlist)
3084             longest_floorlist = g.values;
3085       }
3086    }
3087 
3088    // Residue
3089    f.residue_count = get_bits(f, 6)+1;
3090    f.residue_config = cast(Residue *) setup_malloc(f, f.residue_count * (f.residue_config[0]).sizeof);
3091    if (f.residue_config == null) return error(f, VORBIS_outofmem);
3092    memset(f.residue_config, 0, f.residue_count * (f.residue_config[0]).sizeof);
3093    for (i=0; i < f.residue_count; ++i) {
3094       uint8[64] residue_cascade;
3095       Residue *r = f.residue_config+i;
3096       f.residue_types[i] = cast(ushort) get_bits(f, 16);
3097       if (f.residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
3098       r.begin = get_bits(f, 24);
3099       r.end = get_bits(f, 24);
3100       if (r.end < r.begin) return error(f, VORBIS_invalid_setup);
3101       r.part_size = get_bits(f,24)+1;
3102       r.classifications = cast(ubyte)(get_bits(f,6)+1);
3103       r.classbook = cast(ubyte) get_bits(f,8);
3104       if (r.classbook >= f.codebook_count) return error(f, VORBIS_invalid_setup);
3105       for (j=0; j < r.classifications; ++j) {
3106          uint8 high_bits=0;
3107          uint8 low_bits = cast(ubyte) get_bits(f,3);
3108          if (get_bits(f,1))
3109             high_bits = cast(ubyte) get_bits(f,5);
3110          residue_cascade[j] = cast(ubyte)(high_bits*8 + low_bits);
3111       }
3112       r.residue_books = cast(int16[8]*) setup_malloc(f, (r.residue_books[0]).sizeof * r.classifications);
3113       if (r.residue_books == null) return error(f, VORBIS_outofmem);
3114       for (j=0; j < r.classifications; ++j) {
3115          for (k=0; k < 8; ++k) {
3116             if (residue_cascade[j] & (1 << k)) {
3117                r.residue_books[j][k] = cast(short) get_bits(f, 8);
3118                if (r.residue_books[j][k] >= f.codebook_count) return error(f, VORBIS_invalid_setup);
3119             } else {
3120                r.residue_books[j][k] = -1;
3121             }
3122          }
3123       }
3124       // precompute the classifications[] array to avoid inner-loop mod/divide
3125       // call it 'classdata' since we already have r.classifications
3126       r.classdata = cast(uint8 **) setup_malloc(f, (*r.classdata).sizeof * f.codebooks[r.classbook].entries);
3127       if (!r.classdata) return error(f, VORBIS_outofmem);
3128       memset(r.classdata, 0, (*r.classdata).sizeof * f.codebooks[r.classbook].entries);
3129       for (j=0; j < f.codebooks[r.classbook].entries; ++j) {
3130          int classwords = f.codebooks[r.classbook].dimensions;
3131          int temp = j;
3132          r.classdata[j] = cast(uint8 *) setup_malloc(f, (r.classdata[j][0]).sizeof * classwords);
3133          if (r.classdata[j] == null) return error(f, VORBIS_outofmem);
3134          for (k=classwords-1; k >= 0; --k) {
3135             r.classdata[j][k] = cast(ubyte)(temp % r.classifications);
3136             temp /= r.classifications;
3137          }
3138       }
3139    }
3140 
3141    f.mapping_count = get_bits(f,6)+1;
3142    f.mapping = cast(Mapping *) setup_malloc(f, f.mapping_count * (*f.mapping).sizeof);
3143    if (f.mapping == null) return error(f, VORBIS_outofmem);
3144    memset(f.mapping, 0, f.mapping_count * (*f.mapping).sizeof);
3145    for (i=0; i < f.mapping_count; ++i) {
3146       Mapping *m = f.mapping + i;
3147       int mapping_type = get_bits(f,16);
3148       if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
3149       m.chan = cast(MappingChannel *) setup_malloc(f, f.channels * (*m.chan).sizeof);
3150       if (m.chan == null) return error(f, VORBIS_outofmem);
3151       if (get_bits(f,1))
3152          m.submaps = cast(ubyte)(get_bits(f,4)+1);
3153       else
3154          m.submaps = 1;
3155       if (m.submaps > max_submaps)
3156          max_submaps = m.submaps;
3157       if (get_bits(f,1)) {
3158          m.coupling_steps = cast(ushort)(get_bits(f,8)+1);
3159          if (m.coupling_steps > f.channels) return error(f, VORBIS_invalid_setup);
3160          for (k=0; k < m.coupling_steps; ++k) {
3161             m.chan[k].magnitude = cast(ubyte) get_bits(f, ilog(f.channels-1));
3162             m.chan[k].angle = cast(ubyte) get_bits(f, ilog(f.channels-1));
3163             if (m.chan[k].magnitude >= f.channels)        return error(f, VORBIS_invalid_setup);
3164             if (m.chan[k].angle     >= f.channels)        return error(f, VORBIS_invalid_setup);
3165             if (m.chan[k].magnitude == m.chan[k].angle)   return error(f, VORBIS_invalid_setup);
3166          }
3167       } else
3168          m.coupling_steps = 0;
3169 
3170       // reserved field
3171       if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
3172       if (m.submaps > 1) {
3173          for (j=0; j < f.channels; ++j) {
3174             m.chan[j].mux = cast(ubyte) get_bits(f, 4);
3175             if (m.chan[j].mux >= m.submaps)                return error(f, VORBIS_invalid_setup);
3176          }
3177       } else
3178          // @SPECIFICATION: this case is missing from the spec
3179          for (j=0; j < f.channels; ++j)
3180             m.chan[j].mux = 0;
3181 
3182       for (j=0; j < m.submaps; ++j) {
3183          get_bits(f,8); // discard
3184          m.submap_floor[j] = cast(ubyte) get_bits(f,8);
3185          m.submap_residue[j] = cast(ubyte) get_bits(f,8);
3186          if (m.submap_floor[j] >= f.floor_count)      return error(f, VORBIS_invalid_setup);
3187          if (m.submap_residue[j] >= f.residue_count)  return error(f, VORBIS_invalid_setup);
3188       }
3189    }
3190 
3191    // Modes
3192    f.mode_count = get_bits(f, 6)+1;
3193    for (i=0; i < f.mode_count; ++i) {
3194       Mode *m = f.mode_config.ptr+i;
3195       m.blockflag = cast(ubyte) get_bits(f,1);
3196       m.windowtype = cast(ushort) get_bits(f,16);
3197       m.transformtype = cast(ushort) get_bits(f,16);
3198       m.mapping = cast(ubyte) get_bits(f,8);
3199       if (m.windowtype != 0)                 return error(f, VORBIS_invalid_setup);
3200       if (m.transformtype != 0)              return error(f, VORBIS_invalid_setup);
3201       if (m.mapping >= f.mapping_count)     return error(f, VORBIS_invalid_setup);
3202    }
3203 
3204    flush_packet(f);
3205 
3206    f.previous_length = 0;
3207 
3208    for (i=0; i < f.channels; ++i) {
3209       f.channel_buffers[i] = cast(float *) setup_malloc(f, (float).sizeof * f.blocksize_1);
3210       f.previous_window[i] = cast(float *) setup_malloc(f, (float).sizeof * f.blocksize_1/2);
3211       f.finalY[i]          = cast(int16 *) setup_malloc(f, (int16).sizeof * longest_floorlist);
3212       if (f.channel_buffers[i] == null || f.previous_window[i] == null || f.finalY[i] == null) return error(f, VORBIS_outofmem);
3213       memset(f.channel_buffers[i], 0, (float).sizeof * f.blocksize_1);      
3214    }
3215 
3216    if (!init_blocksize(f, 0, f.blocksize_0)) return FALSE;
3217    if (!init_blocksize(f, 1, f.blocksize_1)) return FALSE;
3218    f.blocksize[0] = f.blocksize_0;
3219    f.blocksize[1] = f.blocksize_1;
3220 
3221    // compute how much temporary memory is needed
3222 
3223    // 1.
3224    {
3225       uint32 imdct_mem = cast(uint)( (f.blocksize_1 * (float.sizeof) >> 1));
3226       uint32 classify_mem;
3227       int i2,max_part_read=0;
3228       for (i2=0; i2 < f.residue_count; ++i2) {
3229          Residue *r = f.residue_config + i2;
3230          uint actual_size = f.blocksize_1 / 2;
3231          uint limit_r_begin = r.begin < actual_size ? r.begin : actual_size;
3232          uint limit_r_end   = r.end   < actual_size ? r.end   : actual_size;
3233          int n_read = limit_r_end - limit_r_begin;
3234          int part_read = n_read / r.part_size;
3235          if (part_read > max_part_read)
3236             max_part_read = part_read;
3237       }
3238       classify_mem = cast(uint)( f.channels * ((void*).sizeof + max_part_read * (uint8 *).sizeof));
3239       // maximum reasonable partition size is f.blocksize_1
3240 
3241       f.temp_memory_required = classify_mem;
3242       if (imdct_mem > f.temp_memory_required)
3243          f.temp_memory_required = imdct_mem;
3244    }
3245 
3246 
3247    if (f.alloc.alloc_buffer) {
3248       assert(f.temp_offset == f.alloc.alloc_buffer_length_in_bytes);
3249       // check if there's enough temp memory so we don't error later
3250       if (f.setup_offset + (*f).sizeof + f.temp_memory_required > cast(uint) f.temp_offset)
3251          return error(f, VORBIS_outofmem);
3252    }
3253 
3254    // @TODO: stb_vorbis_seek_start expects first_audio_page_offset to point to a page
3255    // without PAGEFLAG_continued_packet, so this either points to the first page, or
3256    // the page after the end of the headers. It might be cleaner to point to a page
3257    // in the middle of the headers, when that's the page where the first audio packet
3258    // starts, but we'd have to also correctly skip the end of any continued packet in
3259    // stb_vorbis_seek_start.
3260    if (f.next_seg == -1) {
3261       f.first_audio_page_offset = stb_vorbis_get_file_offset(f);
3262    } else {
3263       f.first_audio_page_offset = 0;
3264    }
3265 
3266    return TRUE;
3267 }
3268 
3269 static void vorbis_deinit(stb_vorbis *p)
3270 {
3271    int i,j;
3272 
3273    setup_free(p, p.vendor);
3274    for (i=0; i < p.comment_list_length; ++i) {
3275       setup_free(p, p.comment_list[i]);
3276    }
3277    setup_free(p, p.comment_list);
3278 
3279    if (p.residue_config) {
3280       for (i=0; i < p.residue_count; ++i) {
3281          Residue *r = p.residue_config+i;
3282          if (r.classdata) {
3283             for (j=0; j < p.codebooks[r.classbook].entries; ++j)
3284                setup_free(p, r.classdata[j]);
3285             setup_free(p, r.classdata);
3286          }
3287          setup_free(p, r.residue_books);
3288       }
3289    }
3290 
3291    if (p.codebooks) {
3292       for (i=0; i < p.codebook_count; ++i) {
3293          Codebook *c = p.codebooks + i;
3294          setup_free(p, c.codeword_lengths);
3295          setup_free(p, c.multiplicands);
3296          setup_free(p, c.codewords);
3297          setup_free(p, c.sorted_codewords);
3298          // c.sorted_values[-1] is the first entry in the array
3299          setup_free(p, c.sorted_values ? c.sorted_values-1 : null);
3300       }
3301       setup_free(p, p.codebooks);
3302    }
3303    setup_free(p, p.floor_config);
3304    setup_free(p, p.residue_config);
3305    if (p.mapping) {
3306       for (i=0; i < p.mapping_count; ++i)
3307          setup_free(p, p.mapping[i].chan);
3308       setup_free(p, p.mapping);
3309    }
3310    for (i=0; i < p.channels && i < STB_VORBIS_MAX_CHANNELS; ++i) {
3311       setup_free(p, p.channel_buffers[i]);
3312       setup_free(p, p.previous_window[i]);      
3313       setup_free(p, p.finalY[i]);
3314    }
3315    for (i=0; i < 2; ++i) {
3316       setup_free(p, p.A[i]);
3317       setup_free(p, p.B[i]);
3318       setup_free(p, p.C[i]);
3319       setup_free(p, p.window[i]);
3320       setup_free(p, p.bit_reverse[i]);
3321    }
3322 }
3323 
3324 void stb_vorbis_close(stb_vorbis *p)
3325 {
3326    vorbis_deinit(p);
3327    setup_free(p,p);
3328 }
3329 
3330 static void vorbis_init(stb_vorbis *p, stb_vorbis_alloc *z)
3331 {
3332    memset(p, 0, (*p).sizeof); // NULL out all malloc'd pointers to start
3333    if (z) {
3334       p.alloc = *z;
3335       p.alloc.alloc_buffer_length_in_bytes &= ~7;
3336       p.temp_offset = p.alloc.alloc_buffer_length_in_bytes;
3337    }
3338    p.eof = 0;
3339    p.error = VORBIS__no_error;
3340    p.codebooks = null;
3341 }
3342 
3343 long stb_vorbis_get_sample_offset(stb_vorbis *f)
3344 {
3345    // Note: this is rewritten vs original stb_vorbis, who wasn't returning something good.    
3346    if (f.current_loc_valid)
3347    {
3348         return f.current_sample_loc;
3349    }
3350    else
3351       return -1;
3352 }
3353 
3354 stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f)
3355 {
3356    stb_vorbis_info d;
3357    d.channels = f.channels;
3358    d.sample_rate = f.sample_rate;
3359    d.setup_memory_required = f.setup_memory_required;
3360    d.setup_temp_memory_required = f.setup_temp_memory_required;
3361    d.temp_memory_required = f.temp_memory_required;
3362    d.max_frame_size = f.blocksize_1 >> 1;
3363    return d;
3364 }
3365 
3366 stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f)
3367 {
3368    stb_vorbis_comment d;
3369    d.vendor = f.vendor;
3370    d.comment_list_length = f.comment_list_length;
3371    d.comment_list = f.comment_list;
3372    return d;
3373 }
3374 
3375 int stb_vorbis_get_error(stb_vorbis *f)
3376 {
3377    int e = f.error;
3378    f.error = VORBIS__no_error;
3379    return e;
3380 }
3381 
3382 static stb_vorbis * vorbis_alloc(stb_vorbis *f)
3383 {
3384    stb_vorbis *p = cast(stb_vorbis *) setup_malloc(f, stb_vorbis.sizeof);
3385    return p;
3386 }
3387 
3388 uint stb_vorbis_get_file_offset(stb_vorbis *f)
3389 {
3390     return cast(uint) f._io.tell(f._userData);   
3391 }
3392 
3393 //
3394 // DATA-PULLING API
3395 //
3396 
3397 uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last)
3398 {
3399    for(;;) {
3400       int n;
3401       if (f.eof) return 0;
3402       n = get8(f);
3403       if (n == 0x4f) { // page header candidate
3404          uint retry_loc = stb_vorbis_get_file_offset(f);
3405          int i;
3406          // check if we're off the end of a file_section stream
3407          if (retry_loc - 25 > f.stream_len)
3408             return 0;
3409          // check the rest of the header
3410          for (i=1; i < 4; ++i)
3411             if (get8(f) != ogg_page_header[i])
3412                break;
3413          if (f.eof) return 0;
3414          if (i == 4) {
3415             uint8[27] header;
3416             uint32 i2, crc, goal, len;
3417             for (i2=0; i2 < 4; ++i2)
3418                header[i2] = ogg_page_header[i2];
3419             for (; i2 < 27; ++i2)
3420                header[i2] = get8(f);
3421             if (f.eof) return 0;
3422             if (header[4] != 0) goto invalid;
3423             goal = header[22] + (header[23] << 8) + (header[24]<<16) + (cast(uint32)header[25]<<24);
3424             for (i2=22; i2 < 26; ++i2)
3425                header[i2] = 0;
3426             crc = 0;
3427             for (i2=0; i2 < 27; ++i2)
3428                crc = crc32_update(f, crc, header[i2]);
3429             len = 0;
3430             for (i2=0; i2 < header[26]; ++i2) {
3431                int s = get8(f);
3432                crc = crc32_update(f, crc,cast(ubyte)s);
3433                len += s;
3434             }
3435             if (len && f.eof) return 0;
3436             for (i2=0; i2 < len; ++i2)
3437                crc = crc32_update(f, crc, get8(f));
3438             // finished parsing probable page
3439             if (crc == goal) {
3440                // we could now check that it's either got the last
3441                // page flag set, OR it's followed by the capture
3442                // pattern, but I guess TECHNICALLY you could have
3443                // a file with garbage between each ogg page and recover
3444                // from it automatically? So even though that paranoia
3445                // might decrease the chance of an invalid decode by
3446                // another 2^32, not worth it since it would hose those
3447                // invalid-but-useful files?
3448                if (end)
3449                   *end = stb_vorbis_get_file_offset(f);
3450                if (last) {
3451                   if (header[5] & 0x04)
3452                      *last = 1;
3453                   else
3454                      *last = 0;
3455                }
3456                set_file_offset(f, retry_loc-1);
3457                return 1;
3458             }
3459          }
3460         invalid:
3461          // not a valid page, so rewind and look for next one
3462          set_file_offset(f, retry_loc);
3463       }
3464    }
3465    assert(false);
3466 }
3467 
3468 
3469 enum SAMPLE_unknown = 0xffffffff;
3470 
3471 // seeking is implemented with a binary search, which narrows down the range to
3472 // 64K, before using a linear search (because finding the synchronization
3473 // pattern can be expensive, and the chance we'd find the end page again is
3474 // relatively high for small ranges)
3475 //
3476 // two initial interpolation-style probes are used at the start of the search
3477 // to try to bound either side of the binary search sensibly, while still
3478 // working in O(log n) time if they fail.
3479 
3480 static int get_seek_page_info(stb_vorbis *f, ProbedPage *z)
3481 {
3482    uint8[27] header;
3483    uint8[255] lacing;
3484    int i,len;
3485 
3486    // record where the page starts
3487    z.page_start = stb_vorbis_get_file_offset(f);
3488 
3489    // parse the header
3490    getn(f, header.ptr, 27);
3491    if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S')
3492       return 0;
3493    getn(f, lacing.ptr, header[26]);
3494 
3495    // determine the length of the payload
3496    len = 0;
3497    for (i=0; i < header[26]; ++i)
3498       len += lacing[i];
3499 
3500    // this implies where the page ends
3501    z.page_end = z.page_start + 27 + header[26] + len;
3502 
3503    // read the last-decoded sample out of the data
3504    z.last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24);
3505 
3506    // restore file state to where we were
3507    set_file_offset(f, z.page_start);
3508    return 1;
3509 }
3510 
3511 // rarely used function to seek back to the preceding page while finding the
3512 // start of a packet
3513 static int go_to_page_before(stb_vorbis *f, uint limit_offset)
3514 {
3515    uint previous_safe, end;
3516 
3517    // now we want to seek back 64K from the limit
3518    if (limit_offset >= 65536 && limit_offset-65536 >= f.first_audio_page_offset)
3519       previous_safe = limit_offset - 65536;
3520    else
3521       previous_safe = f.first_audio_page_offset;
3522 
3523    set_file_offset(f, previous_safe);
3524 
3525    while (vorbis_find_page(f, &end, null)) {
3526       if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset)
3527          return 1;
3528       set_file_offset(f, end);
3529    }
3530 
3531    return 0;
3532 }
3533 
3534 // implements the search logic for finding a page and starting decoding. if
3535 // the function succeeds, current_loc_valid will be true and current_loc will
3536 // be less than or equal to the provided sample number (the closer the
3537 // better).
3538 static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number)
3539 {
3540    ProbedPage left, right, mid;
3541    int i, start_seg_with_known_loc, end_pos, page_start;
3542    uint32 delta, stream_length, padding, last_sample_limit;
3543    double offset = 0.0, bytes_per_sample = 0.0;
3544    int probe = 0;
3545 
3546    // find the last page and validate the target sample
3547    stream_length = stb_vorbis_stream_length_in_samples(f);
3548    if (stream_length == 0)            return error(f, VORBIS_seek_without_length);
3549    if (sample_number > stream_length) return error(f, VORBIS_seek_invalid);
3550 
3551    // this is the maximum difference between the window-center (which is the
3552    // actual granule position value), and the right-start (which the spec
3553    // indicates should be the granule position (give or take one)).
3554    padding = ((f.blocksize_1 - f.blocksize_0) >> 2);
3555    if (sample_number < padding)
3556       last_sample_limit = 0;
3557    else
3558       last_sample_limit = sample_number - padding;
3559 
3560    left = f.p_first;
3561    while (left.last_decoded_sample == ~0U) {
3562       // (untested) the first page does not have a 'last_decoded_sample'
3563       set_file_offset(f, left.page_end);
3564       if (!get_seek_page_info(f, &left)) goto error;
3565    }
3566 
3567    right = f.p_last;
3568    assert(right.last_decoded_sample != ~0U);
3569 
3570    // starting from the start is handled differently
3571    if (last_sample_limit <= left.last_decoded_sample) {
3572       if (stb_vorbis_seek_start(f)) {
3573          if (f.current_loc > sample_number)
3574             return error(f, VORBIS_seek_failed);
3575          return 1;
3576       }
3577       return 0;
3578    }
3579 
3580    while (left.page_end != right.page_start) {
3581       assert(left.page_end < right.page_start);
3582       // search range in bytes
3583       delta = right.page_start - left.page_end;
3584       if (delta <= 65536) {
3585          // there's only 64K left to search - handle it linearly
3586          set_file_offset(f, left.page_end);
3587       } else {
3588          if (probe < 2) {
3589             if (probe == 0) {
3590                // first probe (interpolate)
3591                double data_bytes = right.page_end - left.page_start;
3592                bytes_per_sample = data_bytes / right.last_decoded_sample;
3593                offset = left.page_start + bytes_per_sample * (last_sample_limit - left.last_decoded_sample);
3594             } else {
3595                // second probe (try to bound the other side)
3596                double error = (cast(double) last_sample_limit - mid.last_decoded_sample) * bytes_per_sample;
3597                if (error >= 0 && error <  8000) error =  8000;
3598                if (error <  0 && error > -8000) error = -8000;
3599                offset += error * 2;
3600             }
3601 
3602             // ensure the offset is valid
3603             if (offset < left.page_end)
3604                offset = left.page_end;
3605             if (offset > right.page_start - 65536)
3606                offset = right.page_start - 65536;
3607 
3608             set_file_offset(f, cast(uint) offset);
3609          } else {
3610             // binary search for large ranges (offset by 32K to ensure
3611             // we don't hit the right page)
3612             set_file_offset(f, left.page_end + (delta / 2) - 32768);
3613          }
3614 
3615          if (!vorbis_find_page(f, null, null)) goto error;
3616       }
3617 
3618       for (;;) {
3619          if (!get_seek_page_info(f, &mid)) goto error;
3620          if (mid.last_decoded_sample != ~0U) break;
3621          // (untested) no frames end on this page
3622          set_file_offset(f, mid.page_end);
3623          assert(mid.page_start < right.page_start);
3624       }
3625 
3626       // if we've just found the last page again then we're in a tricky file,
3627       // and we're close enough (if it wasn't an interpolation probe).
3628       if (mid.page_start == right.page_start) {
3629          if (probe >= 2 || delta <= 65536)
3630             break;
3631       } else {
3632          if (last_sample_limit < mid.last_decoded_sample)
3633             right = mid;
3634          else
3635             left = mid;
3636       }
3637 
3638       ++probe;
3639    }
3640 
3641    // seek back to start of the last packet
3642    page_start = left.page_start;
3643    set_file_offset(f, page_start);
3644    if (!start_page(f)) return error(f, VORBIS_seek_failed);
3645    end_pos = f.end_seg_with_known_loc;
3646    assert(end_pos >= 0);
3647 
3648    for (;;) {
3649       for (i = end_pos; i > 0; --i)
3650          if (f.segments[i-1] != 255)
3651             break;
3652 
3653       start_seg_with_known_loc = i;
3654 
3655       if (start_seg_with_known_loc > 0 || !(f.page_flag & PAGEFLAG_continued_packet))
3656          break;
3657 
3658       // (untested) the final packet begins on an earlier page
3659       if (!go_to_page_before(f, page_start))
3660          goto error;
3661 
3662       page_start = stb_vorbis_get_file_offset(f);
3663       if (!start_page(f)) goto error;
3664       end_pos = f.segment_count - 1;
3665    }
3666 
3667    // prepare to start decoding
3668    f.current_loc_valid = FALSE;
3669    f.last_seg = FALSE;
3670    f.valid_bits = 0;
3671    f.packet_bytes = 0;
3672    f.bytes_in_seg = 0;
3673    f.previous_length = 0;
3674    f.next_seg = start_seg_with_known_loc;
3675 
3676    for (i = 0; i < start_seg_with_known_loc; i++)
3677    {
3678         bool err;
3679         skip(f, f.segments[i], &err);
3680         if (err)
3681             return error(f, VORBIS_seek_failed);
3682    }
3683 
3684    // start decoding (optimizable - this frame is generally discarded)
3685    if (!vorbis_pump_first_frame(f))
3686       return 0;
3687    if (f.current_loc > sample_number)
3688       return error(f, VORBIS_seek_failed);
3689    return 1;
3690 
3691 error:
3692    // try to restore the file to a valid state
3693    stb_vorbis_seek_start(f);
3694    return error(f, VORBIS_seek_failed);
3695 }
3696 
3697 // the same as vorbis_decode_initial, but without advancing
3698 static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
3699 {
3700    int bits_read, bytes_read;
3701 
3702    if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode))
3703       return 0;
3704 
3705    // either 1 or 2 bytes were read, figure out which so we can rewind
3706    bits_read = 1 + ilog(f.mode_count-1);
3707    if (f.mode_config[*mode].blockflag)
3708       bits_read += 2;
3709    bytes_read = (bits_read + 7) / 8;
3710 
3711    f.bytes_in_seg += bytes_read;
3712    f.packet_bytes -= bytes_read;
3713    bool err;
3714    skip(f, -bytes_read, &err);
3715    if (err)
3716        return 0;
3717 
3718    if (f.next_seg == -1)
3719       f.next_seg = f.segment_count - 1;
3720    else
3721       f.next_seg--;
3722    f.valid_bits = 0;
3723 
3724    return 1;
3725 }
3726 
3727 int stb_vorbis_seek_frame(stb_vorbis *f, uint sample_number)
3728 {
3729    uint32 max_frame_samples;
3730 
3731    // fast page-level search
3732    if (!seek_to_sample_coarse(f, sample_number))
3733       return 0;
3734 
3735    assert(f.current_loc_valid);
3736    assert(f.current_loc <= sample_number);
3737 
3738    // linear search for the relevant packet
3739    max_frame_samples = (f.blocksize_1*3 - f.blocksize_0) >> 2;
3740    while (f.current_loc < sample_number) {
3741       int left_start, left_end, right_start, right_end, mode, frame_samples;
3742       if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
3743          return error(f, VORBIS_seek_failed);
3744       // calculate the number of samples returned by the next frame
3745       frame_samples = right_start - left_start;
3746       if (f.current_loc + frame_samples > sample_number) {
3747          return 1; // the next frame will contain the sample
3748       } else if (f.current_loc + frame_samples + max_frame_samples > sample_number) {
3749          // there's a chance the frame after this could contain the sample
3750          vorbis_pump_first_frame(f);
3751       } else {
3752          // this frame is too early to be relevant
3753          f.current_loc += frame_samples;
3754          f.previous_length = 0;
3755          maybe_start_packet(f);
3756          flush_packet(f);
3757       }
3758    }
3759    // the next frame should start with the sample
3760    if (f.current_loc != sample_number) return error(f, VORBIS_seek_failed);
3761    return 1;
3762 }
3763 
3764 int stb_vorbis_seek(stb_vorbis *f, long sample_number)
3765 {
3766     if (sample_number < 0)
3767         return 0;
3768 
3769     if (sample_number > uint.max)
3770         return 0;
3771 
3772    if (!stb_vorbis_seek_frame(f, cast(uint)sample_number))
3773       return 0;
3774 
3775    if (sample_number != f.current_loc) {
3776       int n;
3777       uint32 frame_start = f.current_loc;
3778       stb_vorbis_get_frame_float(f, &n, null);
3779       assert(sample_number > frame_start);
3780       assert(f.channel_buffer_start + cast(int) (sample_number-frame_start) <= f.channel_buffer_end);
3781       f.channel_buffer_start += (sample_number - frame_start);
3782    }
3783    f.current_sample_loc = sample_number;
3784 
3785    return 1;
3786 }
3787 
3788 int stb_vorbis_seek_start(stb_vorbis *f)
3789 {
3790    set_file_offset(f, f.first_audio_page_offset);
3791    f.previous_length = 0;
3792    f.first_decode = TRUE;
3793    f.next_seg = -1;
3794    return vorbis_pump_first_frame(f);
3795 }
3796 
3797 uint stb_vorbis_stream_length_in_samples(stb_vorbis *f)
3798 {
3799    uint restore_offset, previous_safe;
3800    uint end, last_page_loc;
3801 
3802    if (!f.total_samples) {
3803       uint last;
3804       uint32 lo,hi;
3805       char[6] header;
3806 
3807       // first, store the current decode position so we can restore it
3808       restore_offset = stb_vorbis_get_file_offset(f);
3809 
3810       // now we want to seek back 64K from the end (the last page must
3811       // be at most a little less than 64K, but let's allow a little slop)
3812       if (f.stream_len >= 65536 && f.stream_len-65536 >= f.first_audio_page_offset)
3813          previous_safe = f.stream_len - 65536;
3814       else
3815          previous_safe = f.first_audio_page_offset;
3816 
3817       set_file_offset(f, previous_safe);
3818       // previous_safe is now our candidate 'earliest known place that seeking
3819       // to will lead to the final page'
3820 
3821       if (!vorbis_find_page(f, &end, &last)) {
3822          // if we can't find a page, we're hosed!
3823          f.error = VORBIS_cant_find_last_page;
3824          f.total_samples = 0xffffffff;
3825          goto done;
3826       }
3827 
3828       // check if there are more pages
3829       last_page_loc = stb_vorbis_get_file_offset(f);
3830 
3831       // stop when the last_page flag is set, not when we reach eof;
3832       // this allows us to stop short of a 'file_section' end without
3833       // explicitly checking the length of the section
3834       while (!last) {
3835          set_file_offset(f, end);
3836          if (!vorbis_find_page(f, &end, &last)) {
3837             // the last page we found didn't have the 'last page' flag
3838             // set. whoops!
3839             break;
3840          }
3841          //previous_safe = last_page_loc+1; // NOTE: not used after this point, but note for debugging
3842          last_page_loc = stb_vorbis_get_file_offset(f);
3843       }
3844 
3845       set_file_offset(f, last_page_loc);
3846 
3847       // parse the header
3848       getn(f, cast(ubyte*)header.ptr, 6);
3849       // extract the absolute granule position
3850       lo = get32(f);
3851       hi = get32(f);
3852       if (lo == 0xffffffff && hi == 0xffffffff) {
3853          f.error = VORBIS_cant_find_last_page;
3854          f.total_samples = SAMPLE_unknown;
3855          goto done;
3856       }
3857       if (hi)
3858          lo = 0xfffffffe; // saturate
3859       f.total_samples = lo;
3860 
3861       f.p_last.page_start = last_page_loc;
3862       f.p_last.page_end   = end;
3863       f.p_last.last_decoded_sample = lo;
3864 
3865      done:
3866       set_file_offset(f, restore_offset);
3867    }
3868    return f.total_samples == SAMPLE_unknown ? 0 : f.total_samples;
3869 }
3870 
3871 float stb_vorbis_stream_length_in_seconds(stb_vorbis *f)
3872 {
3873    return stb_vorbis_stream_length_in_samples(f) / cast(float) f.sample_rate;
3874 }
3875 
3876 int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
3877 {
3878    int len, right,left,i;
3879 
3880    if (!vorbis_decode_packet(f, &len, &left, &right)) {
3881       f.channel_buffer_start = f.channel_buffer_end = 0;
3882       return 0;
3883    }
3884 
3885    len = vorbis_finish_frame(f, len, left, right);
3886    for (i=0; i < f.channels; ++i)
3887       f.outputs[i] = f.channel_buffers[i] + left;
3888 
3889    f.channel_buffer_start = left;
3890    f.channel_buffer_end   = left+len;
3891 
3892    if (channels) *channels = f.channels;
3893    if (output)   *output = f.outputs.ptr;
3894    return len;
3895 }
3896 
3897 stb_vorbis* stb_vorbis_open_file_section(IOCallbacks* io, void* userData, int *error, stb_vorbis_alloc *alloc, uint length)
3898 {
3899     stb_vorbis *f;
3900     stb_vorbis p;
3901     vorbis_init(&p, alloc);
3902     p._io = io;
3903     p._userData = userData;
3904     p.stream_len = length;
3905     if (start_decoder(&p)) 
3906     {
3907         f = vorbis_alloc(&p);
3908         if (f) 
3909         {
3910             *f = p;
3911             vorbis_pump_first_frame(f);
3912             return f;
3913         }
3914     }
3915     if (error) *error = p.error;
3916     vorbis_deinit(&p);
3917     return null;
3918 }
3919 
3920 stb_vorbis* stb_vorbis_open_file(IOCallbacks* io, void* userData, int *error, stb_vorbis_alloc *alloc)
3921 {
3922     uint len = cast(uint) io.getFileLength(userData);
3923     return stb_vorbis_open_file_section(io, userData, error, alloc, len);
3924 }
3925 
3926 
3927 int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
3928 {
3929    float **outputs;
3930    int len = num_floats / channels;
3931    int n=0;
3932    int z = f.channels;
3933    if (z > channels) z = channels;
3934    while (n < len) {
3935       int i,j;
3936       int k = f.channel_buffer_end - f.channel_buffer_start;
3937       if (n+k >= len) k = len - n;
3938       for (j=0; j < k; ++j) {
3939          for (i=0; i < z; ++i)
3940             *buffer++ = f.channel_buffers[i][f.channel_buffer_start+j];
3941          for (   ; i < channels; ++i)
3942             *buffer++ = 0;
3943       }
3944       n += k;
3945       f.channel_buffer_start += k;
3946       if (n == len)
3947          break;
3948       if (!stb_vorbis_get_frame_float(f, null, &outputs))
3949          break;
3950    }
3951    return n;
3952 }
3953 
3954 int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples)
3955 {
3956    float **outputs;
3957    int n=0;
3958    int z = f.channels;
3959    if (z > channels) z = channels;
3960    while (n < num_samples) {
3961       int i;
3962       int k = f.channel_buffer_end - f.channel_buffer_start;
3963       if (n+k >= num_samples) k = num_samples - n;
3964       if (k) {
3965          for (i=0; i < z; ++i)
3966             memcpy(buffer[i]+n, f.channel_buffers[i]+f.channel_buffer_start, (float.sizeof)*k);
3967          for (   ; i < channels; ++i)
3968             memset(buffer[i]+n, 0, (float.sizeof) * k);
3969       }
3970       n += k;
3971       f.channel_buffer_start += k;
3972       if (n == num_samples)
3973          break;
3974       if (!stb_vorbis_get_frame_float(f, null, &outputs))
3975          break;
3976    }
3977    return n;
3978 }
3979 
3980 /* Version history
3981     1.17    - 2019-07-08 - fix CVE-2019-13217, -13218, -13219, -13220, -13221, -13222, -13223
3982                            found with Mayhem by ForAllSecure
3983     1.16    - 2019-03-04 - fix warnings
3984     1.15    - 2019-02-07 - explicit failure if Ogg Skeleton data is found
3985     1.14    - 2018-02-11 - delete bogus dealloca usage
3986     1.13    - 2018-01-29 - fix truncation of last frame (hopefully)
3987     1.12    - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
3988     1.11    - 2017-07-23 - fix MinGW compilation
3989     1.10    - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
3990     1.09    - 2016-04-04 - back out 'avoid discarding last frame' fix from previous version
3991     1.08    - 2016-04-02 - fixed multiple warnings; fix setup memory leaks;
3992                            avoid discarding last frame of audio data
3993     1.07    - 2015-01-16 - fixed some warnings, fix mingw, const-correct API
3994                            some more crash fixes when out of memory or with corrupt files
3995     1.06    - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
3996                            some crash fixes when out of memory or with corrupt files
3997     1.05    - 2015-04-19 - don't define __forceinline if it's redundant
3998     1.04    - 2014-08-27 - fix missing const-correct case in API
3999     1.03    - 2014-08-07 - Warning fixes
4000     1.02    - 2014-07-09 - Declare qsort compare function _cdecl on windows
4001     1.01    - 2014-06-18 - fix stb_vorbis_get_samples_float
4002     1.0     - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in multichannel
4003                            (API change) report sample rate for decode-full-file funcs
4004     0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
4005     0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
4006     0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
4007     0.99993 - remove assert that fired on legal files with empty tables
4008     0.99992 - rewind-to-start
4009     0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
4010     0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
4011     0.9998 - add a full-decode function with a memory source
4012     0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
4013     0.9996 - query length of vorbis stream in samples/seconds
4014     0.9995 - bugfix to another optimization that only happened in certain files
4015     0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
4016     0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
4017     0.9992 - performance improvement of IMDCT; now performs close to reference implementation
4018     0.9991 - performance improvement of IMDCT
4019     0.999 - (should have been 0.9990) performance improvement of IMDCT
4020     0.998 - no-CRT support from Casey Muratori
4021     0.997 - bugfixes for bugs found by Terje Mathisen
4022     0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
4023     0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
4024     0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
4025     0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
4026     0.992 - fixes for MinGW warning
4027     0.991 - turn fast-float-conversion on by default
4028     0.990 - fix push-mode seek recovery if you seek into the headers
4029     0.98b - fix to bad release of 0.98
4030     0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
4031     0.97 - builds under c++ (typecasting, don't use 'class' keyword)
4032     0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
4033     0.95 - clamping code for 16-bit functions
4034     0.94 - not publically released
4035     0.93 - fixed all-zero-floor case (was decoding garbage)
4036     0.92 - fixed a memory leak
4037     0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
4038     0.90 - first public release
4039 */
4040 
4041 /*
4042 ------------------------------------------------------------------------------
4043 This software is available under 2 licenses -- choose whichever you prefer.
4044 ------------------------------------------------------------------------------
4045 ALTERNATIVE A - MIT License
4046 Copyright (c) 2017 Sean Barrett
4047 Permission is hereby granted, free of charge, to any person obtaining a copy of
4048 this software and associated documentation files (the "Software"), to deal in
4049 the Software without restriction, including without limitation the rights to
4050 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
4051 of the Software, and to permit persons to whom the Software is furnished to do
4052 so, subject to the following conditions:
4053 The above copyright notice and this permission notice shall be included in all
4054 copies or substantial portions of the Software.
4055 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
4056 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
4057 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
4058 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
4059 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
4060 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
4061 SOFTWARE.
4062 ------------------------------------------------------------------------------
4063 ALTERNATIVE B - Public Domain (www.unlicense.org)
4064 This is free and unencumbered software released into the public domain.
4065 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
4066 software, either in source code form or as a compiled binary, for any purpose,
4067 commercial or non-commercial, and by any means.
4068 In jurisdictions that recognize copyright laws, the author or authors of this
4069 software dedicate any and all copyright interest in the software to the public
4070 domain. We make this dedication for the benefit of the public at large and to
4071 the detriment of our heirs and successors. We intend this dedication to be an
4072 overt act of relinquishment in perpetuity of all present and future rights to
4073 this software under copyright law.
4074 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
4075 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
4076 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
4077 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
4078 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
4079 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
4080 ------------------------------------------------------------------------------
4081 */