1 module audioformats.stb_vorbis2;
2 
3 // Ogg Vorbis audio decoder - v1.22 - public domain
4 // http://nothings.org/stb_vorbis/
5 //
6 // Original version written by Sean Barrett in 2007.
7 //
8 // Originally sponsored by RAD Game Tools. Seeking implementation
9 // sponsored by Phillip Bennefall, Marc Andersen, Aaron Baker,
10 // Elias Software, Aras Pranckevicius, and Sean Barrett.
11 //
12 // Translated to D by Guillaume Piolat.
13 //
14 // LICENSE
15 //
16 //   See end of file for license information.
17 //
18 // Limitations:
19 //
20 //   - floor 0 not supported (used in old ogg vorbis files pre-2004)
21 //   - lossless sample-truncation at beginning ignored
22 //   - cannot concatenate multiple vorbis streams
23 //   - sample positions are 32-bit, limiting seekable 192Khz
24 //       files to around 6 hours (Ogg supports 64-bit)
25 //
26 // Feature contributors:
27 //    Dougall Johnson (sample-exact seeking)
28 //
29 // Bugfix/warning contributors:
30 //    Terje Mathisen     Niklas Frykholm     Andy Hill
31 //    Casey Muratori     John Bolton         Gargaj
32 //    Laurent Gomila     Marc LeBlanc        Ronny Chevalier
33 //    Bernhard Wodo      Evan Balster        github:alxprd
34 //    Tom Beaumont       Ingo Leitgeb        Nicolas Guillemot
35 //    Phillip Bennefall  Rohit               Thiago Goulart
36 //    github:manxorist   Saga Musix          github:infatum
37 //    Timur Gagiev       Maxwell Koo         Peter Waller
38 //    github:audinowho   Dougall Johnson     David Reid
39 //    github:Clownacy    Pedro J. Estebanez  Remi Verschelde
40 //    AnthoFoxo          github:morlat       Gabriel Ravier
41 //
42 // Partial history:
43 //    1.22d   - 2021-10-31 - translated to D to update the one in audio-formats, and perhaps get seeking.
44 //    1.22    - 2021-07-11 - various small fixes
45 //    1.21    - 2021-07-02 - fix bug for files with no comments
46 //    1.20    - 2020-07-11 - several small fixes
47 //    1.19    - 2020-02-05 - warnings
48 //    1.18    - 2020-02-02 - fix seek bugs; parse header comments; misc warnings etc.
49 //    1.17    - 2019-07-08 - fix CVE-2019-13217..CVE-2019-13223 (by ForAllSecure)
50 //    1.16    - 2019-03-04 - fix warnings
51 //    1.15    - 2019-02-07 - explicit failure if Ogg Skeleton data is found
52 //    1.14    - 2018-02-11 - delete bogus dealloca usage
53 //    1.13    - 2018-01-29 - fix truncation of last frame (hopefully)
54 //    1.12    - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
55 //    1.11    - 2017-07-23 - fix MinGW compilation
56 //    1.10    - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
57 //    1.09    - 2016-04-04 - back out 'truncation of last frame' fix from previous version
58 //    1.08    - 2016-04-02 - warnings; setup memory leaks; truncation of last frame
59 //    1.07    - 2015-01-16 - fixes for crashes on invalid files; warning fixes; const
60 //    1.06    - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
61 //                           some crash fixes when out of memory or with corrupt files
62 //                           fix some inappropriately signed shifts
63 //    1.05    - 2015-04-19 - don't define __forceinline if it's redundant
64 //    1.04    - 2014-08-27 - fix missing const-correct case in API
65 //    1.03    - 2014-08-07 - warning fixes
66 //    1.02    - 2014-07-09 - declare qsort comparison as explicitly _cdecl in Windows
67 //    1.01    - 2014-06-18 - fix stb_vorbis_get_samples_float (interleaved was correct)
68 //    1.0     - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
69 //                           (API change) report sample rate for decode-full-file funcs
70 //
71 // See end of file for full version history.
72 //
73 // Notes about the D translation: removed push data API.
74 
75 
76 ///////////   THREAD SAFETY
77 
78 // Individual stb_vorbis* handles are not thread-safe; you cannot decode from
79 // them from multiple threads at the same time. However, you can have multiple
80 // stb_vorbis* handles and decode from them independently in multiple thrads.
81 
82 
83 ///////////   MEMORY ALLOCATION
84 
85 // normally stb_vorbis uses malloc() to allocate memory at startup,
86 // and alloca() to allocate temporary memory during a frame on the
87 // stack. (Memory consumption will depend on the amount of setup
88 // data in the file and how you set the compile flags for speed
89 // vs. size. In my test files the maximal-size usage is ~150KB.)
90 //
91 // You can modify the wrapper functions in the source (setup_malloc,
92 // setup_temp_malloc, temp_malloc) to change this behavior, or you
93 // can use a simpler allocation model: you pass in a buffer from
94 // which stb_vorbis will allocate _all_ its memory (including the
95 // temp memory). "open" may fail with a VORBIS_outofmem if you
96 // do not pass in enough data; there is no way to determine how
97 // much you do need except to succeed (at which point you can
98 // query get_info to find the exact amount required. yes I know
99 // this is lame).
100 //
101 // If you pass in a non-NULL buffer of the type below, allocation
102 // will occur from it as described above. Otherwise just pass NULL
103 // to use malloc()/alloca()
104 
105 import core.stdc.stdlib: malloc, free, qsort, abs;
106 import core.stdc.string: memset, memcmp, memcpy;
107 import std.math: ldexp, sin, cos, floor, exp, log, pow;
108 import audioformats.io;
109 
110 nothrow:
111 @nogc:
112 
113 struct stb_vorbis_alloc
114 {
115    ubyte* alloc_buffer;
116    int   alloc_buffer_length_in_bytes;
117 }
118 
119 
120 ///////////   FUNCTIONS USEABLE WITH ALL INPUT MODES
121 
122 struct stb_vorbis_info
123 {
124    uint sample_rate;
125    int channels;
126 
127    uint setup_memory_required;
128    uint setup_temp_memory_required;
129    uint temp_memory_required;
130 
131    int max_frame_size;
132 }
133 
134 struct stb_vorbis_comment
135 {
136    char *vendor;
137 
138    int comment_list_length;
139    char **comment_list;
140 }
141 
142 ////////   ERROR CODES
143 
144 alias STBVorbisError = int;
145 enum : STBVorbisError
146 {
147    VORBIS__no_error,
148 
149    VORBIS_need_more_data=1,             // not a real error
150 
151    VORBIS_invalid_api_mixing,           // can't mix API modes
152    VORBIS_outofmem,                     // not enough memory
153    VORBIS_feature_not_supported,        // uses floor 0
154    VORBIS_too_many_channels,            // STB_VORBIS_MAX_CHANNELS is too small
155    VORBIS_file_open_failure,            // fopen() failed
156    VORBIS_seek_without_length,          // can't seek in unknown-length file
157 
158    VORBIS_unexpected_eof=10,            // file is truncated?
159    VORBIS_seek_invalid,                 // seek past EOF
160 
161    // decoding errors (corrupt/invalid stream) -- you probably
162    // don't care about the exact details of these
163 
164    // vorbis errors:
165    VORBIS_invalid_setup=20,
166    VORBIS_invalid_stream,
167 
168    // ogg errors:
169    VORBIS_missing_capture_pattern=30,
170    VORBIS_invalid_stream_structure_version,
171    VORBIS_continued_packet_flag_invalid,
172    VORBIS_incorrect_stream_serial_number,
173    VORBIS_invalid_first_page,
174    VORBIS_bad_packet_type,
175    VORBIS_cant_find_last_page,
176    VORBIS_seek_failed,
177    VORBIS_ogg_skeleton_not_supported
178 }
179 
180 //
181 //  HEADER ENDS HERE
182 //
183 //////////////////////////////////////////////////////////////////////////////
184 
185 // global configuration settings (e.g. set these in the project/makefile),
186 // or just set them in this file at the top (although ideally the first few
187 // should be visible when the header file is compiled too, although it's not
188 // crucial)
189 
190 // STB_VORBIS_NO_STDIO
191 //     does not compile the code for the APIs that use FILE *s internally
192 //     or externally (implied by STB_VORBIS_NO_PULLDATA_API)
193 // #define STB_VORBIS_NO_STDIO
194 
195 // STB_VORBIS_NO_INTEGER_CONVERSION
196 //     does not compile the code for converting audio sample data from
197 //     float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
198 // #define STB_VORBIS_NO_INTEGER_CONVERSION
199 
200 // STB_VORBIS_NO_FAST_SCALED_FLOAT
201 //      does not use a fast float-to-int trick to accelerate float-to-int on
202 //      most platforms which requires endianness be defined correctly.
203 //#define STB_VORBIS_NO_FAST_SCALED_FLOAT
204 
205 
206 // STB_VORBIS_MAX_CHANNELS [number]
207 //     globally define this to the maximum number of channels you need.
208 //     The spec does not put a restriction on channels except that
209 //     the count is stored in a byte, so 255 is the hard limit.
210 //     Reducing this saves about 16 bytes per value, so using 16 saves
211 //     (255-16)*16 or around 4KB. Plus anything other memory usage
212 //     I forgot to account for. Can probably go as low as 8 (7.1 audio),
213 //     6 (5.1 audio), or 2 (stereo only).
214 enum STB_VORBIS_MAX_CHANNELS = 16;
215 
216 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
217 //     sets the log size of the huffman-acceleration table.  Maximum
218 //     supported value is 24. with larger numbers, more decodings are O(1),
219 //     but the table size is larger so worse cache missing, so you'll have
220 //     to probe (and try multiple ogg vorbis files) to find the sweet spot.
221 enum STB_VORBIS_FAST_HUFFMAN_LENGTH = 10;
222 
223 // STB_VORBIS_FAST_BINARY_LENGTH [number]
224 //     sets the log size of the binary-search acceleration table. this
225 //     is used in similar fashion to the fast-huffman size to set initial
226 //     parameters for the binary search
227 
228 // STB_VORBIS_FAST_HUFFMAN_INT
229 //     The fast huffman tables are much more efficient if they can be
230 //     stored as 16-bit results instead of 32-bit results. This restricts
231 //     the codebooks to having only 65535 possible outcomes, though.
232 //     (At least, accelerated by the huffman table.)
233 enum STB_VORBIS_FAST_HUFFMAN_SHORT = true;
234 
235 enum STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH = false;
236 
237 enum STB_VORBIS_DIVIDES_IN_CODEBOOK = false; // cannot be true, support for this not translated
238 enum STB_VORBIS_DIVIDES_IN_RESIDUE = false;
239 
240 enum STB_VORBIS_DIVIDE_TABLE = false;
241 enum STB_VORBIS_NO_DEFER_FLOOR = false; // STB_VORBIS_NO_DEFER_FLOOR not defined
242 enum STB_VORBIS_NO_CRT = false;
243 enum STB_VORBIS_NO_STDIO = false;
244 enum STB_VORBIS_NO_INTEGER_CONVERSION = true;
245 enum STB_VORBIS_NO_FAST_SCALED_FLOAT = true;
246 
247 static assert(STB_VORBIS_MAX_CHANNELS <= 256);
248 static assert(STB_VORBIS_FAST_HUFFMAN_LENGTH <= 24);
249 
250 enum MAX_BLOCKSIZE_LOG = 13; // from specification
251 enum MAX_BLOCKSIZE = (1 << MAX_BLOCKSIZE_LOG);
252 
253 alias uint8 = ubyte;
254 alias int8 = byte;
255 alias uint16 = ushort;
256 alias int16 = short;
257 alias uint32 = uint;
258 alias int32 = int;
259 
260 enum TRUE = 1;
261 enum FALSE = 0;
262 enum void* NULL = null;
263 
264 alias codetype = float;
265 
266 // @NOTE
267 //
268 // Some arrays below are tagged "//varies", which means it's actually
269 // a variable-sized piece of data, but rather than malloc I assume it's
270 // small enough it's better to just allocate it all together with the
271 // main thing
272 //
273 // Most of the variables are specified with the smallest size I could pack
274 // them into. It might give better performance to make them all full-sized
275 // integers. It should be safe to freely rearrange the structures or change
276 // the sizes larger--nothing relies on silently truncating etc., nor the
277 // order of variables.
278 
279 enum FAST_HUFFMAN_TABLE_SIZE = (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH);
280 enum FAST_HUFFMAN_TABLE_MASK = (FAST_HUFFMAN_TABLE_SIZE - 1);
281 
282 struct Codebook
283 {
284    int dimensions, entries;
285    uint8 *codeword_lengths;
286    float  minimum_value;
287    float  delta_value;
288    uint8  value_bits;
289    uint8  lookup_type;
290    uint8  sequence_p;
291    uint8  sparse;
292    uint32 lookup_values;
293    codetype *multiplicands;
294    uint32 *codewords;
295    int16[FAST_HUFFMAN_TABLE_SIZE] fast_huffman;
296    uint32 *sorted_codewords;
297    int    *sorted_values;
298    int     sorted_entries;
299 } ;
300 
301 struct Floor0
302 {
303    uint8 order;
304    uint16 rate;
305    uint16 bark_map_size;
306    uint8 amplitude_bits;
307    uint8 amplitude_offset;
308    uint8 number_of_books;
309    uint8[16] book_list; // varies
310 }
311 
312 struct Floor1
313 {
314    uint8 partitions;
315    uint8[32] partition_class_list; // varies
316    uint8[16] class_dimensions; // varies
317    uint8[16] class_subclasses; // varies
318    uint8[16] class_masterbooks; // varies
319    int16[8][16] subclass_books; // varies
320    uint16[31*8+2] Xlist; // varies
321    uint8[31*8+2] sorted_order;
322    uint8[2][31*8+2] neighbors;
323    uint8 floor1_multiplier;
324    uint8 rangebits;
325    int values;
326 }
327 
328 union Floor
329 {
330    Floor0 floor0;
331    Floor1 floor1;
332 }
333 
334 struct Residue
335 {
336    uint32 begin, end;
337    uint32 part_size;
338    uint8 classifications;
339    uint8 classbook;
340    uint8 **classdata;
341    int16[8]*residue_books;
342 } ;
343 
344 struct MappingChannel
345 {
346    uint8 magnitude;
347    uint8 angle;
348    uint8 mux;
349 } ;
350 
351 struct Mapping
352 {
353    uint16 coupling_steps;
354    MappingChannel *chan;
355    uint8  submaps;
356    uint8[15]  submap_floor; // varies
357    uint8[15]  submap_residue; // varies
358 } ;
359 
360 struct Mode
361 {
362    uint8 blockflag;
363    uint8 mapping;
364    uint16 windowtype;
365    uint16 transformtype;
366 }
367 
368 struct CRCscan
369 {
370    uint32  goal_crc;    // expected crc if match
371    int     bytes_left;  // bytes left in packet
372    uint32  crc_so_far;  // running crc
373    int     bytes_done;  // bytes processed in _current_ chunk
374    uint32  sample_loc;  // granule pos encoded in page
375 }
376 
377 struct ProbedPage
378 {
379    uint32 page_start, page_end;
380    uint32 last_decoded_sample;
381 }
382 
383 struct stb_vorbis
384 {
385   // user-accessible info
386    uint sample_rate;
387    int channels;
388 
389    uint setup_memory_required;
390    uint temp_memory_required;
391    uint setup_temp_memory_required;
392 
393    char *vendor;
394    int comment_list_length;
395    char **comment_list;
396 
397   // input config
398    IOCallbacks* _io;
399    void* _userData;
400 
401    /*uint8 *stream;
402    uint8 *stream_start;
403    uint8 *stream_end;*/
404 
405    uint32 stream_len;
406 
407    // the page to seek to when seeking to start, may be zero
408    uint32 first_audio_page_offset;
409 
410    // p_first is the page on which the first audio packet ends
411    // (but not necessarily the page on which it starts)
412    ProbedPage p_first, p_last;
413 
414   // memory management
415    stb_vorbis_alloc alloc;
416    int setup_offset;
417    int temp_offset;
418 
419   // run-time results
420    int eof;
421    STBVorbisError error;
422 
423   // user-useful data
424 
425   // header info
426    int[2] blocksize;
427    int blocksize_0, blocksize_1;
428    int codebook_count;
429    Codebook *codebooks;
430    int floor_count;
431    uint16[64] floor_types; // varies
432    Floor *floor_config;
433    int residue_count;
434    uint16[64] residue_types; // varies
435    Residue *residue_config;
436    int mapping_count;
437    Mapping *mapping;
438    int mode_count;
439    Mode[64] mode_config;  // varies
440 
441    uint32 total_samples;
442 
443   // decode buffer
444    float*[STB_VORBIS_MAX_CHANNELS] channel_buffers;
445    float*[STB_VORBIS_MAX_CHANNELS] outputs        ;
446 
447    float*[STB_VORBIS_MAX_CHANNELS] previous_window;
448    int previous_length;
449 
450    int16*[STB_VORBIS_MAX_CHANNELS] finalY;
451 
452    long current_sample_loc; // sample location of next sample to decode
453 
454    
455    uint32 current_loc; // sample location of next frame to decode
456    int    current_loc_valid;
457 
458   // per-blocksize precomputed data
459 
460    // twiddle factors
461    float*[2] A, B, C;
462    float*[2] window;
463    uint16*[2] bit_reverse;
464 
465   // current page/packet/segment streaming info
466    uint32 serial; // stream serial number for verification
467    int last_page;
468    int segment_count;
469    uint8[255] segments;
470    uint8 page_flag;
471    uint8 bytes_in_seg;
472    uint8 first_decode;
473    int next_seg;
474    int last_seg;  // flag that we're on the last segment
475    int last_seg_which; // what was the segment number of the last seg?
476    uint32 acc;
477    int valid_bits;
478    int packet_bytes;
479    int end_seg_with_known_loc;
480    uint32 known_loc_for_packet;
481    int discard_samples_deferred;
482    uint32 samples_output;
483 
484   // sample-access
485    int channel_buffer_start;
486    int channel_buffer_end;
487 
488    uint32[256] crc_table; // was a global, moved here
489 };
490 
491 alias vorb = stb_vorbis;
492 
493 static int error(vorb *f, STBVorbisError e)
494 {
495    f.error = e;
496    if (!f.eof && e != VORBIS_need_more_data) {
497       f.error=e; // breakpoint for debugging
498    }
499    return 0;
500 }
501 
502 
503 // these functions are used for allocating temporary memory
504 // while decoding. if you can afford the stack space, use
505 // alloca(); otherwise, provide a temp buffer and it will
506 // allocate out of those.
507 
508 size_t array_size_required(size_t count, size_t size)
509 {
510     return (count * ((void *).sizeof + size));
511 }
512 
513 void* temp_alloc(vorb *f, size_t size)
514 {
515     assert(f.alloc.alloc_buffer);
516     return setup_temp_malloc(f, cast(int)size); // note: never use alloca
517 }
518 
519 void temp_free(vorb *f, void* p)
520 {
521 }
522 
523 int temp_alloc_save(vorb *f)
524 {
525     return f.temp_offset;
526 }
527 
528 void temp_alloc_restore(vorb *f, int p)
529 {
530     f.temp_offset = p;
531 }
532 
533 void* temp_block_array(vorb *f, size_t count, size_t size)
534 {
535     return make_block_array(temp_alloc(f, cast(int) array_size_required(count,size)), cast(int) count, cast(int)size);
536 }
537 
538 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
539 static void *make_block_array(void *mem, int count, int size)
540 {
541    int i;
542    void ** p = cast(void **) mem;
543    char *q = cast(char *) (p + count);
544    for (i=0; i < count; ++i) {
545       p[i] = q;
546       q += size;
547    }
548    return p;
549 }
550 
551 void *setup_malloc(vorb *f, size_t sz)
552 {
553     return setup_malloc(f, cast(int)sz);
554 }
555 
556 void *setup_malloc(vorb *f, int sz)
557 {
558    sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs.
559    f.setup_memory_required += sz;
560    if (f.alloc.alloc_buffer) {
561       void *p = cast(char *) f.alloc.alloc_buffer + f.setup_offset;
562       if (f.setup_offset + sz > f.temp_offset) return NULL;
563       f.setup_offset += sz;
564       return p;
565    }
566    return sz ? malloc(sz) : NULL;
567 }
568 
569 static void setup_free(vorb *f, void *p)
570 {
571    if (f.alloc.alloc_buffer) return; // do nothing; setup mem is a stack
572    free(p);
573 }
574 
575 void *setup_temp_malloc(vorb *f, int sz)
576 {
577    sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs.
578    if (f.alloc.alloc_buffer) {
579       if (f.temp_offset - sz < f.setup_offset) 
580           return NULL;
581       f.temp_offset -= sz;
582       return cast(char *) f.alloc.alloc_buffer + f.temp_offset;
583    }
584    return malloc(sz);
585 }
586 
587 void setup_temp_free(vorb *f, void *p, size_t sz)
588 {
589    if (f.alloc.alloc_buffer) 
590    {
591       f.temp_offset += (sz+7)&~7;
592       return;
593    }
594    free(p);
595 }
596 
597 enum uint CRC32_POLY = 0x04c11db7;   // from spec
598 
599 void crc32_init(vorb* f)
600 {
601    int i,j;
602    uint32 s;
603    for(i=0; i < 256; i++) {
604       for (s=cast(uint32) i << 24, j=0; j < 8; ++j)
605          s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
606       f.crc_table[i] = s;
607    }
608 }
609 
610 uint32 crc32_update(vorb* f, uint32 crc, uint8 byte_)
611 {
612    return (crc << 8) ^ f.crc_table[byte_ ^ (crc >> 24)];
613 }
614 
615 
616 // used in setup, and for huffman that doesn't go fast path
617 static uint bit_reverse(uint n)
618 {
619   n = ((n & 0xAAAAAAAA) >>  1) | ((n & 0x55555555) << 1);
620   n = ((n & 0xCCCCCCCC) >>  2) | ((n & 0x33333333) << 2);
621   n = ((n & 0xF0F0F0F0) >>  4) | ((n & 0x0F0F0F0F) << 4);
622   n = ((n & 0xFF00FF00) >>  8) | ((n & 0x00FF00FF) << 8);
623   return (n >> 16) | (n << 16);
624 }
625 
626 static float square(float x)
627 {
628    return x*x;
629 }
630 
631 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
632 // as required by the specification. fast(?) implementation from stb.h
633 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
634 static int ilog(int32 n)
635 {
636    static immutable byte[16] log2_4 = [ 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 ];
637 
638    if (n < 0) return 0; // signed n returns 0
639 
640    // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
641    if (n < (1 << 14))
642         if (n < (1 <<  4))            return  0 + log2_4[n      ];
643         else if (n < (1 <<  9))       return  5 + log2_4[n >>  5];
644              else                     return 10 + log2_4[n >> 10];
645    else if (n < (1 << 24))
646              if (n < (1 << 19))       return 15 + log2_4[n >> 15];
647              else                     return 20 + log2_4[n >> 20];
648         else if (n < (1 << 29))       return 25 + log2_4[n >> 25];
649              else                     return 30 + log2_4[n >> 30];
650 }
651 
652 enum M_PI = 3.14159265358979323846264f;  // from CRC
653 
654 // code length assigned to a value with no huffman encoding
655 enum NO_CODE = 255;
656 
657 /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
658 //
659 // these functions are only called at setup, and only a few times
660 // per file
661 
662 static float float32_unpack(uint32 x)
663 {
664    // from the specification
665    uint32 mantissa = x & 0x1fffff;
666    uint32 sign = x & 0x80000000;
667    uint32 exp = (x & 0x7fe00000) >> 21;
668    double res = sign ? -cast(double)mantissa : cast(double)mantissa;
669    return cast(float) ldexp(cast(float)res, cast(int)exp-788);
670 }
671 
672 
673 // zlib & jpeg huffman tables assume that the output symbols
674 // can either be arbitrarily arranged, or have monotonically
675 // increasing frequencies--they rely on the lengths being sorted;
676 // this makes for a very simple generation algorithm.
677 // vorbis allows a huffman table with non-sorted lengths. This
678 // requires a more sophisticated construction, since symbols in
679 // order do not map to huffman codes "in order".
680 static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
681 {
682    if (!c.sparse) {
683       c.codewords      [symbol] = huff_code;
684    } else {
685       c.codewords       [count] = huff_code;
686       c.codeword_lengths[count] = cast(ubyte)len;
687       values             [count] = symbol;
688    }
689 }
690 
691 static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
692 {
693    int i,k,m=0;
694    uint32[32] available;
695 
696    memset(available.ptr, 0, 32);
697    // find the first entry
698    for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
699    if (k == n) { assert(c.sorted_entries == 0); return TRUE; }
700    assert(len[k] < 32); // no error return required, code reading lens checks this
701    // add to the list
702    add_entry(c, 0, k, m++, len[k], values);
703    // add all available leaves
704    for (i=1; i <= len[k]; ++i)
705       available[i] = 1U << (32-i);
706    // note that the above code treats the first case specially,
707    // but it's really the same as the following code, so they
708    // could probably be combined (except the initial code is 0,
709    // and I use 0 in available[] to mean 'empty')
710    for (i=k+1; i < n; ++i) {
711       uint32 res;
712       int z = len[i], y;
713       if (z == NO_CODE) continue;
714       assert(z < 32); // no error return required, code reading lens checks this
715       // find lowest available leaf (should always be earliest,
716       // which is what the specification calls for)
717       // note that this property, and the fact we can never have
718       // more than one free leaf at a given level, isn't totally
719       // trivial to prove, but it seems true and the assert never
720       // fires, so!
721       while (z > 0 && !available[z]) --z;
722       if (z == 0) { return FALSE; }
723       res = available[z];
724       available[z] = 0;
725       add_entry(c, bit_reverse(res), i, m++, len[i], values);
726       // propagate availability up the tree
727       if (z != len[i]) {
728          for (y=len[i]; y > z; --y) {
729             assert(available[y] == 0);
730             available[y] = res + (1 << (32-y));
731          }
732       }
733    }
734    return TRUE;
735 }
736 
737 // accelerated huffman table allows fast O(1) match of all symbols
738 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
739 static void compute_accelerated_huffman(Codebook *c)
740 {
741    int i, len;
742    for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
743       c.fast_huffman[i] = -1;
744 
745    len = c.sparse ? c.sorted_entries : c.entries;
746    if (len > 32767) len = 32767; // largest possible value we can encode!
747    for (i=0; i < len; ++i) {
748       if (c.codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
749          uint32 z = c.sparse ? bit_reverse(c.sorted_codewords[i]) : c.codewords[i];
750          // set table entries for all bit combinations in the higher bits
751          while (z < FAST_HUFFMAN_TABLE_SIZE) {
752              c.fast_huffman[z] = cast(short)i;
753              z += 1 << c.codeword_lengths[i];
754          }
755       }
756    }
757 }
758 
759 extern(C) int uint32_compare_2(const void *p, const void *q)
760 {
761    uint32 x = *cast(uint32 *) p;
762    uint32 y = *cast(uint32 *) q;
763    return x < y ? -1 : x > y;
764 }
765 
766 static int include_in_sort(Codebook *c, uint8 len)
767 {
768    if (c.sparse) { assert(len != NO_CODE); return TRUE; }
769    if (len == NO_CODE) return FALSE;
770    if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
771    return FALSE;
772 }
773 
774 // if the fast table above doesn't work, we want to binary
775 // search them... need to reverse the bits
776 static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
777 {
778    int i, len;
779    // build a list of all the entries
780    // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
781    // this is kind of a frivolous optimization--I don't see any performance improvement,
782    // but it's like 4 extra lines of code, so.
783    if (!c.sparse) {
784       int k = 0;
785       for (i=0; i < c.entries; ++i)
786          if (include_in_sort(c, lengths[i]))
787             c.sorted_codewords[k++] = bit_reverse(c.codewords[i]);
788       assert(k == c.sorted_entries);
789    } else {
790       for (i=0; i < c.sorted_entries; ++i)
791          c.sorted_codewords[i] = bit_reverse(c.codewords[i]);
792    }
793 
794    qsort(c.sorted_codewords, c.sorted_entries, (c.sorted_codewords[0]).sizeof, &uint32_compare_2);
795    c.sorted_codewords[c.sorted_entries] = 0xffffffff;
796 
797    len = c.sparse ? c.sorted_entries : c.entries;
798    // now we need to indicate how they correspond; we could either
799    //   #1: sort a different data structure that says who they correspond to
800    //   #2: for each sorted entry, search the original list to find who corresponds
801    //   #3: for each original entry, find the sorted entry
802    // #1 requires extra storage, #2 is slow, #3 can use binary search!
803    for (i=0; i < len; ++i) {
804       ubyte huff_len = c.sparse ? lengths[values[i]] : lengths[i];
805       if (include_in_sort(c, huff_len)) {
806          uint32 code = bit_reverse(c.codewords[i]);
807          int x=0, n=c.sorted_entries;
808          while (n > 1) {
809             // invariant: sc[x] <= code < sc[x+n]
810             int m = x + (n >> 1);
811             if (c.sorted_codewords[m] <= code) {
812                x = m;
813                n -= (n>>1);
814             } else {
815                n >>= 1;
816             }
817          }
818          assert(c.sorted_codewords[x] == code);
819          if (c.sparse) {
820             c.sorted_values[x] = values[i];
821             c.codeword_lengths[x] = huff_len;
822          } else {
823             c.sorted_values[x] = i;
824          }
825       }
826    }
827 }
828 
829 // only run while parsing the header (3 times)
830 static int vorbis_validate(uint8 *data)
831 {
832    static immutable char[6] vorbis = "vorbis";
833    return memcmp(data, vorbis.ptr, 6) == 0;
834 }
835 
836 // called from setup only, once per code book
837 // (formula implied by specification)
838 static int lookup1_values(int entries, int dim)
839 {
840    int r = cast(int) floor(exp(cast(float) log(cast(float) entries) / dim));
841    if (cast(int) floor(pow(cast(float) r+1, dim)) <= entries)   // (int) cast for MinGW warning;
842       ++r;                                              // floor() to avoid _ftol() when non-CRT
843    if (pow(cast(float) r+1, dim) <= entries)
844       return -1;
845    if (cast(int) floor(pow(cast(float) r, dim)) > entries)
846       return -1;
847    return r;
848 }
849 
850 // called twice per file
851 static void compute_twiddle_factors(int n, float *A, float *B, float *C)
852 {
853    int n4 = n >> 2, n8 = n >> 3;
854    int k,k2;
855 
856    for (k=k2=0; k < n4; ++k,k2+=2) {
857       A[k2  ] = cast(float)  cos(4*k*M_PI/n);
858       A[k2+1] = cast(float) -sin(4*k*M_PI/n);
859       B[k2  ] = cast(float)  cos((k2+1)*M_PI/n/2) * 0.5f;
860       B[k2+1] = cast(float)  sin((k2+1)*M_PI/n/2) * 0.5f;
861    }
862    for (k=k2=0; k < n8; ++k,k2+=2) {
863       C[k2  ] = cast(float)  cos(2*(k2+1)*M_PI/n);
864       C[k2+1] = cast(float) -sin(2*(k2+1)*M_PI/n);
865    }
866 }
867 
868 static void compute_window(int n, float *window)
869 {
870    int n2 = n >> 1, i;
871    for (i=0; i < n2; ++i)
872       window[i] = cast(float) sin(0.5 * M_PI * square(cast(float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
873 }
874 
875 static void compute_bitreverse(int n, uint16 *rev)
876 {
877    int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
878    int i, n8 = n >> 3;
879    for (i=0; i < n8; ++i)
880       rev[i] = cast(ushort) ( (bit_reverse(i) >> (32-ld+3)) << 2 );
881 }
882 
883 static int init_blocksize(vorb *f, int b, int n)
884 {
885    int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
886    f.A[b] = cast(float *) setup_malloc(f, 4 * n2);
887    f.B[b] = cast(float *) setup_malloc(f, 4 * n2);
888    f.C[b] = cast(float *) setup_malloc(f, 4 * n4);
889    if (!f.A[b] || !f.B[b] || !f.C[b]) return error(f, VORBIS_outofmem);
890    compute_twiddle_factors(n, f.A[b], f.B[b], f.C[b]);
891    f.window[b] = cast(float *) setup_malloc(f, 4 * n2);
892    if (!f.window[b]) return error(f, VORBIS_outofmem);
893    compute_window(n, f.window[b]);
894    f.bit_reverse[b] = cast(uint16 *) setup_malloc(f, 2 * n8);
895    if (!f.bit_reverse[b]) return error(f, VORBIS_outofmem);
896    compute_bitreverse(n, f.bit_reverse[b]);
897    return TRUE;
898 }
899 
900 static void neighbors(uint16 *x, int n, int *plow, int *phigh)
901 {
902    int low = -1;
903    int high = 65536;
904    int i;
905    for (i=0; i < n; ++i) {
906       if (x[i] > low  && x[i] < x[n]) { *plow  = i; low = x[i]; }
907       if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
908    }
909 }
910 
911 // this has been repurposed so y is now the original index instead of y
912 struct stbv__floor_ordering
913 {
914    uint16 x,id;
915 }
916 
917 extern(C) int point_compare_2(const void *p, const void *q)
918 {
919    stbv__floor_ordering *a = cast(stbv__floor_ordering *) p;
920    stbv__floor_ordering *b = cast(stbv__floor_ordering *) q;
921    return a.x < b.x ? -1 : a.x > b.x;
922 }
923 
924 //
925 /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
926 
927 uint8 get8(vorb *z)
928 {
929     ubyte r;
930     if (z._io.read(&r, 1, z._userData) == 1)
931         return r;
932     else
933     {
934         z.eof = TRUE; 
935         return 0;
936     }
937 }
938 
939 static uint32 get32(vorb *f)
940 {
941    uint32 x;
942    x = get8(f);
943    x += get8(f) << 8;
944    x += get8(f) << 16;
945    x += cast(uint32) get8(f) << 24;
946    return x;
947 }
948 
949 static int getn(vorb *z, uint8 *data, int n)
950 {
951     ubyte r;
952     if (z._io.read(data, n, z._userData) == n)
953         return 1;
954     else
955     {
956         z.eof = 1; 
957         return 0;
958     }
959 }
960 
961 static void skip(vorb *z, int n)
962 {
963     z._io.skip(n, z._userData);
964 }
965 
966 static int set_file_offset(stb_vorbis *f, uint loc)
967 {
968     if (f._io.seek( loc, false, f._userData))
969     {
970         return 1;
971     }
972     else
973     {
974         f.eof = 1;
975         f._io.seek(f.stream_len, false, f._userData); // stb_vorbis set to end of the file, for some reason.
976         return 0;
977     }
978 }
979 
980 
981 static immutable uint8[4] ogg_page_header = [ 0x4f, 0x67, 0x67, 0x53 ];
982 
983 static int capture_pattern(vorb *f)
984 {
985    if (0x4f != get8(f)) return FALSE;
986    if (0x67 != get8(f)) return FALSE;
987    if (0x67 != get8(f)) return FALSE;
988    if (0x53 != get8(f)) return FALSE;
989    return TRUE;
990 }
991 
992 enum PAGEFLAG_continued_packet  = 1;
993 enum PAGEFLAG_first_page        = 2;
994 enum PAGEFLAG_last_page         = 4;
995 
996 static int start_page_no_capturepattern(vorb *f)
997 {
998    uint32 loc0,loc1,n;
999    if (f.first_decode) {
1000       f.p_first.page_start = stb_vorbis_get_file_offset(f) - 4;
1001    }
1002    // stream structure version
1003    if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
1004    // header flag
1005    f.page_flag = get8(f);
1006    // absolute granule position
1007    loc0 = get32(f);
1008    loc1 = get32(f);
1009    // @TODO: validate loc0,loc1 as valid positions?
1010    // stream serial number -- vorbis doesn't interleave, so discard
1011    get32(f);
1012    //if (f.serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
1013    // page sequence number
1014    n = get32(f);
1015    f.last_page = n;
1016    // CRC32
1017    get32(f);
1018    // page_segments
1019    f.segment_count = get8(f);
1020    if (!getn(f, f.segments.ptr, f.segment_count))
1021       return error(f, VORBIS_unexpected_eof);
1022    // assume we _don't_ know any the sample position of any segments
1023    f.end_seg_with_known_loc = -2;
1024    if (loc0 != ~0U || loc1 != ~0U) {
1025       int i;
1026       // determine which packet is the last one that will complete
1027       for (i=f.segment_count-1; i >= 0; --i)
1028          if (f.segments[i] < 255)
1029             break;
1030       // 'i' is now the index of the _last_ segment of a packet that ends
1031       if (i >= 0) {
1032          f.end_seg_with_known_loc = i;
1033          f.known_loc_for_packet   = loc0;
1034       }
1035    }
1036    if (f.first_decode) {
1037       int i,len;
1038       len = 0;
1039       for (i=0; i < f.segment_count; ++i)
1040          len += f.segments[i];
1041       len += 27 + f.segment_count;
1042       f.p_first.page_end = f.p_first.page_start + len;
1043       f.p_first.last_decoded_sample = loc0;
1044    }
1045    f.next_seg = 0;
1046    return TRUE;
1047 }
1048 
1049 static int start_page(vorb *f)
1050 {
1051    if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
1052    return start_page_no_capturepattern(f);
1053 }
1054 
1055 static int start_packet(vorb *f)
1056 {
1057    while (f.next_seg == -1) {
1058       if (!start_page(f)) return FALSE;
1059       if (f.page_flag & PAGEFLAG_continued_packet)
1060          return error(f, VORBIS_continued_packet_flag_invalid);
1061    }
1062    f.last_seg = FALSE;
1063    f.valid_bits = 0;
1064    f.packet_bytes = 0;
1065    f.bytes_in_seg = 0;
1066    // f.next_seg is now valid
1067    return TRUE;
1068 }
1069 
1070 int maybe_start_packet(vorb *f)
1071 {
1072    if (f.next_seg == -1) {
1073       int x = get8(f);
1074       if (f.eof) return FALSE; // EOF at page boundary is not an error!
1075       if (0x4f != x      ) return error(f, VORBIS_missing_capture_pattern);
1076       if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1077       if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1078       if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1079       if (!start_page_no_capturepattern(f)) return FALSE;
1080       if (f.page_flag & PAGEFLAG_continued_packet) {
1081          // set up enough state that we can read this packet if we want,
1082          // e.g. during recovery
1083          f.last_seg = FALSE;
1084          f.bytes_in_seg = 0;
1085          return error(f, VORBIS_continued_packet_flag_invalid);
1086       }
1087    }
1088    return start_packet(f);
1089 }
1090 
1091 static int next_segment(vorb *f)
1092 {
1093    int len;
1094    if (f.last_seg) return 0;
1095    if (f.next_seg == -1) {
1096       f.last_seg_which = f.segment_count-1; // in case start_page fails
1097       if (!start_page(f)) { f.last_seg = 1; return 0; }
1098       if (!(f.page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
1099    }
1100    len = f.segments[f.next_seg++];
1101    if (len < 255) {
1102       f.last_seg = TRUE;
1103       f.last_seg_which = f.next_seg-1;
1104    }
1105    if (f.next_seg >= f.segment_count)
1106       f.next_seg = -1;
1107    assert(f.bytes_in_seg == 0);
1108    f.bytes_in_seg = cast(ubyte)len;
1109    return len;
1110 }
1111 
1112 enum EOP    = (-1);
1113 enum INVALID_BITS = (-1);
1114 
1115 static int get8_packet_raw(vorb *f)
1116 {
1117    if (!f.bytes_in_seg) {  // CLANG!
1118       if (f.last_seg) return EOP;
1119       else if (!next_segment(f)) return EOP;
1120    }
1121    assert(f.bytes_in_seg > 0);
1122    --f.bytes_in_seg;
1123    ++f.packet_bytes;
1124    return get8(f);
1125 }
1126 
1127 static int get8_packet(vorb *f)
1128 {
1129    int x = get8_packet_raw(f);
1130    f.valid_bits = 0;
1131    return x;
1132 }
1133 
1134 static int get32_packet(vorb *f)
1135 {
1136    uint32 x;
1137    x = get8_packet(f);
1138    x += get8_packet(f) << 8;
1139    x += get8_packet(f) << 16;
1140    x += cast(uint32) get8_packet(f) << 24;
1141    return x;
1142 }
1143 
1144 static void flush_packet(vorb *f)
1145 {
1146    while (get8_packet_raw(f) != EOP)
1147    {
1148    }
1149 }
1150 
1151 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1152 // as the huffman decoder?
1153 static uint32 get_bits(vorb *f, int n)
1154 {
1155    uint32 z;
1156 
1157    if (f.valid_bits < 0) return 0;
1158    if (f.valid_bits < n) {
1159       if (n > 24) {
1160          // the accumulator technique below would not work correctly in this case
1161          z = get_bits(f, 24);
1162          z += get_bits(f, n-24) << 24;
1163          return z;
1164       }
1165       if (f.valid_bits == 0) f.acc = 0;
1166       while (f.valid_bits < n) {
1167          int z2 = get8_packet_raw(f);
1168          if (z2 == EOP) {
1169             f.valid_bits = INVALID_BITS;
1170             return 0;
1171          }
1172          f.acc += z2 << f.valid_bits;
1173          f.valid_bits += 8;
1174       }
1175    }
1176 
1177    assert(f.valid_bits >= n);
1178    z = f.acc & ((1 << n)-1);
1179    f.acc >>= n;
1180    f.valid_bits -= n;
1181    return z;
1182 }
1183 
1184 // @OPTIMIZE: primary accumulator for huffman
1185 // expand the buffer to as many bits as possible without reading off end of packet
1186 // it might be nice to allow f.valid_bits and f.acc to be stored in registers,
1187 // e.g. cache them locally and decode locally
1188 static void prep_huffman(vorb *f)
1189 {
1190    if (f.valid_bits <= 24) {
1191       if (f.valid_bits == 0) f.acc = 0;
1192       do {
1193          int z;
1194          if (f.last_seg && !f.bytes_in_seg) return;
1195          z = get8_packet_raw(f);
1196          if (z == EOP) return;
1197          f.acc += cast(uint) z << f.valid_bits;
1198          f.valid_bits += 8;
1199       } while (f.valid_bits <= 24);
1200    }
1201 }
1202 
1203 enum
1204 {
1205    VORBIS_packet_id = 1,
1206    VORBIS_packet_comment = 3,
1207    VORBIS_packet_setup = 5
1208 };
1209 
1210 static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
1211 {
1212    int i;
1213    prep_huffman(f);
1214 
1215    if (c.codewords == NULL && c.sorted_codewords == NULL)
1216       return -1;
1217 
1218    // cases to use binary search: sorted_codewords && !c.codewords
1219    //                             sorted_codewords && c.entries > 8
1220    if (c.entries > 8 ? c.sorted_codewords!=NULL : !c.codewords) {
1221       // binary search
1222       uint32 code = bit_reverse(f.acc);
1223       int x=0, n=c.sorted_entries, len;
1224 
1225       while (n > 1) {
1226          // invariant: sc[x] <= code < sc[x+n]
1227          int m = x + (n >> 1);
1228          if (c.sorted_codewords[m] <= code) {
1229             x = m;
1230             n -= (n>>1);
1231          } else {
1232             n >>= 1;
1233          }
1234       }
1235       // x is now the sorted index
1236       if (!c.sparse) x = c.sorted_values[x];
1237       // x is now sorted index if sparse, or symbol otherwise
1238       len = c.codeword_lengths[x];
1239       if (f.valid_bits >= len) {
1240          f.acc >>= len;
1241          f.valid_bits -= len;
1242          return x;
1243       }
1244 
1245       f.valid_bits = 0;
1246       return -1;
1247    }
1248 
1249    // if small, linear search
1250    assert(!c.sparse);
1251    for (i=0; i < c.entries; ++i) {
1252       if (c.codeword_lengths[i] == NO_CODE) continue;
1253       if (c.codewords[i] == (f.acc & ((1 << c.codeword_lengths[i])-1))) {
1254          if (f.valid_bits >= c.codeword_lengths[i]) {
1255             f.acc >>= c.codeword_lengths[i];
1256             f.valid_bits -= c.codeword_lengths[i];
1257             return i;
1258          }
1259          f.valid_bits = 0;
1260          return -1;
1261       }
1262    }
1263 
1264    error(f, VORBIS_invalid_stream);
1265    f.valid_bits = 0;
1266    return -1;
1267 }
1268 
1269 
1270 static int codebook_decode_scalar(vorb *f, Codebook *c)
1271 {
1272    int i;
1273    if (f.valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
1274       prep_huffman(f);
1275    // fast huffman table lookup
1276    i = f.acc & FAST_HUFFMAN_TABLE_MASK;
1277    i = c.fast_huffman[i];
1278    if (i >= 0) {
1279       f.acc >>= c.codeword_lengths[i];
1280       f.valid_bits -= c.codeword_lengths[i];
1281       if (f.valid_bits < 0) { f.valid_bits = 0; return -1; }
1282       return i;
1283    }
1284    return codebook_decode_scalar_raw(f,c);
1285 }
1286 
1287 void DECODE_RAW(ref int var, vorb* f, Codebook* c)
1288 {
1289     var = codebook_decode_scalar(f,c);
1290 }
1291 
1292 void DECODE(ref int var, vorb* f, Codebook* c)
1293 {
1294     var = codebook_decode_scalar(f,c);
1295     if (c.sparse) 
1296         var = c.sorted_values[var];
1297 }
1298 
1299 void DECODE_VQ(ref int var, vorb* f, Codebook* c)
1300 {
1301     DECODE_RAW(var, f, c);
1302 }
1303 
1304 
1305 
1306 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1307 // where we avoid one addition
1308 codetype CODEBOOK_ELEMENT(Codebook* c, int off)
1309 {
1310     return (c.multiplicands[off]);
1311 }
1312 
1313 codetype CODEBOOK_ELEMENT_FAST(Codebook* c, int off)
1314 {
1315     return (c.multiplicands[off]);
1316 }
1317 
1318 codetype CODEBOOK_ELEMENT_BASE(Codebook* c)
1319 {
1320     return 0;
1321 }
1322 
1323 static int codebook_decode_start(vorb *f, Codebook *c)
1324 {
1325    int z = -1;
1326 
1327    // type 0 is only legal in a scalar context
1328    if (c.lookup_type == 0)
1329       error(f, VORBIS_invalid_stream);
1330    else {
1331       DECODE_VQ(z,f,c);
1332       if (c.sparse) assert(z < c.sorted_entries);
1333       if (z < 0) {  // check for EOP
1334          if (!f.bytes_in_seg)
1335             if (f.last_seg)
1336                return z;
1337          error(f, VORBIS_invalid_stream);
1338       }
1339    }
1340    return z;
1341 }
1342 
1343 static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
1344 {
1345     int i,z = codebook_decode_start(f,c);
1346     if (z < 0) return FALSE;
1347     if (len > c.dimensions) len = c.dimensions;
1348 
1349     z *= c.dimensions;
1350     if (c.sequence_p) 
1351     {
1352         float last = CODEBOOK_ELEMENT_BASE(c);
1353         for (i=0; i < len; ++i) 
1354         {
1355             float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1356             output[i] += val;
1357             last = val + c.minimum_value;
1358         }
1359     } 
1360     else 
1361     {
1362         float last = CODEBOOK_ELEMENT_BASE(c);
1363         for (i=0; i < len; ++i) 
1364         {
1365             output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1366         }
1367     }
1368     return TRUE;
1369 }
1370 
1371 static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
1372 {
1373     int i,z = codebook_decode_start(f,c);
1374     float last = CODEBOOK_ELEMENT_BASE(c);
1375     if (z < 0) return FALSE;
1376     if (len > c.dimensions) len = c.dimensions;
1377 
1378    z *= c.dimensions;
1379    for (i=0; i < len; ++i) {
1380       float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1381       output[i*step] += val;
1382       if (c.sequence_p) last = val;
1383    }
1384 
1385    return TRUE;
1386 }
1387 
1388 static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1389 {
1390    int c_inter = *c_inter_p;
1391    int p_inter = *p_inter_p;
1392    int i,z, effective = c.dimensions;
1393 
1394    // type 0 is only legal in a scalar context
1395    if (c.lookup_type == 0)   return error(f, VORBIS_invalid_stream);
1396 
1397    while (total_decode > 0) {
1398       float last = CODEBOOK_ELEMENT_BASE(c);
1399       DECODE_VQ(z,f,c);
1400       static if (!STB_VORBIS_DIVIDES_IN_CODEBOOK)
1401       {
1402         assert(!c.sparse || z < c.sorted_entries);
1403       }
1404       if (z < 0) {
1405          if (!f.bytes_in_seg)
1406             if (f.last_seg) return FALSE;
1407          return error(f, VORBIS_invalid_stream);
1408       }
1409 
1410       // if this will take us off the end of the buffers, stop short!
1411       // we check by computing the length of the virtual interleaved
1412       // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1413       // and the length we'll be using (effective)
1414       if (c_inter + p_inter*ch + effective > len * ch) {
1415          effective = len*ch - (p_inter*ch - c_inter);
1416       }
1417 
1418       {
1419          z *= c.dimensions;
1420          if (c.sequence_p) {
1421             for (i=0; i < effective; ++i) {
1422                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1423                if (outputs[c_inter])
1424                   outputs[c_inter][p_inter] += val;
1425                if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1426                last = val;
1427             }
1428          } else {
1429             for (i=0; i < effective; ++i) {
1430                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1431                if (outputs[c_inter])
1432                   outputs[c_inter][p_inter] += val;
1433                if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1434             }
1435          }
1436       }
1437 
1438       total_decode -= effective;
1439    }
1440    *c_inter_p = c_inter;
1441    *p_inter_p = p_inter;
1442    return TRUE;
1443 }
1444 
1445 static int predict_point(int x, int x0, int x1, int y0, int y1)
1446 {
1447    int dy = y1 - y0;
1448    int adx = x1 - x0;
1449    // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1450    int err = abs(dy) * (x - x0);
1451    int off = err / adx;
1452    return dy < 0 ? y0 - off : y0 + off;
1453 }
1454 
1455 // the following table is block-copied from the specification
1456 static immutable float[256] inverse_db_table =
1457 [
1458   1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1459   1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1460   1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1461   2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1462   2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1463   3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1464   4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1465   6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1466   7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1467   1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1468   1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1469   1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1470   2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1471   2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1472   3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1473   4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1474   5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1475   7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1476   9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1477   1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1478   1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1479   2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1480   2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1481   3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1482   4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1483   5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1484   7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1485   9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1486   0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1487   0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1488   0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1489   0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1490   0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1491   0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1492   0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1493   0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1494   0.00092223983f, 0.00098217216f, 0.0010459992f,  0.0011139742f,
1495   0.0011863665f,  0.0012634633f,  0.0013455702f,  0.0014330129f,
1496   0.0015261382f,  0.0016253153f,  0.0017309374f,  0.0018434235f,
1497   0.0019632195f,  0.0020908006f,  0.0022266726f,  0.0023713743f,
1498   0.0025254795f,  0.0026895994f,  0.0028643847f,  0.0030505286f,
1499   0.0032487691f,  0.0034598925f,  0.0036847358f,  0.0039241906f,
1500   0.0041792066f,  0.0044507950f,  0.0047400328f,  0.0050480668f,
1501   0.0053761186f,  0.0057254891f,  0.0060975636f,  0.0064938176f,
1502   0.0069158225f,  0.0073652516f,  0.0078438871f,  0.0083536271f,
1503   0.0088964928f,  0.009474637f,   0.010090352f,   0.010746080f,
1504   0.011444421f,   0.012188144f,   0.012980198f,   0.013823725f,
1505   0.014722068f,   0.015678791f,   0.016697687f,   0.017782797f,
1506   0.018938423f,   0.020169149f,   0.021479854f,   0.022875735f,
1507   0.024362330f,   0.025945531f,   0.027631618f,   0.029427276f,
1508   0.031339626f,   0.033376252f,   0.035545228f,   0.037855157f,
1509   0.040315199f,   0.042935108f,   0.045725273f,   0.048696758f,
1510   0.051861348f,   0.055231591f,   0.058820850f,   0.062643361f,
1511   0.066714279f,   0.071049749f,   0.075666962f,   0.080584227f,
1512   0.085821044f,   0.091398179f,   0.097337747f,   0.10366330f,
1513   0.11039993f,    0.11757434f,    0.12521498f,    0.13335215f,
1514   0.14201813f,    0.15124727f,    0.16107617f,    0.17154380f,
1515   0.18269168f,    0.19456402f,    0.20720788f,    0.22067342f,
1516   0.23501402f,    0.25028656f,    0.26655159f,    0.28387361f,
1517   0.30232132f,    0.32196786f,    0.34289114f,    0.36517414f,
1518   0.38890521f,    0.41417847f,    0.44109412f,    0.46975890f,
1519   0.50028648f,    0.53279791f,    0.56742212f,    0.60429640f,
1520   0.64356699f,    0.68538959f,    0.72993007f,    0.77736504f,
1521   0.82788260f,    0.88168307f,    0.9389798f,     1.0f
1522  ];
1523 
1524 
1525 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
1526 // note that you must produce bit-identical output to decode correctly;
1527 // this specific sequence of operations is specified in the spec (it's
1528 // drawing integer-quantized frequency-space lines that the encoder
1529 // expects to be exactly the same)
1530 //     ... also, isn't the whole point of Bresenham's algorithm to NOT
1531 // have to divide in the setup? sigh.
1532 
1533 static void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
1534 {
1535    int dy = y1 - y0;
1536    int adx = x1 - x0;
1537    int ady = abs(dy);
1538    int base;
1539    int x=x0,y=y0;
1540    int err = 0;
1541    int sy;
1542 
1543    base = dy / adx;
1544    if (dy < 0)
1545       sy = base - 1;
1546    else
1547       sy = base+1;
1548    ady -= abs(base) * adx;
1549    if (x1 > n) x1 = n;
1550    if (x < x1) {
1551       output[x] *= inverse_db_table[y&255];
1552       for (++x; x < x1; ++x) {
1553          err += ady;
1554          if (err >= adx) {
1555             err -= adx;
1556             y += sy;
1557          } else
1558             y += base;
1559          output[x] *= inverse_db_table[y&255];
1560       }
1561    }
1562 }
1563 
1564 static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
1565 {
1566    int k;
1567    if (rtype == 0) {
1568       int step = n / book.dimensions;
1569       for (k=0; k < step; ++k)
1570          if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
1571             return FALSE;
1572    } else {
1573       for (k=0; k < n; ) {
1574          if (!codebook_decode(f, book, target+offset, n-k))
1575             return FALSE;
1576          k += book.dimensions;
1577          offset += book.dimensions;
1578       }
1579    }
1580    return TRUE;
1581 }
1582 
1583 // n is 1/2 of the blocksize --
1584 // specification: "Correct per-vector decode length is [n]/2"
1585 static void decode_residue(vorb *f, float **residue_buffers, int ch, int n, int rn, uint8 *do_not_decode)
1586 {
1587    int i,j,pass;
1588    Residue *r = f.residue_config + rn;
1589    int rtype = f.residue_types[rn];
1590    int c = r.classbook;
1591    int classwords = f.codebooks[c].dimensions;
1592    uint actual_size = rtype == 2 ? n*2 : n;
1593    uint limit_r_begin = (r.begin < actual_size ? r.begin : actual_size);
1594    uint limit_r_end   = (r.end   < actual_size ? r.end   : actual_size);
1595    int n_read = limit_r_end - limit_r_begin;
1596    int part_read = n_read / r.part_size;
1597    int temp_alloc_point = temp_alloc_save(f);
1598    uint8 ***part_classdata = cast(uint8 ***) temp_block_array(f,f.channels, part_read * (uint8*).sizeof);
1599   
1600    for (i=0; i < ch; ++i)
1601       if (!do_not_decode[i])
1602          memset(residue_buffers[i], 0, float.sizeof * n);
1603 
1604    if (rtype == 2 && ch != 1) {
1605       for (j=0; j < ch; ++j)
1606          if (!do_not_decode[j])
1607             break;
1608       if (j == ch)
1609          goto done;
1610 
1611       for (pass=0; pass < 8; ++pass) {
1612          int pcount = 0, class_set = 0;
1613          if (ch == 2) {
1614             while (pcount < part_read) {
1615                int z = r.begin + pcount*r.part_size;
1616                int c_inter = (z & 1), p_inter = z>>1;
1617                if (pass == 0) {
1618                   Codebook *c2 = f.codebooks+r.classbook;
1619                   int q;
1620                   DECODE(q,f,c2);
1621                   if (q == EOP) goto done;
1622                   part_classdata[0][class_set] = r.classdata[q];
1623                }
1624                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) 
1625                {
1626                   int z2 = r.begin + pcount*r.part_size;
1627                   int c2 = part_classdata[0][class_set][i];
1628                   int b = r.residue_books[c2][pass];
1629                   if (b >= 0) {
1630                      Codebook *book = f.codebooks + b;
1631             
1632                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size))
1633                         goto done;
1634             
1635                   } 
1636                   else 
1637                   {
1638                      z2 += r.part_size;
1639                      c_inter = z2 & 1;
1640                      p_inter = z2 >> 1;
1641                   }
1642                }
1643                ++class_set;
1644             }
1645          } else if (ch > 2) {
1646             while (pcount < part_read) {
1647                int z = r.begin + pcount*r.part_size;
1648                int c_inter = z % ch, p_inter = z/ch;
1649                if (pass == 0) {
1650                   Codebook *c3 = f.codebooks+r.classbook;
1651                   int q;
1652                   DECODE(q,f,c3);
1653                   if (q == EOP) goto done;
1654                   part_classdata[0][class_set] = r.classdata[q];
1655                }
1656                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
1657                   int z3 = r.begin + pcount*r.part_size;
1658                   int c3 = part_classdata[0][class_set][i];
1659                   int b = r.residue_books[c3][pass];
1660                   if (b >= 0) {
1661                      Codebook *book = f.codebooks + b;
1662                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size))
1663                         goto done;
1664                   } else {
1665                      z3 += r.part_size;
1666                      c_inter = z3 % ch;
1667                      p_inter = z3 / ch;
1668                   }
1669                }
1670                ++class_set;
1671             }
1672          }
1673       }
1674       goto done;
1675    }
1676 
1677    for (pass=0; pass < 8; ++pass) {
1678       int pcount = 0, class_set=0;
1679       while (pcount < part_read) {
1680          if (pass == 0) {
1681             for (j=0; j < ch; ++j) {
1682                if (!do_not_decode[j]) {
1683                   Codebook *c4 = f.codebooks+r.classbook;
1684                   int temp;
1685                   DECODE(temp,f,c4);
1686                   if (temp == EOP) goto done;
1687                   part_classdata[j][class_set] = r.classdata[temp];
1688                }
1689             }
1690          }
1691          for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
1692             for (j=0; j < ch; ++j) {
1693                if (!do_not_decode[j]) {
1694                   int c5 = part_classdata[j][class_set][i];
1695                   int b = r.residue_books[c5][pass];
1696                   if (b >= 0) {
1697                      float *target = residue_buffers[j];
1698                      int offset = r.begin + pcount * r.part_size;
1699                      int n2 = r.part_size;
1700                      Codebook *book = f.codebooks + b;
1701                      if (!residue_decode(f, book, target, offset, n2, rtype))
1702                         goto done;
1703                   }
1704                }
1705             }
1706          }
1707          ++class_set;
1708       }
1709    }
1710   done:
1711 
1712    temp_free(f,part_classdata);
1713    temp_alloc_restore(f,temp_alloc_point);
1714 }
1715 
1716 // the following were split out into separate functions while optimizing;
1717 // they could be pushed back up but eh. __forceinline showed no change;
1718 // they're probably already being inlined.
1719 static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
1720 {
1721    float *ee0 = e + i_off;
1722    float *ee2 = ee0 + k_off;
1723    int i;
1724 
1725    assert((n & 3) == 0);
1726    for (i=(n>>2); i > 0; --i) {
1727       float k00_20, k01_21;
1728       k00_20  = ee0[ 0] - ee2[ 0];
1729       k01_21  = ee0[-1] - ee2[-1];
1730       ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
1731       ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
1732       ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
1733       ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
1734       A += 8;
1735 
1736       k00_20  = ee0[-2] - ee2[-2];
1737       k01_21  = ee0[-3] - ee2[-3];
1738       ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
1739       ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
1740       ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
1741       ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
1742       A += 8;
1743 
1744       k00_20  = ee0[-4] - ee2[-4];
1745       k01_21  = ee0[-5] - ee2[-5];
1746       ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
1747       ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
1748       ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
1749       ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
1750       A += 8;
1751 
1752       k00_20  = ee0[-6] - ee2[-6];
1753       k01_21  = ee0[-7] - ee2[-7];
1754       ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
1755       ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
1756       ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
1757       ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
1758       A += 8;
1759       ee0 -= 8;
1760       ee2 -= 8;
1761    }
1762 }
1763 
1764 static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
1765 {
1766    int i;
1767    float k00_20, k01_21;
1768 
1769    float *e0 = e + d0;
1770    float *e2 = e0 + k_off;
1771 
1772    for (i=lim >> 2; i > 0; --i) {
1773       k00_20 = e0[-0] - e2[-0];
1774       k01_21 = e0[-1] - e2[-1];
1775       e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
1776       e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
1777       e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
1778       e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
1779 
1780       A += k1;
1781 
1782       k00_20 = e0[-2] - e2[-2];
1783       k01_21 = e0[-3] - e2[-3];
1784       e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
1785       e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
1786       e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
1787       e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
1788 
1789       A += k1;
1790 
1791       k00_20 = e0[-4] - e2[-4];
1792       k01_21 = e0[-5] - e2[-5];
1793       e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
1794       e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
1795       e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
1796       e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
1797 
1798       A += k1;
1799 
1800       k00_20 = e0[-6] - e2[-6];
1801       k01_21 = e0[-7] - e2[-7];
1802       e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
1803       e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
1804       e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
1805       e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
1806 
1807       e0 -= 8;
1808       e2 -= 8;
1809 
1810       A += k1;
1811    }
1812 }
1813 
1814 static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
1815 {
1816    int i;
1817    float A0 = A[0];
1818    float A1 = A[0+1];
1819    float A2 = A[0+a_off];
1820    float A3 = A[0+a_off+1];
1821    float A4 = A[0+a_off*2+0];
1822    float A5 = A[0+a_off*2+1];
1823    float A6 = A[0+a_off*3+0];
1824    float A7 = A[0+a_off*3+1];
1825 
1826    float k00,k11;
1827 
1828    float *ee0 = e  +i_off;
1829    float *ee2 = ee0+k_off;
1830 
1831    for (i=n; i > 0; --i) {
1832       k00     = ee0[ 0] - ee2[ 0];
1833       k11     = ee0[-1] - ee2[-1];
1834       ee0[ 0] =  ee0[ 0] + ee2[ 0];
1835       ee0[-1] =  ee0[-1] + ee2[-1];
1836       ee2[ 0] = (k00) * A0 - (k11) * A1;
1837       ee2[-1] = (k11) * A0 + (k00) * A1;
1838 
1839       k00     = ee0[-2] - ee2[-2];
1840       k11     = ee0[-3] - ee2[-3];
1841       ee0[-2] =  ee0[-2] + ee2[-2];
1842       ee0[-3] =  ee0[-3] + ee2[-3];
1843       ee2[-2] = (k00) * A2 - (k11) * A3;
1844       ee2[-3] = (k11) * A2 + (k00) * A3;
1845 
1846       k00     = ee0[-4] - ee2[-4];
1847       k11     = ee0[-5] - ee2[-5];
1848       ee0[-4] =  ee0[-4] + ee2[-4];
1849       ee0[-5] =  ee0[-5] + ee2[-5];
1850       ee2[-4] = (k00) * A4 - (k11) * A5;
1851       ee2[-5] = (k11) * A4 + (k00) * A5;
1852 
1853       k00     = ee0[-6] - ee2[-6];
1854       k11     = ee0[-7] - ee2[-7];
1855       ee0[-6] =  ee0[-6] + ee2[-6];
1856       ee0[-7] =  ee0[-7] + ee2[-7];
1857       ee2[-6] = (k00) * A6 - (k11) * A7;
1858       ee2[-7] = (k11) * A6 + (k00) * A7;
1859 
1860       ee0 -= k0;
1861       ee2 -= k0;
1862    }
1863 }
1864 
1865 static void iter_54(float *z)
1866 {
1867    float k00,k11,k22,k33;
1868    float y0,y1,y2,y3;
1869 
1870    k00  = z[ 0] - z[-4];
1871    y0   = z[ 0] + z[-4];
1872    y2   = z[-2] + z[-6];
1873    k22  = z[-2] - z[-6];
1874 
1875    z[-0] = y0 + y2;      // z0 + z4 + z2 + z6
1876    z[-2] = y0 - y2;      // z0 + z4 - z2 - z6
1877 
1878    // done with y0,y2
1879 
1880    k33  = z[-3] - z[-7];
1881 
1882    z[-4] = k00 + k33;    // z0 - z4 + z3 - z7
1883    z[-6] = k00 - k33;    // z0 - z4 - z3 + z7
1884 
1885    // done with k33
1886 
1887    k11  = z[-1] - z[-5];
1888    y1   = z[-1] + z[-5];
1889    y3   = z[-3] + z[-7];
1890 
1891    z[-1] = y1 + y3;      // z1 + z5 + z3 + z7
1892    z[-3] = y1 - y3;      // z1 + z5 - z3 - z7
1893    z[-5] = k11 - k22;    // z1 - z5 + z2 - z6
1894    z[-7] = k11 + k22;    // z1 - z5 - z2 + z6
1895 }
1896 
1897 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
1898 {
1899    int a_off = base_n >> 3;
1900    float A2 = A[0+a_off];
1901    float *z = e + i_off;
1902    float *base = z - 16 * n;
1903 
1904    while (z > base) {
1905       float k00,k11;
1906       float l00,l11;
1907 
1908       k00    = z[-0] - z[ -8];
1909       k11    = z[-1] - z[ -9];
1910       l00    = z[-2] - z[-10];
1911       l11    = z[-3] - z[-11];
1912       z[ -0] = z[-0] + z[ -8];
1913       z[ -1] = z[-1] + z[ -9];
1914       z[ -2] = z[-2] + z[-10];
1915       z[ -3] = z[-3] + z[-11];
1916       z[ -8] = k00;
1917       z[ -9] = k11;
1918       z[-10] = (l00+l11) * A2;
1919       z[-11] = (l11-l00) * A2;
1920 
1921       k00    = z[ -4] - z[-12];
1922       k11    = z[ -5] - z[-13];
1923       l00    = z[ -6] - z[-14];
1924       l11    = z[ -7] - z[-15];
1925       z[ -4] = z[ -4] + z[-12];
1926       z[ -5] = z[ -5] + z[-13];
1927       z[ -6] = z[ -6] + z[-14];
1928       z[ -7] = z[ -7] + z[-15];
1929       z[-12] = k11;
1930       z[-13] = -k00;
1931       z[-14] = (l11-l00) * A2;
1932       z[-15] = (l00+l11) * -A2;
1933 
1934       iter_54(z);
1935       iter_54(z-8);
1936       z -= 16;
1937    }
1938 }
1939 
1940 static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
1941 {
1942    int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
1943    int ld;
1944    // @OPTIMIZE: reduce register pressure by using fewer variables?
1945    int save_point = temp_alloc_save(f);
1946    float *buf2 = cast(float *) temp_alloc(f, n2 * float.sizeof);
1947    float* u = null;
1948    float* v = null;
1949    // twiddle factors
1950    float *A = f.A[blocktype];
1951 
1952    // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
1953    // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
1954 
1955    // kernel from paper
1956 
1957 
1958    // merged:
1959    //   copy and reflect spectral data
1960    //   step 0
1961 
1962    // note that it turns out that the items added together during
1963    // this step are, in fact, being added to themselves (as reflected
1964    // by step 0). inexplicable inefficiency! this became obvious
1965    // once I combined the passes.
1966 
1967    // so there's a missing 'times 2' here (for adding X to itself).
1968    // this propagates through linearly to the end, where the numbers
1969    // are 1/2 too small, and need to be compensated for.
1970 
1971    {
1972       float *d, e, AA, e_stop;
1973       d = &buf2[n2-2];
1974       AA = A;
1975       e = &buffer[0];
1976       e_stop = &buffer[n2];
1977       while (e != e_stop) {
1978          d[1] = (e[0] * AA[0] - e[2]*AA[1]);
1979          d[0] = (e[0] * AA[1] + e[2]*AA[0]);
1980          d -= 2;
1981          AA += 2;
1982          e += 4;
1983       }
1984 
1985       e = &buffer[n2-3];
1986       while (d >= buf2) {
1987          d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
1988          d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
1989          d -= 2;
1990          AA += 2;
1991          e -= 4;
1992       }
1993    }
1994 
1995    // now we use symbolic names for these, so that we can
1996    // possibly swap their meaning as we change which operations
1997    // are in place
1998 
1999    u = buffer;
2000    v = buf2;
2001 
2002    // step 2    (paper output is w, now u)
2003    // this could be in place, but the data ends up in the wrong
2004    // place... _somebody_'s got to swap it, so this is nominated
2005    {
2006       float *AA = &A[n2-8];
2007       float *d0, d1,  e0, e1;
2008 
2009       e0 = &v[n4];
2010       e1 = &v[0];
2011 
2012       d0 = &u[n4];
2013       d1 = &u[0];
2014 
2015       while (AA >= A) {
2016          float v40_20, v41_21;
2017 
2018          v41_21 = e0[1] - e1[1];
2019          v40_20 = e0[0] - e1[0];
2020          d0[1]  = e0[1] + e1[1];
2021          d0[0]  = e0[0] + e1[0];
2022          d1[1]  = v41_21*AA[4] - v40_20*AA[5];
2023          d1[0]  = v40_20*AA[4] + v41_21*AA[5];
2024 
2025          v41_21 = e0[3] - e1[3];
2026          v40_20 = e0[2] - e1[2];
2027          d0[3]  = e0[3] + e1[3];
2028          d0[2]  = e0[2] + e1[2];
2029          d1[3]  = v41_21*AA[0] - v40_20*AA[1];
2030          d1[2]  = v40_20*AA[0] + v41_21*AA[1];
2031 
2032          AA -= 8;
2033 
2034          d0 += 4;
2035          d1 += 4;
2036          e0 += 4;
2037          e1 += 4;
2038       }
2039    }
2040 
2041    // step 3
2042    ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2043 
2044    // optimized step 3:
2045 
2046    // the original step3 loop can be nested r inside s or s inside r;
2047    // it's written originally as s inside r, but this is dumb when r
2048    // iterates many times, and s few. So I have two copies of it and
2049    // switch between them halfway.
2050 
2051    // this is iteration 0 of step 3
2052    imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
2053    imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
2054 
2055    // this is iteration 1 of step 3
2056    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
2057    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
2058    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
2059    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
2060 
2061    l=2;
2062    for (; l < (ld-3)>>1; ++l) {
2063       int k0 = n >> (l+2), k0_2 = k0>>1;
2064       int lim = 1 << (l+1);
2065       int i;
2066       for (i=0; i < lim; ++i)
2067          imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
2068    }
2069 
2070    for (; l < ld-6; ++l) {
2071       int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
2072       int rlim = n >> (l+6), r;
2073       int lim = 1 << (l+1);
2074       int i_off;
2075       float *A0 = A;
2076       i_off = n2-1;
2077       for (r=rlim; r > 0; --r) {
2078          imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2079          A0 += k1*4;
2080          i_off -= 8;
2081       }
2082    }
2083 
2084    // iterations with count:
2085    //   ld-6,-5,-4 all interleaved together
2086    //       the big win comes from getting rid of needless flops
2087    //         due to the constants on pass 5 & 4 being all 1 and 0;
2088    //       combining them to be simultaneous to improve cache made little difference
2089    imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
2090 
2091    // output is u
2092 
2093    // step 4, 5, and 6
2094    // cannot be in-place because of step 5
2095    {
2096       uint16 *bitrev = f.bit_reverse[blocktype];
2097       // weirdly, I'd have thought reading sequentially and writing
2098       // erratically would have been better than vice-versa, but in
2099       // fact that's not what my testing showed. (That is, with
2100       // j = bitreverse(i), do you read i and write j, or read j and write i.)
2101 
2102       float *d0 = &v[n4-4];
2103       float *d1 = &v[n2-4];
2104       while (d0 >= v) {
2105          int k4;
2106 
2107          k4 = bitrev[0];
2108          d1[3] = u[k4+0];
2109          d1[2] = u[k4+1];
2110          d0[3] = u[k4+2];
2111          d0[2] = u[k4+3];
2112 
2113          k4 = bitrev[1];
2114          d1[1] = u[k4+0];
2115          d1[0] = u[k4+1];
2116          d0[1] = u[k4+2];
2117          d0[0] = u[k4+3];
2118 
2119          d0 -= 4;
2120          d1 -= 4;
2121          bitrev += 2;
2122       }
2123    }
2124    // (paper output is u, now v)
2125 
2126 
2127    // data must be in buf2
2128    assert(v == buf2);
2129 
2130    // step 7   (paper output is v, now v)
2131    // this is now in place
2132    {
2133       float *C = f.C[blocktype];
2134       float *d, e;
2135 
2136       d = v;
2137       e = v + n2 - 4;
2138 
2139       while (d < e) {
2140          float a02,a11,b0,b1,b2,b3;
2141 
2142          a02 = d[0] - e[2];
2143          a11 = d[1] + e[3];
2144 
2145          b0 = C[1]*a02 + C[0]*a11;
2146          b1 = C[1]*a11 - C[0]*a02;
2147 
2148          b2 = d[0] + e[ 2];
2149          b3 = d[1] - e[ 3];
2150 
2151          d[0] = b2 + b0;
2152          d[1] = b3 + b1;
2153          e[2] = b2 - b0;
2154          e[3] = b1 - b3;
2155 
2156          a02 = d[2] - e[0];
2157          a11 = d[3] + e[1];
2158 
2159          b0 = C[3]*a02 + C[2]*a11;
2160          b1 = C[3]*a11 - C[2]*a02;
2161 
2162          b2 = d[2] + e[ 0];
2163          b3 = d[3] - e[ 1];
2164 
2165          d[2] = b2 + b0;
2166          d[3] = b3 + b1;
2167          e[0] = b2 - b0;
2168          e[1] = b1 - b3;
2169 
2170          C += 4;
2171          d += 4;
2172          e -= 4;
2173       }
2174    }
2175 
2176    // data must be in buf2
2177 
2178 
2179    // step 8+decode   (paper output is X, now buffer)
2180    // this generates pairs of data a la 8 and pushes them directly through
2181    // the decode kernel (pushing rather than pulling) to avoid having
2182    // to make another pass later
2183 
2184    // this cannot POSSIBLY be in place, so we refer to the buffers directly
2185 
2186    {
2187       float *d0, d1, d2, d3;
2188 
2189       float *B = f.B[blocktype] + n2 - 8;
2190       float *e = buf2 + n2 - 8;
2191       d0 = &buffer[0];
2192       d1 = &buffer[n2-4];
2193       d2 = &buffer[n2];
2194       d3 = &buffer[n-4];
2195       while (e >= v) {
2196          float p0,p1,p2,p3;
2197 
2198          p3 =  e[6]*B[7] - e[7]*B[6];
2199          p2 = -e[6]*B[6] - e[7]*B[7];
2200 
2201          d0[0] =   p3;
2202          d1[3] = - p3;
2203          d2[0] =   p2;
2204          d3[3] =   p2;
2205 
2206          p1 =  e[4]*B[5] - e[5]*B[4];
2207          p0 = -e[4]*B[4] - e[5]*B[5];
2208 
2209          d0[1] =   p1;
2210          d1[2] = - p1;
2211          d2[1] =   p0;
2212          d3[2] =   p0;
2213 
2214          p3 =  e[2]*B[3] - e[3]*B[2];
2215          p2 = -e[2]*B[2] - e[3]*B[3];
2216 
2217          d0[2] =   p3;
2218          d1[1] = - p3;
2219          d2[2] =   p2;
2220          d3[1] =   p2;
2221 
2222          p1 =  e[0]*B[1] - e[1]*B[0];
2223          p0 = -e[0]*B[0] - e[1]*B[1];
2224 
2225          d0[3] =   p1;
2226          d1[0] = - p1;
2227          d2[3] =   p0;
2228          d3[0] =   p0;
2229 
2230          B -= 8;
2231          e -= 8;
2232          d0 += 4;
2233          d2 += 4;
2234          d1 -= 4;
2235          d3 -= 4;
2236       }
2237    }
2238 
2239    temp_free(f,buf2);
2240    temp_alloc_restore(f,save_point);
2241 }
2242 
2243 
2244 static float *get_window(vorb *f, int len)
2245 {
2246    len <<= 1;
2247    if (len == f.blocksize_0) return f.window[0];
2248    if (len == f.blocksize_1) return f.window[1];
2249    return null;
2250 }
2251 
2252 alias YTYPE = int16;
2253 
2254 static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag)
2255 {
2256    int n2 = n >> 1;
2257    int s = map.chan[i].mux, floor;
2258    floor = map.submap_floor[s];
2259    if (f.floor_types[floor] == 0) {
2260       return error(f, VORBIS_invalid_stream);
2261    } else {
2262       Floor1 *g = &f.floor_config[floor].floor1;
2263       int j,q;
2264       int lx = 0, ly = finalY[0] * g.floor1_multiplier;
2265       for (q=1; q < g.values; ++q) {
2266          j = g.sorted_order[q];
2267          if (finalY[j] >= 0)
2268          {
2269             int hy = finalY[j] * g.floor1_multiplier;
2270             int hx = g.Xlist[j];
2271             if (lx != hx)
2272                draw_line(target, lx,ly, hx,hy, n2);
2273             lx = hx, ly = hy;
2274          }
2275       }
2276       if (lx < n2) {
2277          // optimization of: draw_line(target, lx,ly, n,ly, n2);
2278          for (j=lx; j < n2; ++j)
2279             target[j] *= inverse_db_table[ly];
2280       }
2281    }
2282    return TRUE;
2283 }
2284 
2285 // The meaning of "left" and "right"
2286 //
2287 // For a given frame:
2288 //     we compute samples from 0..n
2289 //     window_center is n/2
2290 //     we'll window and mix the samples from left_start to left_end with data from the previous frame
2291 //     all of the samples from left_end to right_start can be output without mixing; however,
2292 //        this interval is 0-length except when transitioning between short and long frames
2293 //     all of the samples from right_start to right_end need to be mixed with the next frame,
2294 //        which we don't have, so those get saved in a buffer
2295 //     frame N's right_end-right_start, the number of samples to mix with the next frame,
2296 //        has to be the same as frame N+1's left_end-left_start (which they are by
2297 //        construction)
2298 
2299 static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
2300 {
2301    Mode *m;
2302    int i, n, prev, next, window_center;
2303    f.channel_buffer_start = f.channel_buffer_end = 0;
2304 
2305   retry:
2306    if (f.eof) return FALSE;
2307    if (!maybe_start_packet(f))
2308       return FALSE;
2309    // check packet type
2310    if (get_bits(f,1) != 0) {      
2311       while (EOP != get8_packet(f)) {}
2312       goto retry;
2313    }
2314 
2315    if (f.alloc.alloc_buffer)
2316       assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2317 
2318    i = get_bits(f, ilog(f.mode_count-1));
2319    if (i == EOP) return FALSE;
2320    if (i >= f.mode_count) return FALSE;
2321    *mode = i;
2322    m = f.mode_config.ptr + i;
2323    if (m.blockflag) {
2324       n = f.blocksize_1;
2325       prev = get_bits(f,1);
2326       next = get_bits(f,1);
2327    } else {
2328       prev = next = 0;
2329       n = f.blocksize_0;
2330    }
2331 
2332 // WINDOWING
2333 
2334    window_center = n >> 1;
2335    if (m.blockflag && !prev) {
2336       *p_left_start = (n - f.blocksize_0) >> 2;
2337       *p_left_end   = (n + f.blocksize_0) >> 2;
2338    } else {
2339       *p_left_start = 0;
2340       *p_left_end   = window_center;
2341    }
2342    if (m.blockflag && !next) {
2343       *p_right_start = (n*3 - f.blocksize_0) >> 2;
2344       *p_right_end   = (n*3 + f.blocksize_0) >> 2;
2345    } else {
2346       *p_right_start = window_center;
2347       *p_right_end   = n;
2348    }
2349 
2350    return TRUE;
2351 }
2352 
2353 static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
2354 {
2355    Mapping *map;
2356    int i,j,k,n,n2;
2357    int[256] zero_channel;
2358    int[256] really_zero_channel;
2359 
2360 // WINDOWING
2361 
2362    n = f.blocksize[m.blockflag];
2363    map = &f.mapping[m.mapping];
2364 
2365 // FLOORS
2366    n2 = n >> 1;
2367 
2368    for (i=0; i < f.channels; ++i) {
2369       int s = map.chan[i].mux, floor;
2370       zero_channel[i] = FALSE;
2371       floor = map.submap_floor[s];
2372       if (f.floor_types[floor] == 0) {
2373          return error(f, VORBIS_invalid_stream);
2374       } else {
2375          Floor1 *g = &f.floor_config[floor].floor1;
2376          if (get_bits(f, 1)) {
2377             short *finalY;
2378             uint8[256] step2_flag;
2379             static immutable int[4] range_list = [ 256, 128, 86, 64 ];
2380             int range = range_list[g.floor1_multiplier-1];
2381             int offset = 2;
2382             finalY = f.finalY[i];
2383             finalY[0] = cast(short) get_bits(f, ilog(range)-1);
2384             finalY[1] = cast(short) get_bits(f, ilog(range)-1);
2385             for (j=0; j < g.partitions; ++j) {
2386                int pclass = g.partition_class_list[j];
2387                int cdim = g.class_dimensions[pclass];
2388                int cbits = g.class_subclasses[pclass];
2389                int csub = (1 << cbits)-1;
2390                int cval = 0;
2391                if (cbits) {
2392                   Codebook *c = f.codebooks + g.class_masterbooks[pclass];
2393                   DECODE(cval,f,c);
2394                }
2395                for (k=0; k < cdim; ++k) {
2396                   int book = g.subclass_books[pclass][cval & csub];
2397                   cval = cval >> cbits;
2398                   if (book >= 0) {
2399                      int temp;
2400                      Codebook *c = f.codebooks + book;
2401                      DECODE(temp,f,c);
2402                      finalY[offset++] = cast(short)temp;
2403                   } else
2404                      finalY[offset++] = 0;
2405                }
2406             }
2407             if (f.valid_bits == INVALID_BITS) goto error; // behavior according to spec
2408             step2_flag[0] = step2_flag[1] = 1;
2409             for (j=2; j < g.values; ++j) {
2410                int low, high, pred, highroom, lowroom, room, val;
2411                low = g.neighbors[j][0];
2412                high = g.neighbors[j][1];
2413                //neighbors(g.Xlist, j, &low, &high);
2414                pred = predict_point(g.Xlist[j], g.Xlist[low], g.Xlist[high], finalY[low], finalY[high]);
2415                val = finalY[j];
2416                highroom = range - pred;
2417                lowroom = pred;
2418                if (highroom < lowroom)
2419                   room = highroom * 2;
2420                else
2421                   room = lowroom * 2;
2422                if (val) {
2423                   step2_flag[low] = step2_flag[high] = 1;
2424                   step2_flag[j] = 1;
2425                   if (val >= room)
2426                      if (highroom > lowroom)
2427                         finalY[j] = cast(short)(val - lowroom + pred);
2428                      else
2429                         finalY[j] = cast(short)(pred - val + highroom - 1);
2430                   else
2431                      if (val & 1)
2432                         finalY[j] = cast(short)(pred - ((val+1)>>1));
2433                      else
2434                         finalY[j] = cast(short)(pred + (val>>1));
2435                } else {
2436                   step2_flag[j] = 0;
2437                   finalY[j] = cast(short)pred;
2438                }
2439             }
2440 
2441             // defer final floor computation until _after_ residue
2442             for (j=0; j < g.values; ++j) {
2443                if (!step2_flag[j])
2444                   finalY[j] = -1;
2445             }
2446          } else {
2447            error:
2448             zero_channel[i] = TRUE;
2449          }
2450          // So we just defer everything else to later
2451 
2452          // at this point we've decoded the floor into buffer
2453       }
2454    }
2455 
2456    // at this point we've decoded all floors
2457 
2458    if (f.alloc.alloc_buffer)
2459       assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2460 
2461    // re-enable coupled channels if necessary
2462    memcpy(really_zero_channel.ptr, zero_channel.ptr, (really_zero_channel[0]).sizeof * f.channels);
2463    for (i=0; i < map.coupling_steps; ++i)
2464       if (!zero_channel[map.chan[i].magnitude] || !zero_channel[map.chan[i].angle]) {
2465          zero_channel[map.chan[i].magnitude] = zero_channel[map.chan[i].angle] = FALSE;
2466       }
2467 
2468 // RESIDUE DECODE
2469    for (i=0; i < map.submaps; ++i) {
2470       float *[STB_VORBIS_MAX_CHANNELS] residue_buffers;
2471       int r;
2472       uint8[256] do_not_decode;
2473       int ch = 0;
2474       for (j=0; j < f.channels; ++j) {
2475          if (map.chan[j].mux == i) {
2476             if (zero_channel[j]) {
2477                do_not_decode[ch] = TRUE;
2478                residue_buffers[ch] = null;
2479             } else {
2480                do_not_decode[ch] = FALSE;
2481                residue_buffers[ch] = f.channel_buffers[j];
2482             }
2483             ++ch;
2484          }
2485       }
2486       r = map.submap_residue[i];
2487       decode_residue(f, residue_buffers.ptr, ch, n2, r, do_not_decode.ptr);
2488    }
2489 
2490    if (f.alloc.alloc_buffer)
2491       assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2492 
2493 // INVERSE COUPLING
2494    for (i = map.coupling_steps-1; i >= 0; --i) {
2495       int n3 = n >> 1;
2496       float *m_ = f.channel_buffers[map.chan[i].magnitude];
2497       float *a = f.channel_buffers[map.chan[i].angle    ];
2498       for (j=0; j < n3; ++j) {
2499          float a2,m2;
2500          if (m_[j] > 0)
2501             if (a[j] > 0)
2502                m2 = m_[j], a2 = m_[j] - a[j];
2503             else
2504                a2 = m_[j], m2 = m_[j] + a[j];
2505          else
2506             if (a[j] > 0)
2507                m2 = m_[j], a2 = m_[j] + a[j];
2508             else
2509                a2 = m_[j], m2 = m_[j] - a[j];
2510          m_[j] = m2;
2511          a[j] = a2;
2512       }
2513    }
2514 
2515    // finish decoding the floors
2516    for (i=0; i < f.channels; ++i) {
2517       if (really_zero_channel[i]) {
2518          memset(f.channel_buffers[i], 0, (*f.channel_buffers[i]).sizeof * n2);
2519       } else {
2520          do_floor(f, map, i, n, f.channel_buffers[i], f.finalY[i], null);
2521       }
2522    }
2523 
2524    // INVERSE MDCT
2525    for (i=0; i < f.channels; ++i)
2526       inverse_mdct(f.channel_buffers[i], n, f, m.blockflag);
2527 
2528    // this shouldn't be necessary, unless we exited on an error
2529    // and want to flush to get to the next packet
2530    flush_packet(f);
2531 
2532    if (f.first_decode) {
2533       // assume we start so first non-discarded sample is sample 0
2534       // this isn't to spec, but spec would require us to read ahead
2535       // and decode the size of all current frames--could be done,
2536       // but presumably it's not a commonly used feature
2537       f.current_loc = 0u - n2; // start of first frame is positioned for discard (NB this is an intentional unsigned overflow/wrap-around)
2538       // we might have to discard samples "from" the next frame too,
2539       // if we're lapping a large block then a small at the start?
2540       f.discard_samples_deferred = n - right_end;
2541       f.current_loc_valid = TRUE;
2542       f.first_decode = FALSE;
2543    } else if (f.discard_samples_deferred) {
2544       if (f.discard_samples_deferred >= right_start - left_start) {
2545          f.discard_samples_deferred -= (right_start - left_start);
2546          left_start = right_start;
2547          *p_left = left_start;
2548       } else {
2549          left_start += f.discard_samples_deferred;
2550          *p_left = left_start;
2551          f.discard_samples_deferred = 0;
2552       }
2553    } else if (f.previous_length == 0 && f.current_loc_valid) {
2554       // we're recovering from a seek... that means we're going to discard
2555       // the samples from this packet even though we know our position from
2556       // the last page header, so we need to update the position based on
2557       // the discarded samples here
2558       // but wait, the code below is going to add this in itself even
2559       // on a discard, so we don't need to do it here...
2560    }
2561 
2562    // check if we have ogg information about the sample # for this packet
2563    if (f.last_seg_which == f.end_seg_with_known_loc) {
2564       // if we have a valid current loc, and this is final:
2565       if (f.current_loc_valid && (f.page_flag & PAGEFLAG_last_page)) {
2566          uint32 current_end = f.known_loc_for_packet;
2567          // then let's infer the size of the (probably) short final frame
2568          if (current_end < f.current_loc + (right_end-left_start)) {
2569             if (current_end < f.current_loc) {
2570                // negative truncation, that's impossible!
2571                *len = 0;
2572             } else {
2573                *len = current_end - f.current_loc;
2574             }
2575             *len += left_start; // this doesn't seem right, but has no ill effect on my test files
2576             if (*len > right_end) *len = right_end; // this should never happen
2577             f.current_loc += *len;
2578             return TRUE;
2579          }
2580       }
2581       // otherwise, just set our sample loc
2582       // guess that the ogg granule pos refers to the _middle_ of the
2583       // last frame?
2584       // set f.current_loc to the position of left_start
2585       f.current_loc = f.known_loc_for_packet - (n2-left_start);
2586       f.current_loc_valid = TRUE;
2587    }
2588    if (f.current_loc_valid)
2589       f.current_loc += (right_start - left_start);
2590 
2591    if (f.alloc.alloc_buffer)
2592       assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2593    *len = right_end;  // ignore samples after the window goes to 0
2594 
2595    return TRUE;
2596 }
2597 
2598 static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
2599 {
2600    int mode, left_end, right_end;
2601    if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
2602    return vorbis_decode_packet_rest(f, len, f.mode_config.ptr + mode, *p_left, left_end, *p_right, right_end, p_left);
2603 }
2604 
2605 static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
2606 {
2607    int prev,i,j;
2608    // we use right&left (the start of the right- and left-window sin()-regions)
2609    // to determine how much to return, rather than inferring from the rules
2610    // (same result, clearer code); 'left' indicates where our sin() window
2611    // starts, therefore where the previous window's right edge starts, and
2612    // therefore where to start mixing from the previous buffer. 'right'
2613    // indicates where our sin() ending-window starts, therefore that's where
2614    // we start saving, and where our returned-data ends.
2615 
2616    // mixin from previous window
2617    if (f.previous_length) {
2618       int i2, j2, n = f.previous_length;
2619       float *w = get_window(f, n);
2620       if (w == NULL) return 0;
2621       for (i2=0; i2 < f.channels; ++i2) {
2622          for (j2=0; j2 < n; ++j2)
2623             f.channel_buffers[i2][left+j2] =
2624                f.channel_buffers[i2][left+j2]*w[    j2] +
2625                f.previous_window[i2][     j2]*w[n-1-j2];
2626       }
2627    }
2628 
2629    prev = f.previous_length;
2630 
2631    // last half of this data becomes previous window
2632    f.previous_length = len - right;
2633 
2634    // @OPTIMIZE: could avoid this copy by double-buffering the
2635    // output (flipping previous_window with channel_buffers), but
2636    // then previous_window would have to be 2x as large, and
2637    // channel_buffers couldn't be temp mem (although they're NOT
2638    // currently temp mem, they could be (unless we want to level
2639    // performance by spreading out the computation))
2640    for (i=0; i < f.channels; ++i)
2641       for (j=0; right+j < len; ++j)
2642          f.previous_window[i][j] = f.channel_buffers[i][right+j];
2643 
2644    if (!prev)
2645       // there was no previous packet, so this data isn't valid...
2646       // this isn't entirely true, only the would-have-overlapped data
2647       // isn't valid, but this seems to be what the spec requires
2648       return 0;
2649 
2650    // truncate a short frame
2651    if (len < right) right = len;
2652 
2653    f.samples_output += right-left;
2654 
2655    return right - left;
2656 }
2657 
2658 static int vorbis_pump_first_frame(stb_vorbis *f)
2659 {
2660    int len, right, left, res;
2661    res = vorbis_decode_packet(f, &len, &left, &right);
2662    if (res)
2663       vorbis_finish_frame(f, len, left, right);
2664    return res;
2665 }
2666 
2667 
2668 static int start_decoder(vorb *f)
2669 {
2670    uint8[6] header;
2671    uint8 x,y;
2672    int len,i,j,k, max_submaps = 0;
2673    int longest_floorlist=0;
2674 
2675    // first page, first packet
2676    f.first_decode = TRUE;
2677 
2678    if (!start_page(f))                              return FALSE;
2679    // validate page flag
2680    if (!(f.page_flag & PAGEFLAG_first_page))       return error(f, VORBIS_invalid_first_page);
2681    if (f.page_flag & PAGEFLAG_last_page)           return error(f, VORBIS_invalid_first_page);
2682    if (f.page_flag & PAGEFLAG_continued_packet)    return error(f, VORBIS_invalid_first_page);
2683    // check for expected packet length
2684    if (f.segment_count != 1)                       return error(f, VORBIS_invalid_first_page);
2685    if (f.segments[0] != 30) {
2686       // check for the Ogg skeleton fishead identifying header to refine our error
2687       if (f.segments[0] == 64 &&
2688           getn(f, header.ptr, 6) &&
2689           header[0] == 'f' &&
2690           header[1] == 'i' &&
2691           header[2] == 's' &&
2692           header[3] == 'h' &&
2693           header[4] == 'e' &&
2694           header[5] == 'a' &&
2695           get8(f)   == 'd' &&
2696           get8(f)   == '\0')                        return error(f, VORBIS_ogg_skeleton_not_supported);
2697       else
2698                                                     return error(f, VORBIS_invalid_first_page);
2699    }
2700 
2701    // read packet
2702    // check packet header
2703    if (get8(f) != VORBIS_packet_id)                 return error(f, VORBIS_invalid_first_page);
2704    if (!getn(f, header.ptr, 6))                         return error(f, VORBIS_unexpected_eof);
2705    if (!vorbis_validate(header.ptr))                    return error(f, VORBIS_invalid_first_page);
2706    // vorbis_version
2707    if (get32(f) != 0)                               return error(f, VORBIS_invalid_first_page);
2708    f.channels = get8(f); if (!f.channels)         return error(f, VORBIS_invalid_first_page);
2709    if (f.channels > STB_VORBIS_MAX_CHANNELS)       return error(f, VORBIS_too_many_channels);
2710    f.sample_rate = get32(f); if (!f.sample_rate)  return error(f, VORBIS_invalid_first_page);
2711    get32(f); // bitrate_maximum
2712    get32(f); // bitrate_nominal
2713    get32(f); // bitrate_minimum
2714    x = get8(f);
2715    {
2716       int log0,log1;
2717       log0 = x & 15;
2718       log1 = x >> 4;
2719       f.blocksize_0 = 1 << log0;
2720       f.blocksize_1 = 1 << log1;
2721       if (log0 < 6 || log0 > 13)                       return error(f, VORBIS_invalid_setup);
2722       if (log1 < 6 || log1 > 13)                       return error(f, VORBIS_invalid_setup);
2723       if (log0 > log1)                                 return error(f, VORBIS_invalid_setup);
2724    }
2725 
2726    // framing_flag
2727    x = get8(f);
2728    if (!(x & 1))                                    return error(f, VORBIS_invalid_first_page);
2729 
2730    // second packet!
2731    if (!start_page(f))                              return FALSE;
2732 
2733    if (!start_packet(f))                            return FALSE;
2734 
2735    if (!next_segment(f))                            return FALSE;
2736 
2737    if (get8_packet(f) != VORBIS_packet_comment)            return error(f, VORBIS_invalid_setup);
2738    for (i=0; i < 6; ++i) header[i] = cast(ubyte) get8_packet(f);
2739    if (!vorbis_validate(header.ptr))                    return error(f, VORBIS_invalid_setup);
2740    //file vendor
2741    len = get32_packet(f);
2742    f.vendor = cast(char*)setup_malloc(f, len+1);
2743    if (f.vendor == NULL)                           return error(f, VORBIS_outofmem);
2744    for(i=0; i < len; ++i) {
2745       f.vendor[i] = cast(char) get8_packet(f);
2746    }
2747    f.vendor[len] = cast(char)'\0';
2748    //user comments
2749    f.comment_list_length = get32_packet(f);
2750    f.comment_list = null;
2751    if (f.comment_list_length > 0)
2752    {
2753       f.comment_list = cast(char**) setup_malloc(f, cast(int)( (char*).sizeof * (f.comment_list_length) ));
2754       if (f.comment_list == NULL)                  return error(f, VORBIS_outofmem);
2755    }
2756 
2757    for(i=0; i < f.comment_list_length; ++i) {
2758       len = get32_packet(f);
2759       f.comment_list[i] = cast(char*)setup_malloc(f, len+1);
2760       if (f.comment_list[i] == null)               return error(f, VORBIS_outofmem);
2761 
2762       for(j=0; j < len; ++j) {
2763          f.comment_list[i][j] = cast(char) get8_packet(f);
2764       }
2765       f.comment_list[i][len] = cast(char)'\0';
2766    }
2767 
2768    // framing_flag
2769    x = cast(ubyte) get8_packet(f);
2770    if (!(x & 1))                                    return error(f, VORBIS_invalid_setup);
2771 
2772 
2773    skip(f, f.bytes_in_seg);
2774    f.bytes_in_seg = 0;
2775 
2776    do {
2777       len = next_segment(f);
2778       skip(f, len);
2779       f.bytes_in_seg = 0;
2780    } while (len);
2781 
2782    // third packet!
2783    if (!start_packet(f))                            return FALSE;
2784 
2785    crc32_init(f);
2786 
2787    if (get8_packet(f) != VORBIS_packet_setup)       return error(f, VORBIS_invalid_setup);
2788    for (i=0; i < 6; ++i) header[i] = cast(ubyte) get8_packet(f);
2789    if (!vorbis_validate(header.ptr))                    return error(f, VORBIS_invalid_setup);
2790 
2791    // codebooks
2792 
2793    f.codebook_count = get_bits(f,8) + 1;
2794    f.codebooks = cast(Codebook *) setup_malloc(f, cast(int)( (*f.codebooks).sizeof * f.codebook_count) );
2795    if (f.codebooks == null)                        return error(f, VORBIS_outofmem);
2796    memset(f.codebooks, 0, (*f.codebooks).sizeof * f.codebook_count);
2797    for (i=0; i < f.codebook_count; ++i) {
2798       uint32 *values;
2799       int ordered, sorted_count;
2800       int total=0;
2801       uint8 *lengths;
2802       Codebook *c = f.codebooks+i;
2803       x = cast(ubyte) get_bits(f, 8); if (x != 0x42)            return error(f, VORBIS_invalid_setup);
2804       x = cast(ubyte) get_bits(f, 8); if (x != 0x43)            return error(f, VORBIS_invalid_setup);
2805       x = cast(ubyte) get_bits(f, 8); if (x != 0x56)            return error(f, VORBIS_invalid_setup);
2806       x = cast(ubyte) get_bits(f, 8);
2807       c.dimensions = (get_bits(f, 8)<<8) + x;
2808       x = cast(ubyte) get_bits(f, 8);
2809       y = cast(ubyte) get_bits(f, 8);
2810       c.entries = (get_bits(f, 8)<<16) + (y<<8) + x;
2811       ordered = get_bits(f,1);
2812       c.sparse = ordered ? 0 : cast(ubyte) get_bits(f,1);
2813 
2814       if (c.dimensions == 0 && c.entries != 0)    return error(f, VORBIS_invalid_setup);
2815 
2816       if (c.sparse)
2817          lengths = cast(uint8 *) setup_temp_malloc(f, c.entries);
2818       else
2819          lengths = c.codeword_lengths = cast(uint8 *) setup_malloc(f, c.entries);
2820 
2821       if (!lengths) return error(f, VORBIS_outofmem);
2822 
2823       if (ordered) {
2824          int current_entry = 0;
2825          int current_length = get_bits(f,5) + 1;
2826          while (current_entry < c.entries) {
2827             int limit = c.entries - current_entry;
2828             int n = get_bits(f, ilog(limit));
2829             if (current_length >= 32) return error(f, VORBIS_invalid_setup);
2830             if (current_entry + n > cast(int) c.entries) { return error(f, VORBIS_invalid_setup); }
2831             memset(lengths + current_entry, current_length, n);
2832             current_entry += n;
2833             ++current_length;
2834          }
2835       } else {
2836          for (j=0; j < c.entries; ++j) {
2837             int present = c.sparse ? get_bits(f,1) : 1;
2838             if (present) {
2839                lengths[j] = cast(ubyte) (get_bits(f, 5) + 1);
2840                ++total;
2841                if (lengths[j] == 32)
2842                   return error(f, VORBIS_invalid_setup);
2843             } else {
2844                lengths[j] = NO_CODE;
2845             }
2846          }
2847       }
2848 
2849       if (c.sparse && total >= c.entries >> 2) {
2850          // convert sparse items to non-sparse!
2851          if (c.entries > cast(int) f.setup_temp_memory_required)
2852             f.setup_temp_memory_required = c.entries;
2853 
2854          c.codeword_lengths = cast(uint8 *) setup_malloc(f, c.entries);
2855          if (c.codeword_lengths == null) return error(f, VORBIS_outofmem);
2856          memcpy(c.codeword_lengths, lengths, c.entries);
2857          setup_temp_free(f, lengths, c.entries); // note this is only safe if there have been no intervening temp mallocs!
2858          lengths = c.codeword_lengths;
2859          c.sparse = 0;
2860       }
2861 
2862       // compute the size of the sorted tables
2863       if (c.sparse) {
2864          sorted_count = total;
2865       } else {
2866          sorted_count = 0;
2867          for (j=0; j < c.entries; ++j)
2868             if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
2869                ++sorted_count;
2870       }
2871 
2872       c.sorted_entries = sorted_count;
2873       values = null;
2874 
2875       if (!c.sparse) {
2876          c.codewords = cast(uint32 *) setup_malloc(f, cast(int)( (c.codewords[0]).sizeof * c.entries ) );
2877          if (!c.codewords)                  return error(f, VORBIS_outofmem);
2878       } else {
2879          uint size;
2880          if (c.sorted_entries) {
2881             c.codeword_lengths = cast(uint8 *) setup_malloc(f, c.sorted_entries);
2882             if (!c.codeword_lengths)           return error(f, VORBIS_outofmem);
2883             c.codewords = cast(uint32 *) setup_temp_malloc(f, cast(int)( (*c.codewords).sizeof * c.sorted_entries ));
2884             if (!c.codewords)                  return error(f, VORBIS_outofmem);
2885             values = cast(uint32 *) setup_temp_malloc(f, cast(int)( (*values).sizeof * c.sorted_entries ) );
2886             if (!values)                        return error(f, VORBIS_outofmem);
2887          }
2888          size = cast(uint)( c.entries + ((*c.codewords).sizeof + (*values).sizeof) * c.sorted_entries );
2889          if (size > f.setup_temp_memory_required)
2890             f.setup_temp_memory_required = size;
2891       }
2892 
2893       if (!compute_codewords(c, lengths, c.entries, values)) {
2894          if (c.sparse) setup_temp_free(f, values, 0);
2895          return error(f, VORBIS_invalid_setup);
2896       }
2897 
2898       if (c.sorted_entries) {
2899          // allocate an extra slot for sentinels
2900          c.sorted_codewords = cast(uint32 *) setup_malloc(f, cast(int)( (*c.sorted_codewords).sizeof * (c.sorted_entries+1) ));
2901          if (c.sorted_codewords == null) return error(f, VORBIS_outofmem);
2902          // allocate an extra slot at the front so that c.sorted_values[-1] is defined
2903          // so that we can catch that case without an extra if
2904          c.sorted_values    = cast( int   *) setup_malloc(f, cast(int)( (*c.sorted_values   ).sizeof * (c.sorted_entries+1) ) );
2905          if (c.sorted_values == null) return error(f, VORBIS_outofmem);
2906          ++c.sorted_values;
2907          c.sorted_values[-1] = -1;
2908          compute_sorted_huffman(c, lengths, values);
2909       }
2910 
2911       if (c.sparse) {
2912          setup_temp_free(f, values, (*values).sizeof*c.sorted_entries);
2913          setup_temp_free(f, c.codewords, (*c.codewords).sizeof*c.sorted_entries);
2914          setup_temp_free(f, lengths, c.entries);
2915          c.codewords = null;
2916       }
2917 
2918       compute_accelerated_huffman(c);
2919 
2920       c.lookup_type = cast(ubyte) get_bits(f, 4);
2921       if (c.lookup_type > 2) return error(f, VORBIS_invalid_setup);
2922       if (c.lookup_type > 0) {
2923          uint16 *mults;
2924          c.minimum_value = float32_unpack(get_bits(f, 32));
2925          c.delta_value = float32_unpack(get_bits(f, 32));
2926          c.value_bits = cast(ubyte) (get_bits(f, 4)+1);
2927          c.sequence_p = cast(ubyte) get_bits(f,1);
2928          if (c.lookup_type == 1) {
2929             int values2 = lookup1_values(c.entries, c.dimensions);
2930             if (values2 < 0) return error(f, VORBIS_invalid_setup);
2931             c.lookup_values = cast(uint32) values2;
2932          } else {
2933             c.lookup_values = c.entries * c.dimensions;
2934          }
2935          if (c.lookup_values == 0) return error(f, VORBIS_invalid_setup);
2936          mults = cast(uint16 *) setup_temp_malloc(f, cast(int)( (mults[0]).sizeof * c.lookup_values));
2937          if (mults == null) return error(f, VORBIS_outofmem);
2938          for (j=0; j < cast(int) c.lookup_values; ++j) {
2939             int q = get_bits(f, c.value_bits);
2940             if (q == EOP) 
2941             { 
2942                 setup_temp_free(f,mults,(mults[0]).sizeof*c.lookup_values); 
2943                 return error(f, VORBIS_invalid_setup); 
2944             }
2945             mults[j] = cast(ushort)q;
2946          }
2947 
2948          if (c.lookup_type == 1) {
2949             int len2, sparse = c.sparse;
2950             float last=0;
2951             // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
2952             if (sparse) {
2953                if (c.sorted_entries == 0) goto skip;
2954                c.multiplicands = cast(codetype *) setup_malloc(f, (c.multiplicands[0]).sizeof * c.sorted_entries * c.dimensions);
2955             } else
2956                c.multiplicands = cast(codetype *) setup_malloc(f, (c.multiplicands[0]).sizeof * c.entries        * c.dimensions);
2957             if (c.multiplicands == null) { setup_temp_free(f,mults,(mults[0]).sizeof*c.lookup_values); return error(f, VORBIS_outofmem); }
2958             len2 = sparse ? c.sorted_entries : c.entries;
2959             for (j=0; j < len2; ++j) {
2960                uint z = sparse ? c.sorted_values[j] : j;
2961                uint div=1;
2962                for (k=0; k < c.dimensions; ++k) {
2963                   int off = (z / div) % c.lookup_values;
2964                   float val = mults[off]*c.delta_value + c.minimum_value + last;
2965                   c.multiplicands[j*c.dimensions + k] = val;
2966                   if (c.sequence_p)
2967                      last = val;
2968                   if (k+1 < c.dimensions) {
2969                      if (div > uint.max / cast(uint) c.lookup_values) {
2970                         setup_temp_free(f, mults,(mults[0]).sizeof*c.lookup_values);
2971                         return error(f, VORBIS_invalid_setup);
2972                      }
2973                      div *= c.lookup_values;
2974                   }
2975                }
2976             }
2977             c.lookup_type = 2;
2978          }
2979          else
2980          {
2981             float last=0;
2982             c.multiplicands = cast(codetype *) setup_malloc(f, (c.multiplicands[0]).sizeof * c.lookup_values);
2983             if (c.multiplicands == null) { setup_temp_free(f, mults,(mults[0]).sizeof*c.lookup_values); return error(f, VORBIS_outofmem); }
2984             for (j=0; j < cast(int) c.lookup_values; ++j) {
2985                float val = mults[j] * c.delta_value + c.minimum_value + last;
2986                c.multiplicands[j] = val;
2987                if (c.sequence_p)
2988                   last = val;
2989             }
2990          }
2991         skip:;
2992          setup_temp_free(f, mults, (mults[0]).sizeof * c.lookup_values);
2993 
2994       }
2995    }
2996 
2997    // time domain transfers (notused)
2998 
2999    x = cast(ubyte)(get_bits(f, 6) + 1);
3000    for (i=0; i < x; ++i) {
3001       uint32 z = get_bits(f, 16);
3002       if (z != 0) return error(f, VORBIS_invalid_setup);
3003    }
3004 
3005    // Floors
3006    f.floor_count = get_bits(f, 6)+1;
3007    f.floor_config = cast(Floor *)  setup_malloc(f, f.floor_count * (*f.floor_config).sizeof);
3008    if (f.floor_config == null) return error(f, VORBIS_outofmem);
3009    for (i=0; i < f.floor_count; ++i) {
3010       f.floor_types[i] = cast(ushort) get_bits(f, 16);
3011       if (f.floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
3012       if (f.floor_types[i] == 0) {
3013          Floor0 *g = &f.floor_config[i].floor0;
3014          g.order = cast(ubyte) get_bits(f,8);
3015          g.rate = cast(ushort) get_bits(f,16);
3016          g.bark_map_size = cast(ushort) get_bits(f,16);
3017          g.amplitude_bits = cast(ubyte) get_bits(f,6);
3018          g.amplitude_offset = cast(ubyte) get_bits(f,8);
3019          g.number_of_books = cast(ubyte)(get_bits(f,4) + 1);
3020          for (j=0; j < g.number_of_books; ++j)
3021             g.book_list[j] = cast(ubyte) get_bits(f,8);
3022          return error(f, VORBIS_feature_not_supported);
3023       } else {
3024          stbv__floor_ordering[31*8+2] p;
3025          Floor1 *g = &f.floor_config[i].floor1;
3026          int max_class = -1;
3027          g.partitions = cast(ubyte) get_bits(f, 5);
3028          for (j=0; j < g.partitions; ++j) {
3029             g.partition_class_list[j] = cast(ubyte) get_bits(f, 4);
3030             if (g.partition_class_list[j] > max_class)
3031                max_class = g.partition_class_list[j];
3032          }
3033          for (j=0; j <= max_class; ++j) {
3034             g.class_dimensions[j] = cast(ubyte)(get_bits(f, 3)+1);
3035             g.class_subclasses[j] = cast(ubyte)get_bits(f, 2);
3036             if (g.class_subclasses[j]) {
3037                g.class_masterbooks[j] = cast(ubyte) get_bits(f, 8);
3038                if (g.class_masterbooks[j] >= f.codebook_count) return error(f, VORBIS_invalid_setup);
3039             }
3040             for (k=0; k < 1 << g.class_subclasses[j]; ++k) {
3041                g.subclass_books[j][k] = cast(int16) (get_bits(f,8)-1);
3042                if (g.subclass_books[j][k] >= f.codebook_count) return error(f, VORBIS_invalid_setup);
3043             }
3044          }
3045          g.floor1_multiplier = cast(ubyte)(get_bits(f,2)+1);
3046          g.rangebits = cast(ubyte) get_bits(f,4);
3047          g.Xlist[0] = 0;
3048          g.Xlist[1] = cast(ushort)(1 << g.rangebits);
3049          g.values = 2;
3050          for (j=0; j < g.partitions; ++j) {
3051             int c = g.partition_class_list[j];
3052             for (k=0; k < g.class_dimensions[c]; ++k) {
3053                g.Xlist[g.values] = cast(ushort) get_bits(f, g.rangebits);
3054                ++g.values;
3055             }
3056          }
3057          // precompute the sorting
3058          for (j=0; j < g.values; ++j) {
3059             p[j].x = g.Xlist[j];
3060             p[j].id = cast(ushort) j;
3061          }
3062          qsort(p.ptr, g.values, (p[0]).sizeof, &point_compare_2);
3063          for (j=0; j < g.values-1; ++j)
3064             if (p[j].x == p[j+1].x)
3065                return error(f, VORBIS_invalid_setup);
3066          for (j=0; j < g.values; ++j)
3067             g.sorted_order[j] = cast(uint8) p[j].id;
3068          // precompute the neighbors
3069          for (j=2; j < g.values; ++j) {
3070             int low = 0,hi = 0;
3071             neighbors(g.Xlist.ptr, j, &low,&hi);
3072             g.neighbors[j][0] = cast(ubyte)low;
3073             g.neighbors[j][1] = cast(ubyte)hi;
3074          }
3075 
3076          if (g.values > longest_floorlist)
3077             longest_floorlist = g.values;
3078       }
3079    }
3080 
3081    // Residue
3082    f.residue_count = get_bits(f, 6)+1;
3083    f.residue_config = cast(Residue *) setup_malloc(f, f.residue_count * (f.residue_config[0]).sizeof);
3084    if (f.residue_config == null) return error(f, VORBIS_outofmem);
3085    memset(f.residue_config, 0, f.residue_count * (f.residue_config[0]).sizeof);
3086    for (i=0; i < f.residue_count; ++i) {
3087       uint8[64] residue_cascade;
3088       Residue *r = f.residue_config+i;
3089       f.residue_types[i] = cast(ushort) get_bits(f, 16);
3090       if (f.residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
3091       r.begin = get_bits(f, 24);
3092       r.end = get_bits(f, 24);
3093       if (r.end < r.begin) return error(f, VORBIS_invalid_setup);
3094       r.part_size = get_bits(f,24)+1;
3095       r.classifications = cast(ubyte)(get_bits(f,6)+1);
3096       r.classbook = cast(ubyte) get_bits(f,8);
3097       if (r.classbook >= f.codebook_count) return error(f, VORBIS_invalid_setup);
3098       for (j=0; j < r.classifications; ++j) {
3099          uint8 high_bits=0;
3100          uint8 low_bits = cast(ubyte) get_bits(f,3);
3101          if (get_bits(f,1))
3102             high_bits = cast(ubyte) get_bits(f,5);
3103          residue_cascade[j] = cast(ubyte)(high_bits*8 + low_bits);
3104       }
3105       r.residue_books = cast(int16[8]*) setup_malloc(f, (r.residue_books[0]).sizeof * r.classifications);
3106       if (r.residue_books == null) return error(f, VORBIS_outofmem);
3107       for (j=0; j < r.classifications; ++j) {
3108          for (k=0; k < 8; ++k) {
3109             if (residue_cascade[j] & (1 << k)) {
3110                r.residue_books[j][k] = cast(short) get_bits(f, 8);
3111                if (r.residue_books[j][k] >= f.codebook_count) return error(f, VORBIS_invalid_setup);
3112             } else {
3113                r.residue_books[j][k] = -1;
3114             }
3115          }
3116       }
3117       // precompute the classifications[] array to avoid inner-loop mod/divide
3118       // call it 'classdata' since we already have r.classifications
3119       r.classdata = cast(uint8 **) setup_malloc(f, (*r.classdata).sizeof * f.codebooks[r.classbook].entries);
3120       if (!r.classdata) return error(f, VORBIS_outofmem);
3121       memset(r.classdata, 0, (*r.classdata).sizeof * f.codebooks[r.classbook].entries);
3122       for (j=0; j < f.codebooks[r.classbook].entries; ++j) {
3123          int classwords = f.codebooks[r.classbook].dimensions;
3124          int temp = j;
3125          r.classdata[j] = cast(uint8 *) setup_malloc(f, (r.classdata[j][0]).sizeof * classwords);
3126          if (r.classdata[j] == null) return error(f, VORBIS_outofmem);
3127          for (k=classwords-1; k >= 0; --k) {
3128             r.classdata[j][k] = cast(ubyte)(temp % r.classifications);
3129             temp /= r.classifications;
3130          }
3131       }
3132    }
3133 
3134    f.mapping_count = get_bits(f,6)+1;
3135    f.mapping = cast(Mapping *) setup_malloc(f, f.mapping_count * (*f.mapping).sizeof);
3136    if (f.mapping == null) return error(f, VORBIS_outofmem);
3137    memset(f.mapping, 0, f.mapping_count * (*f.mapping).sizeof);
3138    for (i=0; i < f.mapping_count; ++i) {
3139       Mapping *m = f.mapping + i;
3140       int mapping_type = get_bits(f,16);
3141       if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
3142       m.chan = cast(MappingChannel *) setup_malloc(f, f.channels * (*m.chan).sizeof);
3143       if (m.chan == null) return error(f, VORBIS_outofmem);
3144       if (get_bits(f,1))
3145          m.submaps = cast(ubyte)(get_bits(f,4)+1);
3146       else
3147          m.submaps = 1;
3148       if (m.submaps > max_submaps)
3149          max_submaps = m.submaps;
3150       if (get_bits(f,1)) {
3151          m.coupling_steps = cast(ushort)(get_bits(f,8)+1);
3152          if (m.coupling_steps > f.channels) return error(f, VORBIS_invalid_setup);
3153          for (k=0; k < m.coupling_steps; ++k) {
3154             m.chan[k].magnitude = cast(ubyte) get_bits(f, ilog(f.channels-1));
3155             m.chan[k].angle = cast(ubyte) get_bits(f, ilog(f.channels-1));
3156             if (m.chan[k].magnitude >= f.channels)        return error(f, VORBIS_invalid_setup);
3157             if (m.chan[k].angle     >= f.channels)        return error(f, VORBIS_invalid_setup);
3158             if (m.chan[k].magnitude == m.chan[k].angle)   return error(f, VORBIS_invalid_setup);
3159          }
3160       } else
3161          m.coupling_steps = 0;
3162 
3163       // reserved field
3164       if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
3165       if (m.submaps > 1) {
3166          for (j=0; j < f.channels; ++j) {
3167             m.chan[j].mux = cast(ubyte) get_bits(f, 4);
3168             if (m.chan[j].mux >= m.submaps)                return error(f, VORBIS_invalid_setup);
3169          }
3170       } else
3171          // @SPECIFICATION: this case is missing from the spec
3172          for (j=0; j < f.channels; ++j)
3173             m.chan[j].mux = 0;
3174 
3175       for (j=0; j < m.submaps; ++j) {
3176          get_bits(f,8); // discard
3177          m.submap_floor[j] = cast(ubyte) get_bits(f,8);
3178          m.submap_residue[j] = cast(ubyte) get_bits(f,8);
3179          if (m.submap_floor[j] >= f.floor_count)      return error(f, VORBIS_invalid_setup);
3180          if (m.submap_residue[j] >= f.residue_count)  return error(f, VORBIS_invalid_setup);
3181       }
3182    }
3183 
3184    // Modes
3185    f.mode_count = get_bits(f, 6)+1;
3186    for (i=0; i < f.mode_count; ++i) {
3187       Mode *m = f.mode_config.ptr+i;
3188       m.blockflag = cast(ubyte) get_bits(f,1);
3189       m.windowtype = cast(ushort) get_bits(f,16);
3190       m.transformtype = cast(ushort) get_bits(f,16);
3191       m.mapping = cast(ubyte) get_bits(f,8);
3192       if (m.windowtype != 0)                 return error(f, VORBIS_invalid_setup);
3193       if (m.transformtype != 0)              return error(f, VORBIS_invalid_setup);
3194       if (m.mapping >= f.mapping_count)     return error(f, VORBIS_invalid_setup);
3195    }
3196 
3197    flush_packet(f);
3198 
3199    f.previous_length = 0;
3200 
3201    for (i=0; i < f.channels; ++i) {
3202       f.channel_buffers[i] = cast(float *) setup_malloc(f, (float).sizeof * f.blocksize_1);
3203       f.previous_window[i] = cast(float *) setup_malloc(f, (float).sizeof * f.blocksize_1/2);
3204       f.finalY[i]          = cast(int16 *) setup_malloc(f, (int16).sizeof * longest_floorlist);
3205       if (f.channel_buffers[i] == null || f.previous_window[i] == null || f.finalY[i] == null) return error(f, VORBIS_outofmem);
3206       memset(f.channel_buffers[i], 0, (float).sizeof * f.blocksize_1);      
3207    }
3208 
3209    if (!init_blocksize(f, 0, f.blocksize_0)) return FALSE;
3210    if (!init_blocksize(f, 1, f.blocksize_1)) return FALSE;
3211    f.blocksize[0] = f.blocksize_0;
3212    f.blocksize[1] = f.blocksize_1;
3213 
3214    // compute how much temporary memory is needed
3215 
3216    // 1.
3217    {
3218       uint32 imdct_mem = cast(uint)( (f.blocksize_1 * (float.sizeof) >> 1));
3219       uint32 classify_mem;
3220       int i2,max_part_read=0;
3221       for (i2=0; i2 < f.residue_count; ++i2) {
3222          Residue *r = f.residue_config + i2;
3223          uint actual_size = f.blocksize_1 / 2;
3224          uint limit_r_begin = r.begin < actual_size ? r.begin : actual_size;
3225          uint limit_r_end   = r.end   < actual_size ? r.end   : actual_size;
3226          int n_read = limit_r_end - limit_r_begin;
3227          int part_read = n_read / r.part_size;
3228          if (part_read > max_part_read)
3229             max_part_read = part_read;
3230       }
3231       classify_mem = cast(uint)( f.channels * ((void*).sizeof + max_part_read * (uint8 *).sizeof));
3232       // maximum reasonable partition size is f.blocksize_1
3233 
3234       f.temp_memory_required = classify_mem;
3235       if (imdct_mem > f.temp_memory_required)
3236          f.temp_memory_required = imdct_mem;
3237    }
3238 
3239 
3240    if (f.alloc.alloc_buffer) {
3241       assert(f.temp_offset == f.alloc.alloc_buffer_length_in_bytes);
3242       // check if there's enough temp memory so we don't error later
3243       if (f.setup_offset + (*f).sizeof + f.temp_memory_required > cast(uint) f.temp_offset)
3244          return error(f, VORBIS_outofmem);
3245    }
3246 
3247    // @TODO: stb_vorbis_seek_start expects first_audio_page_offset to point to a page
3248    // without PAGEFLAG_continued_packet, so this either points to the first page, or
3249    // the page after the end of the headers. It might be cleaner to point to a page
3250    // in the middle of the headers, when that's the page where the first audio packet
3251    // starts, but we'd have to also correctly skip the end of any continued packet in
3252    // stb_vorbis_seek_start.
3253    if (f.next_seg == -1) {
3254       f.first_audio_page_offset = stb_vorbis_get_file_offset(f);
3255    } else {
3256       f.first_audio_page_offset = 0;
3257    }
3258 
3259    return TRUE;
3260 }
3261 
3262 static void vorbis_deinit(stb_vorbis *p)
3263 {
3264    int i,j;
3265 
3266    setup_free(p, p.vendor);
3267    for (i=0; i < p.comment_list_length; ++i) {
3268       setup_free(p, p.comment_list[i]);
3269    }
3270    setup_free(p, p.comment_list);
3271 
3272    if (p.residue_config) {
3273       for (i=0; i < p.residue_count; ++i) {
3274          Residue *r = p.residue_config+i;
3275          if (r.classdata) {
3276             for (j=0; j < p.codebooks[r.classbook].entries; ++j)
3277                setup_free(p, r.classdata[j]);
3278             setup_free(p, r.classdata);
3279          }
3280          setup_free(p, r.residue_books);
3281       }
3282    }
3283 
3284    if (p.codebooks) {
3285       for (i=0; i < p.codebook_count; ++i) {
3286          Codebook *c = p.codebooks + i;
3287          setup_free(p, c.codeword_lengths);
3288          setup_free(p, c.multiplicands);
3289          setup_free(p, c.codewords);
3290          setup_free(p, c.sorted_codewords);
3291          // c.sorted_values[-1] is the first entry in the array
3292          setup_free(p, c.sorted_values ? c.sorted_values-1 : null);
3293       }
3294       setup_free(p, p.codebooks);
3295    }
3296    setup_free(p, p.floor_config);
3297    setup_free(p, p.residue_config);
3298    if (p.mapping) {
3299       for (i=0; i < p.mapping_count; ++i)
3300          setup_free(p, p.mapping[i].chan);
3301       setup_free(p, p.mapping);
3302    }
3303    for (i=0; i < p.channels && i < STB_VORBIS_MAX_CHANNELS; ++i) {
3304       setup_free(p, p.channel_buffers[i]);
3305       setup_free(p, p.previous_window[i]);      
3306       setup_free(p, p.finalY[i]);
3307    }
3308    for (i=0; i < 2; ++i) {
3309       setup_free(p, p.A[i]);
3310       setup_free(p, p.B[i]);
3311       setup_free(p, p.C[i]);
3312       setup_free(p, p.window[i]);
3313       setup_free(p, p.bit_reverse[i]);
3314    }
3315 }
3316 
3317 void stb_vorbis_close(stb_vorbis *p)
3318 {
3319    vorbis_deinit(p);
3320    setup_free(p,p);
3321 }
3322 
3323 static void vorbis_init(stb_vorbis *p, stb_vorbis_alloc *z)
3324 {
3325    memset(p, 0, (*p).sizeof); // NULL out all malloc'd pointers to start
3326    if (z) {
3327       p.alloc = *z;
3328       p.alloc.alloc_buffer_length_in_bytes &= ~7;
3329       p.temp_offset = p.alloc.alloc_buffer_length_in_bytes;
3330    }
3331    p.eof = 0;
3332    p.error = VORBIS__no_error;
3333    p.codebooks = null;
3334 }
3335 
3336 long stb_vorbis_get_sample_offset(stb_vorbis *f)
3337 {
3338    // Note: this is rewritten vs original stb_vorbis, who wasn't returning something good.    
3339    if (f.current_loc_valid)
3340    {
3341         return f.current_sample_loc;
3342    }
3343    else
3344       return -1;
3345 }
3346 
3347 stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f)
3348 {
3349    stb_vorbis_info d;
3350    d.channels = f.channels;
3351    d.sample_rate = f.sample_rate;
3352    d.setup_memory_required = f.setup_memory_required;
3353    d.setup_temp_memory_required = f.setup_temp_memory_required;
3354    d.temp_memory_required = f.temp_memory_required;
3355    d.max_frame_size = f.blocksize_1 >> 1;
3356    return d;
3357 }
3358 
3359 stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f)
3360 {
3361    stb_vorbis_comment d;
3362    d.vendor = f.vendor;
3363    d.comment_list_length = f.comment_list_length;
3364    d.comment_list = f.comment_list;
3365    return d;
3366 }
3367 
3368 int stb_vorbis_get_error(stb_vorbis *f)
3369 {
3370    int e = f.error;
3371    f.error = VORBIS__no_error;
3372    return e;
3373 }
3374 
3375 static stb_vorbis * vorbis_alloc(stb_vorbis *f)
3376 {
3377    stb_vorbis *p = cast(stb_vorbis *) setup_malloc(f, stb_vorbis.sizeof);
3378    return p;
3379 }
3380 
3381 uint stb_vorbis_get_file_offset(stb_vorbis *f)
3382 {
3383     return cast(uint) f._io.tell(f._userData);   
3384 }
3385 
3386 //
3387 // DATA-PULLING API
3388 //
3389 
3390 uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last)
3391 {
3392    for(;;) {
3393       int n;
3394       if (f.eof) return 0;
3395       n = get8(f);
3396       if (n == 0x4f) { // page header candidate
3397          uint retry_loc = stb_vorbis_get_file_offset(f);
3398          int i;
3399          // check if we're off the end of a file_section stream
3400          if (retry_loc - 25 > f.stream_len)
3401             return 0;
3402          // check the rest of the header
3403          for (i=1; i < 4; ++i)
3404             if (get8(f) != ogg_page_header[i])
3405                break;
3406          if (f.eof) return 0;
3407          if (i == 4) {
3408             uint8[27] header;
3409             uint32 i2, crc, goal, len;
3410             for (i2=0; i2 < 4; ++i2)
3411                header[i2] = ogg_page_header[i2];
3412             for (; i2 < 27; ++i2)
3413                header[i2] = get8(f);
3414             if (f.eof) return 0;
3415             if (header[4] != 0) goto invalid;
3416             goal = header[22] + (header[23] << 8) + (header[24]<<16) + (cast(uint32)header[25]<<24);
3417             for (i2=22; i2 < 26; ++i2)
3418                header[i2] = 0;
3419             crc = 0;
3420             for (i2=0; i2 < 27; ++i2)
3421                crc = crc32_update(f, crc, header[i2]);
3422             len = 0;
3423             for (i2=0; i2 < header[26]; ++i2) {
3424                int s = get8(f);
3425                crc = crc32_update(f, crc,cast(ubyte)s);
3426                len += s;
3427             }
3428             if (len && f.eof) return 0;
3429             for (i2=0; i2 < len; ++i2)
3430                crc = crc32_update(f, crc, get8(f));
3431             // finished parsing probable page
3432             if (crc == goal) {
3433                // we could now check that it's either got the last
3434                // page flag set, OR it's followed by the capture
3435                // pattern, but I guess TECHNICALLY you could have
3436                // a file with garbage between each ogg page and recover
3437                // from it automatically? So even though that paranoia
3438                // might decrease the chance of an invalid decode by
3439                // another 2^32, not worth it since it would hose those
3440                // invalid-but-useful files?
3441                if (end)
3442                   *end = stb_vorbis_get_file_offset(f);
3443                if (last) {
3444                   if (header[5] & 0x04)
3445                      *last = 1;
3446                   else
3447                      *last = 0;
3448                }
3449                set_file_offset(f, retry_loc-1);
3450                return 1;
3451             }
3452          }
3453         invalid:
3454          // not a valid page, so rewind and look for next one
3455          set_file_offset(f, retry_loc);
3456       }
3457    }
3458    assert(false);
3459 }
3460 
3461 
3462 enum SAMPLE_unknown = 0xffffffff;
3463 
3464 // seeking is implemented with a binary search, which narrows down the range to
3465 // 64K, before using a linear search (because finding the synchronization
3466 // pattern can be expensive, and the chance we'd find the end page again is
3467 // relatively high for small ranges)
3468 //
3469 // two initial interpolation-style probes are used at the start of the search
3470 // to try to bound either side of the binary search sensibly, while still
3471 // working in O(log n) time if they fail.
3472 
3473 static int get_seek_page_info(stb_vorbis *f, ProbedPage *z)
3474 {
3475    uint8[27] header;
3476    uint8[255] lacing;
3477    int i,len;
3478 
3479    // record where the page starts
3480    z.page_start = stb_vorbis_get_file_offset(f);
3481 
3482    // parse the header
3483    getn(f, header.ptr, 27);
3484    if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S')
3485       return 0;
3486    getn(f, lacing.ptr, header[26]);
3487 
3488    // determine the length of the payload
3489    len = 0;
3490    for (i=0; i < header[26]; ++i)
3491       len += lacing[i];
3492 
3493    // this implies where the page ends
3494    z.page_end = z.page_start + 27 + header[26] + len;
3495 
3496    // read the last-decoded sample out of the data
3497    z.last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24);
3498 
3499    // restore file state to where we were
3500    set_file_offset(f, z.page_start);
3501    return 1;
3502 }
3503 
3504 // rarely used function to seek back to the preceding page while finding the
3505 // start of a packet
3506 static int go_to_page_before(stb_vorbis *f, uint limit_offset)
3507 {
3508    uint previous_safe, end;
3509 
3510    // now we want to seek back 64K from the limit
3511    if (limit_offset >= 65536 && limit_offset-65536 >= f.first_audio_page_offset)
3512       previous_safe = limit_offset - 65536;
3513    else
3514       previous_safe = f.first_audio_page_offset;
3515 
3516    set_file_offset(f, previous_safe);
3517 
3518    while (vorbis_find_page(f, &end, null)) {
3519       if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset)
3520          return 1;
3521       set_file_offset(f, end);
3522    }
3523 
3524    return 0;
3525 }
3526 
3527 // implements the search logic for finding a page and starting decoding. if
3528 // the function succeeds, current_loc_valid will be true and current_loc will
3529 // be less than or equal to the provided sample number (the closer the
3530 // better).
3531 static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number)
3532 {
3533    ProbedPage left, right, mid;
3534    int i, start_seg_with_known_loc, end_pos, page_start;
3535    uint32 delta, stream_length, padding, last_sample_limit;
3536    double offset = 0.0, bytes_per_sample = 0.0;
3537    int probe = 0;
3538 
3539    // find the last page and validate the target sample
3540    stream_length = stb_vorbis_stream_length_in_samples(f);
3541    if (stream_length == 0)            return error(f, VORBIS_seek_without_length);
3542    if (sample_number > stream_length) return error(f, VORBIS_seek_invalid);
3543 
3544    // this is the maximum difference between the window-center (which is the
3545    // actual granule position value), and the right-start (which the spec
3546    // indicates should be the granule position (give or take one)).
3547    padding = ((f.blocksize_1 - f.blocksize_0) >> 2);
3548    if (sample_number < padding)
3549       last_sample_limit = 0;
3550    else
3551       last_sample_limit = sample_number - padding;
3552 
3553    left = f.p_first;
3554    while (left.last_decoded_sample == ~0U) {
3555       // (untested) the first page does not have a 'last_decoded_sample'
3556       set_file_offset(f, left.page_end);
3557       if (!get_seek_page_info(f, &left)) goto error;
3558    }
3559 
3560    right = f.p_last;
3561    assert(right.last_decoded_sample != ~0U);
3562 
3563    // starting from the start is handled differently
3564    if (last_sample_limit <= left.last_decoded_sample) {
3565       if (stb_vorbis_seek_start(f)) {
3566          if (f.current_loc > sample_number)
3567             return error(f, VORBIS_seek_failed);
3568          return 1;
3569       }
3570       return 0;
3571    }
3572 
3573    while (left.page_end != right.page_start) {
3574       assert(left.page_end < right.page_start);
3575       // search range in bytes
3576       delta = right.page_start - left.page_end;
3577       if (delta <= 65536) {
3578          // there's only 64K left to search - handle it linearly
3579          set_file_offset(f, left.page_end);
3580       } else {
3581          if (probe < 2) {
3582             if (probe == 0) {
3583                // first probe (interpolate)
3584                double data_bytes = right.page_end - left.page_start;
3585                bytes_per_sample = data_bytes / right.last_decoded_sample;
3586                offset = left.page_start + bytes_per_sample * (last_sample_limit - left.last_decoded_sample);
3587             } else {
3588                // second probe (try to bound the other side)
3589                double error = (cast(double) last_sample_limit - mid.last_decoded_sample) * bytes_per_sample;
3590                if (error >= 0 && error <  8000) error =  8000;
3591                if (error <  0 && error > -8000) error = -8000;
3592                offset += error * 2;
3593             }
3594 
3595             // ensure the offset is valid
3596             if (offset < left.page_end)
3597                offset = left.page_end;
3598             if (offset > right.page_start - 65536)
3599                offset = right.page_start - 65536;
3600 
3601             set_file_offset(f, cast(uint) offset);
3602          } else {
3603             // binary search for large ranges (offset by 32K to ensure
3604             // we don't hit the right page)
3605             set_file_offset(f, left.page_end + (delta / 2) - 32768);
3606          }
3607 
3608          if (!vorbis_find_page(f, null, null)) goto error;
3609       }
3610 
3611       for (;;) {
3612          if (!get_seek_page_info(f, &mid)) goto error;
3613          if (mid.last_decoded_sample != ~0U) break;
3614          // (untested) no frames end on this page
3615          set_file_offset(f, mid.page_end);
3616          assert(mid.page_start < right.page_start);
3617       }
3618 
3619       // if we've just found the last page again then we're in a tricky file,
3620       // and we're close enough (if it wasn't an interpolation probe).
3621       if (mid.page_start == right.page_start) {
3622          if (probe >= 2 || delta <= 65536)
3623             break;
3624       } else {
3625          if (last_sample_limit < mid.last_decoded_sample)
3626             right = mid;
3627          else
3628             left = mid;
3629       }
3630 
3631       ++probe;
3632    }
3633 
3634    // seek back to start of the last packet
3635    page_start = left.page_start;
3636    set_file_offset(f, page_start);
3637    if (!start_page(f)) return error(f, VORBIS_seek_failed);
3638    end_pos = f.end_seg_with_known_loc;
3639    assert(end_pos >= 0);
3640 
3641    for (;;) {
3642       for (i = end_pos; i > 0; --i)
3643          if (f.segments[i-1] != 255)
3644             break;
3645 
3646       start_seg_with_known_loc = i;
3647 
3648       if (start_seg_with_known_loc > 0 || !(f.page_flag & PAGEFLAG_continued_packet))
3649          break;
3650 
3651       // (untested) the final packet begins on an earlier page
3652       if (!go_to_page_before(f, page_start))
3653          goto error;
3654 
3655       page_start = stb_vorbis_get_file_offset(f);
3656       if (!start_page(f)) goto error;
3657       end_pos = f.segment_count - 1;
3658    }
3659 
3660    // prepare to start decoding
3661    f.current_loc_valid = FALSE;
3662    f.last_seg = FALSE;
3663    f.valid_bits = 0;
3664    f.packet_bytes = 0;
3665    f.bytes_in_seg = 0;
3666    f.previous_length = 0;
3667    f.next_seg = start_seg_with_known_loc;
3668 
3669    for (i = 0; i < start_seg_with_known_loc; i++)
3670       skip(f, f.segments[i]);
3671 
3672    // start decoding (optimizable - this frame is generally discarded)
3673    if (!vorbis_pump_first_frame(f))
3674       return 0;
3675    if (f.current_loc > sample_number)
3676       return error(f, VORBIS_seek_failed);
3677    return 1;
3678 
3679 error:
3680    // try to restore the file to a valid state
3681    stb_vorbis_seek_start(f);
3682    return error(f, VORBIS_seek_failed);
3683 }
3684 
3685 // the same as vorbis_decode_initial, but without advancing
3686 static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
3687 {
3688    int bits_read, bytes_read;
3689 
3690    if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode))
3691       return 0;
3692 
3693    // either 1 or 2 bytes were read, figure out which so we can rewind
3694    bits_read = 1 + ilog(f.mode_count-1);
3695    if (f.mode_config[*mode].blockflag)
3696       bits_read += 2;
3697    bytes_read = (bits_read + 7) / 8;
3698 
3699    f.bytes_in_seg += bytes_read;
3700    f.packet_bytes -= bytes_read;
3701    skip(f, -bytes_read);
3702    if (f.next_seg == -1)
3703       f.next_seg = f.segment_count - 1;
3704    else
3705       f.next_seg--;
3706    f.valid_bits = 0;
3707 
3708    return 1;
3709 }
3710 
3711 int stb_vorbis_seek_frame(stb_vorbis *f, uint sample_number)
3712 {
3713    uint32 max_frame_samples;
3714 
3715    // fast page-level search
3716    if (!seek_to_sample_coarse(f, sample_number))
3717       return 0;
3718 
3719    assert(f.current_loc_valid);
3720    assert(f.current_loc <= sample_number);
3721 
3722    // linear search for the relevant packet
3723    max_frame_samples = (f.blocksize_1*3 - f.blocksize_0) >> 2;
3724    while (f.current_loc < sample_number) {
3725       int left_start, left_end, right_start, right_end, mode, frame_samples;
3726       if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
3727          return error(f, VORBIS_seek_failed);
3728       // calculate the number of samples returned by the next frame
3729       frame_samples = right_start - left_start;
3730       if (f.current_loc + frame_samples > sample_number) {
3731          return 1; // the next frame will contain the sample
3732       } else if (f.current_loc + frame_samples + max_frame_samples > sample_number) {
3733          // there's a chance the frame after this could contain the sample
3734          vorbis_pump_first_frame(f);
3735       } else {
3736          // this frame is too early to be relevant
3737          f.current_loc += frame_samples;
3738          f.previous_length = 0;
3739          maybe_start_packet(f);
3740          flush_packet(f);
3741       }
3742    }
3743    // the next frame should start with the sample
3744    if (f.current_loc != sample_number) return error(f, VORBIS_seek_failed);
3745    return 1;
3746 }
3747 
3748 int stb_vorbis_seek(stb_vorbis *f, long sample_number)
3749 {
3750     if (sample_number < 0)
3751         return 0;
3752 
3753     if (sample_number > uint.max)
3754         return 0;
3755 
3756    if (!stb_vorbis_seek_frame(f, cast(uint)sample_number))
3757       return 0;
3758 
3759    if (sample_number != f.current_loc) {
3760       int n;
3761       uint32 frame_start = f.current_loc;
3762       stb_vorbis_get_frame_float(f, &n, null);
3763       assert(sample_number > frame_start);
3764       assert(f.channel_buffer_start + cast(int) (sample_number-frame_start) <= f.channel_buffer_end);
3765       f.channel_buffer_start += (sample_number - frame_start);
3766    }
3767    f.current_sample_loc = sample_number;
3768 
3769    return 1;
3770 }
3771 
3772 int stb_vorbis_seek_start(stb_vorbis *f)
3773 {
3774    set_file_offset(f, f.first_audio_page_offset);
3775    f.previous_length = 0;
3776    f.first_decode = TRUE;
3777    f.next_seg = -1;
3778    return vorbis_pump_first_frame(f);
3779 }
3780 
3781 uint stb_vorbis_stream_length_in_samples(stb_vorbis *f)
3782 {
3783    uint restore_offset, previous_safe;
3784    uint end, last_page_loc;
3785 
3786    if (!f.total_samples) {
3787       uint last;
3788       uint32 lo,hi;
3789       char[6] header;
3790 
3791       // first, store the current decode position so we can restore it
3792       restore_offset = stb_vorbis_get_file_offset(f);
3793 
3794       // now we want to seek back 64K from the end (the last page must
3795       // be at most a little less than 64K, but let's allow a little slop)
3796       if (f.stream_len >= 65536 && f.stream_len-65536 >= f.first_audio_page_offset)
3797          previous_safe = f.stream_len - 65536;
3798       else
3799          previous_safe = f.first_audio_page_offset;
3800 
3801       set_file_offset(f, previous_safe);
3802       // previous_safe is now our candidate 'earliest known place that seeking
3803       // to will lead to the final page'
3804 
3805       if (!vorbis_find_page(f, &end, &last)) {
3806          // if we can't find a page, we're hosed!
3807          f.error = VORBIS_cant_find_last_page;
3808          f.total_samples = 0xffffffff;
3809          goto done;
3810       }
3811 
3812       // check if there are more pages
3813       last_page_loc = stb_vorbis_get_file_offset(f);
3814 
3815       // stop when the last_page flag is set, not when we reach eof;
3816       // this allows us to stop short of a 'file_section' end without
3817       // explicitly checking the length of the section
3818       while (!last) {
3819          set_file_offset(f, end);
3820          if (!vorbis_find_page(f, &end, &last)) {
3821             // the last page we found didn't have the 'last page' flag
3822             // set. whoops!
3823             break;
3824          }
3825          //previous_safe = last_page_loc+1; // NOTE: not used after this point, but note for debugging
3826          last_page_loc = stb_vorbis_get_file_offset(f);
3827       }
3828 
3829       set_file_offset(f, last_page_loc);
3830 
3831       // parse the header
3832       getn(f, cast(ubyte*)header.ptr, 6);
3833       // extract the absolute granule position
3834       lo = get32(f);
3835       hi = get32(f);
3836       if (lo == 0xffffffff && hi == 0xffffffff) {
3837          f.error = VORBIS_cant_find_last_page;
3838          f.total_samples = SAMPLE_unknown;
3839          goto done;
3840       }
3841       if (hi)
3842          lo = 0xfffffffe; // saturate
3843       f.total_samples = lo;
3844 
3845       f.p_last.page_start = last_page_loc;
3846       f.p_last.page_end   = end;
3847       f.p_last.last_decoded_sample = lo;
3848 
3849      done:
3850       set_file_offset(f, restore_offset);
3851    }
3852    return f.total_samples == SAMPLE_unknown ? 0 : f.total_samples;
3853 }
3854 
3855 float stb_vorbis_stream_length_in_seconds(stb_vorbis *f)
3856 {
3857    return stb_vorbis_stream_length_in_samples(f) / cast(float) f.sample_rate;
3858 }
3859 
3860 int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
3861 {
3862    int len, right,left,i;
3863 
3864    if (!vorbis_decode_packet(f, &len, &left, &right)) {
3865       f.channel_buffer_start = f.channel_buffer_end = 0;
3866       return 0;
3867    }
3868 
3869    len = vorbis_finish_frame(f, len, left, right);
3870    for (i=0; i < f.channels; ++i)
3871       f.outputs[i] = f.channel_buffers[i] + left;
3872 
3873    f.channel_buffer_start = left;
3874    f.channel_buffer_end   = left+len;
3875 
3876    if (channels) *channels = f.channels;
3877    if (output)   *output = f.outputs.ptr;
3878    return len;
3879 }
3880 
3881 stb_vorbis* stb_vorbis_open_file_section(IOCallbacks* io, void* userData, int *error, stb_vorbis_alloc *alloc, uint length)
3882 {
3883     stb_vorbis *f;
3884     stb_vorbis p;
3885     vorbis_init(&p, alloc);
3886     p._io = io;
3887     p._userData = userData;
3888     p.stream_len = length;
3889     if (start_decoder(&p)) 
3890     {
3891         f = vorbis_alloc(&p);
3892         if (f) 
3893         {
3894             *f = p;
3895             vorbis_pump_first_frame(f);
3896             return f;
3897         }
3898     }
3899     if (error) *error = p.error;
3900     vorbis_deinit(&p);
3901     return null;
3902 }
3903 
3904 stb_vorbis* stb_vorbis_open_file(IOCallbacks* io, void* userData, int *error, stb_vorbis_alloc *alloc)
3905 {
3906     uint len = cast(uint) io.getFileLength(userData);
3907     return stb_vorbis_open_file_section(io, userData, error, alloc, len);
3908 }
3909 
3910 
3911 int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
3912 {
3913    float **outputs;
3914    int len = num_floats / channels;
3915    int n=0;
3916    int z = f.channels;
3917    if (z > channels) z = channels;
3918    while (n < len) {
3919       int i,j;
3920       int k = f.channel_buffer_end - f.channel_buffer_start;
3921       if (n+k >= len) k = len - n;
3922       for (j=0; j < k; ++j) {
3923          for (i=0; i < z; ++i)
3924             *buffer++ = f.channel_buffers[i][f.channel_buffer_start+j];
3925          for (   ; i < channels; ++i)
3926             *buffer++ = 0;
3927       }
3928       n += k;
3929       f.channel_buffer_start += k;
3930       if (n == len)
3931          break;
3932       if (!stb_vorbis_get_frame_float(f, null, &outputs))
3933          break;
3934    }
3935    return n;
3936 }
3937 
3938 int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples)
3939 {
3940    float **outputs;
3941    int n=0;
3942    int z = f.channels;
3943    if (z > channels) z = channels;
3944    while (n < num_samples) {
3945       int i;
3946       int k = f.channel_buffer_end - f.channel_buffer_start;
3947       if (n+k >= num_samples) k = num_samples - n;
3948       if (k) {
3949          for (i=0; i < z; ++i)
3950             memcpy(buffer[i]+n, f.channel_buffers[i]+f.channel_buffer_start, (float.sizeof)*k);
3951          for (   ; i < channels; ++i)
3952             memset(buffer[i]+n, 0, (float.sizeof) * k);
3953       }
3954       n += k;
3955       f.channel_buffer_start += k;
3956       if (n == num_samples)
3957          break;
3958       if (!stb_vorbis_get_frame_float(f, null, &outputs))
3959          break;
3960    }
3961    return n;
3962 }
3963 
3964 /* Version history
3965     1.17    - 2019-07-08 - fix CVE-2019-13217, -13218, -13219, -13220, -13221, -13222, -13223
3966                            found with Mayhem by ForAllSecure
3967     1.16    - 2019-03-04 - fix warnings
3968     1.15    - 2019-02-07 - explicit failure if Ogg Skeleton data is found
3969     1.14    - 2018-02-11 - delete bogus dealloca usage
3970     1.13    - 2018-01-29 - fix truncation of last frame (hopefully)
3971     1.12    - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
3972     1.11    - 2017-07-23 - fix MinGW compilation
3973     1.10    - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
3974     1.09    - 2016-04-04 - back out 'avoid discarding last frame' fix from previous version
3975     1.08    - 2016-04-02 - fixed multiple warnings; fix setup memory leaks;
3976                            avoid discarding last frame of audio data
3977     1.07    - 2015-01-16 - fixed some warnings, fix mingw, const-correct API
3978                            some more crash fixes when out of memory or with corrupt files
3979     1.06    - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
3980                            some crash fixes when out of memory or with corrupt files
3981     1.05    - 2015-04-19 - don't define __forceinline if it's redundant
3982     1.04    - 2014-08-27 - fix missing const-correct case in API
3983     1.03    - 2014-08-07 - Warning fixes
3984     1.02    - 2014-07-09 - Declare qsort compare function _cdecl on windows
3985     1.01    - 2014-06-18 - fix stb_vorbis_get_samples_float
3986     1.0     - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in multichannel
3987                            (API change) report sample rate for decode-full-file funcs
3988     0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
3989     0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
3990     0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
3991     0.99993 - remove assert that fired on legal files with empty tables
3992     0.99992 - rewind-to-start
3993     0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
3994     0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
3995     0.9998 - add a full-decode function with a memory source
3996     0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
3997     0.9996 - query length of vorbis stream in samples/seconds
3998     0.9995 - bugfix to another optimization that only happened in certain files
3999     0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
4000     0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
4001     0.9992 - performance improvement of IMDCT; now performs close to reference implementation
4002     0.9991 - performance improvement of IMDCT
4003     0.999 - (should have been 0.9990) performance improvement of IMDCT
4004     0.998 - no-CRT support from Casey Muratori
4005     0.997 - bugfixes for bugs found by Terje Mathisen
4006     0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
4007     0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
4008     0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
4009     0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
4010     0.992 - fixes for MinGW warning
4011     0.991 - turn fast-float-conversion on by default
4012     0.990 - fix push-mode seek recovery if you seek into the headers
4013     0.98b - fix to bad release of 0.98
4014     0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
4015     0.97 - builds under c++ (typecasting, don't use 'class' keyword)
4016     0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
4017     0.95 - clamping code for 16-bit functions
4018     0.94 - not publically released
4019     0.93 - fixed all-zero-floor case (was decoding garbage)
4020     0.92 - fixed a memory leak
4021     0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
4022     0.90 - first public release
4023 */
4024 
4025 /*
4026 ------------------------------------------------------------------------------
4027 This software is available under 2 licenses -- choose whichever you prefer.
4028 ------------------------------------------------------------------------------
4029 ALTERNATIVE A - MIT License
4030 Copyright (c) 2017 Sean Barrett
4031 Permission is hereby granted, free of charge, to any person obtaining a copy of
4032 this software and associated documentation files (the "Software"), to deal in
4033 the Software without restriction, including without limitation the rights to
4034 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
4035 of the Software, and to permit persons to whom the Software is furnished to do
4036 so, subject to the following conditions:
4037 The above copyright notice and this permission notice shall be included in all
4038 copies or substantial portions of the Software.
4039 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
4040 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
4041 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
4042 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
4043 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
4044 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
4045 SOFTWARE.
4046 ------------------------------------------------------------------------------
4047 ALTERNATIVE B - Public Domain (www.unlicense.org)
4048 This is free and unencumbered software released into the public domain.
4049 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
4050 software, either in source code form or as a compiled binary, for any purpose,
4051 commercial or non-commercial, and by any means.
4052 In jurisdictions that recognize copyright laws, the author or authors of this
4053 software dedicate any and all copyright interest in the software to the public
4054 domain. We make this dedication for the benefit of the public at large and to
4055 the detriment of our heirs and successors. We intend this dedication to be an
4056 overt act of relinquishment in perpetuity of all present and future rights to
4057 this software under copyright law.
4058 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
4059 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
4060 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
4061 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
4062 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
4063 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
4064 ------------------------------------------------------------------------------
4065 */