1 // Ogg Vorbis audio decoder - v1.19 - public domain
2 // http://nothings.org/stb_vorbis/
3 //
4 // Original version written by Sean Barrett in 2007.
5 //
6 // Originally sponsored by RAD Game Tools. Seeking implementation
7 // sponsored by Phillip Bennefall, Marc Andersen, Aaron Baker,
8 // Elias Software, Aras Pranckevicius, and Sean Barrett.
9 //
10 // LICENSE
11 //
12 //   See end of file for license information.
13 //
14 // Limitations:
15 //
16 //   - floor 0 not supported (used in old ogg vorbis files pre-2004)
17 //   - lossless sample-truncation at beginning ignored
18 //   - cannot concatenate multiple vorbis streams
19 //   - sample positions are 32-bit, limiting seekable 192Khz
20 //       files to around 6 hours (Ogg supports 64-bit)
21 //
22 // Feature contributors:
23 //    Dougall Johnson (sample-exact seeking)
24 //
25 // Bugfix/warning contributors:
26 //    Terje Mathisen     Niklas Frykholm     Andy Hill
27 //    Casey Muratori     John Bolton         Gargaj
28 //    Laurent Gomila     Marc LeBlanc        Ronny Chevalier
29 //    Bernhard Wodo      Evan Balster        github:alxprd
30 //    Tom Beaumont       Ingo Leitgeb        Nicolas Guillemot
31 //    Phillip Bennefall  Rohit               Thiago Goulart
32 //    github:manxorist   saga musix          github:infatum
33 //    Timur Gagiev       Maxwell Koo         Peter Waller
34 //    github:audinowho   Dougall Johnson
35 //
36 // Partial history:
37 //    1.19    - 2020-02-05 - warnings
38 //    1.18    - 2020-02-02 - fix seek bugs; parse header comments; misc warnings etc.
39 //    1.17    - 2019-07-08 - fix CVE-2019-13217..CVE-2019-13223 (by ForAllSecure)
40 //    1.16    - 2019-03-04 - fix warnings
41 //    1.15    - 2019-02-07 - explicit failure if Ogg Skeleton data is found
42 //    1.14    - 2018-02-11 - delete bogus dealloca usage
43 //    1.13    - 2018-01-29 - fix truncation of last frame (hopefully)
44 //    1.12    - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
45 //    1.11    - 2017-07-23 - fix MinGW compilation
46 //    1.10    - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
47 //    1.09    - 2016-04-04 - back out 'truncation of last frame' fix from previous version
48 //    1.08    - 2016-04-02 - warnings; setup memory leaks; truncation of last frame
49 //    1.07    - 2015-01-16 - fixes for crashes on invalid files; warning fixes; const
50 //    1.06    - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
51 //                           some crash fixes when out of memory or with corrupt files
52 //                           fix some inappropriately signed shifts
53 //    1.05    - 2015-04-19 - don't define __forceinline if it's redundant
54 //    1.04    - 2014-08-27 - fix missing const-correct case in API
55 //    1.03    - 2014-08-07 - warning fixes
56 //    1.02    - 2014-07-09 - declare qsort comparison as explicitly _cdecl in Windows
57 //    1.01    - 2014-06-18 - fix stb_vorbis_get_samples_float (interleaved was correct)
58 //    1.0     - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
59 //                           (API change) report sample rate for decode-full-file funcs
60 //
61 // See end of file for full version history.
62 module audioformats.vorbis;
63 
64 import core.stdc.stdlib;
65 import core.stdc.string;
66 
67 import std.math;
68 
69 
70 import audioformats.io;
71 
72 nothrow:
73 @nogc:
74 
75 ///////////   THREAD SAFETY
76 
77 // Individual stb_vorbis* handles are not thread-safe; you cannot decode from
78 // them from multiple threads at the same time. However, you can have multiple
79 // stb_vorbis* handles and decode from them independently in multiple thrads.
80 
81 
82 ///////////   MEMORY ALLOCATION
83 
84 // normally stb_vorbis uses malloc() to allocate memory at startup,
85 // and alloca() to allocate temporary memory during a frame on the
86 // stack. (Memory consumption will depend on the amount of setup
87 // data in the file and how you set the compile flags for speed
88 // vs. size. In my test files the maximal-size usage is ~150KB.)
89 //
90 // You can modify the wrapper functions in the source (setup_malloc,
91 // setup_temp_malloc, temp_malloc) to change this behavior, or you
92 // can use a simpler allocation model: you pass in a buffer from
93 // which stb_vorbis will allocate _all_ its memory (including the
94 // temp memory). "open" may fail with a VORBIS_outofmem if you
95 // do not pass in enough data; there is no way to determine how
96 // much you do need except to succeed (at which point you can
97 // query get_info to find the exact amount required. yes I know
98 // this is lame).
99 //
100 // If you pass in a non-null buffer of the type below, allocation
101 // will occur from it as described above. Otherwise just pass null
102 // to use malloc()/alloca()
103 
104 struct stb_vorbis_alloc
105 {
106    char *alloc_buffer;
107    int   alloc_buffer_length_in_bytes;
108 }
109 
110 
111 ///////////   FUNCTIONS USEABLE WITH ALL INPUT MODES
112 
113 
114 struct stb_vorbis_info
115 {
116    uint sample_rate;
117    int channels;
118 
119    uint setup_memory_required;
120    uint setup_temp_memory_required;
121    uint temp_memory_required;
122 
123    int max_frame_size;
124 }
125 
126 struct stb_vorbis_comment
127 {
128    char *vendor;
129 
130    int comment_list_length;
131    char **comment_list;
132 }
133 
134 
135 //////////   PULLING INPUT API
136 
137 // This API assumes stb_vorbis is allowed to pull data from a source--
138 // either a block of memory containing the _entire_ vorbis stream, or a
139 // FILE * that you or it create, or possibly some other reading mechanism
140 // if you go modify the source to replace the FILE * case with some kind
141 // of callback to your code. (But if you don't support seeking, you may
142 // just want to go ahead and use pushdata.)
143 
144 //extern int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output);
145 //extern int stb_vorbis_decode_memory(const unsigned char *mem, int len, int *channels, int *sample_rate, short **output);
146 
147 // decode an entire file and output the data interleaved into a malloc()ed
148 // buffer stored in *output. The return value is the number of samples
149 // decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
150 // When you're done with it, just free() the pointer returned in *output.
151 
152 //extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len,
153 //                                  int *error, const stb_vorbis_alloc *alloc_buffer);
154 // create an ogg vorbis decoder from an ogg vorbis stream in memory (note
155 // this must be the entire stream!). on failure, returns null and sets *error
156 
157 
158 //extern int stb_vorbis_seek_frame(stb_vorbis *f, uint sample_number);
159 //extern int stb_vorbis_seek(stb_vorbis *f, uint sample_number);
160 // these functions seek in the Vorbis file to (approximately) 'sample_number'.
161 // after calling seek_frame(), the next call to get_frame_*() will include
162 // the specified sample. after calling stb_vorbis_seek(), the next call to
163 // stb_vorbis_get_samples_* will start with the specified sample. If you
164 // do not need to seek to EXACTLY the target sample when using get_samples_*,
165 // you can also use seek_frame().
166 
167 //extern int stb_vorbis_seek_start(stb_vorbis *f);
168 // this function is equivalent to stb_vorbis_seek(f,0)
169 
170 //extern uint stb_vorbis_stream_length_in_samples(stb_vorbis *f);
171 // these functions return the total length of the vorbis stream
172 
173 //extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output);
174 // decode the next frame and return the number of samples. the number of
175 // channels returned are stored in *channels (which can be null--it is always
176 // the same as the number of channels reported by get_info). *output will
177 // contain an array of float* buffers, one per channel. These outputs will
178 // be overwritten on the next call to stb_vorbis_get_frame_*.
179 //
180 // You generally should not intermix calls to stb_vorbis_get_frame_*()
181 // and stb_vorbis_get_samples_*(), since the latter calls the former.
182 
183 
184 //extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats);
185 //extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples);
186 // gets num_samples samples, not necessarily on a frame boundary--this requires
187 // buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
188 // Returns the number of samples stored per channel; it may be less than requested
189 // at the end of the file. If there are no more samples in the file, returns 0.
190 
191 
192 ////////   ERROR CODES
193 
194 alias STBVorbisError = int;
195 enum : STBVorbisError
196 {
197    VORBIS__no_error,
198 
199    VORBIS_need_more_data=1,             // not a real error
200 
201    VORBIS_invalid_api_mixing,           // can't mix API modes
202    VORBIS_outofmem,                     // not enough memory
203    VORBIS_feature_not_supported,        // uses floor 0
204    VORBIS_too_many_channels,            // STB_VORBIS_MAX_CHANNELS is too small
205    VORBIS_file_open_failure,            // fopen() failed
206    VORBIS_seek_without_length,          // can't seek in unknown-length file
207 
208    VORBIS_unexpected_eof=10,            // file is truncated?
209    VORBIS_seek_invalid,                 // seek past EOF
210 
211    // decoding errors (corrupt/invalid stream) -- you probably
212    // don't care about the exact details of these
213 
214    // vorbis errors:
215    VORBIS_invalid_setup=20,
216    VORBIS_invalid_stream,
217 
218    // ogg errors:
219    VORBIS_missing_capture_pattern=30,
220    VORBIS_invalid_stream_structure_version,
221    VORBIS_continued_packet_flag_invalid,
222    VORBIS_incorrect_stream_serial_number,
223    VORBIS_invalid_first_page,
224    VORBIS_bad_packet_type,
225    VORBIS_cant_find_last_page,
226    VORBIS_seek_failed,
227    VORBIS_ogg_skeleton_not_supported
228 };
229 
230 // global configuration settings (e.g. set these in the project/makefile),
231 // or just set them in this file at the top (although ideally the first few
232 // should be visible when the header file is compiled too, although it's not
233 // crucial)
234 
235 // STB_VORBIS_MAX_CHANNELS [number]
236 //     globally define this to the maximum number of channels you need.
237 //     The spec does not put a restriction on channels except that
238 //     the count is stored in a byte, so 255 is the hard limit.
239 //     Reducing this saves about 16 bytes per value, so using 16 saves
240 //     (255-16)*16 or around 4KB. Plus anything other memory usage
241 //     I forgot to account for. Can probably go as low as 8 (7.1 audio),
242 //     6 (5.1 audio), or 2 (stereo only).
243 enum STB_VORBIS_MAX_CHANNELS = 8;
244 
245 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
246 //     sets the log size of the huffman-acceleration table.  Maximum
247 //     supported value is 24. with larger numbers, more decodings are O(1),
248 //     but the table size is larger so worse cache missing, so you'll have
249 //     to probe (and try multiple ogg vorbis files) to find the sweet spot.
250 enum STB_VORBIS_FAST_HUFFMAN_LENGTH = 10;
251 
252 static assert(STB_VORBIS_MAX_CHANNELS <= 256);
253 static assert(STB_VORBIS_FAST_HUFFMAN_LENGTH <= 24);
254 
255 enum MAX_BLOCKSIZE_LOG = 13; // from specification
256 enum MAX_BLOCKSIZE = (1 << MAX_BLOCKSIZE_LOG);
257 
258 alias uint8  = ubyte;
259 alias int8   = byte;
260 alias uint16 = ushort;
261 alias int16  = short;
262 alias int32  = int;
263 
264 enum int TRUE = 1;
265 enum int FALSE = 0;
266 
267 alias codetype = float;
268 
269 // @NOTE
270 //
271 // Some arrays below are tagged "//varies", which means it's actually
272 // a variable-sized piece of data, but rather than malloc I assume it's
273 // small enough it's better to just allocate it all together with the
274 // main thing
275 //
276 // Most of the variables are specified with the smallest size I could pack
277 // them into. It might give better performance to make them all full-sized
278 // integers. It should be safe to freely rearrange the structures or change
279 // the sizes larger--nothing relies on silently truncating etc., nor the
280 // order of variables.
281 
282 enum FAST_HUFFMAN_TABLE_SIZE = (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH);
283 enum FAST_HUFFMAN_TABLE_MASK = (FAST_HUFFMAN_TABLE_SIZE - 1);
284 
285 struct Codebook
286 {
287    int dimensions, entries;
288    uint8 *codeword_lengths;
289    float  minimum_value;
290    float  delta_value;
291    uint8  value_bits;
292    uint8  lookup_type;
293    uint8  sequence_p;
294    uint8  sparse;
295    uint lookup_values;
296    codetype *multiplicands;
297    uint *codewords;
298    int32[FAST_HUFFMAN_TABLE_SIZE]  fast_huffman;
299    uint *sorted_codewords;
300    int    *sorted_values;
301    int     sorted_entries;
302 }
303 
304 struct Floor0
305 {
306    uint8 order;
307    uint16 rate;
308    uint16 bark_map_size;
309    uint8 amplitude_bits;
310    uint8 amplitude_offset;
311    uint8 number_of_books;
312    uint8[16] book_list; // varies
313 }
314 
315 struct Floor1
316 {
317    uint8 partitions;
318    uint8[32] partition_class_list; // varies
319    uint8[16] class_dimensions; // varies
320    uint8[16] class_subclasses; // varies
321    uint8[16] class_masterbooks; // varies
322    int16[8][16] subclass_books; // varies
323    uint16[31*8+2] Xlist; // varies
324    uint8[31*8+2] sorted_order;
325    uint8[2][31*8+2] neighbors;
326    uint8 floor1_multiplier;
327    uint8 rangebits;
328    int values;
329 }
330 
331 union Floor
332 {
333    Floor0 floor0;
334    Floor1 floor1;
335 }
336 
337 struct Residue
338 {
339    uint begin, end;
340    uint part_size;
341    uint8 classifications;
342    uint8 classbook;
343    uint8 **classdata;
344    int16[8]* residue_books;
345 }
346 
347 struct MappingChannel
348 {
349    uint8 magnitude;
350    uint8 angle;
351    uint8 mux;
352 }
353 
354 struct Mapping
355 {
356    uint16 coupling_steps;
357    MappingChannel *chan;
358    uint8  submaps;
359    uint8[15] submap_floor; // varies
360    uint8[15] submap_residue; // varies
361 }
362 
363 struct Mode
364 {
365    uint8 blockflag;
366    uint8 mapping;
367    uint16 windowtype;
368    uint16 transformtype;
369 }
370 
371 struct CRCscan
372 {
373    uint  goal_crc;    // expected crc if match
374    int     bytes_left;  // bytes left in packet
375    uint  crc_so_far;  // running crc
376    int     bytes_done;  // bytes processed in _current_ chunk
377    uint  sample_loc;  // granule pos encoded in page
378 }
379 
380 struct ProbedPage
381 {
382    uint page_start, page_end;
383    uint last_decoded_sample;
384 }
385 
386 struct stb_vorbis
387 {
388    // modified to use audio-formats I/O callbacks
389    IOCallbacks* io;
390    void* userData;
391 
392    // user-accessible info
393    uint sample_rate;
394    int channels;
395 
396    uint setup_memory_required;
397    uint temp_memory_required;
398    uint setup_temp_memory_required;
399 
400    char *vendor;
401    int comment_list_length;
402    char **comment_list;
403 
404   // input config
405 
406    uint stream_len;
407 
408    // the page to seek to when seeking to start, may be zero
409    uint first_audio_page_offset;
410 
411    // p_first is the page on which the first audio packet ends
412    // (but not necessarily the page on which it starts)
413    ProbedPage p_first, p_last;
414 
415   // memory management
416    stb_vorbis_alloc alloc;
417    int setup_offset;
418    int temp_offset;
419 
420   // run-time results
421    int eof;
422    STBVorbisError error;
423 
424   // user-useful data
425 
426   // header info
427    int[2] blocksize;
428    int blocksize_0, blocksize_1;
429    int codebook_count;
430    Codebook *codebooks;
431    int floor_count;
432    uint16[64] floor_types; // varies
433    Floor *floor_config;
434    int residue_count;
435    uint16[64] residue_types; // varies
436    Residue *residue_config;
437    int mapping_count;
438    Mapping *mapping;
439    int mode_count;
440    Mode[64] mode_config;  // varies
441 
442    uint total_samples;
443 
444   // decode buffer
445    float*[STB_VORBIS_MAX_CHANNELS] channel_buffers;
446    float*[STB_VORBIS_MAX_CHANNELS] outputs        ;
447 
448    float*[STB_VORBIS_MAX_CHANNELS] previous_window;
449    int previous_length;
450 
451    int16*[STB_VORBIS_MAX_CHANNELS] finalY;
452 
453    uint current_loc; // sample location of next frame to decode
454    int    current_loc_valid;
455 
456   // per-blocksize precomputed data
457 
458    // twiddle factors
459    float*[2] A, B, C;
460    float*[2] window;
461    uint16*[2] bit_reverse;
462 
463   // current page/packet/segment streaming info
464    uint serial; // stream serial number for verification
465    int last_page;
466    int segment_count;
467    uint8[255] segments;
468    uint8 page_flag;
469    uint8 bytes_in_seg;
470    uint8 first_decode;
471    int next_seg;
472    int last_seg;  // flag that we're on the last segment
473    int last_seg_which; // what was the segment number of the last seg?
474    uint acc;
475    int valid_bits;
476    int packet_bytes;
477    int end_seg_with_known_loc;
478    uint known_loc_for_packet;
479    int discard_samples_deferred;
480    uint samples_output;
481 
482   // push mode scanning
483    int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
484 
485   // sample-access
486    int channel_buffer_start;
487    int channel_buffer_end;
488 }
489 
490 alias vorb = stb_vorbis;
491 
492 int error(vorb *f, STBVorbisError e)
493 {
494    f.error = e;
495    if (!f.eof && e != VORBIS_need_more_data) {
496       f.error=e; // breakpoint for debugging
497    }
498    return 0;
499 }
500 
501 
502 // these functions are used for allocating temporary memory
503 // while decoding. if you can afford the stack space, use
504 // alloca(); otherwise, provide a temp buffer and it will
505 // allocate out of those.
506 
507 void* temp_alloc(stb_vorbis* f, size_t sz)
508 {
509     return malloc(sz);
510 }
511 
512 int temp_alloc_save(stb_vorbis* f)
513 {
514    return f.temp_offset;
515 }
516 
517 void temp_alloc_restore(stb_vorbis* f, int p)
518 {
519    f.temp_offset = p;
520 }
521 
522 
523 void* temp_block_array(stb_vorbis* f, int count, int size)
524 {
525     int array_size_required = count * (cast(int)( (void *).sizeof ) + size);
526     return make_block_array( temp_alloc(f, array_size_required), count, size);
527 }
528 
529 
530 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
531 void *make_block_array(void *mem, int count, int size)
532 {
533    int i;
534    void ** p = cast(void **) mem;
535    char *q = cast(char *) (p + count);
536    for (i=0; i < count; ++i) {
537       p[i] = q;
538       q += size;
539    }
540    return p;
541 }
542 
543 void *setup_malloc(vorb *f, size_t sz)
544 {
545      return sz ? malloc(sz) : null;
546 }
547 
548 void setup_free(vorb *f, void *p)
549 {
550      free(p);
551 }
552 
553 void *setup_temp_malloc(vorb *f, size_t sz)
554 {
555     return setup_temp_malloc(f, cast(int)sz);
556 }
557 
558 void *setup_temp_malloc(vorb *f, int sz)
559 {
560     return sz ? malloc(sz) : null; // do not use temp buffer in setup, as it leads to crashes
561 }
562 
563 void setup_temp_free(vorb *f, void *p, ulong sz)
564 {
565     free(p);
566 }
567 
568 void setup_temp_free(vorb *f, void *p, int sz)
569 {
570     free(p);
571 }
572 
573 enum CRC32_POLY = 0x04c11db7;   // from spec
574 
575 __gshared uint[256] crc_table; // TODO this should not be a global! but go into a decoder structure
576 
577 void crc32_init()
578 {
579    int i,j;
580    uint s;
581    for(i=0; i < 256; i++) {
582       for (s=cast(uint) i << 24, j=0; j < 8; ++j)
583          s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
584       crc_table[i] = s;
585    }
586 }
587 
588 uint crc32_update(uint crc, uint8 byte_)
589 {
590    return (crc << 8) ^ crc_table[byte_ ^ (crc >> 24)];
591 }
592 
593 
594 // used in setup, and for huffman that doesn't go fast path
595 uint bit_reverse(uint n)
596 {
597   n = ((n & 0xAAAAAAAA) >>  1) | ((n & 0x55555555) << 1);
598   n = ((n & 0xCCCCCCCC) >>  2) | ((n & 0x33333333) << 2);
599   n = ((n & 0xF0F0F0F0) >>  4) | ((n & 0x0F0F0F0F) << 4);
600   n = ((n & 0xFF00FF00) >>  8) | ((n & 0x00FF00FF) << 8);
601   return (n >> 16) | (n << 16);
602 }
603 
604 float square(float x)
605 {
606    return x*x;
607 }
608 
609 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
610 // as required by the specification. fast(?) implementation from stb.h
611 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
612 int ilog(int32 n)
613 {
614    static immutable byte[16] log2_4 = [ 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 ];
615 
616    if (n < 0) return 0; // signed n returns 0
617 
618    // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
619    if (n < (1 << 14))
620         if (n < (1 <<  4))            return  0 + log2_4[n      ];
621         else if (n < (1 <<  9))       return  5 + log2_4[n >>  5];
622              else                     return 10 + log2_4[n >> 10];
623    else if (n < (1 << 24))
624              if (n < (1 << 19))       return 15 + log2_4[n >> 15];
625              else                     return 20 + log2_4[n >> 20];
626         else if (n < (1 << 29))       return 25 + log2_4[n >> 25];
627              else                     return 30 + log2_4[n >> 30];
628 }
629 
630 // code length assigned to a value with no huffman encoding
631 enum NO_CODE = 255;
632 
633 /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
634 //
635 // these functions are only called at setup, and only a few times
636 // per file
637 
638 float float32_unpack(uint x)
639 {
640    // from the specification
641    uint mantissa = x & 0x1fffff;
642    uint sign = x & 0x80000000;
643    uint exp = (x & 0x7fe00000) >> 21;
644    double res = sign ? -cast(double)mantissa : cast(double)mantissa;
645    return cast(float) ldexp(cast(float)res, cast(int)(exp-788));
646 }
647 
648 
649 // zlib & jpeg huffman tables assume that the output symbols
650 // can either be arbitrarily arranged, or have monotonically
651 // increasing frequencies--they rely on the lengths being sorted;
652 // this makes for a very simple generation algorithm.
653 // vorbis allows a huffman table with non-sorted lengths. This
654 // requires a more sophisticated construction, since symbols in
655 // order do not map to huffman codes "in order".
656 static void add_entry(Codebook *c, uint huff_code, int symbol, int count, int len, uint *values)
657 {
658    if (!c.sparse) {
659       c.codewords      [symbol] = huff_code;
660    } else {
661       c.codewords       [count] = huff_code;
662       c.codeword_lengths[count] = cast(ubyte)len;
663       values             [count] = symbol;
664    }
665 }
666 
667 int compute_codewords(Codebook *c, uint8 *len, int n, uint *values)
668 {
669    int i,k,m=0;
670    uint[32] available;
671    memset(available.ptr, 0, (available).sizeof);
672    // find the first entry
673    for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
674    if (k == n) { assert(c.sorted_entries == 0); return TRUE; }
675    // add to the list
676    add_entry(c, 0, k, m++, len[k], values);
677    // add all available leaves
678    for (i=1; i <= len[k]; ++i)
679       available[i] = 1U << (32-i);
680    // note that the above code treats the first case specially,
681    // but it's really the same as the following code, so they
682    // could probably be combined (except the initial code is 0,
683    // and I use 0 in available[] to mean 'empty')
684    for (i=k+1; i < n; ++i) {
685       uint res;
686       int z = len[i], y;
687       if (z == NO_CODE) continue;
688       // find lowest available leaf (should always be earliest,
689       // which is what the specification calls for)
690       // note that this property, and the fact we can never have
691       // more than one free leaf at a given level, isn't totally
692       // trivial to prove, but it seems true and the assert never
693       // fires, so!
694       while (z > 0 && !available[z]) --z;
695       if (z == 0) { return FALSE; }
696       res = available[z];
697       assert(z >= 0 && z < 32);
698       available[z] = 0;
699       add_entry(c, bit_reverse(res), i, m++, len[i], values);
700       // propagate availability up the tree
701       if (z != len[i]) {
702          assert(len[i] >= 0 && len[i] < 32);
703          for (y=len[i]; y > z; --y) {
704             assert(available[y] == 0);
705             available[y] = res + (1 << (32-y));
706          }
707       }
708    }
709    return TRUE;
710 }
711 
712 // accelerated huffman table allows fast O(1) match of all symbols
713 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
714 static void compute_accelerated_huffman(Codebook *c)
715 {
716    int i, len;
717    for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
718       c.fast_huffman[i] = -1;
719 
720    len = c.sparse ? c.sorted_entries : c.entries;
721    for (i=0; i < len; ++i) {
722       if (c.codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
723          uint z = c.sparse ? bit_reverse(c.sorted_codewords[i]) : c.codewords[i];
724          // set table entries for all bit combinations in the higher bits
725          while (z < FAST_HUFFMAN_TABLE_SIZE) {
726              c.fast_huffman[z] = i;
727              z += 1 << c.codeword_lengths[i];
728          }
729       }
730    }
731 }
732 
733 extern(C) int uint32_compare(const void *p, const void *q)
734 {
735    uint x = * cast(uint *) p;
736    uint y = * cast(uint *) q;
737    return x < y ? -1 : x > y;
738 }
739 
740 int include_in_sort(Codebook *c, uint8 len)
741 {
742    if (c.sparse) { assert(len != NO_CODE); return TRUE; }
743    if (len == NO_CODE) return FALSE;
744    if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
745    return FALSE;
746 }
747 
748 // if the fast table above doesn't work, we want to binary
749 // search them... need to reverse the bits
750 static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint *values)
751 {
752    int i, len;
753    // build a list of all the entries
754    // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
755    // this is kind of a frivolous optimization--I don't see any performance improvement,
756    // but it's like 4 extra lines of code, so.
757    if (!c.sparse) {
758       int k = 0;
759       for (i=0; i < c.entries; ++i)
760          if (include_in_sort(c, lengths[i]))
761             c.sorted_codewords[k++] = bit_reverse(c.codewords[i]);
762       assert(k == c.sorted_entries);
763    } else {
764       for (i=0; i < c.sorted_entries; ++i)
765          c.sorted_codewords[i] = bit_reverse(c.codewords[i]);
766    }
767 
768    qsort(c.sorted_codewords, c.sorted_entries, (c.sorted_codewords[0]).sizeof, &uint32_compare);
769    c.sorted_codewords[c.sorted_entries] = 0xffffffff;
770 
771    len = c.sparse ? c.sorted_entries : c.entries;
772    // now we need to indicate how they correspond; we could either
773    //   #1: sort a different data structure that says who they correspond to
774    //   #2: for each sorted entry, search the original list to find who corresponds
775    //   #3: for each original entry, find the sorted entry
776    // #1 requires extra storage, #2 is slow, #3 can use binary search!
777    for (i=0; i < len; ++i) {
778       int huff_len = c.sparse ? lengths[values[i]] : lengths[i];
779       if (include_in_sort(c, cast(ubyte) huff_len)) {
780          uint code = bit_reverse(c.codewords[i]);
781          int x=0, n=c.sorted_entries;
782          while (n > 1) {
783             // invariant: sc[x] <= code < sc[x+n]
784             int m = x + (n >> 1);
785             if (c.sorted_codewords[m] <= code) {
786                x = m;
787                n -= (n>>1);
788             } else {
789                n >>= 1;
790             }
791          }
792          assert(c.sorted_codewords[x] == code);
793          if (c.sparse) {
794             c.sorted_values[x] = values[i];
795             c.codeword_lengths[x] = cast(ubyte) huff_len;
796          } else {
797             c.sorted_values[x] = i;
798          }
799       }
800    }
801 }
802 
803 // only run while parsing the header (3 times)
804 int vorbis_validate(uint8 *data)
805 {
806    static immutable uint8[6] vorbis = [ 'v', 'o', 'r', 'b', 'i', 's' ];
807    return memcmp(data, vorbis.ptr, 6) == 0;
808 }
809 
810 // called from setup only, once per code book
811 // (formula implied by specification)
812 int lookup1_values(int entries, int dim)
813 {
814    int r = cast(int) floor(exp(cast(float) log(cast(float) entries) / dim));
815    if (cast(int) floor(pow(cast(float) r+1, dim)) <= entries)   // (int) cast for MinGW warning;
816       ++r;                                              // floor() to avoid _ftol() when non-CRT
817    if (pow(cast(float) r+1, dim) <= entries)
818       return -1;
819    if (cast(int) floor(pow(cast(float) r, dim)) > entries)
820       return -1;
821    return r;
822 }
823 
824 // called twice per file
825 static void compute_twiddle_factors(int n, float *A, float *B, float *C)
826 {
827    int n4 = n >> 2, n8 = n >> 3;
828    int k,k2;
829 
830    for (k=k2=0; k < n4; ++k,k2+=2) {
831       A[k2  ] = cast(float)  cos(4*k*PI/n);
832       A[k2+1] = cast(float) -sin(4*k*PI/n);
833       B[k2  ] = cast(float)  cos((k2+1)*PI/n/2) * 0.5f;
834       B[k2+1] = cast(float)  sin((k2+1)*PI/n/2) * 0.5f;
835    }
836    for (k=k2=0; k < n8; ++k,k2+=2) {
837       C[k2  ] = cast(float)  cos(2*(k2+1)*PI/n);
838       C[k2+1] = cast(float) -sin(2*(k2+1)*PI/n);
839    }
840 }
841 
842 static void compute_window(int n, float *window)
843 {
844    int n2 = n >> 1, i;
845    for (i=0; i < n2; ++i)
846       window[i] = cast(float) sin(0.5 * PI * square(cast(float) sin((i - 0 + 0.5) / n2 * 0.5 * PI)));
847 }
848 
849 static void compute_bitreverse(int n, uint16 *rev)
850 {
851    int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
852    int i, n8 = n >> 3;
853    for (i=0; i < n8; ++i)
854       rev[i] = cast(ushort)( (bit_reverse(i) >> (32-ld+3)) << 2);
855 }
856 
857 int init_blocksize(vorb *f, int b, int n)
858 {
859     int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
860     f.A[b] = cast(float *) setup_malloc(f, (float.sizeof) * n2);
861     f.B[b] = cast(float *) setup_malloc(f, (float.sizeof) * n2);
862     f.C[b] = cast(float *) setup_malloc(f, (float.sizeof) * n4);
863     if (!f.A[b] || !f.B[b] || !f.C[b]) return error(f, VORBIS_outofmem);
864     compute_twiddle_factors(n, f.A[b], f.B[b], f.C[b]);
865     f.window[b] = cast(float *) setup_malloc(f, (float.sizeof) * n2);
866     if (!f.window[b]) return error(f, VORBIS_outofmem);
867     compute_window(n, f.window[b]);
868     f.bit_reverse[b] = cast(uint16 *) setup_malloc(f, (uint16.sizeof) * n8);
869     if (!f.bit_reverse[b]) return error(f, VORBIS_outofmem);
870     compute_bitreverse(n, f.bit_reverse[b]);
871     return TRUE;
872 }
873 
874 static void neighbors(uint16 *x, int n, int *plow, int *phigh)
875 {
876     int low = -1;
877     int high = 65536;
878     int i;
879     for (i=0; i < n; ++i) 
880     {
881         if (x[i] > low  && x[i] < x[n]) { *plow  = i; low = x[i]; }
882         if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
883     }
884 }
885 
886 // this has been repurposed so y is now the original index instead of y
887 struct stbv__floor_ordering
888 {
889     uint16 x,id;
890 }
891 
892 extern(C) int point_compare(const void *p, const void *q)
893 {
894     stbv__floor_ordering *a = cast(stbv__floor_ordering *) p;
895     stbv__floor_ordering *b = cast(stbv__floor_ordering *) q;
896     return a.x < b.x ? -1 : a.x > b.x;
897 }
898 
899 //
900 /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
901 
902 uint8 get8(vorb *z)
903 {
904     ubyte b;
905     if (1 == z.io.read(&b, 1, z.userData))
906     {
907         return b;
908     }
909     else
910     {
911         z.eof = TRUE;
912         return 0;
913     }
914 }
915 
916 uint get32(vorb *f)
917 {
918     uint x;
919     x = get8(f);
920     x += get8(f) << 8;
921     x += get8(f) << 16;
922     x += cast(uint) get8(f) << 24;
923     return x;
924 }
925 
926 int getn(vorb *z, uint8 *data, int n)
927 {
928     int read = z.io.read(data, n, z.userData);
929 
930     if (n == read)
931     {
932         return 1;
933     }
934     else
935     {
936         z.eof = 1;
937         return 0;
938     }
939 }
940 
941 void skip(vorb *z, int n)
942 {
943     bool success = z.io.skip(n, z.userData);
944     if (!success)
945         z.eof = 1;
946 }
947 
948 int set_file_offset(stb_vorbis *f, uint loc)
949 {
950     f.io.seek(loc, false, f.userData);
951     if (loc >= f.stream_len)
952     {
953         f.eof = 1;
954         return 0;
955     }
956     else
957     {
958         f.eof = 0;
959         return 1;
960     }
961 }
962 
963 static immutable uint8[4] ogg_page_header = [ 0x4f, 0x67, 0x67, 0x53 ];
964 
965 int capture_pattern(vorb *f)
966 {
967    if (0x4f != get8(f)) return FALSE;
968    if (0x67 != get8(f)) return FALSE;
969    if (0x67 != get8(f)) return FALSE;
970    if (0x53 != get8(f)) return FALSE;
971    return TRUE;
972 }
973 
974 enum PAGEFLAG_continued_packet   = 1;
975 enum PAGEFLAG_first_page         = 2;
976 enum PAGEFLAG_last_page          = 4;
977 
978 int start_page_no_capturepattern(vorb *f)
979 {
980    uint loc0,loc1,n;
981    if (f.first_decode) {
982       f.p_first.page_start = stb_vorbis_get_file_offset(f) - 4;
983    }
984    // stream structure version
985    if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
986    // header flag
987    f.page_flag = get8(f);
988    // absolute granule position
989    loc0 = get32(f);
990    loc1 = get32(f);
991    // @TODO: validate loc0,loc1 as valid positions?
992    // stream serial number -- vorbis doesn't interleave, so discard
993    get32(f);
994    //if (f.serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
995    // page sequence number
996    n = get32(f);
997    f.last_page = n;
998    // CRC32
999    get32(f);
1000    // page_segments
1001    f.segment_count = get8(f);
1002    if (!getn(f, f.segments.ptr, f.segment_count))
1003       return error(f, VORBIS_unexpected_eof);
1004    // assume we _don't_ know any the sample position of any segments
1005    f.end_seg_with_known_loc = -2;
1006    if (loc0 != ~0U || loc1 != ~0U) {
1007       int i;
1008       // determine which packet is the last one that will complete
1009       for (i=f.segment_count-1; i >= 0; --i)
1010          if (f.segments[i] < 255)
1011             break;
1012       // 'i' is now the index of the _last_ segment of a packet that ends
1013       if (i >= 0) {
1014          f.end_seg_with_known_loc = i;
1015          f.known_loc_for_packet   = loc0;
1016       }
1017    }
1018    if (f.first_decode) {
1019       int i,len;
1020       len = 0;
1021       for (i=0; i < f.segment_count; ++i)
1022          len += f.segments[i];
1023       len += 27 + f.segment_count;
1024       f.p_first.page_end = f.p_first.page_start + len;
1025       f.p_first.last_decoded_sample = loc0;
1026    }
1027    f.next_seg = 0;
1028    return TRUE;
1029 }
1030 
1031 static int start_page(vorb *f)
1032 {
1033    if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
1034    return start_page_no_capturepattern(f);
1035 }
1036 
1037 static int start_packet(vorb *f)
1038 {
1039    while (f.next_seg == -1) {
1040       if (!start_page(f)) return FALSE;
1041       if (f.page_flag & PAGEFLAG_continued_packet)
1042          return error(f, VORBIS_continued_packet_flag_invalid);
1043    }
1044    f.last_seg = FALSE;
1045    f.valid_bits = 0;
1046    f.packet_bytes = 0;
1047    f.bytes_in_seg = 0;
1048    // f.next_seg is now valid
1049    return TRUE;
1050 }
1051 
1052 static int maybe_start_packet(vorb *f)
1053 {
1054    if (f.next_seg == -1) {
1055       int x = get8(f);
1056       if (f.eof) return FALSE; // EOF at page boundary is not an error!
1057       if (0x4f != x      ) return error(f, VORBIS_missing_capture_pattern);
1058       if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1059       if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1060       if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1061       if (!start_page_no_capturepattern(f)) return FALSE;
1062       if (f.page_flag & PAGEFLAG_continued_packet) {
1063          // set up enough state that we can read this packet if we want,
1064          // e.g. during recovery
1065          f.last_seg = FALSE;
1066          f.bytes_in_seg = 0;
1067          return error(f, VORBIS_continued_packet_flag_invalid);
1068       }
1069    }
1070    return start_packet(f);
1071 }
1072 
1073 static int next_segment(vorb *f)
1074 {
1075    int len;
1076    if (f.last_seg) return 0;
1077    if (f.next_seg == -1) {
1078       f.last_seg_which = f.segment_count-1; // in case start_page fails
1079       if (!start_page(f)) { f.last_seg = 1; return 0; }
1080       if (!(f.page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
1081    }
1082    len = f.segments[f.next_seg++];
1083    if (len < 255) {
1084       f.last_seg = TRUE;
1085       f.last_seg_which = f.next_seg-1;
1086    }
1087    if (f.next_seg >= f.segment_count)
1088       f.next_seg = -1;
1089    assert(f.bytes_in_seg == 0);
1090    f.bytes_in_seg = cast(ubyte)len;
1091    return len;
1092 }
1093 
1094 enum EOP =    (-1);
1095 enum INVALID_BITS = (-1);
1096 
1097 int get8_packet_raw(vorb *f)
1098 {
1099    if (!f.bytes_in_seg) {  // CLANG!
1100       if (f.last_seg) return EOP;
1101       else if (!next_segment(f)) return EOP;
1102    }
1103    assert(f.bytes_in_seg > 0);
1104    --f.bytes_in_seg;
1105    ++f.packet_bytes;
1106    return get8(f);
1107 }
1108 
1109 int get8_packet(vorb *f)
1110 {
1111    int x = get8_packet_raw(f);
1112    f.valid_bits = 0;
1113    return x;
1114 }
1115 
1116 int get32_packet(vorb *f)
1117 {
1118    uint x;
1119    x = get8_packet(f);
1120    x += get8_packet(f) << 8;
1121    x += get8_packet(f) << 16;
1122    x += cast(uint) get8_packet(f) << 24;
1123    return x;
1124 }
1125 
1126 void flush_packet(vorb *f)
1127 {
1128    while (get8_packet_raw(f) != EOP)
1129    {
1130    }
1131 }
1132 
1133 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1134 // as the huffman decoder?
1135 static uint get_bits(vorb *f, int n)
1136 {
1137    uint z;
1138 
1139    if (f.valid_bits < 0) return 0;
1140    if (f.valid_bits < n) {
1141       if (n > 24) {
1142          // the accumulator technique below would not work correctly in this case
1143          z = get_bits(f, 24);
1144          z += get_bits(f, n-24) << 24;
1145          return z;
1146       }
1147       if (f.valid_bits == 0) f.acc = 0;
1148       while (f.valid_bits < n) {
1149          int z2 = get8_packet_raw(f);
1150          if (z2 == EOP) {
1151             f.valid_bits = INVALID_BITS;
1152             return 0;
1153          }
1154          f.acc += z2 << f.valid_bits;
1155          f.valid_bits += 8;
1156       }
1157    }
1158    if (f.valid_bits < 0) return 0;
1159    z = f.acc & ((1 << n)-1);
1160    f.acc >>= n;
1161    f.valid_bits -= n;
1162    return z;
1163 }
1164 
1165 // @OPTIMIZE: primary accumulator for huffman
1166 // expand the buffer to as many bits as possible without reading off end of packet
1167 // it might be nice to allow f.valid_bits and f.acc to be stored in registers,
1168 // e.g. cache them locally and decode locally
1169 static void prep_huffman(vorb *f)
1170 {
1171    if (f.valid_bits <= 24) {
1172       if (f.valid_bits == 0) f.acc = 0;
1173       do {
1174          int z;
1175          if (f.last_seg && !f.bytes_in_seg) return;
1176          z = get8_packet_raw(f);
1177          if (z == EOP) return;
1178          f.acc += cast(uint) z << f.valid_bits;
1179          f.valid_bits += 8;
1180       } while (f.valid_bits <= 24);
1181    }
1182 }
1183 
1184 enum
1185 {
1186    VORBIS_packet_id = 1,
1187    VORBIS_packet_comment = 3,
1188    VORBIS_packet_setup = 5
1189 }
1190 
1191 int codebook_decode_scalar_raw(vorb *f, Codebook *c)
1192 {
1193    int i;
1194    prep_huffman(f);
1195 
1196    if (c.codewords == null && c.sorted_codewords == null)
1197       return -1;
1198 
1199    // cases to use binary search: sorted_codewords && !c.codewords
1200    //                             sorted_codewords && c.entries > 8
1201    if (c.entries > 8 ? c.sorted_codewords!=null : !c.codewords) {
1202       // binary search
1203       uint code = bit_reverse(f.acc);
1204       int x=0, n=c.sorted_entries, len;
1205 
1206       while (n > 1) {
1207          // invariant: sc[x] <= code < sc[x+n]
1208          int m = x + (n >> 1);
1209          if (c.sorted_codewords[m] <= code) {
1210             x = m;
1211             n -= (n>>1);
1212          } else {
1213             n >>= 1;
1214          }
1215       }
1216       // x is now the sorted index
1217       if (!c.sparse) x = c.sorted_values[x];
1218       // x is now sorted index if sparse, or symbol otherwise
1219       len = c.codeword_lengths[x];
1220       if (f.valid_bits >= len) {
1221          f.acc >>= len;
1222          f.valid_bits -= len;
1223          return x;
1224       }
1225 
1226       f.valid_bits = 0;
1227       return -1;
1228    }
1229 
1230    // if small, linear search
1231    assert(!c.sparse);
1232    for (i=0; i < c.entries; ++i) {
1233       if (c.codeword_lengths[i] == NO_CODE) continue;
1234       if (c.codewords[i] == (f.acc & ((1 << c.codeword_lengths[i])-1))) {
1235          if (f.valid_bits >= c.codeword_lengths[i]) {
1236             f.acc >>= c.codeword_lengths[i];
1237             f.valid_bits -= c.codeword_lengths[i];
1238             return i;
1239          }
1240          f.valid_bits = 0;
1241          return -1;
1242       }
1243    }
1244 
1245    error(f, VORBIS_invalid_stream);
1246    f.valid_bits = 0;
1247    return -1;
1248 }
1249 
1250 
1251 
1252 int codebook_decode_scalar(vorb *f, Codebook *c)
1253 {
1254    int i;
1255    if (f.valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
1256       prep_huffman(f);
1257    // fast huffman table lookup
1258    i = f.acc & FAST_HUFFMAN_TABLE_MASK;
1259    i = c.fast_huffman[i];
1260    if (i >= 0) {
1261       f.acc >>= c.codeword_lengths[i];
1262       f.valid_bits -= c.codeword_lengths[i];
1263       if (f.valid_bits < 0) { f.valid_bits = 0; return -1; }
1264       return i;
1265    }
1266    return codebook_decode_scalar_raw(f,c);
1267 }
1268 
1269 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1270 // where we avoid one addition
1271 codetype CODEBOOK_ELEMENT_FAST(Codebook * c, int off)
1272 {
1273     return c.multiplicands[off];
1274 }
1275 
1276 int CODEBOOK_ELEMENT_BASE(Codebook * c) 
1277 { 
1278     return 0; 
1279 }
1280 
1281 int codebook_decode_start(vorb *f, Codebook *c)
1282 {
1283    int z = -1;
1284 
1285    // type 0 is only legal in a scalar context
1286    if (c.lookup_type == 0)
1287       error(f, VORBIS_invalid_stream);
1288    else {
1289       z = codebook_decode_scalar(f,c);
1290       if (c.sparse) z = c.sorted_values[z];
1291       if (c.sparse) assert(z < c.sorted_entries);
1292       if (z < 0) {  // check for EOP
1293          if (!f.bytes_in_seg)
1294             if (f.last_seg)
1295                return z;
1296          error(f, VORBIS_invalid_stream);
1297       }
1298    }
1299    return z;
1300 }
1301 
1302 static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
1303 {
1304    int i,z = codebook_decode_start(f,c);
1305    if (z < 0) return FALSE;
1306    if (len > c.dimensions) len = c.dimensions;
1307 
1308    if (c.lookup_type == 1) {
1309       float last = CODEBOOK_ELEMENT_BASE(c);
1310       int div = 1;
1311       for (i=0; i < len; ++i) {
1312          int off = (z / div) % c.lookup_values;
1313          float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1314          output[i] += val;
1315          if (c.sequence_p) last = val + c.minimum_value;
1316          div *= c.lookup_values;
1317       }
1318       return TRUE;
1319    }
1320 
1321    z *= c.dimensions;
1322    if (c.sequence_p) {
1323       float last = CODEBOOK_ELEMENT_BASE(c);
1324       for (i=0; i < len; ++i) {
1325          float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1326          output[i] += val;
1327          last = val + c.minimum_value;
1328       }
1329    } else {
1330       float last = CODEBOOK_ELEMENT_BASE(c);
1331       for (i=0; i < len; ++i) {
1332          output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1333       }
1334    }
1335 
1336    return TRUE;
1337 }
1338 
1339 static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
1340 {
1341    int i,z = codebook_decode_start(f,c);
1342    float last = CODEBOOK_ELEMENT_BASE(c);
1343    if (z < 0) return FALSE;
1344    if (len > c.dimensions) len = c.dimensions;
1345 
1346    if (c.lookup_type == 1) {
1347       int div = 1;
1348       for (i=0; i < len; ++i) {
1349          int off = (z / div) % c.lookup_values;
1350          float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1351          output[i*step] += val;
1352          if (c.sequence_p) last = val;
1353          div *= c.lookup_values;
1354       }
1355       return TRUE;
1356    }
1357 
1358    z *= c.dimensions;
1359    for (i=0; i < len; ++i) {
1360       float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1361       output[i*step] += val;
1362       if (c.sequence_p) last = val;
1363    }
1364 
1365    return TRUE;
1366 }
1367 
1368 static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1369 {
1370    int c_inter = *c_inter_p;
1371    int p_inter = *p_inter_p;
1372    int i,z, effective = c.dimensions;
1373 
1374    // type 0 is only legal in a scalar context
1375    if (c.lookup_type == 0)   return error(f, VORBIS_invalid_stream);
1376 
1377    while (total_decode > 0) {
1378       float last = CODEBOOK_ELEMENT_BASE(c);
1379       z = codebook_decode_scalar(f,c);
1380       if (c.sparse) z = c.sorted_values[z];
1381 
1382       if (z < 0) {
1383          if (!f.bytes_in_seg)
1384             if (f.last_seg) return FALSE;
1385          return error(f, VORBIS_invalid_stream);
1386       }
1387 
1388       // if this will take us off the end of the buffers, stop short!
1389       // we check by computing the length of the virtual interleaved
1390       // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1391       // and the length we'll be using (effective)
1392       if (c_inter + p_inter*ch + effective > len * ch) {
1393          effective = len*ch - (p_inter*ch - c_inter);
1394       }
1395 
1396       if (c.lookup_type == 1) {
1397          int div = 1;
1398          for (i=0; i < effective; ++i) {
1399             int off = (z / div) % c.lookup_values;
1400             float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1401             if (outputs[c_inter])
1402                outputs[c_inter][p_inter] += val;
1403             if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1404             if (c.sequence_p) last = val;
1405             div *= c.lookup_values;
1406          }
1407       } else
1408       {
1409          z *= c.dimensions;
1410          if (c.sequence_p) {
1411             for (i=0; i < effective; ++i) {
1412                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1413                if (outputs[c_inter])
1414                   outputs[c_inter][p_inter] += val;
1415                if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1416                last = val;
1417             }
1418          } else {
1419             for (i=0; i < effective; ++i) {
1420                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1421                if (outputs[c_inter])
1422                   outputs[c_inter][p_inter] += val;
1423                if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1424             }
1425          }
1426       }
1427 
1428       total_decode -= effective;
1429    }
1430    *c_inter_p = c_inter;
1431    *p_inter_p = p_inter;
1432    return TRUE;
1433 }
1434 
1435 int predict_point(int x, int x0, int x1, int y0, int y1)
1436 {
1437    int dy = y1 - y0;
1438    int adx = x1 - x0;
1439    int abs_dy = (dy >= 0) ? dy : -dy;
1440    int err = abs_dy * (x - x0);
1441    int off = err / adx;
1442    return dy < 0 ? y0 - off : y0 + off;
1443 }
1444 
1445 // the following table is block-copied from the specification
1446 static immutable float[256] inverse_db_table =
1447 [
1448   1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1449   1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1450   1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1451   2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1452   2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1453   3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1454   4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1455   6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1456   7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1457   1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1458   1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1459   1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1460   2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1461   2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1462   3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1463   4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1464   5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1465   7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1466   9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1467   1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1468   1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1469   2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1470   2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1471   3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1472   4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1473   5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1474   7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1475   9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1476   0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1477   0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1478   0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1479   0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1480   0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1481   0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1482   0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1483   0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1484   0.00092223983f, 0.00098217216f, 0.0010459992f,  0.0011139742f,
1485   0.0011863665f,  0.0012634633f,  0.0013455702f,  0.0014330129f,
1486   0.0015261382f,  0.0016253153f,  0.0017309374f,  0.0018434235f,
1487   0.0019632195f,  0.0020908006f,  0.0022266726f,  0.0023713743f,
1488   0.0025254795f,  0.0026895994f,  0.0028643847f,  0.0030505286f,
1489   0.0032487691f,  0.0034598925f,  0.0036847358f,  0.0039241906f,
1490   0.0041792066f,  0.0044507950f,  0.0047400328f,  0.0050480668f,
1491   0.0053761186f,  0.0057254891f,  0.0060975636f,  0.0064938176f,
1492   0.0069158225f,  0.0073652516f,  0.0078438871f,  0.0083536271f,
1493   0.0088964928f,  0.009474637f,   0.010090352f,   0.010746080f,
1494   0.011444421f,   0.012188144f,   0.012980198f,   0.013823725f,
1495   0.014722068f,   0.015678791f,   0.016697687f,   0.017782797f,
1496   0.018938423f,   0.020169149f,   0.021479854f,   0.022875735f,
1497   0.024362330f,   0.025945531f,   0.027631618f,   0.029427276f,
1498   0.031339626f,   0.033376252f,   0.035545228f,   0.037855157f,
1499   0.040315199f,   0.042935108f,   0.045725273f,   0.048696758f,
1500   0.051861348f,   0.055231591f,   0.058820850f,   0.062643361f,
1501   0.066714279f,   0.071049749f,   0.075666962f,   0.080584227f,
1502   0.085821044f,   0.091398179f,   0.097337747f,   0.10366330f,
1503   0.11039993f,    0.11757434f,    0.12521498f,    0.13335215f,
1504   0.14201813f,    0.15124727f,    0.16107617f,    0.17154380f,
1505   0.18269168f,    0.19456402f,    0.20720788f,    0.22067342f,
1506   0.23501402f,    0.25028656f,    0.26655159f,    0.28387361f,
1507   0.30232132f,    0.32196786f,    0.34289114f,    0.36517414f,
1508   0.38890521f,    0.41417847f,    0.44109412f,    0.46975890f,
1509   0.50028648f,    0.53279791f,    0.56742212f,    0.60429640f,
1510   0.64356699f,    0.68538959f,    0.72993007f,    0.77736504f,
1511   0.82788260f,    0.88168307f,    0.9389798f,     1.0f
1512 ];
1513 
1514 void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
1515 {
1516    int dy = y1 - y0;
1517    int adx = x1 - x0;
1518    int ady = (dy >= 0) ? dy : -dy;
1519    int base;
1520    int x=x0,y=y0;
1521    int err = 0;
1522    int sy;
1523 
1524    base = dy / adx;
1525    if (dy < 0)
1526       sy = base - 1;
1527    else
1528       sy = base+1;
1529 
1530    int abase = (base >= 0) ? base : -base;
1531    ady -= abase * adx;
1532    if (x1 > n) x1 = n;
1533    if (x < x1) {
1534       output[x] *= inverse_db_table[y&255];
1535       for (++x; x < x1; ++x) {
1536          err += ady;
1537          if (err >= adx) {
1538             err -= adx;
1539             y += sy;
1540          } else
1541             y += base;
1542          output[x] *= inverse_db_table[y&255];
1543       }
1544    }
1545 }
1546 
1547 int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
1548 {
1549    int k;
1550    if (rtype == 0) {
1551       int step = n / book.dimensions;
1552       for (k=0; k < step; ++k)
1553          if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
1554             return FALSE;
1555    } else {
1556       for (k=0; k < n; ) {
1557          if (!codebook_decode(f, book, target+offset, n-k))
1558             return FALSE;
1559          k += book.dimensions;
1560          offset += book.dimensions;
1561       }
1562    }
1563    return TRUE;
1564 }
1565 
1566 // n is 1/2 of the blocksize --
1567 // specification: "Correct per-vector decode length is [n]/2"
1568 void decode_residue(vorb *f, float** residue_buffers, int ch, int n, int rn, uint8 *do_not_decode)
1569 {
1570    int i,j,pass;
1571    Residue *r = f.residue_config + rn;
1572    int rtype = f.residue_types[rn];
1573    int c = r.classbook;
1574    int classwords = f.codebooks[c].dimensions;
1575    uint actual_size = rtype == 2 ? n*2 : n;
1576    uint limit_r_begin = (r.begin < actual_size ? r.begin : actual_size);
1577    uint limit_r_end   = (r.end   < actual_size ? r.end   : actual_size);
1578    int n_read = limit_r_end - limit_r_begin;
1579    int part_read = n_read / r.part_size;
1580    int temp_alloc_point = temp_alloc_save(f);
1581    uint8 ***part_classdata = cast(uint8 ***) temp_block_array(f,f.channels, part_read * cast(int)((uint*).sizeof));
1582 
1583    for (i=0; i < ch; ++i)
1584       if (!do_not_decode[i])
1585          memset(residue_buffers[i], 0, (float.sizeof) * n);
1586 
1587    if (rtype == 2 && ch != 1) {
1588       for (j=0; j < ch; ++j)
1589          if (!do_not_decode[j])
1590             break;
1591       if (j == ch)
1592          goto done;
1593 
1594       for (pass=0; pass < 8; ++pass) {
1595          int pcount = 0, class_set = 0;
1596          if (ch == 2) {
1597             while (pcount < part_read) {
1598                int z = r.begin + pcount*r.part_size;
1599                int c_inter = (z & 1), p_inter = z>>1;
1600                if (pass == 0) {
1601                   Codebook *c2 = f.codebooks+r.classbook;
1602                   int q;
1603                   q = codebook_decode_scalar(f,c2);
1604                   if (c2.sparse) q = c2.sorted_values[q];
1605                   if (q == EOP) goto done;
1606                   part_classdata[0][class_set] = r.classdata[q];
1607                }
1608                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
1609                   int z2 = r.begin + pcount*r.part_size;
1610                   int c2 = part_classdata[0][class_set][i];
1611                   int b = r.residue_books[c2][pass];
1612                   if (b >= 0) {
1613                      Codebook *book = f.codebooks + b;
1614                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size))
1615                         goto done;
1616                   } else {
1617                      z2 += r.part_size;
1618                      c_inter = z2 & 1;
1619                      p_inter = z2 >> 1;
1620                   }
1621                }
1622                ++class_set;               
1623             }
1624          } else if (ch > 2) {
1625             while (pcount < part_read) {
1626                int z = r.begin + pcount*r.part_size;
1627                int c_inter = z % ch, p_inter = z/ch;
1628                if (pass == 0) {
1629                   Codebook *c2 = f.codebooks+r.classbook;
1630                   int q;
1631                   q = codebook_decode_scalar(f,c2);
1632                   if (c2.sparse) q = c2.sorted_values[q];
1633 
1634                   if (q == EOP) goto done;
1635                   part_classdata[0][class_set] = r.classdata[q];
1636                }
1637                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
1638                   int z2 = r.begin + pcount*r.part_size;
1639                   int c2 = part_classdata[0][class_set][i];
1640                   int b = r.residue_books[c2][pass];
1641                   if (b >= 0) {
1642                      Codebook *book = f.codebooks + b;
1643                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size))
1644                         goto done;
1645                   } else {
1646                      z2 += r.part_size;
1647                      c_inter = z2 % ch;
1648                      p_inter = z2 / ch;
1649                   }
1650                }
1651                ++class_set;
1652             }
1653          }
1654       }
1655       goto done;
1656    }
1657 
1658    for (pass=0; pass < 8; ++pass) {
1659       int pcount = 0, class_set=0;
1660       while (pcount < part_read) {
1661          if (pass == 0) {
1662             for (j=0; j < ch; ++j) {
1663                if (!do_not_decode[j]) {
1664                   Codebook *c2 = f.codebooks+r.classbook;
1665                   int temp;
1666                   temp = codebook_decode_scalar(f,c2);
1667                   if (c2.sparse) temp = c2.sorted_values[temp];
1668 
1669                   if (temp == EOP) goto done;
1670                   part_classdata[j][class_set] = r.classdata[temp];
1671                }
1672             }
1673          }
1674          for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
1675             for (j=0; j < ch; ++j) {
1676                if (!do_not_decode[j]) {
1677                   int c2 = part_classdata[j][class_set][i];
1678                   int b = r.residue_books[c2][pass];
1679                   if (b >= 0) {
1680                      float *target = residue_buffers[j];
1681                      int offset = r.begin + pcount * r.part_size;
1682                      int n2 = r.part_size;
1683                      Codebook *book = f.codebooks + b;
1684                      if (!residue_decode(f, book, target, offset, n2, rtype))
1685                         goto done;
1686                   }
1687                }
1688             }
1689          }
1690          ++class_set;
1691       }
1692    }
1693   done:
1694    temp_alloc_restore(f,temp_alloc_point);
1695 }
1696 
1697 
1698 // the following were split out into separate functions while optimizing;
1699 // they could be pushed back up but eh. 
1700 static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
1701 {
1702    float *ee0 = e + i_off;
1703    float *ee2 = ee0 + k_off;
1704    int i;
1705 
1706    assert((n & 3) == 0);
1707    for (i=(n>>2); i > 0; --i) {
1708       float k00_20, k01_21;
1709       k00_20  = ee0[ 0] - ee2[ 0];
1710       k01_21  = ee0[-1] - ee2[-1];
1711       ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
1712       ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
1713       ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
1714       ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
1715       A += 8;
1716 
1717       k00_20  = ee0[-2] - ee2[-2];
1718       k01_21  = ee0[-3] - ee2[-3];
1719       ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
1720       ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
1721       ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
1722       ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
1723       A += 8;
1724 
1725       k00_20  = ee0[-4] - ee2[-4];
1726       k01_21  = ee0[-5] - ee2[-5];
1727       ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
1728       ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
1729       ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
1730       ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
1731       A += 8;
1732 
1733       k00_20  = ee0[-6] - ee2[-6];
1734       k01_21  = ee0[-7] - ee2[-7];
1735       ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
1736       ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
1737       ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
1738       ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
1739       A += 8;
1740       ee0 -= 8;
1741       ee2 -= 8;
1742    }
1743 }
1744 
1745 static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
1746 {
1747    int i;
1748    float k00_20, k01_21;
1749 
1750    float *e0 = e + d0;
1751    float *e2 = e0 + k_off;
1752 
1753    for (i=lim >> 2; i > 0; --i) {
1754       k00_20 = e0[-0] - e2[-0];
1755       k01_21 = e0[-1] - e2[-1];
1756       e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
1757       e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
1758       e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
1759       e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
1760 
1761       A += k1;
1762 
1763       k00_20 = e0[-2] - e2[-2];
1764       k01_21 = e0[-3] - e2[-3];
1765       e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
1766       e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
1767       e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
1768       e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
1769 
1770       A += k1;
1771 
1772       k00_20 = e0[-4] - e2[-4];
1773       k01_21 = e0[-5] - e2[-5];
1774       e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
1775       e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
1776       e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
1777       e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
1778 
1779       A += k1;
1780 
1781       k00_20 = e0[-6] - e2[-6];
1782       k01_21 = e0[-7] - e2[-7];
1783       e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
1784       e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
1785       e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
1786       e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
1787 
1788       e0 -= 8;
1789       e2 -= 8;
1790 
1791       A += k1;
1792    }
1793 }
1794 
1795 static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
1796 {
1797    int i;
1798    float A0 = A[0];
1799    float A1 = A[0+1];
1800    float A2 = A[0+a_off];
1801    float A3 = A[0+a_off+1];
1802    float A4 = A[0+a_off*2+0];
1803    float A5 = A[0+a_off*2+1];
1804    float A6 = A[0+a_off*3+0];
1805    float A7 = A[0+a_off*3+1];
1806 
1807    float k00,k11;
1808 
1809    float *ee0 = e  +i_off;
1810    float *ee2 = ee0+k_off;
1811 
1812    for (i=n; i > 0; --i) {
1813       k00     = ee0[ 0] - ee2[ 0];
1814       k11     = ee0[-1] - ee2[-1];
1815       ee0[ 0] =  ee0[ 0] + ee2[ 0];
1816       ee0[-1] =  ee0[-1] + ee2[-1];
1817       ee2[ 0] = (k00) * A0 - (k11) * A1;
1818       ee2[-1] = (k11) * A0 + (k00) * A1;
1819 
1820       k00     = ee0[-2] - ee2[-2];
1821       k11     = ee0[-3] - ee2[-3];
1822       ee0[-2] =  ee0[-2] + ee2[-2];
1823       ee0[-3] =  ee0[-3] + ee2[-3];
1824       ee2[-2] = (k00) * A2 - (k11) * A3;
1825       ee2[-3] = (k11) * A2 + (k00) * A3;
1826 
1827       k00     = ee0[-4] - ee2[-4];
1828       k11     = ee0[-5] - ee2[-5];
1829       ee0[-4] =  ee0[-4] + ee2[-4];
1830       ee0[-5] =  ee0[-5] + ee2[-5];
1831       ee2[-4] = (k00) * A4 - (k11) * A5;
1832       ee2[-5] = (k11) * A4 + (k00) * A5;
1833 
1834       k00     = ee0[-6] - ee2[-6];
1835       k11     = ee0[-7] - ee2[-7];
1836       ee0[-6] =  ee0[-6] + ee2[-6];
1837       ee0[-7] =  ee0[-7] + ee2[-7];
1838       ee2[-6] = (k00) * A6 - (k11) * A7;
1839       ee2[-7] = (k11) * A6 + (k00) * A7;
1840 
1841       ee0 -= k0;
1842       ee2 -= k0;
1843    }
1844 }
1845 
1846 static void iter_54(float *z)
1847 {
1848    float k00,k11,k22,k33;
1849    float y0,y1,y2,y3;
1850 
1851    k00  = z[ 0] - z[-4];
1852    y0   = z[ 0] + z[-4];
1853    y2   = z[-2] + z[-6];
1854    k22  = z[-2] - z[-6];
1855 
1856    z[-0] = y0 + y2;      // z0 + z4 + z2 + z6
1857    z[-2] = y0 - y2;      // z0 + z4 - z2 - z6
1858 
1859    // done with y0,y2
1860 
1861    k33  = z[-3] - z[-7];
1862 
1863    z[-4] = k00 + k33;    // z0 - z4 + z3 - z7
1864    z[-6] = k00 - k33;    // z0 - z4 - z3 + z7
1865 
1866    // done with k33
1867 
1868    k11  = z[-1] - z[-5];
1869    y1   = z[-1] + z[-5];
1870    y3   = z[-3] + z[-7];
1871 
1872    z[-1] = y1 + y3;      // z1 + z5 + z3 + z7
1873    z[-3] = y1 - y3;      // z1 + z5 - z3 - z7
1874    z[-5] = k11 - k22;    // z1 - z5 + z2 - z6
1875    z[-7] = k11 + k22;    // z1 - z5 - z2 + z6
1876 }
1877 
1878 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
1879 {
1880    int a_off = base_n >> 3;
1881    float A2 = A[0+a_off];
1882    float *z = e + i_off;
1883    float *base = z - 16 * n;
1884 
1885    while (z > base) {
1886       float k00,k11;
1887 
1888       k00   = z[-0] - z[-8];
1889       k11   = z[-1] - z[-9];
1890       z[-0] = z[-0] + z[-8];
1891       z[-1] = z[-1] + z[-9];
1892       z[-8] =  k00;
1893       z[-9] =  k11 ;
1894 
1895       k00    = z[ -2] - z[-10];
1896       k11    = z[ -3] - z[-11];
1897       z[ -2] = z[ -2] + z[-10];
1898       z[ -3] = z[ -3] + z[-11];
1899       z[-10] = (k00+k11) * A2;
1900       z[-11] = (k11-k00) * A2;
1901 
1902       k00    = z[-12] - z[ -4];  // reverse to avoid a unary negation
1903       k11    = z[ -5] - z[-13];
1904       z[ -4] = z[ -4] + z[-12];
1905       z[ -5] = z[ -5] + z[-13];
1906       z[-12] = k11;
1907       z[-13] = k00;
1908 
1909       k00    = z[-14] - z[ -6];  // reverse to avoid a unary negation
1910       k11    = z[ -7] - z[-15];
1911       z[ -6] = z[ -6] + z[-14];
1912       z[ -7] = z[ -7] + z[-15];
1913       z[-14] = (k00+k11) * A2;
1914       z[-15] = (k00-k11) * A2;
1915 
1916       iter_54(z);
1917       iter_54(z-8);
1918       z -= 16;
1919    }
1920 }
1921 
1922 static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
1923 {
1924    int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
1925    int ld;
1926    // @OPTIMIZE: reduce register pressure by using fewer variables?
1927    int save_point = temp_alloc_save(f);
1928    float *buf2 = cast(float *) temp_alloc(f, n2 * float.sizeof);
1929    float* u = null, v = null;
1930    // twiddle factors
1931    float *A = f.A[blocktype];
1932 
1933    // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
1934    // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
1935 
1936    // kernel from paper
1937 
1938 
1939    // merged:
1940    //   copy and reflect spectral data
1941    //   step 0
1942 
1943    // note that it turns out that the items added together during
1944    // this step are, in fact, being added to themselves (as reflected
1945    // by step 0). inexplicable inefficiency! this became obvious
1946    // once I combined the passes.
1947 
1948    // so there's a missing 'times 2' here (for adding X to itself).
1949    // this propagates through linearly to the end, where the numbers
1950    // are 1/2 too small, and need to be compensated for.
1951 
1952    {
1953       float *d, e, AA, e_stop;
1954       d = &buf2[n2-2];
1955       AA = A;
1956       e = &buffer[0];
1957       e_stop = &buffer[n2];
1958       while (e != e_stop) {
1959          d[1] = (e[0] * AA[0] - e[2]*AA[1]);
1960          d[0] = (e[0] * AA[1] + e[2]*AA[0]);
1961          d -= 2;
1962          AA += 2;
1963          e += 4;
1964       }
1965 
1966       e = &buffer[n2-3];
1967       while (d >= buf2) {
1968          d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
1969          d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
1970          d -= 2;
1971          AA += 2;
1972          e -= 4;
1973       }
1974    }
1975 
1976    // now we use symbolic names for these, so that we can
1977    // possibly swap their meaning as we change which operations
1978    // are in place
1979 
1980    u = buffer;
1981    v = buf2;
1982 
1983    // step 2    (paper output is w, now u)
1984    // this could be in place, but the data ends up in the wrong
1985    // place... _somebody_'s got to swap it, so this is nominated
1986    {
1987       float *AA = &A[n2-8];
1988       float *d0,d1, e0, e1;
1989 
1990       e0 = &v[n4];
1991       e1 = &v[0];
1992 
1993       d0 = &u[n4];
1994       d1 = &u[0];
1995 
1996       while (AA >= A) {
1997          float v40_20, v41_21;
1998 
1999          v41_21 = e0[1] - e1[1];
2000          v40_20 = e0[0] - e1[0];
2001          d0[1]  = e0[1] + e1[1];
2002          d0[0]  = e0[0] + e1[0];
2003          d1[1]  = v41_21*AA[4] - v40_20*AA[5];
2004          d1[0]  = v40_20*AA[4] + v41_21*AA[5];
2005 
2006          v41_21 = e0[3] - e1[3];
2007          v40_20 = e0[2] - e1[2];
2008          d0[3]  = e0[3] + e1[3];
2009          d0[2]  = e0[2] + e1[2];
2010          d1[3]  = v41_21*AA[0] - v40_20*AA[1];
2011          d1[2]  = v40_20*AA[0] + v41_21*AA[1];
2012 
2013          AA -= 8;
2014 
2015          d0 += 4;
2016          d1 += 4;
2017          e0 += 4;
2018          e1 += 4;
2019       }
2020    }
2021 
2022    // step 3
2023    ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2024 
2025    // optimized step 3:
2026 
2027    // the original step3 loop can be nested r inside s or s inside r;
2028    // it's written originally as s inside r, but this is dumb when r
2029    // iterates many times, and s few. So I have two copies of it and
2030    // switch between them halfway.
2031 
2032    // this is iteration 0 of step 3
2033    imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
2034    imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
2035 
2036    // this is iteration 1 of step 3
2037    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
2038    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
2039    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
2040    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
2041 
2042    l=2;
2043    for (; l < (ld-3)>>1; ++l) {
2044       int k0 = n >> (l+2), k0_2 = k0>>1;
2045       int lim = 1 << (l+1);
2046       int i;
2047       for (i=0; i < lim; ++i)
2048          imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
2049    }
2050 
2051    for (; l < ld-6; ++l) {
2052       int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
2053       int rlim = n >> (l+6), r;
2054       int lim = 1 << (l+1);
2055       int i_off;
2056       float *A0 = A;
2057       i_off = n2-1;
2058       for (r=rlim; r > 0; --r) {
2059          imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2060          A0 += k1*4;
2061          i_off -= 8;
2062       }
2063    }
2064 
2065    // iterations with count:
2066    //   ld-6,-5,-4 all interleaved together
2067    //       the big win comes from getting rid of needless flops
2068    //         due to the constants on pass 5 & 4 being all 1 and 0;
2069    //       combining them to be simultaneous to improve cache made little difference
2070    imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
2071 
2072    // output is u
2073 
2074    // step 4, 5, and 6
2075    // cannot be in-place because of step 5
2076    {
2077       uint16 *bitrev = f.bit_reverse[blocktype];
2078       // weirdly, I'd have thought reading sequentially and writing
2079       // erratically would have been better than vice-versa, but in
2080       // fact that's not what my testing showed. (That is, with
2081       // j = bitreverse(i), do you read i and write j, or read j and write i.)
2082 
2083       float *d0 = &v[n4-4];
2084       float *d1 = &v[n2-4];
2085       while (d0 >= v) {
2086          int k4;
2087 
2088          k4 = bitrev[0];
2089          d1[3] = u[k4+0];
2090          d1[2] = u[k4+1];
2091          d0[3] = u[k4+2];
2092          d0[2] = u[k4+3];
2093 
2094          k4 = bitrev[1];
2095          d1[1] = u[k4+0];
2096          d1[0] = u[k4+1];
2097          d0[1] = u[k4+2];
2098          d0[0] = u[k4+3];
2099 
2100          d0 -= 4;
2101          d1 -= 4;
2102          bitrev += 2;
2103       }
2104    }
2105    // (paper output is u, now v)
2106 
2107 
2108    // data must be in buf2
2109    assert(v == buf2);
2110 
2111    // step 7   (paper output is v, now v)
2112    // this is now in place
2113    {
2114       float *C = f.C[blocktype];
2115       float *d, e;
2116 
2117       d = v;
2118       e = v + n2 - 4;
2119 
2120       while (d < e) {
2121          float a02,a11,b0,b1,b2,b3;
2122 
2123          a02 = d[0] - e[2];
2124          a11 = d[1] + e[3];
2125 
2126          b0 = C[1]*a02 + C[0]*a11;
2127          b1 = C[1]*a11 - C[0]*a02;
2128 
2129          b2 = d[0] + e[ 2];
2130          b3 = d[1] - e[ 3];
2131 
2132          d[0] = b2 + b0;
2133          d[1] = b3 + b1;
2134          e[2] = b2 - b0;
2135          e[3] = b1 - b3;
2136 
2137          a02 = d[2] - e[0];
2138          a11 = d[3] + e[1];
2139 
2140          b0 = C[3]*a02 + C[2]*a11;
2141          b1 = C[3]*a11 - C[2]*a02;
2142 
2143          b2 = d[2] + e[ 0];
2144          b3 = d[3] - e[ 1];
2145 
2146          d[2] = b2 + b0;
2147          d[3] = b3 + b1;
2148          e[0] = b2 - b0;
2149          e[1] = b1 - b3;
2150 
2151          C += 4;
2152          d += 4;
2153          e -= 4;
2154       }
2155    }
2156 
2157    // data must be in buf2
2158 
2159 
2160    // step 8+decode   (paper output is X, now buffer)
2161    // this generates pairs of data a la 8 and pushes them directly through
2162    // the decode kernel (pushing rather than pulling) to avoid having
2163    // to make another pass later
2164 
2165    // this cannot POSSIBLY be in place, so we refer to the buffers directly
2166 
2167    {
2168       float *d0,d1,d2,d3;
2169 
2170       float *B = f.B[blocktype] + n2 - 8;
2171       float *e = buf2 + n2 - 8;
2172       d0 = &buffer[0];
2173       d1 = &buffer[n2-4];
2174       d2 = &buffer[n2];
2175       d3 = &buffer[n-4];
2176       while (e >= v) {
2177          float p0,p1,p2,p3;
2178 
2179          p3 =  e[6]*B[7] - e[7]*B[6];
2180          p2 = -e[6]*B[6] - e[7]*B[7];
2181 
2182          d0[0] =   p3;
2183          d1[3] = - p3;
2184          d2[0] =   p2;
2185          d3[3] =   p2;
2186 
2187          p1 =  e[4]*B[5] - e[5]*B[4];
2188          p0 = -e[4]*B[4] - e[5]*B[5];
2189 
2190          d0[1] =   p1;
2191          d1[2] = - p1;
2192          d2[1] =   p0;
2193          d3[2] =   p0;
2194 
2195          p3 =  e[2]*B[3] - e[3]*B[2];
2196          p2 = -e[2]*B[2] - e[3]*B[3];
2197 
2198          d0[2] =   p3;
2199          d1[1] = - p3;
2200          d2[2] =   p2;
2201          d3[1] =   p2;
2202 
2203          p1 =  e[0]*B[1] - e[1]*B[0];
2204          p0 = -e[0]*B[0] - e[1]*B[1];
2205 
2206          d0[3] =   p1;
2207          d1[0] = - p1;
2208          d2[3] =   p0;
2209          d3[0] =   p0;
2210 
2211          B -= 8;
2212          e -= 8;
2213          d0 += 4;
2214          d2 += 4;
2215          d1 -= 4;
2216          d3 -= 4;
2217       }
2218    }
2219 
2220    temp_alloc_restore(f,save_point);
2221 }
2222 
2223 static float *get_window(vorb *f, int len)
2224 {
2225    len <<= 1;
2226    if (len == f.blocksize_0) return f.window[0];
2227    if (len == f.blocksize_1) return f.window[1];
2228    return null;
2229 }
2230 
2231 alias YTYPE = int16 ;
2232 
2233 int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag)
2234 {
2235    int n2 = n >> 1;
2236    int s = map.chan[i].mux, floor;
2237    floor = map.submap_floor[s];
2238    if (f.floor_types[floor] == 0) {
2239       return error(f, VORBIS_invalid_stream);
2240    } else {
2241       Floor1 *g = &f.floor_config[floor].floor1;
2242       int j,q;
2243       int lx = 0, ly = finalY[0] * g.floor1_multiplier;
2244       for (q=1; q < g.values; ++q) {
2245          j = g.sorted_order[q];
2246          if (finalY[j] >= 0)
2247          {
2248             int hy = finalY[j] * g.floor1_multiplier;
2249             int hx = g.Xlist[j];
2250             if (lx != hx)
2251                draw_line(target, lx,ly, hx,hy, n2);
2252             lx = hx, ly = hy;
2253          }
2254       }
2255       if (lx < n2) {
2256          // optimization of: draw_line(target, lx,ly, n,ly, n2);
2257          for (j=lx; j < n2; ++j)
2258             target[j] *= inverse_db_table[ly];
2259       }
2260    }
2261    return TRUE;
2262 }
2263 
2264 // The meaning of "left" and "right"
2265 //
2266 // For a given frame:
2267 //     we compute samples from 0..n
2268 //     window_center is n/2
2269 //     we'll window and mix the samples from left_start to left_end with data from the previous frame
2270 //     all of the samples from left_end to right_start can be output without mixing; however,
2271 //        this interval is 0-length except when transitioning between short and long frames
2272 //     all of the samples from right_start to right_end need to be mixed with the next frame,
2273 //        which we don't have, so those get saved in a buffer
2274 //     frame N's right_end-right_start, the number of samples to mix with the next frame,
2275 //        has to be the same as frame N+1's left_end-left_start (which they are by
2276 //        construction)
2277 
2278 static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
2279 {
2280    Mode *m;
2281    int i, n, prev, next, window_center;
2282    f.channel_buffer_start = f.channel_buffer_end = 0;
2283 
2284   retry:
2285    if (f.eof) return FALSE;
2286    if (!maybe_start_packet(f))
2287       return FALSE;
2288    // check packet type
2289    if (get_bits(f,1) != 0) {
2290       while (EOP != get8_packet(f)) {}
2291       goto retry;
2292    }
2293 
2294    if (f.alloc.alloc_buffer)
2295       assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2296 
2297    i = get_bits(f, ilog(f.mode_count-1));
2298    if (i == EOP) return FALSE;
2299    if (i >= f.mode_count) return FALSE;
2300    *mode = i;
2301    m = f.mode_config.ptr + i;
2302    if (m.blockflag) {
2303       n = f.blocksize_1;
2304       prev = get_bits(f,1);
2305       next = get_bits(f,1);
2306    } else {
2307       prev = next = 0;
2308       n = f.blocksize_0;
2309    }
2310 
2311 // WINDOWING
2312 
2313    window_center = n >> 1;
2314    if (m.blockflag && !prev) {
2315       *p_left_start = (n - f.blocksize_0) >> 2;
2316       *p_left_end   = (n + f.blocksize_0) >> 2;
2317    } else {
2318       *p_left_start = 0;
2319       *p_left_end   = window_center;
2320    }
2321    if (m.blockflag && !next) {
2322       *p_right_start = (n*3 - f.blocksize_0) >> 2;
2323       *p_right_end   = (n*3 + f.blocksize_0) >> 2;
2324    } else {
2325       *p_right_start = window_center;
2326       *p_right_end   = n;
2327    }
2328 
2329    return TRUE;
2330 }
2331 
2332 static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
2333 {
2334    Mapping *map;
2335    int i,j,k,n,n2;
2336    int[256] zero_channel;
2337    int[256] really_zero_channel;
2338 
2339 // WINDOWING
2340 
2341    n = f.blocksize[m.blockflag];
2342    map = &f.mapping[m.mapping];
2343 
2344 // FLOORS
2345    n2 = n >> 1;
2346 
2347    for (i=0; i < f.channels; ++i) {
2348       int s = map.chan[i].mux, floor;
2349       zero_channel[i] = FALSE;
2350       floor = map.submap_floor[s];
2351       if (f.floor_types[floor] == 0) {
2352          return error(f, VORBIS_invalid_stream);
2353       } else {
2354          Floor1 *g = &f.floor_config[floor].floor1;
2355          if (get_bits(f, 1)) {
2356             short *finalY;
2357             uint8[256] step2_flag;
2358             static immutable int[4] range_list = [ 256, 128, 86, 64 ];
2359             int range = range_list[g.floor1_multiplier-1];
2360             int offset = 2;
2361             finalY = f.finalY[i];
2362             finalY[0] = cast(short) get_bits(f, ilog(range)-1);
2363             finalY[1] = cast(short) get_bits(f, ilog(range)-1);
2364             for (j=0; j < g.partitions; ++j) {
2365                int pclass = g.partition_class_list[j];
2366                int cdim = g.class_dimensions[pclass];
2367                int cbits = g.class_subclasses[pclass];
2368                int csub = (1 << cbits)-1;
2369                int cval = 0;
2370                if (cbits) {
2371                   Codebook *c = f.codebooks + g.class_masterbooks[pclass];
2372                   cval = codebook_decode_scalar(f,c);
2373                   if (c.sparse) cval = c.sorted_values[cval];
2374                }
2375                for (k=0; k < cdim; ++k) {
2376                   int book = g.subclass_books[pclass][cval & csub];
2377                   cval = cval >> cbits;
2378                   if (book >= 0) {
2379                      int temp;
2380                      Codebook *c = f.codebooks + book;
2381                      temp = codebook_decode_scalar(f,c);
2382                      if (c.sparse) temp = c.sorted_values[temp];
2383                      
2384                      temp = codebook_decode_scalar(f,c);
2385                      if (c.sparse) temp = c.sorted_values[temp];
2386 
2387                      finalY[offset++] = cast(short)temp;
2388                   } else
2389                      finalY[offset++] = 0;
2390                }
2391             }
2392             if (f.valid_bits == INVALID_BITS) goto error; // behavior according to spec
2393             step2_flag[0] = step2_flag[1] = 1;
2394             for (j=2; j < g.values; ++j) {
2395                int low, high, pred, highroom, lowroom, room, val;
2396                low = g.neighbors[j][0];
2397                high = g.neighbors[j][1];
2398                //neighbors(g.Xlist, j, &low, &high);
2399                pred = predict_point(g.Xlist[j], g.Xlist[low], g.Xlist[high], finalY[low], finalY[high]);
2400                val = finalY[j];
2401                highroom = range - pred;
2402                lowroom = pred;
2403                if (highroom < lowroom)
2404                   room = highroom * 2;
2405                else
2406                   room = lowroom * 2;
2407                if (val) {
2408                   step2_flag[low] = step2_flag[high] = 1;
2409                   step2_flag[j] = 1;
2410                   if (val >= room)
2411                      if (highroom > lowroom)
2412                         finalY[j] = cast(short)(val - lowroom + pred);
2413                      else
2414                         finalY[j] = cast(short)(pred - val + highroom - 1);
2415                   else
2416                      if (val & 1)
2417                         finalY[j] = cast(short)(pred - ((val+1)>>1));
2418                      else
2419                         finalY[j] = cast(short)(pred + (val>>1));
2420                } else {
2421                   step2_flag[j] = 0;
2422                   finalY[j] = cast(short)pred;
2423                }
2424             }
2425 
2426             // defer final floor computation until _after_ residue
2427             for (j=0; j < g.values; ++j) {
2428                if (!step2_flag[j])
2429                   finalY[j] = -1;
2430             }
2431          } else {
2432            error:
2433             zero_channel[i] = TRUE;
2434          }
2435          // So we just defer everything else to later
2436 
2437          // at this point we've decoded the floor into buffer
2438       }
2439    }
2440    // at this point we've decoded all floors
2441 
2442    if (f.alloc.alloc_buffer)
2443       assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2444 
2445    // re-enable coupled channels if necessary
2446    memcpy(really_zero_channel.ptr, zero_channel.ptr, (really_zero_channel[0]).sizeof * f.channels);
2447    for (i=0; i < map.coupling_steps; ++i)
2448       if (!zero_channel[map.chan[i].magnitude] || !zero_channel[map.chan[i].angle]) {
2449          zero_channel[map.chan[i].magnitude] = zero_channel[map.chan[i].angle] = FALSE;
2450       }
2451 
2452 // RESIDUE DECODE
2453    for (i=0; i < map.submaps; ++i) 
2454    {
2455       float*[STB_VORBIS_MAX_CHANNELS] residue_buffers;
2456       int r;
2457       uint8[256] do_not_decode;
2458       int ch = 0;
2459       for (j=0; j < f.channels; ++j) {
2460          if (map.chan[j].mux == i) {
2461             if (zero_channel[j]) {
2462                do_not_decode[ch] = TRUE;
2463                residue_buffers[ch] = null;
2464             } else {
2465                do_not_decode[ch] = FALSE;
2466                residue_buffers[ch] = f.channel_buffers[j];
2467             }
2468             ++ch;
2469          }
2470       }
2471       r = map.submap_residue[i];
2472       decode_residue(f, residue_buffers.ptr, ch, n2, r, do_not_decode.ptr);
2473    }
2474 
2475    if (f.alloc.alloc_buffer)
2476       assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2477 
2478 // INVERSE COUPLING
2479    for (i = map.coupling_steps-1; i >= 0; --i) {
2480       int n3 = n >> 1;
2481       float *mag = f.channel_buffers[map.chan[i].magnitude];
2482       float *a = f.channel_buffers[map.chan[i].angle    ];
2483       for (j=0; j < n3; ++j) {
2484          float a2,m2;
2485          if (mag[j] > 0)
2486             if (a[j] > 0)
2487                m2 = mag[j], a2 = mag[j] - a[j];
2488             else
2489                a2 = mag[j], m2 = mag[j] + a[j];
2490          else
2491             if (a[j] > 0)
2492                m2 = mag[j], a2 = mag[j] + a[j];
2493             else
2494                a2 = mag[j], m2 = mag[j] - a[j];
2495          mag[j] = m2;
2496          a[j] = a2;
2497       }
2498    }
2499 
2500    // finish decoding the floors
2501    for (i=0; i < f.channels; ++i) {
2502       if (really_zero_channel[i]) {
2503          memset(f.channel_buffers[i], 0, (*f.channel_buffers[i]).sizeof * n2);
2504       } else {
2505          do_floor(f, map, i, n, f.channel_buffers[i], f.finalY[i], null);
2506       }
2507    }
2508 
2509 // INVERSE MDCT
2510    for (i=0; i < f.channels; ++i)
2511       inverse_mdct(f.channel_buffers[i], n, f, m.blockflag);
2512 
2513    // this shouldn't be necessary, unless we exited on an error
2514    // and want to flush to get to the next packet
2515    flush_packet(f);
2516 
2517    if (f.first_decode) {
2518       // assume we start so first non-discarded sample is sample 0
2519       // this isn't to spec, but spec would require us to read ahead
2520       // and decode the size of all current frames--could be done,
2521       // but presumably it's not a commonly used feature
2522       f.current_loc = -n2; // start of first frame is positioned for discard
2523       // we might have to discard samples "from" the next frame too,
2524       // if we're lapping a large block then a small at the start?
2525       f.discard_samples_deferred = n - right_end;
2526       f.current_loc_valid = TRUE;
2527       f.first_decode = FALSE;
2528    } else if (f.discard_samples_deferred) {
2529       if (f.discard_samples_deferred >= right_start - left_start) {
2530          f.discard_samples_deferred -= (right_start - left_start);
2531          left_start = right_start;
2532          *p_left = left_start;
2533       } else {
2534          left_start += f.discard_samples_deferred;
2535          *p_left = left_start;
2536          f.discard_samples_deferred = 0;
2537       }
2538    } else if (f.previous_length == 0 && f.current_loc_valid) {
2539       // we're recovering from a seek... that means we're going to discard
2540       // the samples from this packet even though we know our position from
2541       // the last page header, so we need to update the position based on
2542       // the discarded samples here
2543       // but wait, the code below is going to add this in itself even
2544       // on a discard, so we don't need to do it here...
2545    }
2546 
2547    // check if we have ogg information about the sample # for this packet
2548    if (f.last_seg_which == f.end_seg_with_known_loc) {
2549       // if we have a valid current loc, and this is final:
2550       if (f.current_loc_valid && (f.page_flag & PAGEFLAG_last_page)) {
2551          uint current_end = f.known_loc_for_packet;
2552          // then let's infer the size of the (probably) short final frame
2553          if (current_end < f.current_loc + (right_end-left_start)) {
2554             if (current_end < f.current_loc) {
2555                // negative truncation, that's impossible!
2556                *len = 0;
2557             } else {
2558                *len = current_end - f.current_loc;
2559             }
2560             *len += left_start; // this doesn't seem right, but has no ill effect on my test files
2561             if (*len > right_end) *len = right_end; // this should never happen
2562             f.current_loc += *len;
2563             return TRUE;
2564          }
2565       }
2566       // otherwise, just set our sample loc
2567       // guess that the ogg granule pos refers to the _middle_ of the
2568       // last frame?
2569       // set f.current_loc to the position of left_start
2570       f.current_loc = f.known_loc_for_packet - (n2-left_start);
2571       f.current_loc_valid = TRUE;
2572    }
2573    if (f.current_loc_valid)
2574       f.current_loc += (right_start - left_start);
2575 
2576    if (f.alloc.alloc_buffer)
2577       assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2578    *len = right_end;  // ignore samples after the window goes to 0
2579 
2580    return TRUE;
2581 }
2582 
2583 static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
2584 {
2585    int mode, left_end, right_end;
2586    if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
2587    return vorbis_decode_packet_rest(f, len, f.mode_config.ptr + mode, *p_left, left_end, *p_right, right_end, p_left);
2588 }
2589 
2590 static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
2591 {
2592    
2593    // we use right&left (the start of the right- and left-window sin()-regions)
2594    // to determine how much to return, rather than inferring from the rules
2595    // (same result, clearer code); 'left' indicates where our sin() window
2596    // starts, therefore where the previous window's right edge starts, and
2597    // therefore where to start mixing from the previous buffer. 'right'
2598    // indicates where our sin() ending-window starts, therefore that's where
2599    // we start saving, and where our returned-data ends.
2600 
2601    // mixin from previous window
2602    if (f.previous_length) {
2603       int i,j, n = f.previous_length;
2604       float *w = get_window(f, n);
2605       if (w == null) return 0;
2606       for (i=0; i < f.channels; ++i) {
2607          for (j=0; j < n; ++j)
2608             f.channel_buffers[i][left+j] =
2609                f.channel_buffers[i][left+j]*w[    j] +
2610                f.previous_window[i][     j]*w[n-1-j];
2611       }
2612    }
2613    int prev,i,j;
2614    prev = f.previous_length;
2615 
2616    // last half of this data becomes previous window
2617    f.previous_length = len - right;
2618 
2619    // @OPTIMIZE: could avoid this copy by double-buffering the
2620    // output (flipping previous_window with channel_buffers), but
2621    // then previous_window would have to be 2x as large, and
2622    // channel_buffers couldn't be temp mem (although they're NOT
2623    // currently temp mem, they could be (unless we want to level
2624    // performance by spreading out the computation))
2625    for (i=0; i < f.channels; ++i)
2626       for (j=0; right+j < len; ++j)
2627          f.previous_window[i][j] = f.channel_buffers[i][right+j];
2628 
2629    if (!prev)
2630       // there was no previous packet, so this data isn't valid...
2631       // this isn't entirely true, only the would-have-overlapped data
2632       // isn't valid, but this seems to be what the spec requires
2633       return 0;
2634 
2635    // truncate a short frame
2636    if (len < right) right = len;
2637 
2638    f.samples_output += right-left;
2639 
2640    return right - left;
2641 }
2642 
2643 static int vorbis_pump_first_frame(stb_vorbis *f)
2644 {
2645    int len, right, left, res;
2646    res = vorbis_decode_packet(f, &len, &left, &right);
2647    if (res)
2648       vorbis_finish_frame(f, len, left, right);
2649    return res;
2650 }
2651 
2652 static int start_decoder(vorb *f)
2653 {
2654    uint8[6] header;
2655    uint8 x,y;
2656    int len,i,j,k, max_submaps = 0;
2657    int longest_floorlist=0;
2658 
2659    // first page, first packet
2660    f.first_decode = TRUE;
2661 
2662    if (!start_page(f))                              return FALSE;
2663    // validate page flag
2664    if (!(f.page_flag & PAGEFLAG_first_page))       return error(f, VORBIS_invalid_first_page);
2665    if (f.page_flag & PAGEFLAG_last_page)           return error(f, VORBIS_invalid_first_page);
2666    if (f.page_flag & PAGEFLAG_continued_packet)    return error(f, VORBIS_invalid_first_page);
2667    // check for expected packet length
2668    if (f.segment_count != 1)                       return error(f, VORBIS_invalid_first_page);
2669    if (f.segments[0] != 30) {
2670       // check for the Ogg skeleton fishead identifying header to refine our error
2671       if (f.segments[0] == 64 &&
2672           getn(f, header.ptr, 6) &&
2673           header[0] == 'f' &&
2674           header[1] == 'i' &&
2675           header[2] == 's' &&
2676           header[3] == 'h' &&
2677           header[4] == 'e' &&
2678           header[5] == 'a' &&
2679           get8(f)   == 'd' &&
2680           get8(f)   == '\0')                        return error(f, VORBIS_ogg_skeleton_not_supported);
2681       else
2682                                                     return error(f, VORBIS_invalid_first_page);
2683    }
2684 
2685    // read packet
2686    // check packet header
2687    if (get8(f) != VORBIS_packet_id)                 return error(f, VORBIS_invalid_first_page);
2688    if (!getn(f, header.ptr, 6))                         return error(f, VORBIS_unexpected_eof);
2689    if (!vorbis_validate(header.ptr))                    return error(f, VORBIS_invalid_first_page);
2690    // vorbis_version
2691    if (get32(f) != 0)                               return error(f, VORBIS_invalid_first_page);
2692    f.channels = get8(f); if (!f.channels)         return error(f, VORBIS_invalid_first_page);
2693    if (f.channels > STB_VORBIS_MAX_CHANNELS)       return error(f, VORBIS_too_many_channels);
2694    f.sample_rate = get32(f); if (!f.sample_rate)  return error(f, VORBIS_invalid_first_page);
2695    get32(f); // bitrate_maximum
2696    get32(f); // bitrate_nominal
2697    get32(f); // bitrate_minimum
2698    x = get8(f);
2699    {
2700       int log0,log1;
2701       log0 = x & 15;
2702       log1 = x >> 4;
2703       f.blocksize_0 = 1 << log0;
2704       f.blocksize_1 = 1 << log1;
2705       if (log0 < 6 || log0 > 13)                       return error(f, VORBIS_invalid_setup);
2706       if (log1 < 6 || log1 > 13)                       return error(f, VORBIS_invalid_setup);
2707       if (log0 > log1)                                 return error(f, VORBIS_invalid_setup);
2708    }
2709 
2710    // framing_flag
2711    x = get8(f);
2712    if (!(x & 1))                                    return error(f, VORBIS_invalid_first_page);
2713 
2714    // second packet!
2715    if (!start_page(f))                              return FALSE;
2716 
2717    if (!start_packet(f))                            return FALSE;
2718 
2719    if (!next_segment(f))                            return FALSE;
2720 
2721    if (get8_packet(f) != VORBIS_packet_comment)            return error(f, VORBIS_invalid_setup);
2722    for (i=0; i < 6; ++i) header[i] = cast(ubyte) get8_packet(f);
2723    if (!vorbis_validate(header.ptr))                    return error(f, VORBIS_invalid_setup);
2724    //file vendor
2725    len = get32_packet(f);
2726    f.vendor = cast(char*)setup_malloc(f, (char.sizeof) * (len+1));
2727    for(i=0; i < len; ++i) {
2728       f.vendor[i] = cast(char)get8_packet(f);
2729    }
2730    f.vendor[len] = cast(char)'\0';
2731    //user comments
2732    f.comment_list_length = get32_packet(f);
2733    f.comment_list = cast(char**)setup_malloc(f, (char*).sizeof * (f.comment_list_length));
2734 
2735    for(i=0; i < f.comment_list_length; ++i) {
2736       len = get32_packet(f);
2737       f.comment_list[i] = cast(char*)setup_malloc(f, (char.sizeof) * (len+1));
2738 
2739       for(j=0; j < len; ++j) {
2740          f.comment_list[i][j] = cast(char) get8_packet(f);
2741       }
2742       f.comment_list[i][len] = cast(char)'\0';
2743    }
2744 
2745    // framing_flag
2746    x = cast(ubyte) get8_packet(f);
2747    if (!(x & 1))                                    return error(f, VORBIS_invalid_setup);
2748 
2749 
2750    skip(f, f.bytes_in_seg);
2751    f.bytes_in_seg = 0;
2752 
2753    do {
2754       len = next_segment(f);
2755       skip(f, len);
2756       f.bytes_in_seg = 0;
2757    } while (len);
2758 
2759    // third packet!
2760    if (!start_packet(f))                            return FALSE;
2761 
2762    crc32_init(); // always init it, to avoid multithread race conditions
2763 
2764    if (get8_packet(f) != VORBIS_packet_setup)       return error(f, VORBIS_invalid_setup);
2765    for (i=0; i < 6; ++i) header[i] = cast(ubyte) get8_packet(f);
2766    if (!vorbis_validate(header.ptr))                    return error(f, VORBIS_invalid_setup);
2767 
2768    // codebooks
2769 
2770    f.codebook_count = get_bits(f,8) + 1;
2771    f.codebooks = cast(Codebook *) setup_malloc(f, (*f.codebooks).sizeof * f.codebook_count);
2772    if (f.codebooks == null)                        return error(f, VORBIS_outofmem);
2773    memset(f.codebooks, 0, (*f.codebooks).sizeof * f.codebook_count);
2774    for (i=0; i < f.codebook_count; ++i) {
2775       uint *values;
2776       int ordered, sorted_count;
2777       int total=0;
2778       uint8 *lengths;
2779       Codebook *c = f.codebooks+i;
2780       x = cast(ubyte) get_bits(f, 8); if (x != 0x42)            return error(f, VORBIS_invalid_setup);
2781       x = cast(ubyte) get_bits(f, 8); if (x != 0x43)            return error(f, VORBIS_invalid_setup);
2782       x = cast(ubyte) get_bits(f, 8); if (x != 0x56)            return error(f, VORBIS_invalid_setup);
2783       x = cast(ubyte) get_bits(f, 8);
2784       c.dimensions = (get_bits(f, 8)<<8) + x;
2785       x = cast(ubyte) get_bits(f, 8);
2786       y = cast(ubyte) get_bits(f, 8);
2787       c.entries = (get_bits(f, 8)<<16) + (y<<8) + x;
2788       ordered = get_bits(f,1);
2789       c.sparse = cast(ubyte)( ordered ? 0 : get_bits(f,1) );
2790 
2791       if (c.dimensions == 0 && c.entries != 0)    return error(f, VORBIS_invalid_setup);
2792 
2793       if (c.sparse)
2794          lengths = cast(uint8 *) setup_temp_malloc(f, c.entries);
2795       else
2796          lengths = c.codeword_lengths = cast(uint8 *) setup_malloc(f, c.entries);
2797 
2798       if (!lengths) return error(f, VORBIS_outofmem);
2799 
2800       if (ordered) {
2801          int current_entry = 0;
2802          int current_length = get_bits(f,5) + 1;
2803          while (current_entry < c.entries) {
2804             int limit = c.entries - current_entry;
2805             int n = get_bits(f, ilog(limit));
2806             if (current_length >= 32) return error(f, VORBIS_invalid_setup);
2807             if (current_entry + n > cast(int) c.entries) { return error(f, VORBIS_invalid_setup); }
2808             memset(lengths + current_entry, current_length, n);
2809             current_entry += n;
2810             ++current_length;
2811          }
2812       } else {
2813          for (j=0; j < c.entries; ++j) {
2814             int present = c.sparse ? get_bits(f,1) : 1;
2815             if (present) {
2816                lengths[j] = cast(ubyte)(get_bits(f, 5) + 1);
2817                ++total;
2818                if (lengths[j] == 32)
2819                   return error(f, VORBIS_invalid_setup);
2820             } else {
2821                lengths[j] = NO_CODE;
2822             }
2823          }
2824       }
2825 
2826       if (c.sparse && total >= c.entries >> 2) {
2827          // convert sparse items to non-sparse!
2828          if (c.entries > cast(int) f.setup_temp_memory_required)
2829             f.setup_temp_memory_required = c.entries;
2830 
2831          c.codeword_lengths = cast(uint8 *) setup_malloc(f, c.entries);
2832          if (c.codeword_lengths == null) return error(f, VORBIS_outofmem);
2833          memcpy(c.codeword_lengths, lengths, c.entries);
2834          setup_temp_free(f, lengths, c.entries); // note this is only safe if there have been no intervening temp mallocs!
2835          lengths = c.codeword_lengths;
2836          c.sparse = 0;
2837       }
2838 
2839       // compute the size of the sorted tables
2840       if (c.sparse) {
2841          sorted_count = total;
2842       } else {
2843          sorted_count = 0;
2844          for (j=0; j < c.entries; ++j)
2845             if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
2846                ++sorted_count;
2847       }
2848 
2849       c.sorted_entries = sorted_count;
2850       values = null;
2851 
2852       if (!c.sparse) {
2853          c.codewords = cast(uint *) setup_malloc(f, (c.codewords[0]).sizeof * c.entries);
2854          if (!c.codewords)                  return error(f, VORBIS_outofmem);
2855       } else {
2856          uint size;
2857          if (c.sorted_entries) {
2858             c.codeword_lengths = cast(uint8 *) setup_malloc(f, c.sorted_entries);
2859             if (!c.codeword_lengths)           return error(f, VORBIS_outofmem);
2860             c.codewords = cast(uint *) setup_temp_malloc(f, (*c.codewords).sizeof * c.sorted_entries);
2861             if (!c.codewords)                  return error(f, VORBIS_outofmem);
2862             values = cast(uint *) setup_temp_malloc(f, (*values).sizeof * c.sorted_entries);
2863             if (!values)                        return error(f, VORBIS_outofmem);
2864          }
2865          size = cast(uint)( c.entries + ((*c.codewords).sizeof + (*values).sizeof) * c.sorted_entries );
2866          if (size > f.setup_temp_memory_required)
2867             f.setup_temp_memory_required = size;
2868       }
2869 
2870       if (!compute_codewords(c, lengths, c.entries, values)) {
2871          if (c.sparse) setup_temp_free(f, values, 0);
2872          return error(f, VORBIS_invalid_setup);
2873       }
2874 
2875       if (c.sorted_entries) {
2876          // allocate an extra slot for sentinels
2877          c.sorted_codewords = cast(uint *) setup_malloc(f, (*c.sorted_codewords).sizeof * (c.sorted_entries+1));
2878          if (c.sorted_codewords == null) return error(f, VORBIS_outofmem);
2879          // allocate an extra slot at the front so that c.sorted_values[-1] is defined
2880          // so that we can catch that case without an extra if
2881          c.sorted_values    = cast( int   *) setup_malloc(f, (*c.sorted_values   ).sizeof * (c.sorted_entries+1));
2882          if (c.sorted_values == null) return error(f, VORBIS_outofmem);
2883          ++c.sorted_values;
2884          c.sorted_values[-1] = -1;
2885          compute_sorted_huffman(c, lengths, values);
2886       }
2887 
2888       if (c.sparse) {
2889          setup_temp_free(f, values, (*values).sizeof * c.sorted_entries);
2890          setup_temp_free(f, c.codewords, (*c.codewords).sizeof * c.sorted_entries);
2891          setup_temp_free(f, lengths, c.entries);
2892          c.codewords = null;
2893       }
2894 
2895       compute_accelerated_huffman(c);
2896 
2897       c.lookup_type = cast(ubyte) get_bits(f, 4);
2898       if (c.lookup_type > 2) return error(f, VORBIS_invalid_setup);
2899       if (c.lookup_type > 0) {
2900          uint16 *mults;
2901          c.minimum_value = float32_unpack(get_bits(f, 32));
2902          c.delta_value = float32_unpack(get_bits(f, 32));
2903          c.value_bits = cast(ubyte)(get_bits(f, 4)+1);
2904          c.sequence_p = cast(ubyte) get_bits(f,1);
2905          if (c.lookup_type == 1) {
2906             int values2 = lookup1_values(c.entries, c.dimensions);
2907             if (values2 < 0) return error(f, VORBIS_invalid_setup);
2908             c.lookup_values = cast(uint) values2;
2909          } else {
2910             c.lookup_values = c.entries * c.dimensions;
2911          }
2912          if (c.lookup_values == 0) return error(f, VORBIS_invalid_setup);
2913          mults = cast(uint16 *) setup_temp_malloc(f, (mults[0]).sizeof * c.lookup_values);
2914          if (mults == null) return error(f, VORBIS_outofmem);
2915          for (j=0; j < cast(int) c.lookup_values; ++j) {
2916             int q = get_bits(f, c.value_bits);
2917             if (q == EOP) { setup_temp_free(f,mults,(mults[0]).sizeof*c.lookup_values); return error(f, VORBIS_invalid_setup); }
2918             mults[j] = cast(ushort)q;
2919          }
2920 
2921          {
2922             float last=0;
2923             c.multiplicands = cast(codetype *) setup_malloc(f, (c.multiplicands[0]).sizeof * c.lookup_values);
2924             if (c.multiplicands == null) { setup_temp_free(f, mults,(mults[0]).sizeof*c.lookup_values); return error(f, VORBIS_outofmem); }
2925             for (j=0; j < cast(int) c.lookup_values; ++j) {
2926                float val = mults[j] * c.delta_value + c.minimum_value + last;
2927                c.multiplicands[j] = val;
2928                if (c.sequence_p)
2929                   last = val;
2930             }
2931          }
2932 
2933          setup_temp_free(f, mults, (mults[0]).sizeof*c.lookup_values);
2934 
2935       }
2936    }
2937 
2938    // time domain transfers (notused)
2939 
2940    x = cast(ubyte)(get_bits(f, 6) + 1);
2941    for (i=0; i < x; ++i) {
2942       uint z = get_bits(f, 16);
2943       if (z != 0) return error(f, VORBIS_invalid_setup);
2944    }
2945 
2946    // Floors
2947    f.floor_count = get_bits(f, 6)+1;
2948    f.floor_config = cast(Floor *)  setup_malloc(f, f.floor_count * (*f.floor_config).sizeof);
2949    if (f.floor_config == null) return error(f, VORBIS_outofmem);
2950    for (i=0; i < f.floor_count; ++i) {
2951       f.floor_types[i] = cast(ushort) get_bits(f, 16);
2952       if (f.floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
2953       if (f.floor_types[i] == 0) {
2954          Floor0 *g = &f.floor_config[i].floor0;
2955          g.order = cast(ubyte) get_bits(f,8);
2956          g.rate = cast(ushort) get_bits(f,16);
2957          g.bark_map_size = cast(ushort) get_bits(f,16);
2958          g.amplitude_bits = cast(ubyte) get_bits(f,6);
2959          g.amplitude_offset = cast(ubyte) get_bits(f,8);
2960          g.number_of_books = cast(ubyte) (get_bits(f,4) + 1);
2961          for (j=0; j < g.number_of_books; ++j)
2962             g.book_list[j] = cast(ubyte) get_bits(f,8);
2963          return error(f, VORBIS_feature_not_supported);
2964       } else {
2965          stbv__floor_ordering[31*8+2] p;
2966          Floor1 *g = &f.floor_config[i].floor1;
2967          int max_class = -1;
2968          g.partitions = cast(ubyte) get_bits(f, 5);
2969          for (j=0; j < g.partitions; ++j) {
2970             g.partition_class_list[j] = cast(ubyte) get_bits(f, 4);
2971             if (g.partition_class_list[j] > max_class)
2972                max_class = g.partition_class_list[j];
2973          }
2974          for (j=0; j <= max_class; ++j) {
2975             g.class_dimensions[j] = cast(ubyte) (get_bits(f, 3)+1);
2976             g.class_subclasses[j] = cast(ubyte) get_bits(f, 2);
2977             if (g.class_subclasses[j]) {
2978                g.class_masterbooks[j] = cast(ubyte) get_bits(f, 8);
2979                if (g.class_masterbooks[j] >= f.codebook_count) return error(f, VORBIS_invalid_setup);
2980             }
2981             for (k=0; k < 1 << g.class_subclasses[j]; ++k) {
2982                g.subclass_books[j][k] = cast(short)( get_bits(f,8)-1 );
2983                if (g.subclass_books[j][k] >= f.codebook_count) return error(f, VORBIS_invalid_setup);
2984             }
2985          }
2986          g.floor1_multiplier = cast(ubyte)(get_bits(f,2)+1);
2987          g.rangebits = cast(ubyte) get_bits(f,4);
2988          g.Xlist[0] = 0;
2989          g.Xlist[1] = cast(ushort)( 1 << g.rangebits );
2990          g.values = 2;
2991          for (j=0; j < g.partitions; ++j) {
2992             int c = g.partition_class_list[j];
2993             for (k=0; k < g.class_dimensions[c]; ++k) {
2994                g.Xlist[g.values] = cast(ushort) get_bits(f, g.rangebits);
2995                ++g.values;
2996             }
2997          }
2998          // precompute the sorting
2999          for (j=0; j < g.values; ++j) {
3000             p[j].x = g.Xlist[j];
3001             p[j].id = cast(ushort)j;
3002          }
3003          qsort(p.ptr, g.values, (p[0]).sizeof, &point_compare);
3004          for (j=0; j < g.values-1; ++j)
3005             if (p[j].x == p[j+1].x)
3006                return error(f, VORBIS_invalid_setup);
3007          for (j=0; j < g.values; ++j)
3008             g.sorted_order[j] = cast(uint8) p[j].id;
3009          // precompute the neighbors
3010          for (j=2; j < g.values; ++j) {
3011             int low = 0,hi = 0;
3012             neighbors(g.Xlist.ptr, j, &low,&hi);
3013             g.neighbors[j][0] = cast(ubyte)low;
3014             g.neighbors[j][1] = cast(ubyte)hi;
3015          }
3016 
3017          if (g.values > longest_floorlist)
3018             longest_floorlist = g.values;
3019       }
3020    }
3021 
3022    // Residue
3023    f.residue_count = get_bits(f, 6)+1;
3024    f.residue_config = cast(Residue *) setup_malloc(f, f.residue_count * (f.residue_config[0]).sizeof);
3025    if (f.residue_config == null) return error(f, VORBIS_outofmem);
3026    memset(f.residue_config, 0, f.residue_count * (f.residue_config[0]).sizeof);
3027    for (i=0; i < f.residue_count; ++i) {
3028       uint8[64] residue_cascade;
3029       Residue *r = f.residue_config+i;
3030       f.residue_types[i] = cast(ushort) get_bits(f, 16);
3031       if (f.residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
3032       r.begin = get_bits(f, 24);
3033       r.end = get_bits(f, 24);
3034       if (r.end < r.begin) return error(f, VORBIS_invalid_setup);
3035       r.part_size = get_bits(f,24)+1;
3036       r.classifications = cast(ubyte)( get_bits(f,6)+1);
3037       r.classbook = cast(ubyte) get_bits(f,8);
3038       if (r.classbook >= f.codebook_count) return error(f, VORBIS_invalid_setup);
3039       for (j=0; j < r.classifications; ++j) {
3040          uint8 high_bits=0;
3041          uint8 low_bits=cast(ubyte) get_bits(f,3);
3042          if (get_bits(f,1))
3043             high_bits = cast(ubyte) get_bits(f,5);
3044          residue_cascade[j] = cast(ubyte)(high_bits*8 + low_bits);
3045       }
3046 
3047       r.residue_books = cast(int16[8]*) setup_malloc(f, (r.residue_books[0]).sizeof * r.classifications);
3048       if (r.residue_books == null) return error(f, VORBIS_outofmem);
3049       for (j=0; j < r.classifications; ++j) {
3050          for (k=0; k < 8; ++k) {
3051             if (residue_cascade[j] & (1 << k)) {
3052                r.residue_books[j][k] = cast(short) get_bits(f, 8);
3053                if (r.residue_books[j][k] >= f.codebook_count) return error(f, VORBIS_invalid_setup);
3054             } else {
3055                r.residue_books[j][k] = -1;
3056             }
3057          }
3058       }
3059       // precompute the classifications[] array to avoid inner-loop mod/divide
3060       // call it 'classdata' since we already have r.classifications
3061       r.classdata = cast(uint8 **) setup_malloc(f, (*r.classdata).sizeof * f.codebooks[r.classbook].entries);
3062       if (!r.classdata) return error(f, VORBIS_outofmem);
3063       memset(r.classdata, 0, (*r.classdata).sizeof * f.codebooks[r.classbook].entries);
3064       for (j=0; j < f.codebooks[r.classbook].entries; ++j) {
3065          int classwords = f.codebooks[r.classbook].dimensions;
3066          int temp = j;
3067          r.classdata[j] = cast(uint8 *) setup_malloc(f, (r.classdata[j][0]).sizeof * classwords);
3068          if (r.classdata[j] == null) return error(f, VORBIS_outofmem);
3069          for (k=classwords-1; k >= 0; --k) {
3070             r.classdata[j][k] = cast(ubyte)(temp % r.classifications);
3071             temp /= r.classifications;
3072          }
3073       }
3074    }
3075 
3076    f.mapping_count = get_bits(f,6)+1;
3077    f.mapping = cast(Mapping *) setup_malloc(f, f.mapping_count * (*f.mapping).sizeof);
3078    if (f.mapping == null) return error(f, VORBIS_outofmem);
3079    memset(f.mapping, 0, f.mapping_count * (*f.mapping).sizeof);
3080    for (i=0; i < f.mapping_count; ++i) {
3081       Mapping *m = f.mapping + i;
3082       int mapping_type = get_bits(f,16);
3083       if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
3084       m.chan = cast(MappingChannel *) setup_malloc(f, f.channels * (*m.chan).sizeof);
3085       if (m.chan == null) return error(f, VORBIS_outofmem);
3086       if (get_bits(f,1))
3087          m.submaps = cast(ubyte)(get_bits(f,4)+1);
3088       else
3089          m.submaps = 1;
3090       if (m.submaps > max_submaps)
3091          max_submaps = m.submaps;
3092       if (get_bits(f,1)) {
3093          m.coupling_steps = cast(ushort)( get_bits(f,8)+1 );
3094          if (m.coupling_steps > f.channels) return error(f, VORBIS_invalid_setup);
3095          for (k=0; k < m.coupling_steps; ++k) {
3096             m.chan[k].magnitude = cast(ubyte) get_bits(f, ilog(f.channels-1));
3097             m.chan[k].angle =     cast(ubyte) get_bits(f, ilog(f.channels-1));
3098             if (m.chan[k].magnitude >= f.channels)        return error(f, VORBIS_invalid_setup);
3099             if (m.chan[k].angle     >= f.channels)        return error(f, VORBIS_invalid_setup);
3100             if (m.chan[k].magnitude == m.chan[k].angle)   return error(f, VORBIS_invalid_setup);
3101          }
3102       } else
3103          m.coupling_steps = 0;
3104 
3105       // reserved field
3106       if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
3107       if (m.submaps > 1) {
3108          for (j=0; j < f.channels; ++j) {
3109             m.chan[j].mux = cast(ubyte) get_bits(f, 4);
3110             if (m.chan[j].mux >= m.submaps)                return error(f, VORBIS_invalid_setup);
3111          }
3112       } else
3113          // @SPECIFICATION: this case is missing from the spec
3114          for (j=0; j < f.channels; ++j)
3115             m.chan[j].mux = 0;
3116 
3117       for (j=0; j < m.submaps; ++j) {
3118          get_bits(f,8); // discard
3119          m.submap_floor[j] = cast(ubyte) get_bits(f,8);
3120          m.submap_residue[j] = cast(ubyte) get_bits(f,8);
3121          if (m.submap_floor[j] >= f.floor_count)      return error(f, VORBIS_invalid_setup);
3122          if (m.submap_residue[j] >= f.residue_count)  return error(f, VORBIS_invalid_setup);
3123       }
3124    }
3125 
3126    // Modes
3127    f.mode_count = get_bits(f, 6)+1;
3128    for (i=0; i < f.mode_count; ++i) {
3129       Mode *m = f.mode_config.ptr+i;
3130       m.blockflag = cast(ubyte) get_bits(f,1);
3131       m.windowtype = cast(ushort) get_bits(f,16);
3132       m.transformtype = cast(ushort) get_bits(f,16);
3133       m.mapping = cast(ubyte) get_bits(f,8);
3134       if (m.windowtype != 0)                 return error(f, VORBIS_invalid_setup);
3135       if (m.transformtype != 0)              return error(f, VORBIS_invalid_setup);
3136       if (m.mapping >= f.mapping_count)     return error(f, VORBIS_invalid_setup);
3137    }
3138 
3139    flush_packet(f);
3140 
3141    f.previous_length = 0;
3142 
3143    for (i=0; i < f.channels; ++i) 
3144    {
3145       f.channel_buffers[i] = cast(float *) setup_malloc(f, (float.sizeof) * f.blocksize_1);
3146       f.previous_window[i] = cast(float *) setup_malloc(f, (float.sizeof) * f.blocksize_1/2);
3147       f.finalY[i]          = cast(int16 *) setup_malloc(f, (int16.sizeof) * longest_floorlist);
3148       if (f.channel_buffers[i] == null || f.previous_window[i] == null || f.finalY[i] == null) return error(f, VORBIS_outofmem);
3149       memset(f.channel_buffers[i], 0, (float.sizeof) * f.blocksize_1);
3150    }
3151 
3152    if (!init_blocksize(f, 0, f.blocksize_0)) return FALSE;
3153    if (!init_blocksize(f, 1, f.blocksize_1)) return FALSE;
3154    f.blocksize[0] = f.blocksize_0;
3155    f.blocksize[1] = f.blocksize_1;
3156 
3157    // compute how much temporary memory is needed
3158 
3159    // 1.
3160    {
3161       uint imdct_mem = cast(uint)( (f.blocksize_1 * (float.sizeof) >> 1) );
3162       uint classify_mem;
3163       int i2,max_part_read=0;
3164       for (i2=0; i2 < f.residue_count; ++i2) {
3165          Residue *r = f.residue_config + i2;
3166          uint actual_size = f.blocksize_1 / 2;
3167          uint limit_r_begin = r.begin < actual_size ? r.begin : actual_size;
3168          uint limit_r_end   = r.end   < actual_size ? r.end   : actual_size;
3169          int n_read = limit_r_end - limit_r_begin;
3170          int part_read = n_read / r.part_size;
3171          if (part_read > max_part_read)
3172             max_part_read = part_read;
3173       }
3174       classify_mem = cast(uint)( f.channels * ((void*).sizeof + max_part_read * (uint8 *).sizeof) );
3175       
3176       // maximum reasonable partition size is f.blocksize_1
3177 
3178       f.temp_memory_required = classify_mem;
3179       if (imdct_mem > f.temp_memory_required)
3180          f.temp_memory_required = imdct_mem;
3181    }
3182 
3183 
3184    if (f.alloc.alloc_buffer) {
3185       assert(f.temp_offset == f.alloc.alloc_buffer_length_in_bytes);
3186       // check if there's enough temp memory so we don't error later
3187       if (f.setup_offset + (*f).sizeof + f.temp_memory_required > cast(uint) f.temp_offset)
3188          return error(f, VORBIS_outofmem);
3189    }
3190 
3191    // @TODO: stb_vorbis_seek_start expects first_audio_page_offset to point to a page
3192    // without PAGEFLAG_continued_packet, so this either points to the first page, or
3193    // the page after the end of the headers. It might be cleaner to point to a page
3194    // in the middle of the headers, when that's the page where the first audio packet
3195    // starts, but we'd have to also correctly skip the end of any continued packet in
3196    // stb_vorbis_seek_start.
3197    if (f.next_seg == -1) {
3198       f.first_audio_page_offset = stb_vorbis_get_file_offset(f);
3199    } else {
3200       f.first_audio_page_offset = 0;
3201    }
3202 
3203    return TRUE;
3204 }
3205 
3206 static void vorbis_deinit(stb_vorbis *p)
3207 {
3208    int i,j;
3209 
3210    setup_free(p, p.vendor);
3211    for (i=0; i < p.comment_list_length; ++i) {
3212       setup_free(p, p.comment_list[i]);
3213    }
3214    setup_free(p, p.comment_list);
3215 
3216    if (p.residue_config) {
3217       for (i=0; i < p.residue_count; ++i) {
3218          Residue *r = p.residue_config+i;
3219          if (r.classdata) {
3220             for (j=0; j < p.codebooks[r.classbook].entries; ++j)
3221                setup_free(p, r.classdata[j]);
3222             setup_free(p, r.classdata);
3223          }
3224          setup_free(p, r.residue_books);
3225       }
3226    }
3227 
3228    if (p.codebooks) {
3229       for (i=0; i < p.codebook_count; ++i) {
3230          Codebook *c = p.codebooks + i;
3231          setup_free(p, c.codeword_lengths);
3232          setup_free(p, c.multiplicands);
3233          setup_free(p, c.codewords);
3234          setup_free(p, c.sorted_codewords);
3235          // c.sorted_values[-1] is the first entry in the array
3236          setup_free(p, c.sorted_values ? c.sorted_values-1 : null);
3237       }
3238       setup_free(p, p.codebooks);
3239    }
3240    setup_free(p, p.floor_config);
3241    setup_free(p, p.residue_config);
3242    if (p.mapping) {
3243       for (i=0; i < p.mapping_count; ++i)
3244          setup_free(p, p.mapping[i].chan);
3245       setup_free(p, p.mapping);
3246    }
3247 
3248    for (i=0; i < p.channels && i < STB_VORBIS_MAX_CHANNELS; ++i) {
3249       setup_free(p, p.channel_buffers[i]);
3250       setup_free(p, p.previous_window[i]);
3251       setup_free(p, p.finalY[i]);
3252    }
3253    for (i=0; i < 2; ++i) {
3254       setup_free(p, p.A[i]);
3255       setup_free(p, p.B[i]);
3256       setup_free(p, p.C[i]);
3257       setup_free(p, p.window[i]);
3258       setup_free(p, p.bit_reverse[i]);
3259    }
3260 }
3261 
3262 void stb_vorbis_close(stb_vorbis *p)
3263 {
3264    if (p == null) return;
3265    vorbis_deinit(p);
3266    setup_free(p,p);
3267 }
3268 
3269 static void vorbis_init(stb_vorbis *p, stb_vorbis_alloc *z)
3270 {
3271    memset(p, 0, (*p).sizeof); // null out all malloc'd pointers to start
3272    if (z) {
3273       p.alloc = *z;
3274       p.alloc.alloc_buffer_length_in_bytes = (p.alloc.alloc_buffer_length_in_bytes+3) & ~3;
3275       p.temp_offset = p.alloc.alloc_buffer_length_in_bytes;
3276    }
3277    p.eof = 0;
3278    p.error = VORBIS__no_error;
3279    p.codebooks = null;
3280    p.page_crc_tests = -1;
3281 }
3282 
3283 int stb_vorbis_get_sample_offset(stb_vorbis *f)
3284 {
3285    if (f.current_loc_valid)
3286       return f.current_loc;
3287    else
3288       return -1;
3289 }
3290 
3291 stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f)
3292 {
3293    stb_vorbis_info d;
3294    d.channels = f.channels;
3295    d.sample_rate = f.sample_rate;
3296    d.setup_memory_required = f.setup_memory_required;
3297    d.setup_temp_memory_required = f.setup_temp_memory_required;
3298    d.temp_memory_required = f.temp_memory_required;
3299    d.max_frame_size = f.blocksize_1 >> 1;
3300    return d;
3301 }
3302 
3303 stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f)
3304 {
3305    stb_vorbis_comment d;
3306    d.vendor = f.vendor;
3307    d.comment_list_length = f.comment_list_length;
3308    d.comment_list = f.comment_list;
3309    return d;
3310 }
3311 
3312 int stb_vorbis_get_error(stb_vorbis *f)
3313 {
3314    int e = f.error;
3315    f.error = VORBIS__no_error;
3316    return e;
3317 }
3318 
3319 static stb_vorbis * vorbis_alloc(stb_vorbis *f)
3320 {
3321    stb_vorbis *p = cast(stb_vorbis *) setup_malloc(f, stb_vorbis.sizeof);
3322    return p;
3323 }
3324 
3325 uint stb_vorbis_get_file_offset(stb_vorbis *f)
3326 {
3327     return cast(uint)( f.io.tell(f.userData) );
3328 }
3329 
3330 //
3331 // DATA-PULLING API
3332 //
3333 
3334 uint vorbis_find_page(stb_vorbis *f, uint *end, uint *last)
3335 {
3336    for(;;) {
3337       int n;
3338       if (f.eof) return 0;
3339       n = get8(f);
3340       if (n == 0x4f) { // page header candidate
3341          uint retry_loc = stb_vorbis_get_file_offset(f);
3342          int i2;
3343          // check if we're off the end of a file_section stream
3344          if (retry_loc - 25 > f.stream_len)
3345             return 0;
3346          // check the rest of the header
3347          for (i2=1; i2 < 4; ++i2)
3348             if (get8(f) != ogg_page_header[i2])
3349                break;
3350          if (f.eof) return 0;
3351          if (i2 == 4) {
3352             uint8[27] header;
3353             uint i, crc, goal, len;
3354             for (i=0; i < 4; ++i)
3355                header[i] = ogg_page_header[i];
3356             for (; i < 27; ++i)
3357                header[i] = get8(f);
3358             if (f.eof) return 0;
3359             if (header[4] != 0) goto invalid;
3360             goal = header[22] + (header[23] << 8) + (header[24]<<16) + (header[25]<<24);
3361             for (i=22; i < 26; ++i)
3362                header[i] = 0;
3363             crc = 0;
3364             for (i=0; i < 27; ++i)
3365                crc = crc32_update(crc, header[i]);
3366             len = 0;
3367             for (i=0; i < header[26]; ++i) {
3368                int s = get8(f);
3369                crc = crc32_update(crc, cast(ubyte)s);
3370                len += s;
3371             }
3372             if (len && f.eof) return 0;
3373             for (i=0; i < len; ++i)
3374                crc = crc32_update(crc, get8(f));
3375             // finished parsing probable page
3376             if (crc == goal) {
3377                // we could now check that it's either got the last
3378                // page flag set, OR it's followed by the capture
3379                // pattern, but I guess TECHNICALLY you could have
3380                // a file with garbage between each ogg page and recover
3381                // from it automatically? So even though that paranoia
3382                // might decrease the chance of an invalid decode by
3383                // another 2^32, not worth it since it would hose those
3384                // invalid-but-useful files?
3385                if (end)
3386                   *end = stb_vorbis_get_file_offset(f);
3387                if (last) {
3388                   if (header[5] & 0x04)
3389                      *last = 1;
3390                   else
3391                      *last = 0;
3392                }
3393                set_file_offset(f, retry_loc-1);
3394                return 1;
3395             }
3396          }
3397         invalid:
3398          // not a valid page, so rewind and look for next one
3399          set_file_offset(f, retry_loc);
3400       }
3401    }
3402    assert(false);
3403 }
3404 
3405 
3406 enum SAMPLE_unknown = 0xffffffff;
3407 
3408 // seeking is implemented with a binary search, which narrows down the range to
3409 // 64K, before using a linear search (because finding the synchronization
3410 // pattern can be expensive, and the chance we'd find the end page again is
3411 // relatively high for small ranges)
3412 //
3413 // two initial interpolation-style probes are used at the start of the search
3414 // to try to bound either side of the binary search sensibly, while still
3415 // working in O(log n) time if they fail.
3416 
3417 int get_seek_page_info(stb_vorbis *f, ProbedPage *z)
3418 {
3419    uint8[27] header;
3420    uint8[255] lacing;
3421    int i,len;
3422 
3423    // record where the page starts
3424    z.page_start = stb_vorbis_get_file_offset(f);
3425 
3426    // parse the header
3427    getn(f, header.ptr, 27);
3428    if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S')
3429       return 0;
3430    getn(f, lacing.ptr, header[26]);
3431 
3432    // determine the length of the payload
3433    len = 0;
3434    for (i=0; i < header[26]; ++i)
3435       len += lacing[i];
3436 
3437    // this implies where the page ends
3438    z.page_end = z.page_start + 27 + header[26] + len;
3439 
3440    // read the last-decoded sample out of the data
3441    z.last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24);
3442 
3443    // restore file state to where we were
3444    set_file_offset(f, z.page_start);
3445    return 1;
3446 }
3447 
3448 // rarely used function to seek back to the preceding page while finding the
3449 // start of a packet
3450 static int go_to_page_before(stb_vorbis *f, uint limit_offset)
3451 {
3452    uint previous_safe, end;
3453 
3454    // now we want to seek back 64K from the limit
3455    if (limit_offset >= 65536 && limit_offset-65536 >= f.first_audio_page_offset)
3456       previous_safe = limit_offset - 65536;
3457    else
3458       previous_safe = f.first_audio_page_offset;
3459 
3460    set_file_offset(f, previous_safe);
3461 
3462    while (vorbis_find_page(f, &end, null)) {
3463       if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset)
3464          return 1;
3465       set_file_offset(f, end);
3466    }
3467 
3468    return 0;
3469 }
3470 
3471 // implements the search logic for finding a page and starting decoding. if
3472 // the function succeeds, current_loc_valid will be true and current_loc will
3473 // be less than or equal to the provided sample number (the closer the
3474 // better).
3475 static int seek_to_sample_coarse(stb_vorbis *f, uint sample_number)
3476 {
3477    ProbedPage left, right, mid;
3478    int i, start_seg_with_known_loc, end_pos, page_start;
3479    uint delta, stream_length, padding, last_sample_limit;
3480    double offset = 0.0, bytes_per_sample = 0.0;
3481    int probe = 0;
3482 
3483    // find the last page and validate the target sample
3484    stream_length = stb_vorbis_stream_length_in_samples(f);
3485    if (stream_length == 0)            return error(f, VORBIS_seek_without_length);
3486    if (sample_number > stream_length) return error(f, VORBIS_seek_invalid);
3487 
3488    // this is the maximum difference between the window-center (which is the
3489    // actual granule position value), and the right-start (which the spec
3490    // indicates should be the granule position (give or take one)).
3491    padding = ((f.blocksize_1 - f.blocksize_0) >> 2);
3492    if (sample_number < padding)
3493       last_sample_limit = 0;
3494    else
3495       last_sample_limit = sample_number - padding;
3496 
3497    left = f.p_first;
3498    while (left.last_decoded_sample == ~0U) {
3499       // (untested) the first page does not have a 'last_decoded_sample'
3500       set_file_offset(f, left.page_end);
3501       if (!get_seek_page_info(f, &left)) goto error;
3502    }
3503 
3504    right = f.p_last;
3505    assert(right.last_decoded_sample != ~0U);
3506 
3507    // starting from the start is handled differently
3508    if (last_sample_limit <= left.last_decoded_sample) {
3509       if (stb_vorbis_seek_start(f)) {
3510          if (f.current_loc > sample_number)
3511             return error(f, VORBIS_seek_failed);
3512          return 1;
3513       }
3514       return 0;
3515    }
3516 
3517    while (left.page_end != right.page_start) {
3518       assert(left.page_end < right.page_start);
3519       // search range in bytes
3520       delta = right.page_start - left.page_end;
3521       if (delta <= 65536) {
3522          // there's only 64K left to search - handle it linearly
3523          set_file_offset(f, left.page_end);
3524       } else {
3525          if (probe < 2) {
3526             if (probe == 0) {
3527                // first probe (interpolate)
3528                double data_bytes = right.page_end - left.page_start;
3529                bytes_per_sample = data_bytes / right.last_decoded_sample;
3530                offset = left.page_start + bytes_per_sample * (last_sample_limit - left.last_decoded_sample);
3531             } else {
3532                // second probe (try to bound the other side)
3533                double error = (cast(double) last_sample_limit - mid.last_decoded_sample) * bytes_per_sample;
3534                if (error >= 0 && error <  8000) error =  8000;
3535                if (error <  0 && error > -8000) error = -8000;
3536                offset += error * 2;
3537             }
3538 
3539             // ensure the offset is valid
3540             if (offset < left.page_end)
3541                offset = left.page_end;
3542             if (offset > right.page_start - 65536)
3543                offset = right.page_start - 65536;
3544 
3545             set_file_offset(f, cast(uint) offset);
3546          } else {
3547             // binary search for large ranges (offset by 32K to ensure
3548             // we don't hit the right page)
3549             set_file_offset(f, left.page_end + (delta / 2) - 32768);
3550          }
3551 
3552          if (!vorbis_find_page(f, null, null)) goto error;
3553       }
3554 
3555       for (;;) {
3556          if (!get_seek_page_info(f, &mid)) goto error;
3557          if (mid.last_decoded_sample != ~0U) break;
3558          // (untested) no frames end on this page
3559          set_file_offset(f, mid.page_end);
3560          assert(mid.page_start < right.page_start);
3561       }
3562 
3563       // if we've just found the last page again then we're in a tricky file,
3564       // and we're close enough (if it wasn't an interpolation probe).
3565       if (mid.page_start == right.page_start) {
3566          if (probe >= 2 || delta <= 65536)
3567             break;
3568       } else {
3569          if (last_sample_limit < mid.last_decoded_sample)
3570             right = mid;
3571          else
3572             left = mid;
3573       }
3574 
3575       ++probe;
3576    }
3577 
3578    // seek back to start of the last packet
3579    page_start = left.page_start;
3580    set_file_offset(f, page_start);
3581    if (!start_page(f)) return error(f, VORBIS_seek_failed);
3582    end_pos = f.end_seg_with_known_loc;
3583    assert(end_pos >= 0);
3584 
3585    for (;;) {
3586       for (i = end_pos; i > 0; --i)
3587          if (f.segments[i-1] != 255)
3588             break;
3589 
3590       start_seg_with_known_loc = i;
3591 
3592       if (start_seg_with_known_loc > 0 || !(f.page_flag & PAGEFLAG_continued_packet))
3593          break;
3594 
3595       // (untested) the final packet begins on an earlier page
3596       if (!go_to_page_before(f, page_start))
3597          goto error;
3598 
3599       page_start = stb_vorbis_get_file_offset(f);
3600       if (!start_page(f)) goto error;
3601       end_pos = f.segment_count - 1;
3602    }
3603 
3604    // prepare to start decoding
3605    f.current_loc_valid = FALSE;
3606    f.last_seg = FALSE;
3607    f.valid_bits = 0;
3608    f.packet_bytes = 0;
3609    f.bytes_in_seg = 0;
3610    f.previous_length = 0;
3611    f.next_seg = start_seg_with_known_loc;
3612 
3613    for (i = 0; i < start_seg_with_known_loc; i++)
3614       skip(f, f.segments[i]);
3615 
3616    // start decoding (optimizable - this frame is generally discarded)
3617    if (!vorbis_pump_first_frame(f))
3618       return 0;
3619    if (f.current_loc > sample_number)
3620       return error(f, VORBIS_seek_failed);
3621    return 1;
3622 
3623 error:
3624    // try to restore the file to a valid state
3625    stb_vorbis_seek_start(f);
3626    return error(f, VORBIS_seek_failed);
3627 }
3628 
3629 // the same as vorbis_decode_initial, but without advancing
3630 int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
3631 {
3632    int bits_read, bytes_read;
3633 
3634    if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode))
3635       return 0;
3636 
3637    // either 1 or 2 bytes were read, figure out which so we can rewind
3638    bits_read = 1 + ilog(f.mode_count-1);
3639    if (f.mode_config[*mode].blockflag)
3640       bits_read += 2;
3641    bytes_read = (bits_read + 7) / 8;
3642 
3643    f.bytes_in_seg += bytes_read;
3644    f.packet_bytes -= bytes_read;
3645    skip(f, -bytes_read);
3646    if (f.next_seg == -1)
3647       f.next_seg = f.segment_count - 1;
3648    else
3649       f.next_seg--;
3650    f.valid_bits = 0;
3651 
3652    return 1;
3653 }
3654 
3655 int stb_vorbis_seek_frame(stb_vorbis *f, uint sample_number)
3656 {
3657    uint max_frame_samples;
3658 
3659    // fast page-level search
3660    if (!seek_to_sample_coarse(f, sample_number))
3661       return 0;
3662 
3663    assert(f.current_loc_valid);
3664    assert(f.current_loc <= sample_number);
3665 
3666    // linear search for the relevant packet
3667    max_frame_samples = (f.blocksize_1*3 - f.blocksize_0) >> 2;
3668    while (f.current_loc < sample_number) {
3669       int left_start, left_end, right_start, right_end, mode, frame_samples;
3670       if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
3671          return error(f, VORBIS_seek_failed);
3672       // calculate the number of samples returned by the next frame
3673       frame_samples = right_start - left_start;
3674       if (f.current_loc + frame_samples > sample_number) {
3675          return 1; // the next frame will contain the sample
3676       } else if (f.current_loc + frame_samples + max_frame_samples > sample_number) {
3677          // there's a chance the frame after this could contain the sample
3678          vorbis_pump_first_frame(f);
3679       } else {
3680          // this frame is too early to be relevant
3681          f.current_loc += frame_samples;
3682          f.previous_length = 0;
3683          maybe_start_packet(f);
3684          flush_packet(f);
3685       }
3686    }
3687    // the next frame should start with the sample
3688    if (f.current_loc != sample_number) return error(f, VORBIS_seek_failed);
3689    return 1;
3690 }
3691 
3692 int stb_vorbis_seek(stb_vorbis *f, uint sample_number)
3693 {
3694    if (!stb_vorbis_seek_frame(f, sample_number))
3695       return 0;
3696 
3697    if (sample_number != f.current_loc) {
3698       int n;
3699       uint frame_start = f.current_loc;
3700       stb_vorbis_get_frame_float(f, &n, null);
3701       assert(sample_number > frame_start);
3702       assert(f.channel_buffer_start + cast(int) (sample_number-frame_start) <= f.channel_buffer_end);
3703       f.channel_buffer_start += (sample_number - frame_start);
3704    }
3705 
3706    return 1;
3707 }
3708 
3709 int stb_vorbis_seek_start(stb_vorbis *f)
3710 {
3711    set_file_offset(f, f.first_audio_page_offset);
3712    f.previous_length = 0;
3713    f.first_decode = TRUE;
3714    f.next_seg = -1;
3715    return vorbis_pump_first_frame(f);
3716 }
3717 
3718 uint stb_vorbis_stream_length_in_samples(stb_vorbis *f)
3719 {
3720    uint restore_offset, previous_safe;
3721    uint end, last_page_loc;
3722 
3723    if (!f.total_samples) {
3724       uint last;
3725       uint lo,hi;
3726       char[6] header;
3727 
3728       // first, store the current decode position so we can restore it
3729       restore_offset = stb_vorbis_get_file_offset(f);
3730 
3731       // now we want to seek back 64K from the end (the last page must
3732       // be at most a little less than 64K, but let's allow a little slop)
3733       if (f.stream_len >= 65536 && f.stream_len-65536 >= f.first_audio_page_offset)
3734          previous_safe = f.stream_len - 65536;
3735       else
3736          previous_safe = f.first_audio_page_offset;
3737 
3738       set_file_offset(f, previous_safe);
3739       // previous_safe is now our candidate 'earliest known place that seeking
3740       // to will lead to the final page'
3741 
3742       if (!vorbis_find_page(f, &end, &last)) {
3743          // if we can't find a page, we're hosed!
3744          f.error = VORBIS_cant_find_last_page;
3745          f.total_samples = 0xffffffff;
3746          goto done;
3747       }
3748 
3749       // check if there are more pages
3750       last_page_loc = stb_vorbis_get_file_offset(f);
3751 
3752       // stop when the last_page flag is set, not when we reach eof;
3753       // this allows us to stop short of a 'file_section' end without
3754       // explicitly checking the length of the section
3755       while (!last) {
3756          set_file_offset(f, end);
3757          if (!vorbis_find_page(f, &end, &last)) {
3758             // the last page we found didn't have the 'last page' flag
3759             // set. whoops!
3760             break;
3761          }
3762          previous_safe = last_page_loc+1;
3763          last_page_loc = stb_vorbis_get_file_offset(f);
3764       }
3765 
3766       set_file_offset(f, last_page_loc);
3767 
3768       // parse the header
3769       getn(f, cast(ubyte*) header.ptr, 6);
3770       // extract the absolute granule position
3771       lo = get32(f);
3772       hi = get32(f);
3773       if (lo == 0xffffffff && hi == 0xffffffff) {
3774          f.error = VORBIS_cant_find_last_page;
3775          f.total_samples = SAMPLE_unknown;
3776          goto done;
3777       }
3778       if (hi)
3779          lo = 0xfffffffe; // saturate
3780       f.total_samples = lo;
3781 
3782       f.p_last.page_start = last_page_loc;
3783       f.p_last.page_end   = end;
3784       f.p_last.last_decoded_sample = lo;
3785 
3786      done:
3787       set_file_offset(f, restore_offset);
3788    }
3789    return f.total_samples == SAMPLE_unknown ? 0 : f.total_samples;
3790 }
3791 
3792 int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
3793 {
3794    int len, right,left,i;
3795 
3796    if (!vorbis_decode_packet(f, &len, &left, &right)) {
3797       f.channel_buffer_start = f.channel_buffer_end = 0;
3798       return 0;
3799    }
3800 
3801    len = vorbis_finish_frame(f, len, left, right);
3802    for (i=0; i < f.channels; ++i)
3803       f.outputs[i] = f.channel_buffers[i] + left;
3804 
3805    f.channel_buffer_start = left;
3806    f.channel_buffer_end   = left+len;
3807 
3808    if (channels) *channels = f.channels;
3809    if (output)   *output = f.outputs.ptr;
3810    return len;
3811 }
3812 
3813 
3814 stb_vorbis* stb_vorbis_open_audioformats(IOCallbacks* io, void* userData, int *error, stb_vorbis_alloc *alloc)
3815 {
3816    stb_vorbis *f;
3817    stb_vorbis p;
3818    vorbis_init(&p, alloc);
3819    p.io = io;
3820    p.userData = userData;
3821    p.stream_len = cast(uint) io.getFileLength(userData);
3822 
3823    if (start_decoder(&p)) {
3824       f = vorbis_alloc(&p);
3825       if (f) 
3826       {
3827          *f = p;
3828          vorbis_pump_first_frame(f);
3829          return f;
3830       }
3831    }
3832    if (error) *error = p.error;
3833    vorbis_deinit(&p);
3834    return null;
3835 }
3836 
3837 int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
3838 {
3839    float **outputs;
3840    int len = num_floats / channels;
3841    int n=0;
3842    int z = f.channels;
3843    if (z > channels) z = channels;
3844    while (n < len) {
3845       int i,j;
3846       int k = f.channel_buffer_end - f.channel_buffer_start;
3847       if (n+k >= len) k = len - n;
3848       for (j=0; j < k; ++j) {
3849          for (i=0; i < z; ++i)
3850             *buffer++ = f.channel_buffers[i][f.channel_buffer_start+j];
3851          for (   ; i < channels; ++i)
3852             *buffer++ = 0;
3853       }
3854       n += k;
3855       f.channel_buffer_start += k;
3856       if (n == len)
3857          break;
3858       if (!stb_vorbis_get_frame_float(f, null, &outputs))
3859          break;
3860    }
3861    return n;
3862 }
3863 
3864 
3865 /* Version history
3866     1.17    - 2019-07-08 - fix CVE-2019-13217, -13218, -13219, -13220, -13221, -13222, -13223
3867                            found with Mayhem by ForAllSecure
3868     1.16    - 2019-03-04 - fix warnings
3869     1.15    - 2019-02-07 - explicit failure if Ogg Skeleton data is found
3870     1.14    - 2018-02-11 - delete bogus dealloca usage
3871     1.13    - 2018-01-29 - fix truncation of last frame (hopefully)
3872     1.12    - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
3873     1.11    - 2017-07-23 - fix MinGW compilation
3874     1.10    - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
3875     1.09    - 2016-04-04 - back out 'avoid discarding last frame' fix from previous version
3876     1.08    - 2016-04-02 - fixed multiple warnings; fix setup memory leaks;
3877                            avoid discarding last frame of audio data
3878     1.07    - 2015-01-16 - fixed some warnings, fix mingw, const-correct API
3879                            some more crash fixes when out of memory or with corrupt files
3880     1.06    - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
3881                            some crash fixes when out of memory or with corrupt files
3882     1.05    - 2015-04-19 - don't define __forceinline if it's redundant
3883     1.04    - 2014-08-27 - fix missing const-correct case in API
3884     1.03    - 2014-08-07 - Warning fixes
3885     1.02    - 2014-07-09 - Declare qsort compare function _cdecl on windows
3886     1.01    - 2014-06-18 - fix stb_vorbis_get_samples_float
3887     1.0     - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in multichannel
3888                            (API change) report sample rate for decode-full-file funcs
3889     0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
3890     0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
3891     0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
3892     0.99993 - remove assert that fired on legal files with empty tables
3893     0.99992 - rewind-to-start
3894     0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
3895     0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
3896     0.9998 - add a full-decode function with a memory source
3897     0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
3898     0.9996 - query length of vorbis stream in samples/seconds
3899     0.9995 - bugfix to another optimization that only happened in certain files
3900     0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
3901     0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
3902     0.9992 - performance improvement of IMDCT; now performs close to reference implementation
3903     0.9991 - performance improvement of IMDCT
3904     0.999 - (should have been 0.9990) performance improvement of IMDCT
3905     0.998 - no-CRT support from Casey Muratori
3906     0.997 - bugfixes for bugs found by Terje Mathisen
3907     0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
3908     0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
3909     0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
3910     0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
3911     0.992 - fixes for MinGW warning
3912     0.991 - turn fast-float-conversion on by default
3913     0.990 - fix push-mode seek recovery if you seek into the headers
3914     0.98b - fix to bad release of 0.98
3915     0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
3916     0.97 - builds under c++ (typecasting, don't use 'class' keyword)
3917     0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
3918     0.95 - clamping code for 16-bit functions
3919     0.94 - not publically released
3920     0.93 - fixed all-zero-floor case (was decoding garbage)
3921     0.92 - fixed a memory leak
3922     0.90 - first public release
3923 */
3924 
3925 
3926 /*
3927 ------------------------------------------------------------------------------
3928 This software is available under 2 licenses -- choose whichever you prefer.
3929 ------------------------------------------------------------------------------
3930 ALTERNATIVE A - MIT License
3931 Copyright (c) 2017 Sean Barrett
3932 Permission is hereby granted, free of charge, to any person obtaining a copy of
3933 this software and associated documentation files (the "Software"), to deal in
3934 the Software without restriction, including without limitation the rights to
3935 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
3936 of the Software, and to permit persons to whom the Software is furnished to do
3937 so, subject to the following conditions:
3938 The above copyright notice and this permission notice shall be included in all
3939 copies or substantial portions of the Software.
3940 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
3941 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
3942 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
3943 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
3944 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
3945 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3946 SOFTWARE.
3947 ------------------------------------------------------------------------------
3948 ALTERNATIVE B - Public Domain (www.unlicense.org)
3949 This is free and unencumbered software released into the public domain.
3950 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
3951 software, either in source code form or as a compiled binary, for any purpose,
3952 commercial or non-commercial, and by any means.
3953 In jurisdictions that recognize copyright laws, the author or authors of this
3954 software dedicate any and all copyright interest in the software to the public
3955 domain. We make this dedication for the benefit of the public at large and to
3956 the detriment of our heirs and successors. We intend this dedication to be an
3957 overt act of relinquishment in perpetuity of all present and future rights to
3958 this software under copyright law.
3959 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
3960 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
3961 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
3962 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
3963 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
3964 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
3965 ------------------------------------------------------------------------------
3966 */