1 // Ogg Vorbis audio decoder - v1.10 - public domain
2 // http://nothings.org/stb_vorbis/
3 //
4 // Original version written by Sean Barrett in 2007.
5 //
6 // Originally sponsored by RAD Game Tools. Seeking sponsored
7 // by Phillip Bennefall, Marc Andersen, Aaron Baker, Elias Software,
8 // Aras Pranckevicius, and Sean Barrett.
9 //
10 // LICENSE
11 //
12 //   See end of file for license information.
13 //
14 // Limitations:
15 //
16 //   - floor 0 not supported (used in old ogg vorbis files pre-2004)
17 //   - lossless sample-truncation at beginning ignored
18 //   - cannot concatenate multiple vorbis streams
19 //   - sample positions are 32-bit, limiting seekable 192Khz
20 //       files to around 6 hours (Ogg supports 64-bit)
21 //
22 // Feature contributors:
23 //    Dougall Johnson (sample-exact seeking)
24 //
25 // Bugfix/warning contributors:
26 //    Terje Mathisen     Niklas Frykholm     Andy Hill
27 //    Casey Muratori     John Bolton         Gargaj
28 //    Laurent Gomila     Marc LeBlanc        Ronny Chevalier
29 //    Bernhard Wodo      Evan Balster        alxprd@github
30 //    Tom Beaumont       Ingo Leitgeb        Nicolas Guillemot
31 //    Phillip Bennefall  Rohit               Thiago Goulart
32 //    manxorist@github   saga musix
33 //
34 // Partial history:
35 //    1.10    - 2017/03/03 - more robust seeking; fix negative ilog(); clear error in open_memory
36 //    1.09    - 2016/04/04 - back out 'avoid discarding last frame' fix from previous version
37 //    1.08    - 2016/04/02 - fixed multiple warnings; fix setup memory leaks;
38 //                           avoid discarding last frame of audio data
39 //    1.07    - 2015/01/16 - fixed some warnings, fix mingw, const-correct API
40 //                           some more crash fixes when out of memory or with corrupt files
41 //    1.06    - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
42 //                           some crash fixes when out of memory or with corrupt files
43 //                           fix some inappropriately signed shifts
44 //    1.05    - 2015/04/19 - don't define __forceinline if it's redundant
45 //    1.04    - 2014/08/27 - fix missing const-correct case in API
46 //    1.03    - 2014/08/07 - warning fixes
47 //    1.02    - 2014/07/09 - declare qsort comparison as explicitly _cdecl in Windows
48 //    1.01    - 2014/06/18 - fix stb_vorbis_get_samples_float (interleaved was correct)
49 //    1.0     - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
50 //                           (API change) report sample rate for decode-full-file funcs
51 //    0.99996 -            - bracket #include <malloc.h> for macintosh compilation
52 //    0.99995 -            - avoid alias-optimization issue in float-to-int conversion
53 //
54 // See end of file for full version history.
55 // D translation by Ketmar // Invisible Vector
56 // stolen by adam and module renamed.
57 /++
58     Port of stb_vorbis to D. Provides .ogg audio file reading capabilities. See [arsd.simpleaudio] for code that can use this to actually load and play the file.
59 +/
60 module audioformats.vorbis;
61 
62 import audioformats.io;
63 import core.stdc.stdio : FILE;
64 
65 version(Windows)
66     extern(C) int lrintf(float f) 
67     { 
68         return cast(int) f; 
69     }
70 
71 nothrow /*@trusted*/:
72 @nogc { // code block, as c macro helper is not @nogc; yet it's CTFE-only
73 // import it here, as druntime has no `@nogc` on it (for a reason)
74 private extern(C) void qsort (void* base, size_t nmemb, size_t size, int function(in void*, in void*) compar);
75 
76 
77 public struct stb_vorbis_alloc 
78 {
79     ubyte* alloc_buffer;
80     int alloc_buffer_length_in_bytes;
81 }
82 
83 ////////   ERROR CODES
84 
85 public enum STBVorbisError {
86   no_error,
87 
88   need_more_data = 1,    // not a real error
89 
90   invalid_api_mixing,    // can't mix API modes
91   outofmem,              // not enough memory
92   feature_not_supported, // uses floor 0
93   too_many_channels,     // STB_VORBIS_MAX_CHANNELS is too small
94   file_open_failure,     // fopen() failed
95   seek_without_length,   // can't seek in unknown-length file
96 
97   unexpected_eof = 10,   // file is truncated?
98   seek_invalid,          // seek past EOF
99 
100   // decoding errors (corrupt/invalid stream) -- you probably
101   // don't care about the exact details of these
102 
103   // vorbis errors:
104   invalid_setup = 20,
105   invalid_stream,
106 
107   // ogg errors:
108   missing_capture_pattern = 30,
109   invalid_stream_structure_version,
110   continued_packet_flag_invalid,
111   incorrect_stream_serial_number,
112   invalid_first_page,
113   bad_packet_type,
114   cant_find_last_page,
115   seek_failed,
116 }
117 //
118 //  HEADER ENDS HERE
119 //
120 //////////////////////////////////////////////////////////////////////////////
121 
122 
123 // global configuration settings (e.g. set these in the project/makefile),
124 // or just set them in this file at the top (although ideally the first few
125 // should be visible when the header file is compiled too, although it's not
126 // crucial)
127 
128 // STB_VORBIS_NO_INTEGER_CONVERSION
129 //     does not compile the code for converting audio sample data from
130 //     float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
131 //version = STB_VORBIS_NO_INTEGER_CONVERSION;
132 
133 // STB_VORBIS_NO_FAST_SCALED_FLOAT
134 //      does not use a fast float-to-int trick to accelerate float-to-int on
135 //      most platforms which requires endianness be defined correctly.
136 //version = STB_VORBIS_NO_FAST_SCALED_FLOAT;
137 
138 // STB_VORBIS_MAX_CHANNELS [number]
139 //     globally define this to the maximum number of channels you need.
140 //     The spec does not put a restriction on channels except that
141 //     the count is stored in a byte, so 255 is the hard limit.
142 //     Reducing this saves about 16 bytes per value, so using 16 saves
143 //     (255-16)*16 or around 4KB. Plus anything other memory usage
144 //     I forgot to account for. Can probably go as low as 8 (7.1 audio),
145 //     6 (5.1 audio), or 2 (stereo only).
146 enum STB_VORBIS_MAX_CHANNELS = 16; // enough for anyone?
147 
148 // STB_VORBIS_PUSHDATA_CRC_COUNT [number]
149 //     after a flush_pushdata(), stb_vorbis begins scanning for the
150 //     next valid page, without backtracking. when it finds something
151 //     that looks like a page, it streams through it and verifies its
152 //     CRC32. Should that validation fail, it keeps scanning. But it's
153 //     possible that _while_ streaming through to check the CRC32 of
154 //     one candidate page, it sees another candidate page. This #define
155 //     determines how many "overlapping" candidate pages it can search
156 //     at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
157 //     garbage pages could be as big as 64KB, but probably average ~16KB.
158 //     So don't hose ourselves by scanning an apparent 64KB page and
159 //     missing a ton of real ones in the interim; so minimum of 2
160 enum STB_VORBIS_PUSHDATA_CRC_COUNT = 4;
161 
162 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
163 //     sets the log size of the huffman-acceleration table.  Maximum
164 //     supported value is 24. with larger numbers, more decodings are O(1),
165 //     but the table size is larger so worse cache missing, so you'll have
166 //     to probe (and try multiple ogg vorbis files) to find the sweet spot.
167 enum STB_VORBIS_FAST_HUFFMAN_LENGTH = 10;
168 
169 // STB_VORBIS_FAST_BINARY_LENGTH [number]
170 //     sets the log size of the binary-search acceleration table. this
171 //     is used in similar fashion to the fast-huffman size to set initial
172 //     parameters for the binary search
173 
174 // STB_VORBIS_FAST_HUFFMAN_INT
175 //     The fast huffman tables are much more efficient if they can be
176 //     stored as 16-bit results instead of 32-bit results. This restricts
177 //     the codebooks to having only 65535 possible outcomes, though.
178 //     (At least, accelerated by the huffman table.)
179 //version = STB_VORBIS_FAST_HUFFMAN_INT;
180 version(STB_VORBIS_FAST_HUFFMAN_INT) {} else version = STB_VORBIS_FAST_HUFFMAN_SHORT;
181 
182 // STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
183 //     If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
184 //     back on binary searching for the correct one. This requires storing
185 //     extra tables with the huffman codes in sorted order. Defining this
186 //     symbol trades off space for speed by forcing a linear search in the
187 //     non-fast case, except for "sparse" codebooks.
188 //version = STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH;
189 
190 // STB_VORBIS_DIVIDES_IN_RESIDUE
191 //     stb_vorbis precomputes the result of the scalar residue decoding
192 //     that would otherwise require a divide per chunk. you can trade off
193 //     space for time by defining this symbol.
194 //version = STB_VORBIS_DIVIDES_IN_RESIDUE;
195 
196 // STB_VORBIS_DIVIDES_IN_CODEBOOK
197 //     vorbis VQ codebooks can be encoded two ways: with every case explicitly
198 //     stored, or with all elements being chosen from a small range of values,
199 //     and all values possible in all elements. By default, stb_vorbis expands
200 //     this latter kind out to look like the former kind for ease of decoding,
201 //     because otherwise an integer divide-per-vector-element is required to
202 //     unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
203 //     trade off storage for speed.
204 version = STB_VORBIS_DIVIDES_IN_CODEBOOK; // BUG: if not defined, will dirty the signal with optimizations on.
205 
206 version(STB_VORBIS_CODEBOOK_SHORTS) static assert(0, "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats");
207 
208 // STB_VORBIS_DIVIDE_TABLE
209 //     this replaces small integer divides in the floor decode loop with
210 //     table lookups. made less than 1% difference, so disabled by default.
211 //version = STB_VORBIS_DIVIDE_TABLE;
212 
213 // STB_VORBIS_NO_DEFER_FLOOR
214 //     Normally we only decode the floor without synthesizing the actual
215 //     full curve. We can instead synthesize the curve immediately. This
216 //     requires more memory and is very likely slower, so I don't think
217 //     you'd ever want to do it except for debugging.
218 //version = STB_VORBIS_NO_DEFER_FLOOR;
219 //version(STB_VORBIS_CODEBOOK_FLOATS) static assert(0);
220 
221 
222 // ////////////////////////////////////////////////////////////////////////// //
223 private:
224 static assert(STB_VORBIS_MAX_CHANNELS <= 256, "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range");
225 static assert(STB_VORBIS_FAST_HUFFMAN_LENGTH <= 24, "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range");
226 
227 enum MAX_BLOCKSIZE_LOG = 13; // from specification
228 enum MAX_BLOCKSIZE = (1 << MAX_BLOCKSIZE_LOG);
229 
230 
231 alias codetype = float;
232 
233 // @NOTE
234 //
235 // Some arrays below are tagged "//varies", which means it's actually
236 // a variable-sized piece of data, but rather than malloc I assume it's
237 // small enough it's better to just allocate it all together with the
238 // main thing
239 //
240 // Most of the variables are specified with the smallest size I could pack
241 // them into. It might give better performance to make them all full-sized
242 // integers. It should be safe to freely rearrange the structures or change
243 // the sizes larger--nothing relies on silently truncating etc., nor the
244 // order of variables.
245 
246 enum FAST_HUFFMAN_TABLE_SIZE = (1<<STB_VORBIS_FAST_HUFFMAN_LENGTH);
247 enum FAST_HUFFMAN_TABLE_MASK = (FAST_HUFFMAN_TABLE_SIZE-1);
248 
249 struct Codebook {
250   int dimensions, entries;
251   ubyte* codeword_lengths;
252   float minimum_value;
253   float delta_value;
254   ubyte value_bits;
255   ubyte lookup_type;
256   ubyte sequence_p;
257   ubyte sparse;
258   uint lookup_values;
259   codetype* multiplicands;
260   uint *codewords;
261   version(STB_VORBIS_FAST_HUFFMAN_SHORT) {
262     short[FAST_HUFFMAN_TABLE_SIZE] fast_huffman;
263   } else {
264     int[FAST_HUFFMAN_TABLE_SIZE] fast_huffman;
265   }
266   uint* sorted_codewords;
267   int* sorted_values;
268   int sorted_entries;
269 }
270 
271 struct Floor0 {
272   ubyte order;
273   ushort rate;
274   ushort bark_map_size;
275   ubyte amplitude_bits;
276   ubyte amplitude_offset;
277   ubyte number_of_books;
278   ubyte[16] book_list; // varies
279 }
280 
281 struct Floor1 {
282   ubyte partitions;
283   ubyte[32] partition_class_list; // varies
284   ubyte[16] class_dimensions; // varies
285   ubyte[16] class_subclasses; // varies
286   ubyte[16] class_masterbooks; // varies
287   short[8][16] subclass_books; // varies
288   ushort[31*8+2] Xlist; // varies
289   ubyte[31*8+2] sorted_order;
290   ubyte[2][31*8+2] neighbors;
291   ubyte floor1_multiplier;
292   ubyte rangebits;
293   int values;
294 }
295 
296 union Floor {
297   Floor0 floor0;
298   Floor1 floor1;
299 }
300 
301 struct Residue {
302   uint begin, end;
303   uint part_size;
304   ubyte classifications;
305   ubyte classbook;
306   ubyte** classdata;
307   //int16 (*residue_books)[8];
308   short[8]* residue_books;
309 }
310 
311 struct MappingChannel {
312   ubyte magnitude;
313   ubyte angle;
314   ubyte mux;
315 }
316 
317 struct Mapping {
318   ushort coupling_steps;
319   MappingChannel* chan;
320   ubyte submaps;
321   ubyte[15] submap_floor; // varies
322   ubyte[15] submap_residue; // varies
323 }
324 
325 struct Mode {
326   ubyte blockflag;
327   ubyte mapping;
328   ushort windowtype;
329   ushort transformtype;
330 }
331 
332 struct CRCscan {
333   uint goal_crc;   // expected crc if match
334   int bytes_left;  // bytes left in packet
335   uint crc_so_far; // running crc
336   int bytes_done;  // bytes processed in _current_ chunk
337   uint sample_loc; // granule pos encoded in page
338 }
339 
340 struct ProbedPage {
341   uint page_start, page_end;
342   uint last_decoded_sample;
343 }
344 
345 private int error (VorbisDecoder f, STBVorbisError e) {
346   f.error = e;
347   if (!f.eof && e != STBVorbisError.need_more_data) {
348     f.error = e; // breakpoint for debugging
349   }
350   return 0;
351 }
352 
353 // these functions are used for allocating temporary memory
354 // while decoding. if you can afford the stack space, use
355 // alloca(); otherwise, provide a temp buffer and it will
356 // allocate out of those.
357 uint temp_alloc_save (VorbisDecoder f) nothrow @nogc 
358 { 
359     return f.alloc.tempSave(f); 
360 }
361 
362 void temp_alloc_restore (VorbisDecoder f, uint p) nothrow @nogc 
363 { 
364     f.alloc.tempRestore(p, f); 
365 }
366 
367 void temp_free (VorbisDecoder f, void* p) nothrow @nogc 
368 {
369 }
370 
371 enum array_size_required(string count, string size) = q{((${count})*((void*).sizeof+(${size})))}.cmacroFixVars!("count", "size")(count, size);
372 
373 template temp_alloc(string size) {
374   enum temp_alloc = q{alloca(${size})}.cmacroFixVars!("size")(size);
375 }
376 
377 template temp_block_array(string count, string size) {
378   enum temp_block_array = q{(make_block_array(${tam}, (${count}), (${size})))}
379     .cmacroFixVars!("count", "size", "tam")(count, size, temp_alloc!(array_size_required!(count, size)));
380 }
381 
382 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
383 private void* make_block_array (void* mem, int count, int size) {
384   void** p = cast(void**)mem;
385   char* q = cast(char*)(p+count);
386   foreach (immutable i; 0..count) {
387     p[i] = q;
388     q += size;
389   }
390   return p;
391 }
392 
393 private T* setup_malloc(T) (VorbisDecoder f, uint sz) {
394   sz *= T.sizeof;
395 
396   auto res = f.alloc.alloc(sz+8, f); // +8 to compensate dmd codegen bug: it can read dword(qword?) when told to read only byte
397   if (res !is null) {
398     import core.stdc..string : memset;
399     memset(res, 0, sz+8);
400   }
401   return cast(T*)res;
402 }
403 
404 private void setup_free (VorbisDecoder f, void* p) 
405 {
406   if (p !is null) f.alloc.free(p, f);
407 }
408 
409 private void* setup_temp_malloc (VorbisDecoder f, uint sz) {
410   auto res = f.alloc.allocTemp(sz+8, f); // +8 to compensate dmd codegen bug: it can read dword(qword?) when told to read only byte
411   if (res !is null) {
412     import core.stdc..string : memset;
413     memset(res, 0, sz+8);
414   }
415   return res;
416 }
417 
418 private void setup_temp_free (VorbisDecoder f, void* p, uint sz) 
419 {
420   if (p !is null) f.alloc.freeTemp(p, (sz ? sz : 1)+8, f); // +8 to compensate dmd codegen bug: it can read dword(qword?) when told to read only byte
421 }
422 
423 immutable uint[256] crc_table;
424 shared static this () 
425 {
426   enum CRC32_POLY = 0x04c11db7; // from spec
427   // init crc32 table
428   foreach (uint i; 0..256) {
429     uint s = i<<24;
430     foreach (immutable _; 0..8) s = (s<<1)^(s >= (1U<<31) ? CRC32_POLY : 0);
431     crc_table[i] = s;
432   }
433 }
434 
435 uint crc32_update (uint crc, ubyte b) {
436   return (crc<<8)^crc_table[b^(crc>>24)];
437 }
438 
439 // used in setup, and for huffman that doesn't go fast path
440 private uint bit_reverse (uint n) {
441   n = ((n&0xAAAAAAAA)>>1)|((n&0x55555555)<<1);
442   n = ((n&0xCCCCCCCC)>>2)|((n&0x33333333)<<2);
443   n = ((n&0xF0F0F0F0)>>4)|((n&0x0F0F0F0F)<<4);
444   n = ((n&0xFF00FF00)>>8)|((n&0x00FF00FF)<<8);
445   return (n>>16)|(n<<16);
446 }
447 
448 private float square (float x) {
449   return x*x;
450 }
451 
452 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
453 // as required by the specification. fast(?) implementation from stb.h
454 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
455 immutable byte[16] log2_4 = [0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4];
456 private int ilog (int n) {
457   if (n < 0) return 0; // signed n returns 0
458   // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
459   if (n < (1<<14)) {
460     if (n < (1<<4)) return 0+log2_4[n];
461     if (n < (1<<9)) return 5+log2_4[n>>5];
462     return 10+log2_4[n>>10];
463   } else if (n < (1<<24)) {
464     if (n < (1<<19)) return 15+log2_4[n>>15];
465     return 20+log2_4[n>>20];
466   } else {
467     if (n < (1<<29)) return 25+log2_4[n>>25];
468     return 30+log2_4[n>>30];
469   }
470 }
471 
472 
473 // code length assigned to a value with no huffman encoding
474 enum NO_CODE = 255;
475 
476 /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
477 //
478 // these functions are only called at setup, and only a few times per file
479 private float float32_unpack (uint x) {
480   import core.math : ldexp;
481   // from the specification
482   uint mantissa = x&0x1fffff;
483   uint sign = x&0x80000000;
484   uint exp = (x&0x7fe00000)>>21;
485   double res = (sign ? -cast(double)mantissa : cast(double)mantissa);
486   return cast(float)ldexp(cast(float)res, cast(int)exp-788);
487 }
488 
489 // zlib & jpeg huffman tables assume that the output symbols
490 // can either be arbitrarily arranged, or have monotonically
491 // increasing frequencies--they rely on the lengths being sorted;
492 // this makes for a very simple generation algorithm.
493 // vorbis allows a huffman table with non-sorted lengths. This
494 // requires a more sophisticated construction, since symbols in
495 // order do not map to huffman codes "in order".
496 private void add_entry (Codebook* c, uint huff_code, int symbol, int count, ubyte len, uint* values) {
497   if (!c.sparse) {
498     c.codewords[symbol] = huff_code;
499   } else {
500     c.codewords[count] = huff_code;
501     c.codeword_lengths[count] = len;
502     values[count] = symbol;
503   }
504 }
505 
506 private int compute_codewords (Codebook* c, ubyte* len, int n, uint* values) {
507   import core.stdc..string : memset;
508 
509   int i, k, m = 0;
510   uint[32] available;
511 
512   memset(available.ptr, 0, available.sizeof);
513   // find the first entry
514   for (k = 0; k < n; ++k) if (len[k] < NO_CODE) break;
515   if (k == n) { assert(c.sorted_entries == 0); return true; }
516   // add to the list
517   add_entry(c, 0, k, m++, len[k], values);
518   // add all available leaves
519   for (i = 1; i <= len[k]; ++i) available[i] = 1U<<(32-i);
520   // note that the above code treats the first case specially,
521   // but it's really the same as the following code, so they
522   // could probably be combined (except the initial code is 0,
523   // and I use 0 in available[] to mean 'empty')
524   for (i = k+1; i < n; ++i) {
525     uint res;
526     int z = len[i];
527     if (z == NO_CODE) continue;
528     // find lowest available leaf (should always be earliest,
529     // which is what the specification calls for)
530     // note that this property, and the fact we can never have
531     // more than one free leaf at a given level, isn't totally
532     // trivial to prove, but it seems true and the assert never
533     // fires, so!
534     while (z > 0 && !available[z]) --z;
535     if (z == 0) return false;
536     res = available[z];
537     assert(z >= 0 && z < 32);
538     available[z] = 0;
539     ubyte xxx = len[i];
540     add_entry(c,
541       bit_reverse(res),
542       i,
543       m++,
544       xxx, // dmd bug: it reads 4 bytes without temp
545       values);
546     // propogate availability up the tree
547     if (z != len[i]) {
548       assert(len[i] >= 0 && len[i] < 32);
549       for (int y = len[i]; y > z; --y) {
550         assert(available[y] == 0);
551         available[y] = res+(1<<(32-y));
552       }
553     }
554   }
555   return true;
556 }
557 
558 // accelerated huffman table allows fast O(1) match of all symbols
559 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
560 private void compute_accelerated_huffman (Codebook* c) {
561   //for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i) c.fast_huffman.ptr[i] = -1;
562   c.fast_huffman.ptr[0..FAST_HUFFMAN_TABLE_SIZE] = -1;
563   auto len = (c.sparse ? c.sorted_entries : c.entries);
564   version(STB_VORBIS_FAST_HUFFMAN_SHORT) {
565     if (len > 32767) len = 32767; // largest possible value we can encode!
566   }
567   foreach (uint i; 0..len) {
568     if (c.codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
569       uint z = (c.sparse ? bit_reverse(c.sorted_codewords[i]) : c.codewords[i]);
570       // set table entries for all bit combinations in the higher bits
571       while (z < FAST_HUFFMAN_TABLE_SIZE) {
572         c.fast_huffman.ptr[z] = cast(typeof(c.fast_huffman[0]))i; //k8
573         z += 1<<c.codeword_lengths[i];
574       }
575     }
576   }
577 }
578 
579 extern(C) int uint32_compare (const void* p, const void* q) {
580   uint x = *cast(uint*)p;
581   uint y = *cast(uint*)q;
582   return (x < y ? -1 : x > y);
583 }
584 
585 private int include_in_sort (Codebook* c, uint len) {
586   if (c.sparse) { assert(len != NO_CODE); return true; }
587   if (len == NO_CODE) return false;
588   if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return true;
589   return false;
590 }
591 
592 // if the fast table above doesn't work, we want to binary
593 // search them... need to reverse the bits
594 private void compute_sorted_huffman (Codebook* c, ubyte* lengths, uint* values) {
595   // build a list of all the entries
596   // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
597   // this is kind of a frivolous optimization--I don't see any performance improvement,
598   // but it's like 4 extra lines of code, so.
599   if (!c.sparse) {
600     int k = 0;
601     foreach (uint i; 0..c.entries) if (include_in_sort(c, lengths[i])) c.sorted_codewords[k++] = bit_reverse(c.codewords[i]);
602     assert(k == c.sorted_entries);
603   } else {
604     foreach (uint i; 0..c.sorted_entries) c.sorted_codewords[i] = bit_reverse(c.codewords[i]);
605   }
606 
607   qsort(c.sorted_codewords, c.sorted_entries, (c.sorted_codewords[0]).sizeof, &uint32_compare);
608   c.sorted_codewords[c.sorted_entries] = 0xffffffff;
609 
610   auto len = (c.sparse ? c.sorted_entries : c.entries);
611   // now we need to indicate how they correspond; we could either
612   //   #1: sort a different data structure that says who they correspond to
613   //   #2: for each sorted entry, search the original list to find who corresponds
614   //   #3: for each original entry, find the sorted entry
615   // #1 requires extra storage, #2 is slow, #3 can use binary search!
616   foreach (uint i; 0..len) {
617     auto huff_len = (c.sparse ? lengths[values[i]] : lengths[i]);
618     if (include_in_sort(c, huff_len)) {
619       uint code = bit_reverse(c.codewords[i]);
620       int x = 0, n = c.sorted_entries;
621       while (n > 1) {
622         // invariant: sc[x] <= code < sc[x+n]
623         int m = x+(n>>1);
624         if (c.sorted_codewords[m] <= code) {
625           x = m;
626           n -= (n>>1);
627         } else {
628           n >>= 1;
629         }
630       }
631       assert(c.sorted_codewords[x] == code);
632       if (c.sparse) {
633         c.sorted_values[x] = values[i];
634         c.codeword_lengths[x] = huff_len;
635       } else {
636         c.sorted_values[x] = i;
637       }
638     }
639   }
640 }
641 
642 // only run while parsing the header (3 times)
643 private int vorbis_validate (const(void)* data) {
644   immutable char[6] vorbis = "vorbis";
645   return ((cast(char*)data)[0..6] == vorbis[]);
646 }
647 
648 // called from setup only, once per code book
649 // (formula implied by specification)
650 private int lookup1_values (int entries, int dim) {
651   import core.stdc.math : lrintf;
652   import std.math : floor, exp, pow, log;
653   int r = cast(int)lrintf(floor(exp(cast(float)log(cast(float)entries)/dim)));
654   if (lrintf(floor(pow(cast(float)r+1, dim))) <= entries) ++r; // (int) cast for MinGW warning; floor() to avoid _ftol() when non-CRT
655   assert(pow(cast(float)r+1, dim) > entries);
656   assert(lrintf(floor(pow(cast(float)r, dim))) <= entries); // (int), floor() as above
657   return r;
658 }
659 
660 // called twice per file
661 private void compute_twiddle_factors (int n, float* A, float* B, float* C) {
662   import std.math : cos, sin, PI;
663   int n4 = n>>2, n8 = n>>3;
664   int k, k2;
665   for (k = k2 = 0; k < n4; ++k, k2 += 2) {
666     A[k2  ] = cast(float) cos(4*k*PI/n);
667     A[k2+1] = cast(float)-sin(4*k*PI/n);
668     B[k2  ] = cast(float) cos((k2+1)*PI/n/2)*0.5f;
669     B[k2+1] = cast(float) sin((k2+1)*PI/n/2)*0.5f;
670   }
671   for (k = k2 = 0; k < n8; ++k, k2 += 2) {
672     C[k2  ] = cast(float) cos(2*(k2+1)*PI/n);
673     C[k2+1] = cast(float)-sin(2*(k2+1)*PI/n);
674   }
675 }
676 
677 private void compute_window (int n, float* window) {
678   import std.math : sin, PI;
679   int n2 = n>>1;
680   foreach (int i; 0..n2) *window++ = cast(float)sin(0.5*PI*square(cast(float)sin((i-0+0.5)/n2*0.5*PI)));
681 }
682 
683 private void compute_bitreverse (int n, ushort* rev) {
684   int ld = ilog(n)-1; // ilog is off-by-one from normal definitions
685   int n8 = n>>3;
686   foreach (int i; 0..n8) *rev++ = cast(ushort)((bit_reverse(i)>>(32-ld+3))<<2); //k8
687 }
688 
689 private int init_blocksize (VorbisDecoder f, int b, int n) {
690   int n2 = n>>1, n4 = n>>2, n8 = n>>3;
691   f.A[b] = setup_malloc!float(f, n2);
692   f.B[b] = setup_malloc!float(f, n2);
693   f.C[b] = setup_malloc!float(f, n4);
694   if (f.A[b] is null || f.B[b] is null || f.C[b] is null) return error(f, STBVorbisError.outofmem);
695   compute_twiddle_factors(n, f.A[b], f.B[b], f.C[b]);
696   f.window[b] = setup_malloc!float(f, n2);
697   if (f.window[b] is null) return error(f, STBVorbisError.outofmem);
698   compute_window(n, f.window[b]);
699   f.bit_reverse[b] = setup_malloc!ushort(f, n8);
700   if (f.bit_reverse[b] is null) return error(f, STBVorbisError.outofmem);
701   compute_bitreverse(n, f.bit_reverse[b]);
702   return true;
703 }
704 
705 private void neighbors (ushort* x, int n, ushort* plow, ushort* phigh) {
706   int low = -1;
707   int high = 65536;
708   assert(n >= 0 && n <= ushort.max);
709   foreach (ushort i; 0..cast(ushort)n) {
710     if (x[i] > low  && x[i] < x[n]) { *plow = i; low = x[i]; }
711     if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
712   }
713 }
714 
715 // this has been repurposed so y is now the original index instead of y
716 struct Point {
717   ushort x, y;
718 }
719 
720 extern(C) int point_compare (const void *p, const void *q) {
721   auto a = cast(const(Point)*)p;
722   auto b = cast(const(Point)*)q;
723   return (a.x < b.x ? -1 : a.x > b.x);
724 }
725 /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
726 
727 // ///////////////////////////////////////////////////////////////////// //
728 private ubyte get8 (VorbisDecoder f) {
729   ubyte b = void;
730   if (!f.eof) {
731     if (f.rawRead((&b)[0..1]) != 1) { f.eof = true; b = 0; }
732   }
733   return b;
734 }
735 
736 private uint get32 (VorbisDecoder f) {
737   uint x = 0;
738   if (!f.eof) {
739     version(LittleEndian) {
740       if (f.rawRead((&x)[0..1]) != x.sizeof) { f.eof = true; x = 0; }
741     } else {
742       x = get8(f);
743       x |= cast(uint)get8(f)<<8;
744       x |= cast(uint)get8(f)<<16;
745       x |= cast(uint)get8(f)<<24;
746     }
747   }
748   return x;
749 }
750 
751 private bool getn (VorbisDecoder f, void* data, int n) {
752   if (f.eof || n < 0) return false;
753   if (n == 0) return true;
754   if (f.rawRead(data[0..n]) != n) { f.eof = true; return false; }
755   return true;
756 }
757 
758 private void skip (VorbisDecoder f, int n) {
759   if (f.eof || n <= 0) return;
760   f.rawSkip(n);
761 }
762 
763 private void set_file_offset (VorbisDecoder f, uint loc) {
764   /+if (f.push_mode) return;+/
765   f.eof = false;
766   if (loc >= 0x80000000) { f.eof = true; return; }
767   f.rawSeek(loc);
768 }
769 
770 
771 immutable char[4] ogg_page_header = "OggS"; //[ 0x4f, 0x67, 0x67, 0x53 ];
772 
773 private bool capture_pattern (VorbisDecoder f) {
774   char[4] sign = void;
775   if (!getn(f, sign.ptr, 4)) return false;
776   return (sign == "OggS");
777 }
778 
779 enum PAGEFLAG_continued_packet = 1;
780 enum PAGEFLAG_first_page = 2;
781 enum PAGEFLAG_last_page = 4;
782 
783 private int start_page_no_capturepattern (VorbisDecoder f) {
784   uint loc0, loc1, n;
785   // stream structure version
786   if (get8(f) != 0) return error(f, STBVorbisError.invalid_stream_structure_version);
787   // header flag
788   f.page_flag = get8(f);
789   // absolute granule position
790   loc0 = get32(f);
791   loc1 = get32(f);
792   // @TODO: validate loc0, loc1 as valid positions?
793   // stream serial number -- vorbis doesn't interleave, so discard
794   get32(f);
795   //if (f.serial != get32(f)) return error(f, STBVorbisError.incorrect_stream_serial_number);
796   // page sequence number
797   n = get32(f);
798   f.last_page = n;
799   // CRC32
800   get32(f);
801   // page_segments
802   f.segment_count = get8(f);
803   if (!getn(f, f.segments.ptr, f.segment_count)) return error(f, STBVorbisError.unexpected_eof);
804   // assume we _don't_ know any the sample position of any segments
805   f.end_seg_with_known_loc = -2;
806   if (loc0 != ~0U || loc1 != ~0U) {
807     int i;
808     // determine which packet is the last one that will complete
809     for (i = f.segment_count-1; i >= 0; --i) if (f.segments.ptr[i] < 255) break;
810     // 'i' is now the index of the _last_ segment of a packet that ends
811     if (i >= 0) {
812       f.end_seg_with_known_loc = i;
813       f.known_loc_for_packet = loc0;
814     }
815   }
816   if (f.first_decode) {
817     int len;
818     ProbedPage p;
819     len = 0;
820     foreach (int i; 0..f.segment_count) len += f.segments.ptr[i];
821     len += 27+f.segment_count;
822     p.page_start = f.first_audio_page_offset;
823     p.page_end = p.page_start+len;
824     p.last_decoded_sample = loc0;
825     f.p_first = p;
826   }
827   f.next_seg = 0;
828   return true;
829 }
830 
831 private int start_page (VorbisDecoder f) {
832   if (!capture_pattern(f)) return error(f, STBVorbisError.missing_capture_pattern);
833   return start_page_no_capturepattern(f);
834 }
835 
836 private int start_packet (VorbisDecoder f) {
837   while (f.next_seg == -1) {
838     if (!start_page(f)) return false;
839     if (f.page_flag&PAGEFLAG_continued_packet) return error(f, STBVorbisError.continued_packet_flag_invalid);
840   }
841   f.last_seg = false;
842   f.valid_bits = 0;
843   f.packet_bytes = 0;
844   f.bytes_in_seg = 0;
845   // f.next_seg is now valid
846   return true;
847 }
848 
849 private int maybe_start_packet (VorbisDecoder f) {
850   if (f.next_seg == -1) {
851     auto x = get8(f);
852     if (f.eof) return false; // EOF at page boundary is not an error!
853     if (0x4f != x      ) return error(f, STBVorbisError.missing_capture_pattern);
854     if (0x67 != get8(f)) return error(f, STBVorbisError.missing_capture_pattern);
855     if (0x67 != get8(f)) return error(f, STBVorbisError.missing_capture_pattern);
856     if (0x53 != get8(f)) return error(f, STBVorbisError.missing_capture_pattern);
857     if (!start_page_no_capturepattern(f)) return false;
858     if (f.page_flag&PAGEFLAG_continued_packet) {
859       // set up enough state that we can read this packet if we want,
860       // e.g. during recovery
861       f.last_seg = false;
862       f.bytes_in_seg = 0;
863       return error(f, STBVorbisError.continued_packet_flag_invalid);
864     }
865   }
866   return start_packet(f);
867 }
868 
869 private int next_segment (VorbisDecoder f) {
870   if (f.last_seg) return 0;
871   if (f.next_seg == -1) {
872     f.last_seg_which = f.segment_count-1; // in case start_page fails
873     if (!start_page(f)) { f.last_seg = 1; return 0; }
874     if (!(f.page_flag&PAGEFLAG_continued_packet)) return error(f, STBVorbisError.continued_packet_flag_invalid);
875   }
876   auto len = f.segments.ptr[f.next_seg++];
877   if (len < 255) {
878     f.last_seg = true;
879     f.last_seg_which = f.next_seg-1;
880   }
881   if (f.next_seg >= f.segment_count) f.next_seg = -1;
882   debug(stb_vorbis) assert(f.bytes_in_seg == 0);
883   f.bytes_in_seg = len;
884   return len;
885 }
886 
887 enum EOP = (-1);
888 enum INVALID_BITS = (-1);
889 
890 private int get8_packet_raw (VorbisDecoder f) {
891   if (!f.bytes_in_seg) {  // CLANG!
892     if (f.last_seg) return EOP;
893     else if (!next_segment(f)) return EOP;
894   }
895   debug(stb_vorbis) assert(f.bytes_in_seg > 0);
896   --f.bytes_in_seg;
897   ++f.packet_bytes;
898   return get8(f);
899 }
900 
901 private int get8_packet (VorbisDecoder f) {
902   int x = get8_packet_raw(f);
903   f.valid_bits = 0;
904   return x;
905 }
906 
907 private uint get32_packet (VorbisDecoder f) {
908   uint x = get8_packet(f), b;
909   if (x == EOP) return EOP;
910   if ((b = get8_packet(f)) == EOP) return EOP;
911   x += b<<8;
912   if ((b = get8_packet(f)) == EOP) return EOP;
913   x += b<<16;
914   if ((b = get8_packet(f)) == EOP) return EOP;
915   x += b<<24;
916   return x;
917 }
918 
919 private void flush_packet (VorbisDecoder f) {
920   while (get8_packet_raw(f) != EOP) {}
921 }
922 
923 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
924 // as the huffman decoder?
925 private uint get_bits_main (VorbisDecoder f, int n) {
926   uint z;
927   if (f.valid_bits < 0) return 0;
928   if (f.valid_bits < n) {
929     if (n > 24) {
930       // the accumulator technique below would not work correctly in this case
931       z = get_bits_main(f, 24);
932       z += get_bits_main(f, n-24)<<24;
933       return z;
934     }
935     if (f.valid_bits == 0) f.acc = 0;
936     while (f.valid_bits < n) {
937       z = get8_packet_raw(f);
938       if (z == EOP) {
939         f.valid_bits = INVALID_BITS;
940         return 0;
941       }
942       f.acc += z<<f.valid_bits;
943       f.valid_bits += 8;
944     }
945   }
946   if (f.valid_bits < 0) return 0;
947   z = f.acc&((1<<n)-1);
948   f.acc >>= n;
949   f.valid_bits -= n;
950   return z;
951 }
952 
953 // chooses minimal possible integer type
954 private auto get_bits(ubyte n) (VorbisDecoder f) if (n >= 1 && n <= 64) {
955   static if (n <= 8) return cast(ubyte)get_bits_main(f, n);
956   else static if (n <= 16) return cast(ushort)get_bits_main(f, n);
957   else static if (n <= 32) return cast(uint)get_bits_main(f, n);
958   else static if (n <= 64) return cast(ulong)get_bits_main(f, n);
959   else static assert(0, "wtf?!");
960 }
961 
962 // chooses minimal possible integer type, assume no overflow
963 private auto get_bits_add_no(ubyte n) (VorbisDecoder f, ubyte add) if (n >= 1 && n <= 64) {
964   static if (n <= 8) return cast(ubyte)(get_bits_main(f, n)+add);
965   else static if (n <= 16) return cast(ushort)(get_bits_main(f, n)+add);
966   else static if (n <= 32) return cast(uint)(get_bits_main(f, n)+add);
967   else static if (n <= 64) return cast(ulong)(get_bits_main(f, n)+add);
968   else static assert(0, "wtf?!");
969 }
970 
971 // @OPTIMIZE: primary accumulator for huffman
972 // expand the buffer to as many bits as possible without reading off end of packet
973 // it might be nice to allow f.valid_bits and f.acc to be stored in registers,
974 // e.g. cache them locally and decode locally
975 //private /*__forceinline*/ void prep_huffman (VorbisDecoder f)
976 enum PrepHuffmanMixin = q{
977   if (f.valid_bits <= 24) {
978     if (f.valid_bits == 0) f.acc = 0;
979     int phmz = void;
980     do {
981       if (f.last_seg && !f.bytes_in_seg) break;
982       phmz = get8_packet_raw(f);
983       if (phmz == EOP) break;
984       f.acc += cast(uint)phmz<<f.valid_bits;
985       f.valid_bits += 8;
986     } while (f.valid_bits <= 24);
987   }
988 };
989 
990 enum VorbisPacket {
991   id = 1,
992   comment = 3,
993   setup = 5,
994 }
995 
996 private int codebook_decode_scalar_raw (VorbisDecoder f, Codebook *c) {
997   mixin(PrepHuffmanMixin);
998 
999   if (c.codewords is null && c.sorted_codewords is null) return -1;
1000   // cases to use binary search: sorted_codewords && !c.codewords
1001   //                             sorted_codewords && c.entries > 8
1002   auto cond = (c.entries > 8 ? c.sorted_codewords !is null : !c.codewords);
1003   if (cond) {
1004     // binary search
1005     uint code = bit_reverse(f.acc);
1006     int x = 0, n = c.sorted_entries, len;
1007     while (n > 1) {
1008       // invariant: sc[x] <= code < sc[x+n]
1009       int m = x+(n>>1);
1010       if (c.sorted_codewords[m] <= code) {
1011         x = m;
1012         n -= (n>>1);
1013       } else {
1014         n >>= 1;
1015       }
1016     }
1017     // x is now the sorted index
1018     if (!c.sparse) x = c.sorted_values[x];
1019     // x is now sorted index if sparse, or symbol otherwise
1020     len = c.codeword_lengths[x];
1021     if (f.valid_bits >= len) {
1022       f.acc >>= len;
1023       f.valid_bits -= len;
1024       return x;
1025     }
1026     f.valid_bits = 0;
1027     return -1;
1028   }
1029   // if small, linear search
1030   debug(stb_vorbis) assert(!c.sparse);
1031   foreach (uint i; 0..c.entries) {
1032     if (c.codeword_lengths[i] == NO_CODE) continue;
1033     if (c.codewords[i] == (f.acc&((1<<c.codeword_lengths[i])-1))) {
1034       if (f.valid_bits >= c.codeword_lengths[i]) {
1035         f.acc >>= c.codeword_lengths[i];
1036         f.valid_bits -= c.codeword_lengths[i];
1037         return i;
1038       }
1039       f.valid_bits = 0;
1040       return -1;
1041     }
1042   }
1043   error(f, STBVorbisError.invalid_stream);
1044   f.valid_bits = 0;
1045   return -1;
1046 }
1047 
1048 
1049 template DECODE_RAW(string var, string c) {
1050   enum DECODE_RAW = q{
1051     if (f.valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH) { mixin(PrepHuffmanMixin); }
1052     // fast huffman table lookup
1053     ${i} = f.acc&FAST_HUFFMAN_TABLE_MASK;
1054     ${i} = ${c}.fast_huffman.ptr[${i}];
1055     if (${i} >= 0) {
1056       auto ${__temp_prefix__}n = ${c}.codeword_lengths[${i}];
1057       f.acc >>= ${__temp_prefix__}n;
1058       f.valid_bits -= ${__temp_prefix__}n;
1059       if (f.valid_bits < 0) { f.valid_bits = 0; ${i} = -1; }
1060     } else {
1061       ${i} = codebook_decode_scalar_raw(f, ${c});
1062     }
1063   }.cmacroFixVars!("i", "c")(var, c);
1064 }
1065 
1066 enum DECODE(string var, string c) = q{
1067   ${DECODE_RAW}
1068   if (${c}.sparse) ${var} = ${c}.sorted_values[${var}];
1069 }.cmacroFixVars!("var", "c", "DECODE_RAW")(var, c, DECODE_RAW!(var, c));
1070 
1071 
1072 version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1073   alias DECODE_VQ = DECODE;
1074 } else {
1075   alias DECODE_VQ = DECODE_RAW;
1076 }
1077 
1078 
1079 
1080 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1081 // where we avoid one addition
1082 enum CODEBOOK_ELEMENT(string c, string off) = "("~c~".multiplicands["~off~"])";
1083 enum CODEBOOK_ELEMENT_FAST(string c, string off) = "("~c~".multiplicands["~off~"])";
1084 enum CODEBOOK_ELEMENT_BASE(string c) = "(0)";
1085 
1086 
1087 private int codebook_decode_start (VorbisDecoder f, Codebook* c) {
1088   int z = -1;
1089   // type 0 is only legal in a scalar context
1090   if (c.lookup_type == 0) {
1091     error(f, STBVorbisError.invalid_stream);
1092   } else {
1093     mixin(DECODE_VQ!("z", "c"));
1094     debug(stb_vorbis) if (c.sparse) assert(z < c.sorted_entries);
1095     if (z < 0) {  // check for EOP
1096       if (!f.bytes_in_seg && f.last_seg) return z;
1097       error(f, STBVorbisError.invalid_stream);
1098     }
1099   }
1100   return z;
1101 }
1102 
1103 private int codebook_decode (VorbisDecoder f, Codebook* c, float* output, int len) {
1104   int z = codebook_decode_start(f, c);
1105   if (z < 0) return false;
1106   if (len > c.dimensions) len = c.dimensions;
1107 
1108   version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1109     if (c.lookup_type == 1) {
1110       float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1111       int div = 1;
1112       foreach (immutable i; 0..len) {
1113         int off = (z/div)%c.lookup_values;
1114         float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "off"))+last;
1115         output[i] += val;
1116         if (c.sequence_p) last = val+c.minimum_value;
1117         div *= c.lookup_values;
1118       }
1119       return true;
1120     }
1121   }
1122 
1123   z *= c.dimensions;
1124   if (c.sequence_p) {
1125     float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1126     foreach (immutable i; 0..len) {
1127       float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1128       output[i] += val;
1129       last = val+c.minimum_value;
1130     }
1131   } else {
1132     float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1133     foreach (immutable i; 0..len) output[i] += mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1134   }
1135 
1136   return true;
1137 }
1138 
1139 private int codebook_decode_step (VorbisDecoder f, Codebook* c, float* output, int len, int step) {
1140   int z = codebook_decode_start(f, c);
1141   float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1142   if (z < 0) return false;
1143   if (len > c.dimensions) len = c.dimensions;
1144 
1145   version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1146     if (c.lookup_type == 1) {
1147       int div = 1;
1148       foreach (immutable i; 0..len) {
1149         int off = (z/div)%c.lookup_values;
1150         float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "off"))+last;
1151         output[i*step] += val;
1152         if (c.sequence_p) last = val;
1153         div *= c.lookup_values;
1154       }
1155       return true;
1156     }
1157   }
1158 
1159   z *= c.dimensions;
1160   foreach (immutable i; 0..len) {
1161     float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1162     output[i*step] += val;
1163     if (c.sequence_p) last = val;
1164   }
1165 
1166   return true;
1167 }
1168 
1169 private int codebook_decode_deinterleave_repeat (VorbisDecoder f, Codebook* c, ref float*[STB_VORBIS_MAX_CHANNELS] outputs, int ch, int* c_inter_p, int* p_inter_p, int len, int total_decode) {
1170   int c_inter = *c_inter_p;
1171   int p_inter = *p_inter_p;
1172   int z, effective = c.dimensions;
1173 
1174   // type 0 is only legal in a scalar context
1175   if (c.lookup_type == 0) return error(f, STBVorbisError.invalid_stream);
1176 
1177   while (total_decode > 0) {
1178     float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1179     mixin(DECODE_VQ!("z", "c"));
1180     version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {} else {
1181       debug(stb_vorbis) assert(!c.sparse || z < c.sorted_entries);
1182     }
1183     if (z < 0) {
1184       if (!f.bytes_in_seg && f.last_seg) return false;
1185       return error(f, STBVorbisError.invalid_stream);
1186     }
1187 
1188     // if this will take us off the end of the buffers, stop short!
1189     // we check by computing the length of the virtual interleaved
1190     // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1191     // and the length we'll be using (effective)
1192     if (c_inter+p_inter*ch+effective > len*ch) effective = len*ch-(p_inter*ch-c_inter);
1193 
1194     version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1195       if (c.lookup_type == 1) {
1196         int div = 1;
1197         foreach (immutable i; 0..effective) {
1198           int off = (z/div)%c.lookup_values;
1199           float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "off"))+last;
1200           if (outputs.ptr[c_inter]) outputs.ptr[c_inter][p_inter] += val;
1201           if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1202           if (c.sequence_p) last = val;
1203           div *= c.lookup_values;
1204         }
1205         goto skipit;
1206       }
1207     }
1208     z *= c.dimensions;
1209     if (c.sequence_p) {
1210       foreach (immutable i; 0..effective) {
1211         float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1212         if (outputs.ptr[c_inter]) outputs.ptr[c_inter][p_inter] += val;
1213         if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1214         last = val;
1215       }
1216     } else {
1217       foreach (immutable i; 0..effective) {
1218         float val = mixin(CODEBOOK_ELEMENT_FAST!("c","z+i"))+last;
1219         if (outputs.ptr[c_inter]) outputs.ptr[c_inter][p_inter] += val;
1220         if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1221       }
1222     }
1223    skipit:
1224     total_decode -= effective;
1225   }
1226   *c_inter_p = c_inter;
1227   *p_inter_p = p_inter;
1228   return true;
1229 }
1230 
1231 //private int predict_point (int x, int x0, int x1, int y0, int y1)
1232 enum predict_point(string dest, string x, string x0, string x1, string y0, string y1) = q{{
1233   //import std.math : abs;
1234   int dy = ${y1}-${y0};
1235   int adx = ${x1}-${x0};
1236   // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1237   int err = /*abs(dy)*/(dy < 0 ? -dy : dy)*(${x}-${x0});
1238   int off = err/adx;
1239   /*return*/${dest} = (dy < 0 ? ${y0}-off : ${y0}+off);
1240 }}.cmacroFixVars!("dest", "x", "x0", "x1", "y0", "y1")(dest, x, x0, x1, y0, y1);
1241 
1242 // the following table is block-copied from the specification
1243 immutable float[256] inverse_db_table = [
1244   1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1245   1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1246   1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1247   2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1248   2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1249   3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1250   4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1251   6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1252   7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1253   1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1254   1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1255   1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1256   2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1257   2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1258   3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1259   4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1260   5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1261   7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1262   9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1263   1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1264   1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1265   2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1266   2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1267   3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1268   4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1269   5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1270   7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1271   9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1272   0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1273   0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1274   0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1275   0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1276   0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1277   0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1278   0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1279   0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1280   0.00092223983f, 0.00098217216f, 0.0010459992f,  0.0011139742f,
1281   0.0011863665f,  0.0012634633f,  0.0013455702f,  0.0014330129f,
1282   0.0015261382f,  0.0016253153f,  0.0017309374f,  0.0018434235f,
1283   0.0019632195f,  0.0020908006f,  0.0022266726f,  0.0023713743f,
1284   0.0025254795f,  0.0026895994f,  0.0028643847f,  0.0030505286f,
1285   0.0032487691f,  0.0034598925f,  0.0036847358f,  0.0039241906f,
1286   0.0041792066f,  0.0044507950f,  0.0047400328f,  0.0050480668f,
1287   0.0053761186f,  0.0057254891f,  0.0060975636f,  0.0064938176f,
1288   0.0069158225f,  0.0073652516f,  0.0078438871f,  0.0083536271f,
1289   0.0088964928f,  0.009474637f,   0.010090352f,   0.010746080f,
1290   0.011444421f,   0.012188144f,   0.012980198f,   0.013823725f,
1291   0.014722068f,   0.015678791f,   0.016697687f,   0.017782797f,
1292   0.018938423f,   0.020169149f,   0.021479854f,   0.022875735f,
1293   0.024362330f,   0.025945531f,   0.027631618f,   0.029427276f,
1294   0.031339626f,   0.033376252f,   0.035545228f,   0.037855157f,
1295   0.040315199f,   0.042935108f,   0.045725273f,   0.048696758f,
1296   0.051861348f,   0.055231591f,   0.058820850f,   0.062643361f,
1297   0.066714279f,   0.071049749f,   0.075666962f,   0.080584227f,
1298   0.085821044f,   0.091398179f,   0.097337747f,   0.10366330f,
1299   0.11039993f,    0.11757434f,    0.12521498f,    0.13335215f,
1300   0.14201813f,    0.15124727f,    0.16107617f,    0.17154380f,
1301   0.18269168f,    0.19456402f,    0.20720788f,    0.22067342f,
1302   0.23501402f,    0.25028656f,    0.26655159f,    0.28387361f,
1303   0.30232132f,    0.32196786f,    0.34289114f,    0.36517414f,
1304   0.38890521f,    0.41417847f,    0.44109412f,    0.46975890f,
1305   0.50028648f,    0.53279791f,    0.56742212f,    0.60429640f,
1306   0.64356699f,    0.68538959f,    0.72993007f,    0.77736504f,
1307   0.82788260f,    0.88168307f,    0.9389798f,     1.0f
1308 ];
1309 
1310 
1311 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
1312 // note that you must produce bit-identical output to decode correctly;
1313 // this specific sequence of operations is specified in the spec (it's
1314 // drawing integer-quantized frequency-space lines that the encoder
1315 // expects to be exactly the same)
1316 //     ... also, isn't the whole point of Bresenham's algorithm to NOT
1317 // have to divide in the setup? sigh.
1318 version(STB_VORBIS_NO_DEFER_FLOOR) {
1319   enum LINE_OP(string a, string b) = a~" = "~b~";";
1320 } else {
1321   enum LINE_OP(string a, string b) = a~" *= "~b~";";
1322 }
1323 
1324 version(STB_VORBIS_DIVIDE_TABLE) {
1325   enum DIVTAB_NUMER = 32;
1326   enum DIVTAB_DENOM = 64;
1327   byte[DIVTAB_DENOM][DIVTAB_NUMER] integer_divide_table; // 2KB
1328 }
1329 
1330 // nobranch abs trick
1331 enum ABS(string v) = q{(((${v})+((${v})>>31))^((${v})>>31))}.cmacroFixVars!"v"(v);
1332 
1333 //void draw_line (float* ${output}, int ${x0}, int ${y0}, int ${x1}, int ${y1}, int ${n})
1334 enum draw_line(string output, string x0, string y0, string x1, string y1, string n) = q{{
1335   int ${__temp_prefix__}dy = ${y1}-${y0};
1336   int ${__temp_prefix__}adx = ${x1}-${x0};
1337   int ${__temp_prefix__}ady = mixin(ABS!"${__temp_prefix__}dy");
1338   int ${__temp_prefix__}base;
1339   int ${__temp_prefix__}x = ${x0}, ${__temp_prefix__}y = ${y0};
1340   int ${__temp_prefix__}err = 0;
1341   int ${__temp_prefix__}sy;
1342 
1343   version(STB_VORBIS_DIVIDE_TABLE) {
1344     if (${__temp_prefix__}adx < DIVTAB_DENOM && ${__temp_prefix__}ady < DIVTAB_NUMER) {
1345       if (${__temp_prefix__}dy < 0) {
1346         ${__temp_prefix__}base = -integer_divide_table[${__temp_prefix__}ady].ptr[${__temp_prefix__}adx];
1347         ${__temp_prefix__}sy = ${__temp_prefix__}base-1;
1348       } else {
1349         ${__temp_prefix__}base = integer_divide_table[${__temp_prefix__}ady].ptr[${__temp_prefix__}adx];
1350         ${__temp_prefix__}sy = ${__temp_prefix__}base+1;
1351       }
1352     } else {
1353       ${__temp_prefix__}base = ${__temp_prefix__}dy/${__temp_prefix__}adx;
1354       ${__temp_prefix__}sy = ${__temp_prefix__}base+(${__temp_prefix__}dy < 0 ? -1 : 1);
1355     }
1356   } else {
1357     ${__temp_prefix__}base = ${__temp_prefix__}dy/${__temp_prefix__}adx;
1358     ${__temp_prefix__}sy = ${__temp_prefix__}base+(${__temp_prefix__}dy < 0 ? -1 : 1);
1359   }
1360   ${__temp_prefix__}ady -= mixin(ABS!"${__temp_prefix__}base")*${__temp_prefix__}adx;
1361   if (${x1} > ${n}) ${x1} = ${n};
1362   if (${__temp_prefix__}x < ${x1}) {
1363     mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1364     for (++${__temp_prefix__}x; ${__temp_prefix__}x < ${x1}; ++${__temp_prefix__}x) {
1365       ${__temp_prefix__}err += ${__temp_prefix__}ady;
1366       if (${__temp_prefix__}err >= ${__temp_prefix__}adx) {
1367         ${__temp_prefix__}err -= ${__temp_prefix__}adx;
1368         ${__temp_prefix__}y += ${__temp_prefix__}sy;
1369       } else {
1370         ${__temp_prefix__}y += ${__temp_prefix__}base;
1371       }
1372       mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1373     }
1374   }
1375   /*
1376   mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1377   for (++${__temp_prefix__}x; ${__temp_prefix__}x < ${x1}; ++${__temp_prefix__}x) {
1378     ${__temp_prefix__}err += ${__temp_prefix__}ady;
1379     if (${__temp_prefix__}err >= ${__temp_prefix__}adx) {
1380       ${__temp_prefix__}err -= ${__temp_prefix__}adx;
1381       ${__temp_prefix__}y += ${__temp_prefix__}sy;
1382     } else {
1383       ${__temp_prefix__}y += ${__temp_prefix__}base;
1384     }
1385     mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1386   }
1387   */
1388 }}.cmacroFixVars!("output", "x0", "y0", "x1", "y1", "n")(output, x0, y0, x1, y1, n);
1389 
1390 private int residue_decode (VorbisDecoder f, Codebook* book, float* target, int offset, int n, int rtype) {
1391   if (rtype == 0) {
1392     int step = n/book.dimensions;
1393     foreach (immutable k; 0..step) if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step)) return false;
1394   } else {
1395     for (int k = 0; k < n; ) {
1396       if (!codebook_decode(f, book, target+offset, n-k)) return false;
1397       k += book.dimensions;
1398       offset += book.dimensions;
1399     }
1400   }
1401   return true;
1402 }
1403 
1404 private void decode_residue (VorbisDecoder f, ref float*[STB_VORBIS_MAX_CHANNELS] residue_buffers, int ch, int n, int rn, ubyte* do_not_decode) {
1405   import core.stdc.stdlib : alloca;
1406   import core.stdc..string : memset;
1407 
1408   Residue* r = f.residue_config+rn;
1409   int rtype = f.residue_types.ptr[rn];
1410   int c = r.classbook;
1411   int classwords = f.codebooks[c].dimensions;
1412   int n_read = r.end-r.begin;
1413   int part_read = n_read/r.part_size;
1414   uint temp_alloc_point = temp_alloc_save(f);
1415   version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1416     int** classifications = cast(int**)mixin(temp_block_array!("f.vrchannels", "part_read*int.sizeof"));
1417   } else {
1418     ubyte*** part_classdata = cast(ubyte***)mixin(temp_block_array!("f.vrchannels", "part_read*cast(int)(ubyte*).sizeof"));
1419   }
1420 
1421   //stb_prof(2);
1422   foreach (immutable i; 0..ch) if (!do_not_decode[i]) memset(residue_buffers.ptr[i], 0, float.sizeof*n);
1423 
1424   if (rtype == 2 && ch != 1) {
1425     int j = void;
1426     for (j = 0; j < ch; ++j) if (!do_not_decode[j]) break;
1427     if (j == ch) goto done;
1428 
1429     //stb_prof(3);
1430     foreach (immutable pass; 0..8) {
1431       int pcount = 0, class_set = 0;
1432       if (ch == 2) {
1433         //stb_prof(13);
1434         while (pcount < part_read) {
1435           int z = r.begin+pcount*r.part_size;
1436           int c_inter = (z&1), p_inter = z>>1;
1437           if (pass == 0) {
1438             Codebook *cc = f.codebooks+r.classbook;
1439             int q;
1440             mixin(DECODE!("q", "cc"));
1441             if (q == EOP) goto done;
1442             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1443               for (int i = classwords-1; i >= 0; --i) {
1444                 classifications[0].ptr[i+pcount] = q%r.classifications;
1445                 q /= r.classifications;
1446               }
1447             } else {
1448               part_classdata[0][class_set] = r.classdata[q];
1449             }
1450           }
1451           //stb_prof(5);
1452           for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1453             int zz = r.begin+pcount*r.part_size;
1454             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1455               int cc = classifications[0].ptr[pcount];
1456             } else {
1457               int cc = part_classdata[0][class_set][i];
1458             }
1459             int b = r.residue_books[cc].ptr[pass];
1460             if (b >= 0) {
1461               Codebook* book = f.codebooks+b;
1462               //stb_prof(20); // accounts for X time
1463               version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1464                 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1465               } else {
1466                 // saves 1%
1467                 //if (!codebook_decode_deinterleave_repeat_2(f, book, residue_buffers, &c_inter, &p_inter, n, r.part_size)) goto done; // according to C source
1468                 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1469               }
1470               //stb_prof(7);
1471             } else {
1472               zz += r.part_size;
1473               c_inter = zz&1;
1474               p_inter = zz>>1;
1475             }
1476           }
1477           //stb_prof(8);
1478           version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1479             ++class_set;
1480           }
1481         }
1482       } else if (ch == 1) {
1483         while (pcount < part_read) {
1484           int z = r.begin+pcount*r.part_size;
1485           int c_inter = 0, p_inter = z;
1486           if (pass == 0) {
1487             Codebook* cc = f.codebooks+r.classbook;
1488             int q;
1489             mixin(DECODE!("q", "cc"));
1490             if (q == EOP) goto done;
1491             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1492               for (int i = classwords-1; i >= 0; --i) {
1493                 classifications[0].ptr[i+pcount] = q%r.classifications;
1494                 q /= r.classifications;
1495               }
1496             } else {
1497               part_classdata[0][class_set] = r.classdata[q];
1498             }
1499           }
1500           for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1501             int zz = r.begin+pcount*r.part_size;
1502             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1503               int cc = classifications[0].ptr[pcount];
1504             } else {
1505               int cc = part_classdata[0][class_set][i];
1506             }
1507             int b = r.residue_books[cc].ptr[pass];
1508             if (b >= 0) {
1509               Codebook* book = f.codebooks+b;
1510               //stb_prof(22);
1511               if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1512               //stb_prof(3);
1513             } else {
1514               zz += r.part_size;
1515               c_inter = 0;
1516               p_inter = zz;
1517             }
1518           }
1519           version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1520             ++class_set;
1521           }
1522         }
1523       } else {
1524         while (pcount < part_read) {
1525           int z = r.begin+pcount*r.part_size;
1526           int c_inter = z%ch, p_inter = z/ch;
1527           if (pass == 0) {
1528             Codebook* cc = f.codebooks+r.classbook;
1529             int q;
1530             mixin(DECODE!("q", "cc"));
1531             if (q == EOP) goto done;
1532             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1533               for (int i = classwords-1; i >= 0; --i) {
1534                 classifications[0].ptr[i+pcount] = q%r.classifications;
1535                 q /= r.classifications;
1536               }
1537             } else {
1538               part_classdata[0][class_set] = r.classdata[q];
1539             }
1540           }
1541           for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1542             int zz = r.begin+pcount*r.part_size;
1543             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1544               int cc = classifications[0].ptr[pcount];
1545             } else {
1546               int cc = part_classdata[0][class_set][i];
1547             }
1548             int b = r.residue_books[cc].ptr[pass];
1549             if (b >= 0) {
1550               Codebook* book = f.codebooks+b;
1551               //stb_prof(22);
1552               if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1553               //stb_prof(3);
1554             } else {
1555               zz += r.part_size;
1556               c_inter = zz%ch;
1557               p_inter = zz/ch;
1558             }
1559           }
1560           version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1561             ++class_set;
1562           }
1563         }
1564       }
1565     }
1566     goto done;
1567   }
1568   //stb_prof(9);
1569 
1570   foreach (immutable pass; 0..8) {
1571     int pcount = 0, class_set=0;
1572     while (pcount < part_read) {
1573       if (pass == 0) {
1574         foreach (immutable j; 0..ch) {
1575           if (!do_not_decode[j]) {
1576             Codebook* cc = f.codebooks+r.classbook;
1577             int temp;
1578             mixin(DECODE!("temp", "cc"));
1579             if (temp == EOP) goto done;
1580             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1581               for (int i = classwords-1; i >= 0; --i) {
1582                 classifications[j].ptr[i+pcount] = temp%r.classifications;
1583                 temp /= r.classifications;
1584               }
1585             } else {
1586               part_classdata[j][class_set] = r.classdata[temp];
1587             }
1588           }
1589         }
1590       }
1591       for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1592         foreach (immutable j; 0..ch) {
1593           if (!do_not_decode[j]) {
1594             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1595               int cc = classifications[j].ptr[pcount];
1596             } else {
1597               int cc = part_classdata[j][class_set][i];
1598             }
1599             int b = r.residue_books[cc].ptr[pass];
1600             if (b >= 0) {
1601               float* target = residue_buffers.ptr[j];
1602               int offset = r.begin+pcount*r.part_size;
1603               int nn = r.part_size;
1604               Codebook* book = f.codebooks+b;
1605               if (!residue_decode(f, book, target, offset, nn, rtype)) goto done;
1606             }
1607           }
1608         }
1609       }
1610       version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1611         ++class_set;
1612       }
1613     }
1614   }
1615  done:
1616   //stb_prof(0);
1617   version(STB_VORBIS_DIVIDES_IN_RESIDUE) temp_free(f, classifications); else temp_free(f, part_classdata);
1618   temp_alloc_restore(f, temp_alloc_point);
1619 }
1620 
1621 private void imdct_step3_iter0_loop (int n, float* e, int i_off, int k_off, float* A) {
1622   float* ee0 = e+i_off;
1623   float* ee2 = ee0+k_off;
1624   debug(stb_vorbis) assert((n&3) == 0);
1625   foreach (immutable _; 0..n>>2) {
1626     float k00_20, k01_21;
1627     k00_20 = ee0[ 0]-ee2[ 0];
1628     k01_21 = ee0[-1]-ee2[-1];
1629     ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0]+ee2[ 0];
1630     ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1]+ee2[-1];
1631     ee2[ 0] = k00_20*A[0]-k01_21*A[1];
1632     ee2[-1] = k01_21*A[0]+k00_20*A[1];
1633     A += 8;
1634 
1635     k00_20 = ee0[-2]-ee2[-2];
1636     k01_21 = ee0[-3]-ee2[-3];
1637     ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2]+ee2[-2];
1638     ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3]+ee2[-3];
1639     ee2[-2] = k00_20*A[0]-k01_21*A[1];
1640     ee2[-3] = k01_21*A[0]+k00_20*A[1];
1641     A += 8;
1642 
1643     k00_20 = ee0[-4]-ee2[-4];
1644     k01_21 = ee0[-5]-ee2[-5];
1645     ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4]+ee2[-4];
1646     ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5]+ee2[-5];
1647     ee2[-4] = k00_20*A[0]-k01_21*A[1];
1648     ee2[-5] = k01_21*A[0]+k00_20*A[1];
1649     A += 8;
1650 
1651     k00_20 = ee0[-6]-ee2[-6];
1652     k01_21 = ee0[-7]-ee2[-7];
1653     ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6]+ee2[-6];
1654     ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7]+ee2[-7];
1655     ee2[-6] = k00_20*A[0]-k01_21*A[1];
1656     ee2[-7] = k01_21*A[0]+k00_20*A[1];
1657     A += 8;
1658     ee0 -= 8;
1659     ee2 -= 8;
1660   }
1661 }
1662 
1663 private void imdct_step3_inner_r_loop (int lim, float* e, int d0, int k_off, float* A, int k1) {
1664   float k00_20, k01_21;
1665   float* e0 = e+d0;
1666   float* e2 = e0+k_off;
1667   foreach (immutable _; 0..lim>>2) {
1668     k00_20 = e0[-0]-e2[-0];
1669     k01_21 = e0[-1]-e2[-1];
1670     e0[-0] += e2[-0];//e0[-0] = e0[-0]+e2[-0];
1671     e0[-1] += e2[-1];//e0[-1] = e0[-1]+e2[-1];
1672     e2[-0] = (k00_20)*A[0]-(k01_21)*A[1];
1673     e2[-1] = (k01_21)*A[0]+(k00_20)*A[1];
1674 
1675     A += k1;
1676 
1677     k00_20 = e0[-2]-e2[-2];
1678     k01_21 = e0[-3]-e2[-3];
1679     e0[-2] += e2[-2];//e0[-2] = e0[-2]+e2[-2];
1680     e0[-3] += e2[-3];//e0[-3] = e0[-3]+e2[-3];
1681     e2[-2] = (k00_20)*A[0]-(k01_21)*A[1];
1682     e2[-3] = (k01_21)*A[0]+(k00_20)*A[1];
1683 
1684     A += k1;
1685 
1686     k00_20 = e0[-4]-e2[-4];
1687     k01_21 = e0[-5]-e2[-5];
1688     e0[-4] += e2[-4];//e0[-4] = e0[-4]+e2[-4];
1689     e0[-5] += e2[-5];//e0[-5] = e0[-5]+e2[-5];
1690     e2[-4] = (k00_20)*A[0]-(k01_21)*A[1];
1691     e2[-5] = (k01_21)*A[0]+(k00_20)*A[1];
1692 
1693     A += k1;
1694 
1695     k00_20 = e0[-6]-e2[-6];
1696     k01_21 = e0[-7]-e2[-7];
1697     e0[-6] += e2[-6];//e0[-6] = e0[-6]+e2[-6];
1698     e0[-7] += e2[-7];//e0[-7] = e0[-7]+e2[-7];
1699     e2[-6] = (k00_20)*A[0]-(k01_21)*A[1];
1700     e2[-7] = (k01_21)*A[0]+(k00_20)*A[1];
1701 
1702     e0 -= 8;
1703     e2 -= 8;
1704 
1705     A += k1;
1706   }
1707 }
1708 
1709 private void imdct_step3_inner_s_loop (int n, float* e, int i_off, int k_off, float* A, int a_off, int k0) {
1710   float A0 = A[0];
1711   float A1 = A[0+1];
1712   float A2 = A[0+a_off];
1713   float A3 = A[0+a_off+1];
1714   float A4 = A[0+a_off*2+0];
1715   float A5 = A[0+a_off*2+1];
1716   float A6 = A[0+a_off*3+0];
1717   float A7 = A[0+a_off*3+1];
1718   float k00, k11;
1719   float *ee0 = e  +i_off;
1720   float *ee2 = ee0+k_off;
1721   foreach (immutable _; 0..n) {
1722     k00 = ee0[ 0]-ee2[ 0];
1723     k11 = ee0[-1]-ee2[-1];
1724     ee0[ 0] = ee0[ 0]+ee2[ 0];
1725     ee0[-1] = ee0[-1]+ee2[-1];
1726     ee2[ 0] = (k00)*A0-(k11)*A1;
1727     ee2[-1] = (k11)*A0+(k00)*A1;
1728 
1729     k00 = ee0[-2]-ee2[-2];
1730     k11 = ee0[-3]-ee2[-3];
1731     ee0[-2] = ee0[-2]+ee2[-2];
1732     ee0[-3] = ee0[-3]+ee2[-3];
1733     ee2[-2] = (k00)*A2-(k11)*A3;
1734     ee2[-3] = (k11)*A2+(k00)*A3;
1735 
1736     k00 = ee0[-4]-ee2[-4];
1737     k11 = ee0[-5]-ee2[-5];
1738     ee0[-4] = ee0[-4]+ee2[-4];
1739     ee0[-5] = ee0[-5]+ee2[-5];
1740     ee2[-4] = (k00)*A4-(k11)*A5;
1741     ee2[-5] = (k11)*A4+(k00)*A5;
1742 
1743     k00 = ee0[-6]-ee2[-6];
1744     k11 = ee0[-7]-ee2[-7];
1745     ee0[-6] = ee0[-6]+ee2[-6];
1746     ee0[-7] = ee0[-7]+ee2[-7];
1747     ee2[-6] = (k00)*A6-(k11)*A7;
1748     ee2[-7] = (k11)*A6+(k00)*A7;
1749 
1750     ee0 -= k0;
1751     ee2 -= k0;
1752   }
1753 }
1754 
1755 enum iter_54(string z) = q{{
1756   auto ${__temp_prefix__}z = (${z});
1757   float ${__temp_prefix__}k00, ${__temp_prefix__}k11, ${__temp_prefix__}k22, ${__temp_prefix__}k33;
1758   float ${__temp_prefix__}y0, ${__temp_prefix__}y1, ${__temp_prefix__}y2, ${__temp_prefix__}y3;
1759 
1760   ${__temp_prefix__}k00 = ${__temp_prefix__}z[ 0]-${__temp_prefix__}z[-4];
1761   ${__temp_prefix__}y0  = ${__temp_prefix__}z[ 0]+${__temp_prefix__}z[-4];
1762   ${__temp_prefix__}y2  = ${__temp_prefix__}z[-2]+${__temp_prefix__}z[-6];
1763   ${__temp_prefix__}k22 = ${__temp_prefix__}z[-2]-${__temp_prefix__}z[-6];
1764 
1765   ${__temp_prefix__}z[-0] = ${__temp_prefix__}y0+${__temp_prefix__}y2;   // z0+z4+z2+z6
1766   ${__temp_prefix__}z[-2] = ${__temp_prefix__}y0-${__temp_prefix__}y2;   // z0+z4-z2-z6
1767 
1768   // done with ${__temp_prefix__}y0, ${__temp_prefix__}y2
1769 
1770   ${__temp_prefix__}k33 = ${__temp_prefix__}z[-3]-${__temp_prefix__}z[-7];
1771 
1772   ${__temp_prefix__}z[-4] = ${__temp_prefix__}k00+${__temp_prefix__}k33; // z0-z4+z3-z7
1773   ${__temp_prefix__}z[-6] = ${__temp_prefix__}k00-${__temp_prefix__}k33; // z0-z4-z3+z7
1774 
1775   // done with ${__temp_prefix__}k33
1776 
1777   ${__temp_prefix__}k11 = ${__temp_prefix__}z[-1]-${__temp_prefix__}z[-5];
1778   ${__temp_prefix__}y1  = ${__temp_prefix__}z[-1]+${__temp_prefix__}z[-5];
1779   ${__temp_prefix__}y3  = ${__temp_prefix__}z[-3]+${__temp_prefix__}z[-7];
1780 
1781   ${__temp_prefix__}z[-1] = ${__temp_prefix__}y1+${__temp_prefix__}y3;   // z1+z5+z3+z7
1782   ${__temp_prefix__}z[-3] = ${__temp_prefix__}y1-${__temp_prefix__}y3;   // z1+z5-z3-z7
1783   ${__temp_prefix__}z[-5] = ${__temp_prefix__}k11-${__temp_prefix__}k22; // z1-z5+z2-z6
1784   ${__temp_prefix__}z[-7] = ${__temp_prefix__}k11+${__temp_prefix__}k22; // z1-z5-z2+z6
1785 }}.cmacroFixVars!"z"(z);
1786 
1787 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
1788 {
1789     int a_off = base_n >> 3;
1790     float A2 = A[0+a_off];
1791     float *z = e + i_off;
1792     float *base = z - 16 * n;
1793 
1794     while (z > base) {
1795         float k00,k11;
1796         float l00,l11;
1797 
1798         k00    = z[-0] - z[ -8];
1799         k11    = z[-1] - z[ -9];
1800         l00    = z[-2] - z[-10];
1801         l11    = z[-3] - z[-11];
1802         z[ -0] = z[-0] + z[ -8];
1803         z[ -1] = z[-1] + z[ -9];
1804         z[ -2] = z[-2] + z[-10];
1805         z[ -3] = z[-3] + z[-11];
1806         z[ -8] = k00;
1807         z[ -9] = k11;
1808         z[-10] = (l00+l11) * A2;
1809         z[-11] = (l11-l00) * A2;
1810 
1811         k00    = z[ -4] - z[-12];
1812         k11    = z[ -5] - z[-13];
1813         l00    = z[ -6] - z[-14];
1814         l11    = z[ -7] - z[-15];
1815         z[ -4] = z[ -4] + z[-12];
1816         z[ -5] = z[ -5] + z[-13];
1817         z[ -6] = z[ -6] + z[-14];
1818         z[ -7] = z[ -7] + z[-15];
1819         z[-12] = k11;
1820         z[-13] = -k00;
1821         z[-14] = (l11-l00) * A2;
1822         z[-15] = (l00+l11) * -A2;
1823 
1824         mixin(iter_54!"z");
1825         mixin(iter_54!"z-8");
1826         z -= 16;
1827     }
1828 }
1829 
1830 private void inverse_mdct (float* buffer, int n, VorbisDecoder f, int blocktype) {
1831   import core.stdc.stdlib : alloca;
1832 
1833   int n2 = n>>1, n4 = n>>2, n8 = n>>3, l;
1834   int ld;
1835   // @OPTIMIZE: reduce register pressure by using fewer variables?
1836   int save_point = temp_alloc_save(f);
1837   float *buf2;
1838   buf2 = cast(float*)mixin(temp_alloc!("n2*float.sizeof"));
1839   float *u = null, v = null;
1840   // twiddle factors
1841   float *A = f.A.ptr[blocktype];
1842 
1843   // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
1844   // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
1845 
1846   // kernel from paper
1847 
1848 
1849   // merged:
1850   //   copy and reflect spectral data
1851   //   step 0
1852 
1853   // note that it turns out that the items added together during
1854   // this step are, in fact, being added to themselves (as reflected
1855   // by step 0). inexplicable inefficiency! this became obvious
1856   // once I combined the passes.
1857 
1858   // so there's a missing 'times 2' here (for adding X to itself).
1859   // this propogates through linearly to the end, where the numbers
1860   // are 1/2 too small, and need to be compensated for.
1861 
1862   {
1863     float* d, e, AA, e_stop;
1864     d = &buf2[n2-2];
1865     AA = A;
1866     e = &buffer[0];
1867     e_stop = &buffer[n2];
1868     while (e != e_stop) {
1869       d[1] = (e[0]*AA[0]-e[2]*AA[1]);
1870       d[0] = (e[0]*AA[1]+e[2]*AA[0]);
1871       d -= 2;
1872       AA += 2;
1873       e += 4;
1874     }
1875     e = &buffer[n2-3];
1876     while (d >= buf2) {
1877       d[1] = (-e[2]*AA[0]- -e[0]*AA[1]);
1878       d[0] = (-e[2]*AA[1]+ -e[0]*AA[0]);
1879       d -= 2;
1880       AA += 2;
1881       e -= 4;
1882     }
1883   }
1884 
1885   // now we use symbolic names for these, so that we can
1886   // possibly swap their meaning as we change which operations
1887   // are in place
1888 
1889   u = buffer;
1890   v = buf2;
1891 
1892   // step 2    (paper output is w, now u)
1893   // this could be in place, but the data ends up in the wrong
1894   // place... _somebody_'s got to swap it, so this is nominated
1895   {
1896     float* AA = &A[n2-8];
1897     float* d0, d1, e0, e1;
1898     e0 = &v[n4];
1899     e1 = &v[0];
1900     d0 = &u[n4];
1901     d1 = &u[0];
1902     while (AA >= A) {
1903       float v40_20, v41_21;
1904 
1905       v41_21 = e0[1]-e1[1];
1906       v40_20 = e0[0]-e1[0];
1907       d0[1]  = e0[1]+e1[1];
1908       d0[0]  = e0[0]+e1[0];
1909       d1[1]  = v41_21*AA[4]-v40_20*AA[5];
1910       d1[0]  = v40_20*AA[4]+v41_21*AA[5];
1911 
1912       v41_21 = e0[3]-e1[3];
1913       v40_20 = e0[2]-e1[2];
1914       d0[3]  = e0[3]+e1[3];
1915       d0[2]  = e0[2]+e1[2];
1916       d1[3]  = v41_21*AA[0]-v40_20*AA[1];
1917       d1[2]  = v40_20*AA[0]+v41_21*AA[1];
1918 
1919       AA -= 8;
1920 
1921       d0 += 4;
1922       d1 += 4;
1923       e0 += 4;
1924       e1 += 4;
1925     }
1926   }
1927 
1928   // step 3
1929   ld = ilog(n)-1; // ilog is off-by-one from normal definitions
1930 
1931   // optimized step 3:
1932 
1933   // the original step3 loop can be nested r inside s or s inside r;
1934   // it's written originally as s inside r, but this is dumb when r
1935   // iterates many times, and s few. So I have two copies of it and
1936   // switch between them halfway.
1937 
1938   // this is iteration 0 of step 3
1939   imdct_step3_iter0_loop(n>>4, u, n2-1-n4*0, -(n>>3), A);
1940   imdct_step3_iter0_loop(n>>4, u, n2-1-n4*1, -(n>>3), A);
1941 
1942   // this is iteration 1 of step 3
1943   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*0, -(n>>4), A, 16);
1944   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*1, -(n>>4), A, 16);
1945   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*2, -(n>>4), A, 16);
1946   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*3, -(n>>4), A, 16);
1947 
1948   l = 2;
1949   for (; l < (ld-3)>>1; ++l) {
1950     int k0 = n>>(l+2), k0_2 = k0>>1;
1951     int lim = 1<<(l+1);
1952     foreach (int i; 0..lim) imdct_step3_inner_r_loop(n>>(l+4), u, n2-1-k0*i, -k0_2, A, 1<<(l+3));
1953   }
1954 
1955   for (; l < ld-6; ++l) {
1956     int k0 = n>>(l+2), k1 = 1<<(l+3), k0_2 = k0>>1;
1957     int rlim = n>>(l+6);
1958     int lim = 1<<(l+1);
1959     int i_off;
1960     float *A0 = A;
1961     i_off = n2-1;
1962     foreach (immutable _; 0..rlim) {
1963       imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
1964       A0 += k1*4;
1965       i_off -= 8;
1966     }
1967   }
1968 
1969   // iterations with count:
1970   //   ld-6,-5,-4 all interleaved together
1971   //       the big win comes from getting rid of needless flops
1972   //         due to the constants on pass 5 & 4 being all 1 and 0;
1973   //       combining them to be simultaneous to improve cache made little difference
1974   imdct_step3_inner_s_loop_ld654(n>>5, u, n2-1, A, n);
1975 
1976   // output is u
1977 
1978   // step 4, 5, and 6
1979   // cannot be in-place because of step 5
1980   {
1981     ushort *bitrev = f.bit_reverse.ptr[blocktype];
1982     // weirdly, I'd have thought reading sequentially and writing
1983     // erratically would have been better than vice-versa, but in
1984     // fact that's not what my testing showed. (That is, with
1985     // j = bitreverse(i), do you read i and write j, or read j and write i.)
1986     float *d0 = &v[n4-4];
1987     float *d1 = &v[n2-4];
1988     int k4;
1989     while (d0 >= v) {
1990       k4 = bitrev[0];
1991       d1[3] = u[k4+0];
1992       d1[2] = u[k4+1];
1993       d0[3] = u[k4+2];
1994       d0[2] = u[k4+3];
1995 
1996       k4 = bitrev[1];
1997       d1[1] = u[k4+0];
1998       d1[0] = u[k4+1];
1999       d0[1] = u[k4+2];
2000       d0[0] = u[k4+3];
2001 
2002       d0 -= 4;
2003       d1 -= 4;
2004       bitrev += 2;
2005     }
2006   }
2007   // (paper output is u, now v)
2008 
2009 
2010   // data must be in buf2
2011   debug(stb_vorbis) assert(v == buf2);
2012 
2013   // step 7   (paper output is v, now v)
2014   // this is now in place
2015   {
2016     float a02, a11, b0, b1, b2, b3;
2017     float* C = f.C.ptr[blocktype];
2018     float* d, e;
2019     d = v;
2020     e = v+n2-4;
2021     while (d < e) {
2022       a02 = d[0]-e[2];
2023       a11 = d[1]+e[3];
2024 
2025       b0 = C[1]*a02+C[0]*a11;
2026       b1 = C[1]*a11-C[0]*a02;
2027 
2028       b2 = d[0]+e[ 2];
2029       b3 = d[1]-e[ 3];
2030 
2031       d[0] = b2+b0;
2032       d[1] = b3+b1;
2033       e[2] = b2-b0;
2034       e[3] = b1-b3;
2035 
2036       a02 = d[2]-e[0];
2037       a11 = d[3]+e[1];
2038 
2039       b0 = C[3]*a02+C[2]*a11;
2040       b1 = C[3]*a11-C[2]*a02;
2041 
2042       b2 = d[2]+e[ 0];
2043       b3 = d[3]-e[ 1];
2044 
2045       d[2] = b2+b0;
2046       d[3] = b3+b1;
2047       e[0] = b2-b0;
2048       e[1] = b1-b3;
2049 
2050       C += 4;
2051       d += 4;
2052       e -= 4;
2053     }
2054   }
2055 
2056   // data must be in buf2
2057 
2058 
2059   // step 8+decode   (paper output is X, now buffer)
2060   // this generates pairs of data a la 8 and pushes them directly through
2061   // the decode kernel (pushing rather than pulling) to avoid having
2062   // to make another pass later
2063 
2064   // this cannot POSSIBLY be in place, so we refer to the buffers directly
2065   {
2066     float p0, p1, p2, p3;
2067     float* d0, d1, d2, d3;
2068     float* B = f.B.ptr[blocktype]+n2-8;
2069     float* e = buf2+n2-8;
2070     d0 = &buffer[0];
2071     d1 = &buffer[n2-4];
2072     d2 = &buffer[n2];
2073     d3 = &buffer[n-4];
2074     while (e >= v) {
2075       p3 =  e[6]*B[7]-e[7]*B[6];
2076       p2 = -e[6]*B[6]-e[7]*B[7];
2077 
2078       d0[0] =   p3;
2079       d1[3] =  -p3;
2080       d2[0] =   p2;
2081       d3[3] =   p2;
2082 
2083       p1 =  e[4]*B[5]-e[5]*B[4];
2084       p0 = -e[4]*B[4]-e[5]*B[5];
2085 
2086       d0[1] =   p1;
2087       d1[2] = - p1;
2088       d2[1] =   p0;
2089       d3[2] =   p0;
2090 
2091       p3 =  e[2]*B[3]-e[3]*B[2];
2092       p2 = -e[2]*B[2]-e[3]*B[3];
2093 
2094       d0[2] =   p3;
2095       d1[1] = - p3;
2096       d2[2] =   p2;
2097       d3[1] =   p2;
2098 
2099       p1 =  e[0]*B[1]-e[1]*B[0];
2100       p0 = -e[0]*B[0]-e[1]*B[1];
2101 
2102       d0[3] =   p1;
2103       d1[0] = - p1;
2104       d2[3] =   p0;
2105       d3[0] =   p0;
2106 
2107       B -= 8;
2108       e -= 8;
2109       d0 += 4;
2110       d2 += 4;
2111       d1 -= 4;
2112       d3 -= 4;
2113     }
2114   }
2115 
2116   temp_free(f, buf2);
2117   temp_alloc_restore(f, save_point);
2118 }
2119 
2120 private float *get_window (VorbisDecoder f, int len) {
2121   len <<= 1;
2122   if (len == f.blocksize_0) return f.window.ptr[0];
2123   if (len == f.blocksize_1) return f.window.ptr[1];
2124   assert(0);
2125 }
2126 
2127 version(STB_VORBIS_NO_DEFER_FLOOR) {
2128   alias YTYPE = int;
2129 } else {
2130   alias YTYPE = short;
2131 }
2132 
2133 private int do_floor (VorbisDecoder f, Mapping* map, int i, int n, float* target, YTYPE* finalY, ubyte* step2_flag) {
2134   int n2 = n>>1;
2135   int s = map.chan[i].mux, floor;
2136   floor = map.submap_floor.ptr[s];
2137   if (f.floor_types.ptr[floor] == 0) {
2138     return error(f, STBVorbisError.invalid_stream);
2139   } else {
2140     Floor1* g = &f.floor_config[floor].floor1;
2141     int lx = 0, ly = finalY[0]*g.floor1_multiplier;
2142     foreach (immutable q; 1..g.values) {
2143       int j = g.sorted_order.ptr[q];
2144       version(STB_VORBIS_NO_DEFER_FLOOR) {
2145         auto cond = step2_flag[j];
2146       } else {
2147         auto cond = (finalY[j] >= 0);
2148       }
2149       if (cond) {
2150         int hy = finalY[j]*g.floor1_multiplier;
2151         int hx = g.Xlist.ptr[j];
2152         if (lx != hx) { mixin(draw_line!("target", "lx", "ly", "hx", "hy", "n2")); }
2153         lx = hx; ly = hy;
2154       }
2155     }
2156     if (lx < n2) {
2157       // optimization of: draw_line(target, lx, ly, n, ly, n2);
2158       foreach (immutable j; lx..n2) { mixin(LINE_OP!("target[j]", "inverse_db_table[ly]")); }
2159     }
2160   }
2161   return true;
2162 }
2163 
2164 // The meaning of "left" and "right"
2165 //
2166 // For a given frame:
2167 //     we compute samples from 0..n
2168 //     window_center is n/2
2169 //     we'll window and mix the samples from left_start to left_end with data from the previous frame
2170 //     all of the samples from left_end to right_start can be output without mixing; however,
2171 //        this interval is 0-length except when transitioning between short and long frames
2172 //     all of the samples from right_start to right_end need to be mixed with the next frame,
2173 //        which we don't have, so those get saved in a buffer
2174 //     frame N's right_end-right_start, the number of samples to mix with the next frame,
2175 //        has to be the same as frame N+1's left_end-left_start (which they are by
2176 //        construction)
2177 
2178 private int vorbis_decode_initial (VorbisDecoder f, int* p_left_start, int* p_left_end, int* p_right_start, int* p_right_end, int* mode) {
2179   Mode *m;
2180   int i, n, prev, next, window_center;
2181   f.channel_buffer_start = f.channel_buffer_end = 0;
2182 
2183  retry:
2184   if (f.eof) return false;
2185   if (!maybe_start_packet(f)) return false;
2186   // check packet type
2187   if (get_bits!1(f) != 0) {
2188     /+if (f.push_mode) return error(f, STBVorbisError.bad_packet_type);+/
2189     while (EOP != get8_packet(f)) {}
2190     goto retry;
2191   }
2192 
2193   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2194 
2195   i = get_bits_main(f, ilog(f.mode_count-1));
2196   if (i == EOP) return false;
2197   if (i >= f.mode_count) return false;
2198   *mode = i;
2199   m = f.mode_config.ptr+i;
2200   if (m.blockflag) {
2201     n = f.blocksize_1;
2202     prev = get_bits!1(f);
2203     next = get_bits!1(f);
2204   } else {
2205     prev = next = 0;
2206     n = f.blocksize_0;
2207   }
2208 
2209   // WINDOWING
2210   window_center = n>>1;
2211   if (m.blockflag && !prev) {
2212     *p_left_start = (n-f.blocksize_0)>>2;
2213     *p_left_end   = (n+f.blocksize_0)>>2;
2214   } else {
2215     *p_left_start = 0;
2216     *p_left_end   = window_center;
2217   }
2218   if (m.blockflag && !next) {
2219     *p_right_start = (n*3-f.blocksize_0)>>2;
2220     *p_right_end   = (n*3+f.blocksize_0)>>2;
2221   } else {
2222     *p_right_start = window_center;
2223     *p_right_end   = n;
2224   }
2225   return true;
2226 }
2227 
2228 private int vorbis_decode_packet_rest (VorbisDecoder f, int* len, Mode* m, int left_start, int left_end, int right_start, int right_end, int* p_left) {
2229   import core.stdc..string : memcpy, memset;
2230 
2231   Mapping* map;
2232   int n, n2;
2233   int[256] zero_channel;
2234   int[256] really_zero_channel;
2235 
2236   // WINDOWING
2237   n = f.blocksize.ptr[m.blockflag];
2238   map = &f.mapping[m.mapping];
2239 
2240   // FLOORS
2241   n2 = n>>1;
2242 
2243   //stb_prof(1);
2244   foreach (immutable i; 0..f.vrchannels) {
2245     int s = map.chan[i].mux, floor;
2246     zero_channel[i] = false;
2247     floor = map.submap_floor.ptr[s];
2248     if (f.floor_types.ptr[floor] == 0) {
2249       return error(f, STBVorbisError.invalid_stream);
2250     } else {
2251       Floor1* g = &f.floor_config[floor].floor1;
2252       if (get_bits!1(f)) {
2253         short* finalY;
2254         ubyte[256] step2_flag = void;
2255         immutable int[4] range_list = [ 256, 128, 86, 64 ];
2256         int range = range_list[g.floor1_multiplier-1];
2257         int offset = 2;
2258         finalY = f.finalY.ptr[i];
2259         finalY[0] = cast(short)get_bits_main(f, ilog(range)-1); //k8
2260         finalY[1] = cast(short)get_bits_main(f, ilog(range)-1); //k8
2261         foreach (immutable j; 0..g.partitions) {
2262           int pclass = g.partition_class_list.ptr[j];
2263           int cdim = g.class_dimensions.ptr[pclass];
2264           int cbits = g.class_subclasses.ptr[pclass];
2265           int csub = (1<<cbits)-1;
2266           int cval = 0;
2267           if (cbits) {
2268             Codebook *cc = f.codebooks+g.class_masterbooks.ptr[pclass];
2269             mixin(DECODE!("cval", "cc"));
2270           }
2271           foreach (immutable k; 0..cdim) {
2272             int book = g.subclass_books.ptr[pclass].ptr[cval&csub];
2273             cval = cval>>cbits;
2274             if (book >= 0) {
2275               int temp;
2276               Codebook *cc = f.codebooks+book;
2277               mixin(DECODE!("temp", "cc"));
2278               finalY[offset++] = cast(short)temp; //k8
2279             } else {
2280               finalY[offset++] = 0;
2281             }
2282           }
2283         }
2284         if (f.valid_bits == INVALID_BITS) goto error; // behavior according to spec
2285         step2_flag[0] = step2_flag[1] = 1;
2286         foreach (immutable j; 2..g.values) {
2287           int low = g.neighbors.ptr[j].ptr[0];
2288           int high = g.neighbors.ptr[j].ptr[1];
2289           //neighbors(g.Xlist, j, &low, &high);
2290           int pred = void;
2291           mixin(predict_point!("pred", "g.Xlist.ptr[j]", "g.Xlist.ptr[low]", "g.Xlist.ptr[high]", "finalY[low]", "finalY[high]"));
2292           int val = finalY[j];
2293           int highroom = range-pred;
2294           int lowroom = pred;
2295           auto room = (highroom < lowroom ? highroom : lowroom)*2;
2296           if (val) {
2297             step2_flag[low] = step2_flag[high] = 1;
2298             step2_flag[j] = 1;
2299             if (val >= room) {
2300               finalY[j] = cast(short)(highroom > lowroom ? val-lowroom+pred : pred-val+highroom-1); //k8
2301             } else {
2302               finalY[j] = cast(short)(val&1 ? pred-((val+1)>>1) : pred+(val>>1)); //k8
2303             }
2304           } else {
2305             step2_flag[j] = 0;
2306             finalY[j] = cast(short)pred; //k8
2307           }
2308         }
2309 
2310         version(STB_VORBIS_NO_DEFER_FLOOR) {
2311           do_floor(f, map, i, n, f.floor_buffers.ptr[i], finalY, step2_flag);
2312         } else {
2313           // defer final floor computation until _after_ residue
2314           foreach (immutable j; 0..g.values) if (!step2_flag[j]) finalY[j] = -1;
2315         }
2316       } else {
2317   error:
2318         zero_channel[i] = true;
2319       }
2320       // So we just defer everything else to later
2321       // at this point we've decoded the floor into buffer
2322     }
2323   }
2324   //stb_prof(0);
2325   // at this point we've decoded all floors
2326 
2327   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2328 
2329   // re-enable coupled channels if necessary
2330   memcpy(really_zero_channel.ptr, zero_channel.ptr, (really_zero_channel[0]).sizeof*f.vrchannels);
2331   foreach (immutable i; 0..map.coupling_steps) {
2332     if (!zero_channel[map.chan[i].magnitude] || !zero_channel[map.chan[i].angle]) {
2333       zero_channel[map.chan[i].magnitude] = zero_channel[map.chan[i].angle] = false;
2334     }
2335   }
2336 
2337   // RESIDUE DECODE
2338   foreach (immutable i; 0..map.submaps) {
2339     float*[STB_VORBIS_MAX_CHANNELS] residue_buffers;
2340     ubyte[256] do_not_decode = void;
2341     int ch = 0;
2342     foreach (immutable j; 0..f.vrchannels) {
2343       if (map.chan[j].mux == i) {
2344         if (zero_channel[j]) {
2345           do_not_decode[ch] = true;
2346           residue_buffers.ptr[ch] = null;
2347         } else {
2348           do_not_decode[ch] = false;
2349           residue_buffers.ptr[ch] = f.channel_buffers.ptr[j];
2350         }
2351         ++ch;
2352       }
2353     }
2354     int r = map.submap_residue.ptr[i];
2355     decode_residue(f, residue_buffers, ch, n2, r, do_not_decode.ptr);
2356   }
2357 
2358   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2359 
2360    // INVERSE COUPLING
2361   //stb_prof(14);
2362   foreach_reverse (immutable i; 0..map.coupling_steps) {
2363     int n2n = n>>1;
2364     float* mm = f.channel_buffers.ptr[map.chan[i].magnitude];
2365     float* a = f.channel_buffers.ptr[map.chan[i].angle];
2366     foreach (immutable j; 0..n2n) {
2367       float a2, m2;
2368       if (mm[j] > 0) {
2369         if (a[j] > 0) { m2 = mm[j]; a2 = mm[j]-a[j]; } else { a2 = mm[j]; m2 = mm[j]+a[j]; }
2370       } else {
2371         if (a[j] > 0) { m2 = mm[j]; a2 = mm[j]+a[j]; } else { a2 = mm[j]; m2 = mm[j]-a[j]; }
2372       }
2373       mm[j] = m2;
2374       a[j] = a2;
2375     }
2376   }
2377 
2378   // finish decoding the floors
2379   version(STB_VORBIS_NO_DEFER_FLOOR) {
2380     foreach (immutable i; 0..f.vrchannels) {
2381       if (really_zero_channel[i]) {
2382         memset(f.channel_buffers.ptr[i], 0, (*f.channel_buffers.ptr[i]).sizeof*n2);
2383       } else {
2384         foreach (immutable j; 0..n2) f.channel_buffers.ptr[i].ptr[j] *= f.floor_buffers.ptr[i].ptr[j];
2385       }
2386     }
2387   } else {
2388     //stb_prof(15);
2389     foreach (immutable i; 0..f.vrchannels) {
2390       if (really_zero_channel[i]) {
2391         memset(f.channel_buffers.ptr[i], 0, (*f.channel_buffers.ptr[i]).sizeof*n2);
2392       } else {
2393         do_floor(f, map, i, n, f.channel_buffers.ptr[i], f.finalY.ptr[i], null);
2394       }
2395     }
2396   }
2397 
2398   // INVERSE MDCT
2399   //stb_prof(16);
2400   foreach (immutable i; 0..f.vrchannels) inverse_mdct(f.channel_buffers.ptr[i], n, f, m.blockflag);
2401   //stb_prof(0);
2402 
2403   // this shouldn't be necessary, unless we exited on an error
2404   // and want to flush to get to the next packet
2405   flush_packet(f);
2406 
2407   if (f.first_decode) {
2408     // assume we start so first non-discarded sample is sample 0
2409     // this isn't to spec, but spec would require us to read ahead
2410     // and decode the size of all current frames--could be done,
2411     // but presumably it's not a commonly used feature
2412     f.current_loc = -n2; // start of first frame is positioned for discard
2413     // we might have to discard samples "from" the next frame too,
2414     // if we're lapping a large block then a small at the start?
2415     f.discard_samples_deferred = n-right_end;
2416     f.current_loc_valid = true;
2417     f.first_decode = false;
2418   } else if (f.discard_samples_deferred) {
2419     if (f.discard_samples_deferred >= right_start-left_start) {
2420       f.discard_samples_deferred -= (right_start-left_start);
2421       left_start = right_start;
2422       *p_left = left_start;
2423     } else {
2424       left_start += f.discard_samples_deferred;
2425       *p_left = left_start;
2426       f.discard_samples_deferred = 0;
2427     }
2428   } else if (f.previous_length == 0 && f.current_loc_valid) {
2429     // we're recovering from a seek... that means we're going to discard
2430     // the samples from this packet even though we know our position from
2431     // the last page header, so we need to update the position based on
2432     // the discarded samples here
2433     // but wait, the code below is going to add this in itself even
2434     // on a discard, so we don't need to do it here...
2435   }
2436 
2437   // check if we have ogg information about the sample # for this packet
2438   if (f.last_seg_which == f.end_seg_with_known_loc) {
2439     // if we have a valid current loc, and this is final:
2440     if (f.current_loc_valid && (f.page_flag&PAGEFLAG_last_page)) {
2441       uint current_end = f.known_loc_for_packet-(n-right_end);
2442       // then let's infer the size of the (probably) short final frame
2443       if (current_end < f.current_loc+right_end) {
2444         if (current_end < f.current_loc+(right_end-left_start)) {
2445           // negative truncation, that's impossible!
2446           *len = 0;
2447         } else {
2448           *len = current_end-f.current_loc;
2449         }
2450         *len += left_start;
2451         if (*len > right_end) *len = right_end; // this should never happen
2452         f.current_loc += *len;
2453         return true;
2454       }
2455     }
2456     // otherwise, just set our sample loc
2457     // guess that the ogg granule pos refers to the _middle_ of the
2458     // last frame?
2459     // set f.current_loc to the position of left_start
2460     f.current_loc = f.known_loc_for_packet-(n2-left_start);
2461     f.current_loc_valid = true;
2462   }
2463   if (f.current_loc_valid) f.current_loc += (right_start-left_start);
2464 
2465   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2466 
2467   *len = right_end;  // ignore samples after the window goes to 0
2468   return true;
2469 }
2470 
2471 private int vorbis_decode_packet (VorbisDecoder f, int* len, int* p_left, int* p_right) {
2472   int mode, left_end, right_end;
2473   if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
2474   return vorbis_decode_packet_rest(f, len, f.mode_config.ptr+mode, *p_left, left_end, *p_right, right_end, p_left);
2475 }
2476 
2477 private int vorbis_finish_frame (VorbisDecoder f, int len, int left, int right) {
2478   // we use right&left (the start of the right- and left-window sin()-regions)
2479   // to determine how much to return, rather than inferring from the rules
2480   // (same result, clearer code); 'left' indicates where our sin() window
2481   // starts, therefore where the previous window's right edge starts, and
2482   // therefore where to start mixing from the previous buffer. 'right'
2483   // indicates where our sin() ending-window starts, therefore that's where
2484   // we start saving, and where our returned-data ends.
2485 
2486   // mixin from previous window
2487   if (f.previous_length) {
2488     int n = f.previous_length;
2489     float *w = get_window(f, n);
2490     foreach (immutable i; 0..f.vrchannels) {
2491       foreach (immutable j; 0..n) {
2492         (f.channel_buffers.ptr[i])[left+j] =
2493           (f.channel_buffers.ptr[i])[left+j]*w[    j]+
2494           (f.previous_window.ptr[i])[     j]*w[n-1-j];
2495       }
2496     }
2497   }
2498 
2499   auto prev = f.previous_length;
2500 
2501   // last half of this data becomes previous window
2502   f.previous_length = len-right;
2503 
2504   // @OPTIMIZE: could avoid this copy by double-buffering the
2505   // output (flipping previous_window with channel_buffers), but
2506   // then previous_window would have to be 2x as large, and
2507   // channel_buffers couldn't be temp mem (although they're NOT
2508   // currently temp mem, they could be (unless we want to level
2509   // performance by spreading out the computation))
2510   foreach (immutable i; 0..f.vrchannels) {
2511     for (uint j = 0; right+j < len; ++j) (f.previous_window.ptr[i])[j] = (f.channel_buffers.ptr[i])[right+j];
2512   }
2513 
2514   if (!prev) {
2515     // there was no previous packet, so this data isn't valid...
2516     // this isn't entirely true, only the would-have-overlapped data
2517     // isn't valid, but this seems to be what the spec requires
2518     return 0;
2519   }
2520 
2521   // truncate a short frame
2522   if (len < right) right = len;
2523 
2524   f.samples_output += right-left;
2525 
2526   return right-left;
2527 }
2528 
2529 private bool vorbis_pump_first_frame (VorbisDecoder f) {
2530   int len, right, left;
2531   if (vorbis_decode_packet(f, &len, &left, &right)) {
2532     vorbis_finish_frame(f, len, left, right);
2533     return true;
2534   }
2535   return false;
2536 }
2537 
2538 /+ k8: i don't need that, so it's dead
2539 private int is_whole_packet_present (VorbisDecoder f, int end_page) {
2540   import core.stdc.string : memcmp;
2541 
2542   // make sure that we have the packet available before continuing...
2543   // this requires a full ogg parse, but we know we can fetch from f.stream
2544 
2545   // instead of coding this out explicitly, we could save the current read state,
2546   // read the next packet with get8() until end-of-packet, check f.eof, then
2547   // reset the state? but that would be slower, esp. since we'd have over 256 bytes
2548   // of state to restore (primarily the page segment table)
2549 
2550   int s = f.next_seg, first = true;
2551   ubyte *p = f.stream;
2552 
2553   if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
2554     for (; s < f.segment_count; ++s) {
2555       p += f.segments[s];
2556       if (f.segments[s] < 255) break; // stop at first short segment
2557     }
2558     // either this continues, or it ends it...
2559     if (end_page && s < f.segment_count-1) return error(f, STBVorbisError.invalid_stream);
2560     if (s == f.segment_count) s = -1; // set 'crosses page' flag
2561     if (p > f.stream_end) return error(f, STBVorbisError.need_more_data);
2562     first = false;
2563   }
2564   while (s == -1) {
2565     ubyte* q = void;
2566     int n = void;
2567     // check that we have the page header ready
2568     if (p+26 >= f.stream_end) return error(f, STBVorbisError.need_more_data);
2569     // validate the page
2570     if (memcmp(p, ogg_page_header.ptr, 4)) return error(f, STBVorbisError.invalid_stream);
2571     if (p[4] != 0) return error(f, STBVorbisError.invalid_stream);
2572     if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
2573       if (f.previous_length && (p[5]&PAGEFLAG_continued_packet)) return error(f, STBVorbisError.invalid_stream);
2574       // if no previous length, we're resynching, so we can come in on a continued-packet,
2575       // which we'll just drop
2576     } else {
2577       if (!(p[5]&PAGEFLAG_continued_packet)) return error(f, STBVorbisError.invalid_stream);
2578     }
2579     n = p[26]; // segment counts
2580     q = p+27; // q points to segment table
2581     p = q+n; // advance past header
2582     // make sure we've read the segment table
2583     if (p > f.stream_end) return error(f, STBVorbisError.need_more_data);
2584     for (s = 0; s < n; ++s) {
2585       p += q[s];
2586       if (q[s] < 255) break;
2587     }
2588     if (end_page && s < n-1) return error(f, STBVorbisError.invalid_stream);
2589     if (s == n) s = -1; // set 'crosses page' flag
2590     if (p > f.stream_end) return error(f, STBVorbisError.need_more_data);
2591     first = false;
2592   }
2593   return true;
2594 }
2595 +/
2596 
2597 private int start_decoder (VorbisDecoder f) {
2598   import core.stdc..string : memcpy, memset;
2599 
2600   ubyte[6] header;
2601   ubyte x, y;
2602   int len, max_submaps = 0;
2603   int longest_floorlist = 0;
2604 
2605   // first page, first packet
2606 
2607   if (!start_page(f)) return false;
2608   // validate page flag
2609   if (!(f.page_flag&PAGEFLAG_first_page)) return error(f, STBVorbisError.invalid_first_page);
2610   if (f.page_flag&PAGEFLAG_last_page) return error(f, STBVorbisError.invalid_first_page);
2611   if (f.page_flag&PAGEFLAG_continued_packet) return error(f, STBVorbisError.invalid_first_page);
2612   // check for expected packet length
2613   if (f.segment_count != 1) return error(f, STBVorbisError.invalid_first_page);
2614   if (f.segments[0] != 30) return error(f, STBVorbisError.invalid_first_page);
2615   // read packet
2616   // check packet header
2617   if (get8(f) != VorbisPacket.id) return error(f, STBVorbisError.invalid_first_page);
2618   if (!getn(f, header.ptr, 6)) return error(f, STBVorbisError.unexpected_eof);
2619   if (!vorbis_validate(header.ptr)) return error(f, STBVorbisError.invalid_first_page);
2620   // vorbis_version
2621   if (get32(f) != 0) return error(f, STBVorbisError.invalid_first_page);
2622   f.vrchannels = get8(f); if (!f.vrchannels) return error(f, STBVorbisError.invalid_first_page);
2623   if (f.vrchannels > STB_VORBIS_MAX_CHANNELS) return error(f, STBVorbisError.too_many_channels);
2624   f.sample_rate = get32(f); if (!f.sample_rate) return error(f, STBVorbisError.invalid_first_page);
2625   get32(f); // bitrate_maximum
2626   get32(f); // bitrate_nominal
2627   get32(f); // bitrate_minimum
2628   x = get8(f);
2629   {
2630     int log0 = x&15;
2631     int log1 = x>>4;
2632     f.blocksize_0 = 1<<log0;
2633     f.blocksize_1 = 1<<log1;
2634     if (log0 < 6 || log0 > 13) return error(f, STBVorbisError.invalid_setup);
2635     if (log1 < 6 || log1 > 13) return error(f, STBVorbisError.invalid_setup);
2636     if (log0 > log1) return error(f, STBVorbisError.invalid_setup);
2637   }
2638 
2639   // framing_flag
2640   x = get8(f);
2641   if (!(x&1)) return error(f, STBVorbisError.invalid_first_page);
2642 
2643   // second packet! (comments)
2644   if (!start_page(f)) return false;
2645 
2646   // read comments
2647   if (!start_packet(f)) return false;
2648 
2649   if (f.read_comments) {
2650     /+if (f.push_mode) {
2651       if (!is_whole_packet_present(f, true)) {
2652         // convert error in ogg header to write type
2653         if (f.error == STBVorbisError.invalid_stream) f.error = STBVorbisError.invalid_setup;
2654         return false;
2655       }
2656     }+/
2657     if (get8_packet(f) != VorbisPacket.comment) return error(f, STBVorbisError.invalid_setup);
2658     foreach (immutable i; 0..6) header[i] = cast(ubyte)get8_packet(f); //k8
2659     if (!vorbis_validate(header.ptr)) return error(f, STBVorbisError.invalid_setup);
2660 
2661     // skip vendor id
2662     uint vidsize = get32_packet(f);
2663     //{ import core.stdc.stdio; printf("vendor size: %u\n", vidsize); }
2664     if (vidsize == EOP) return error(f, STBVorbisError.invalid_setup);
2665     while (vidsize--) get8_packet(f);
2666 
2667     // read comments section
2668     uint cmtcount = get32_packet(f);
2669     if (cmtcount == EOP) return error(f, STBVorbisError.invalid_setup);
2670     if (cmtcount > 0) {
2671       uint cmtsize = 32768; // this should be enough for everyone
2672       f.comment_data = setup_malloc!ubyte(f, cmtsize);
2673       if (f.comment_data is null) return error(f, STBVorbisError.outofmem);
2674       auto cmtpos = 0;
2675       auto d = f.comment_data;
2676       while (cmtcount--) {
2677         uint linelen = get32_packet(f);
2678         //{ import core.stdc.stdio; printf("linelen: %u; lines left: %u\n", linelen, cmtcount); }
2679         if (linelen == EOP || linelen > ushort.max-2) break;
2680         if (linelen == 0) { continue; }
2681         if (cmtpos+2+linelen > cmtsize) break;
2682         cmtpos += linelen+2;
2683         *d++ = (linelen+2)&0xff;
2684         *d++ = ((linelen+2)>>8)&0xff;
2685         while (linelen--) {
2686           auto b = get8_packet(f);
2687           if (b == EOP) return error(f, STBVorbisError.outofmem);
2688           *d++ = cast(ubyte)b;
2689         }
2690         //{ import core.stdc.stdio; printf("%u bytes of comments read\n", cmtpos); }
2691         f.comment_size = cmtpos;
2692       }
2693     }
2694     flush_packet(f);
2695     f.comment_rewind();
2696   } else {
2697     // skip comments
2698     do {
2699       len = next_segment(f);
2700       skip(f, len);
2701       f.bytes_in_seg = 0;
2702     } while (len);
2703   }
2704 
2705   // third packet!
2706   if (!start_packet(f)) return false;
2707 
2708   /+if (f.push_mode) {
2709     if (!is_whole_packet_present(f, true)) {
2710       // convert error in ogg header to write type
2711       if (f.error == STBVorbisError.invalid_stream) f.error = STBVorbisError.invalid_setup;
2712       return false;
2713     }
2714   }+/
2715 
2716   if (get8_packet(f) != VorbisPacket.setup) return error(f, STBVorbisError.invalid_setup);
2717   foreach (immutable i; 0..6) header[i] = cast(ubyte)get8_packet(f); //k8
2718   if (!vorbis_validate(header.ptr)) return error(f, STBVorbisError.invalid_setup);
2719 
2720   // codebooks
2721   f.codebook_count = get_bits!8(f)+1;
2722   f.codebooks = setup_malloc!Codebook(f, f.codebook_count);
2723   static assert((*f.codebooks).sizeof == Codebook.sizeof);
2724   if (f.codebooks is null) return error(f, STBVorbisError.outofmem);
2725   memset(f.codebooks, 0, (*f.codebooks).sizeof*f.codebook_count);
2726   foreach (immutable i; 0..f.codebook_count) {
2727     uint* values;
2728     int ordered, sorted_count;
2729     int total = 0;
2730     ubyte* lengths;
2731     Codebook* c = f.codebooks+i;
2732     x = get_bits!8(f); if (x != 0x42) return error(f, STBVorbisError.invalid_setup);
2733     x = get_bits!8(f); if (x != 0x43) return error(f, STBVorbisError.invalid_setup);
2734     x = get_bits!8(f); if (x != 0x56) return error(f, STBVorbisError.invalid_setup);
2735     x = get_bits!8(f);
2736     c.dimensions = (get_bits!8(f)<<8)+x;
2737     x = get_bits!8(f);
2738     y = get_bits!8(f);
2739     c.entries = (get_bits!8(f)<<16)+(y<<8)+x;
2740     ordered = get_bits!1(f);
2741     c.sparse = (ordered ? 0 : get_bits!1(f));
2742 
2743     if (c.dimensions == 0 && c.entries != 0) return error(f, STBVorbisError.invalid_setup);
2744 
2745     if (c.sparse) {
2746       lengths = cast(ubyte*)setup_temp_malloc(f, c.entries);
2747     } else {
2748       lengths = c.codeword_lengths = setup_malloc!ubyte(f, c.entries);
2749     }
2750 
2751     if (lengths is null) return error(f, STBVorbisError.outofmem);
2752 
2753     if (ordered) {
2754       int current_entry = 0;
2755       int current_length = get_bits_add_no!5(f, 1);
2756       while (current_entry < c.entries) {
2757         int limit = c.entries-current_entry;
2758         int n = get_bits_main(f, ilog(limit));
2759         if (current_entry+n > cast(int)c.entries) return error(f, STBVorbisError.invalid_setup);
2760         memset(lengths+current_entry, current_length, n);
2761         current_entry += n;
2762         ++current_length;
2763       }
2764     } else {
2765       foreach (immutable j; 0..c.entries) {
2766         int present = (c.sparse ? get_bits!1(f) : 1);
2767         if (present) {
2768           lengths[j] = get_bits_add_no!5(f, 1);
2769           ++total;
2770           if (lengths[j] == 32) return error(f, STBVorbisError.invalid_setup);
2771         } else {
2772           lengths[j] = NO_CODE;
2773         }
2774       }
2775     }
2776 
2777     if (c.sparse && total >= c.entries>>2) {
2778       // convert sparse items to non-sparse!
2779       if (c.entries > cast(int)f.setup_temp_memory_required) f.setup_temp_memory_required = c.entries;
2780       c.codeword_lengths = setup_malloc!ubyte(f, c.entries);
2781       if (c.codeword_lengths is null) return error(f, STBVorbisError.outofmem);
2782       memcpy(c.codeword_lengths, lengths, c.entries);
2783       setup_temp_free(f, lengths, c.entries); // note this is only safe if there have been no intervening temp mallocs!
2784       lengths = c.codeword_lengths;
2785       c.sparse = 0;
2786     }
2787 
2788     // compute the size of the sorted tables
2789     if (c.sparse) {
2790       sorted_count = total;
2791     } else {
2792       sorted_count = 0;
2793       version(STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH) {} else {
2794         foreach (immutable j; 0..c.entries) if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE) ++sorted_count;
2795       }
2796     }
2797 
2798     c.sorted_entries = sorted_count;
2799     values = null;
2800 
2801     if (!c.sparse) {
2802       c.codewords = setup_malloc!uint(f, c.entries);
2803       if (!c.codewords) return error(f, STBVorbisError.outofmem);
2804     } else {
2805       if (c.sorted_entries) {
2806         c.codeword_lengths = setup_malloc!ubyte(f, c.sorted_entries);
2807         if (!c.codeword_lengths) return error(f, STBVorbisError.outofmem);
2808         c.codewords = cast(uint*)setup_temp_malloc(f, cast(int)(*c.codewords).sizeof*c.sorted_entries);
2809         if (!c.codewords) return error(f, STBVorbisError.outofmem);
2810         values = cast(uint*)setup_temp_malloc(f, cast(int)(*values).sizeof*c.sorted_entries);
2811         if (!values) return error(f, STBVorbisError.outofmem);
2812       }
2813       uint size = c.entries+cast(int)((*c.codewords).sizeof+(*values).sizeof)*c.sorted_entries;
2814       if (size > f.setup_temp_memory_required) f.setup_temp_memory_required = size;
2815     }
2816 
2817     if (!compute_codewords(c, lengths, c.entries, values)) {
2818       if (c.sparse) setup_temp_free(f, values, 0);
2819       return error(f, STBVorbisError.invalid_setup);
2820     }
2821 
2822     if (c.sorted_entries) {
2823       // allocate an extra slot for sentinels
2824       c.sorted_codewords = setup_malloc!uint(f, c.sorted_entries+1);
2825       if (c.sorted_codewords is null) return error(f, STBVorbisError.outofmem);
2826       // allocate an extra slot at the front so that c.sorted_values[-1] is defined
2827       // so that we can catch that case without an extra if
2828       c.sorted_values = setup_malloc!int(f, c.sorted_entries+1);
2829       if (c.sorted_values is null) return error(f, STBVorbisError.outofmem);
2830       ++c.sorted_values;
2831       c.sorted_values[-1] = -1;
2832       compute_sorted_huffman(c, lengths, values);
2833     }
2834 
2835     if (c.sparse) {
2836       setup_temp_free(f, values, cast(int)(*values).sizeof*c.sorted_entries);
2837       setup_temp_free(f, c.codewords, cast(int)(*c.codewords).sizeof*c.sorted_entries);
2838       setup_temp_free(f, lengths, c.entries);
2839       c.codewords = null;
2840     }
2841 
2842     compute_accelerated_huffman(c);
2843 
2844     c.lookup_type = get_bits!4(f);
2845     if (c.lookup_type > 2) return error(f, STBVorbisError.invalid_setup);
2846     if (c.lookup_type > 0) {
2847       ushort* mults;
2848       c.minimum_value = float32_unpack(get_bits!32(f));
2849       c.delta_value = float32_unpack(get_bits!32(f));
2850       c.value_bits = get_bits_add_no!4(f, 1);
2851       c.sequence_p = get_bits!1(f);
2852       if (c.lookup_type == 1) {
2853         c.lookup_values = lookup1_values(c.entries, c.dimensions);
2854       } else {
2855         c.lookup_values = c.entries*c.dimensions;
2856       }
2857       if (c.lookup_values == 0) return error(f, STBVorbisError.invalid_setup);
2858       mults = cast(ushort*)setup_temp_malloc(f, cast(int)(mults[0]).sizeof*c.lookup_values);
2859       if (mults is null) return error(f, STBVorbisError.outofmem);
2860       foreach (immutable j; 0..cast(int)c.lookup_values) {
2861         int q = get_bits_main(f, c.value_bits);
2862         if (q == EOP) { setup_temp_free(f, mults, cast(int)(mults[0]).sizeof*c.lookup_values); return error(f, STBVorbisError.invalid_setup); }
2863         mults[j] = cast(ushort)q; //k8
2864       }
2865 
2866       version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {} else {
2867         if (c.lookup_type == 1) {
2868           int sparse = c.sparse; //len
2869           float last = 0;
2870           // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
2871           if (sparse) {
2872             if (c.sorted_entries == 0) goto skip;
2873             c.multiplicands = setup_malloc!codetype(f, c.sorted_entries*c.dimensions);
2874           } else {
2875             c.multiplicands = setup_malloc!codetype(f, c.entries*c.dimensions);
2876           }
2877           if (c.multiplicands is null) { setup_temp_free(f, mults, cast(int)(mults[0]).sizeof*c.lookup_values); return error(f, STBVorbisError.outofmem); }
2878           foreach (immutable j; 0..(sparse ? c.sorted_entries : c.entries)) {
2879             uint z = (sparse ? c.sorted_values[j] : j);
2880             uint div = 1;
2881             foreach (immutable k; 0..c.dimensions) {
2882               int off = (z/div)%c.lookup_values;
2883               float val = mults[off];
2884               val = val*c.delta_value+c.minimum_value+last;
2885               c.multiplicands[j*c.dimensions+k] = val;
2886               if (c.sequence_p) last = val;
2887               if (k+1 < c.dimensions) {
2888                  if (div > uint.max/cast(uint)c.lookup_values) {
2889                     setup_temp_free(f, mults, cast(uint)(mults[0]).sizeof*c.lookup_values);
2890                     return error(f, STBVorbisError.invalid_setup);
2891                  }
2892                  div *= c.lookup_values;
2893               }
2894             }
2895           }
2896           c.lookup_type = 2;
2897           goto skip;
2898         }
2899         //else
2900       }
2901       {
2902         float last = 0;
2903         c.multiplicands = setup_malloc!codetype(f, c.lookup_values);
2904         if (c.multiplicands is null) { setup_temp_free(f, mults, cast(uint)(mults[0]).sizeof*c.lookup_values); return error(f, STBVorbisError.outofmem); }
2905         foreach (immutable j; 0..cast(int)c.lookup_values) {
2906           float val = mults[j]*c.delta_value+c.minimum_value+last;
2907           c.multiplicands[j] = val;
2908           if (c.sequence_p) last = val;
2909         }
2910       }
2911      //version(STB_VORBIS_DIVIDES_IN_CODEBOOK)
2912      skip: // this is versioned out in C
2913       setup_temp_free(f, mults, cast(uint)(mults[0]).sizeof*c.lookup_values);
2914     }
2915   }
2916 
2917   // time domain transfers (notused)
2918   x = get_bits_add_no!6(f, 1);
2919   foreach (immutable i; 0..x) {
2920     auto z = get_bits!16(f);
2921     if (z != 0) return error(f, STBVorbisError.invalid_setup);
2922   }
2923 
2924   // Floors
2925   f.floor_count = get_bits_add_no!6(f, 1);
2926   f.floor_config = setup_malloc!Floor(f, f.floor_count);
2927   if (f.floor_config is null) return error(f, STBVorbisError.outofmem);
2928   foreach (immutable i; 0..f.floor_count) {
2929     f.floor_types[i] = get_bits!16(f);
2930     if (f.floor_types[i] > 1) return error(f, STBVorbisError.invalid_setup);
2931     if (f.floor_types[i] == 0) {
2932       Floor0* g = &f.floor_config[i].floor0;
2933       g.order = get_bits!8(f);
2934       g.rate = get_bits!16(f);
2935       g.bark_map_size = get_bits!16(f);
2936       g.amplitude_bits = get_bits!6(f);
2937       g.amplitude_offset = get_bits!8(f);
2938       g.number_of_books = get_bits_add_no!4(f, 1);
2939       foreach (immutable j; 0..g.number_of_books) g.book_list[j] = get_bits!8(f);
2940       return error(f, STBVorbisError.feature_not_supported);
2941     } else {
2942       Point[31*8+2] p;
2943       Floor1 *g = &f.floor_config[i].floor1;
2944       int max_class = -1;
2945       g.partitions = get_bits!5(f);
2946       foreach (immutable j; 0..g.partitions) {
2947         g.partition_class_list[j] = get_bits!4(f);
2948         if (g.partition_class_list[j] > max_class) max_class = g.partition_class_list[j];
2949       }
2950       foreach (immutable j; 0..max_class+1) {
2951         g.class_dimensions[j] = get_bits_add_no!3(f, 1);
2952         g.class_subclasses[j] = get_bits!2(f);
2953         if (g.class_subclasses[j]) {
2954           g.class_masterbooks[j] = get_bits!8(f);
2955           if (g.class_masterbooks[j] >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
2956         }
2957         foreach (immutable k; 0..1<<g.class_subclasses[j]) {
2958           g.subclass_books[j].ptr[k] = get_bits!8(f)-1;
2959           if (g.subclass_books[j].ptr[k] >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
2960         }
2961       }
2962       g.floor1_multiplier = get_bits_add_no!2(f, 1);
2963       g.rangebits = get_bits!4(f);
2964       g.Xlist[0] = 0;
2965       g.Xlist[1] = cast(ushort)(1<<g.rangebits); //k8
2966       g.values = 2;
2967       foreach (immutable j; 0..g.partitions) {
2968         int c = g.partition_class_list[j];
2969         foreach (immutable k; 0..g.class_dimensions[c]) {
2970           g.Xlist[g.values] = cast(ushort)get_bits_main(f, g.rangebits); //k8
2971           ++g.values;
2972         }
2973       }
2974       assert(g.values <= ushort.max);
2975       // precompute the sorting
2976       foreach (ushort j; 0..cast(ushort)g.values) {
2977         p[j].x = g.Xlist[j];
2978         p[j].y = j;
2979       }
2980       qsort(p.ptr, g.values, (p[0]).sizeof, &point_compare);
2981       foreach (uint j; 0..g.values) g.sorted_order.ptr[j] = cast(ubyte)p.ptr[j].y;
2982       // precompute the neighbors
2983       foreach (uint j; 2..g.values) {
2984         ushort low = void, hi = void;
2985         neighbors(g.Xlist.ptr, j, &low, &hi);
2986         assert(low <= ubyte.max);
2987         assert(hi <= ubyte.max);
2988         g.neighbors[j].ptr[0] = cast(ubyte)low;
2989         g.neighbors[j].ptr[1] = cast(ubyte)hi;
2990       }
2991       if (g.values > longest_floorlist) longest_floorlist = g.values;
2992     }
2993   }
2994 
2995   // Residue
2996   f.residue_count = get_bits_add_no!6(f, 1);
2997   f.residue_config = setup_malloc!Residue(f, f.residue_count);
2998   if (f.residue_config is null) return error(f, STBVorbisError.outofmem);
2999   memset(f.residue_config, 0, f.residue_count*(f.residue_config[0]).sizeof);
3000   foreach (immutable i; 0..f.residue_count) {
3001     ubyte[64] residue_cascade;
3002     Residue* r = f.residue_config+i;
3003     f.residue_types[i] = get_bits!16(f);
3004     if (f.residue_types[i] > 2) return error(f, STBVorbisError.invalid_setup);
3005     r.begin = get_bits!24(f);
3006     r.end = get_bits!24(f);
3007     if (r.end < r.begin) return error(f, STBVorbisError.invalid_setup);
3008     r.part_size = get_bits_add_no!24(f, 1);
3009     r.classifications = get_bits_add_no!6(f, 1);
3010     r.classbook = get_bits!8(f);
3011     if (r.classbook >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
3012     foreach (immutable j; 0..r.classifications) {
3013       ubyte high_bits = 0;
3014       ubyte low_bits = get_bits!3(f);
3015       if (get_bits!1(f)) high_bits = get_bits!5(f);
3016       assert(high_bits*8+low_bits <= ubyte.max);
3017       residue_cascade[j] = cast(ubyte)(high_bits*8+low_bits);
3018     }
3019     static assert(r.residue_books[0].sizeof == 16);
3020     r.residue_books = setup_malloc!(short[8])(f, r.classifications);
3021     if (r.residue_books is null) return error(f, STBVorbisError.outofmem);
3022     foreach (immutable j; 0..r.classifications) {
3023       foreach (immutable k; 0..8) {
3024         if (residue_cascade[j]&(1<<k)) {
3025           r.residue_books[j].ptr[k] = get_bits!8(f);
3026           if (r.residue_books[j].ptr[k] >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
3027         } else {
3028           r.residue_books[j].ptr[k] = -1;
3029         }
3030       }
3031     }
3032     // precompute the classifications[] array to avoid inner-loop mod/divide
3033     // call it 'classdata' since we already have r.classifications
3034     r.classdata = setup_malloc!(ubyte*)(f, f.codebooks[r.classbook].entries);
3035     if (!r.classdata) return error(f, STBVorbisError.outofmem);
3036     memset(r.classdata, 0, (*r.classdata).sizeof*f.codebooks[r.classbook].entries);
3037     foreach (immutable j; 0..f.codebooks[r.classbook].entries) {
3038       int classwords = f.codebooks[r.classbook].dimensions;
3039       int temp = j;
3040       r.classdata[j] = setup_malloc!ubyte(f, classwords);
3041       if (r.classdata[j] is null) return error(f, STBVorbisError.outofmem);
3042       foreach_reverse (immutable k; 0..classwords) {
3043         assert(temp%r.classifications >= 0 && temp%r.classifications <= ubyte.max);
3044         r.classdata[j][k] = cast(ubyte)(temp%r.classifications);
3045         temp /= r.classifications;
3046       }
3047     }
3048   }
3049 
3050   f.mapping_count = get_bits_add_no!6(f, 1);
3051   f.mapping = setup_malloc!Mapping(f, f.mapping_count);
3052   if (f.mapping is null) return error(f, STBVorbisError.outofmem);
3053   memset(f.mapping, 0, f.mapping_count*(*f.mapping).sizeof);
3054   foreach (immutable i; 0..f.mapping_count) {
3055     Mapping* m = f.mapping+i;
3056     int mapping_type = get_bits!16(f);
3057     if (mapping_type != 0) return error(f, STBVorbisError.invalid_setup);
3058     m.chan = setup_malloc!MappingChannel(f, f.vrchannels);
3059     if (m.chan is null) return error(f, STBVorbisError.outofmem);
3060     m.submaps = (get_bits!1(f) ? get_bits_add_no!4(f, 1) : 1);
3061     if (m.submaps > max_submaps) max_submaps = m.submaps;
3062     if (get_bits!1(f)) {
3063       m.coupling_steps = get_bits_add_no!8(f, 1);
3064       foreach (immutable k; 0..m.coupling_steps) {
3065         m.chan[k].magnitude = cast(ubyte)get_bits_main(f, ilog(f.vrchannels-1)); //k8
3066         m.chan[k].angle = cast(ubyte)get_bits_main(f, ilog(f.vrchannels-1)); //k8
3067         if (m.chan[k].magnitude >= f.vrchannels) return error(f, STBVorbisError.invalid_setup);
3068         if (m.chan[k].angle     >= f.vrchannels) return error(f, STBVorbisError.invalid_setup);
3069         if (m.chan[k].magnitude == m.chan[k].angle) return error(f, STBVorbisError.invalid_setup);
3070       }
3071     } else {
3072       m.coupling_steps = 0;
3073     }
3074 
3075     // reserved field
3076     if (get_bits!2(f)) return error(f, STBVorbisError.invalid_setup);
3077     if (m.submaps > 1) {
3078       foreach (immutable j; 0..f.vrchannels) {
3079         m.chan[j].mux = get_bits!4(f);
3080         if (m.chan[j].mux >= m.submaps) return error(f, STBVorbisError.invalid_setup);
3081       }
3082     } else {
3083       // @SPECIFICATION: this case is missing from the spec
3084       foreach (immutable j; 0..f.vrchannels) m.chan[j].mux = 0;
3085     }
3086     foreach (immutable j; 0..m.submaps) {
3087       get_bits!8(f); // discard
3088       m.submap_floor[j] = get_bits!8(f);
3089       m.submap_residue[j] = get_bits!8(f);
3090       if (m.submap_floor[j] >= f.floor_count) return error(f, STBVorbisError.invalid_setup);
3091       if (m.submap_residue[j] >= f.residue_count) return error(f, STBVorbisError.invalid_setup);
3092     }
3093   }
3094 
3095   // Modes
3096   f.mode_count = get_bits_add_no!6(f, 1);
3097   foreach (immutable i; 0..f.mode_count) {
3098     Mode* m = f.mode_config.ptr+i;
3099     m.blockflag = get_bits!1(f);
3100     m.windowtype = get_bits!16(f);
3101     m.transformtype = get_bits!16(f);
3102     m.mapping = get_bits!8(f);
3103     if (m.windowtype != 0) return error(f, STBVorbisError.invalid_setup);
3104     if (m.transformtype != 0) return error(f, STBVorbisError.invalid_setup);
3105     if (m.mapping >= f.mapping_count) return error(f, STBVorbisError.invalid_setup);
3106   }
3107 
3108   flush_packet(f);
3109 
3110   f.previous_length = 0;
3111 
3112   foreach (immutable i; 0..f.vrchannels) {
3113     f.channel_buffers.ptr[i] = setup_malloc!float(f, f.blocksize_1);
3114     f.previous_window.ptr[i] = setup_malloc!float(f, f.blocksize_1/2);
3115     f.finalY.ptr[i]          = setup_malloc!short(f, longest_floorlist);
3116     if (f.channel_buffers.ptr[i] is null || f.previous_window.ptr[i] is null || f.finalY.ptr[i] is null) return error(f, STBVorbisError.outofmem);
3117     version(STB_VORBIS_NO_DEFER_FLOOR) {
3118       f.floor_buffers.ptr[i] = setup_malloc!float(f, f.blocksize_1/2);
3119       if (f.floor_buffers.ptr[i] is null) return error(f, STBVorbisError.outofmem);
3120     }
3121   }
3122 
3123   if (!init_blocksize(f, 0, f.blocksize_0)) return false;
3124   if (!init_blocksize(f, 1, f.blocksize_1)) return false;
3125   f.blocksize.ptr[0] = f.blocksize_0;
3126   f.blocksize.ptr[1] = f.blocksize_1;
3127 
3128   version(STB_VORBIS_DIVIDE_TABLE) {
3129     if (integer_divide_table[1].ptr[1] == 0) {
3130       foreach (immutable i; 0..DIVTAB_NUMER) foreach (immutable j; 1..DIVTAB_DENOM) integer_divide_table[i].ptr[j] = i/j;
3131     }
3132   }
3133 
3134   // compute how much temporary memory is needed
3135 
3136   // 1.
3137   {
3138     uint imdct_mem = (f.blocksize_1*cast(uint)(float).sizeof>>1);
3139     uint classify_mem;
3140     int max_part_read = 0;
3141     foreach (immutable i; 0..f.residue_count) {
3142       Residue* r = f.residue_config+i;
3143       int n_read = r.end-r.begin;
3144       int part_read = n_read/r.part_size;
3145       if (part_read > max_part_read) max_part_read = part_read;
3146     }
3147     version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
3148       classify_mem = f.vrchannels*cast(uint)((void*).sizeof+max_part_read*(int*).sizeof);
3149     } else {
3150       classify_mem = f.vrchannels*cast(uint)((void*).sizeof+max_part_read*(ubyte*).sizeof);
3151     }
3152     f.temp_memory_required = classify_mem;
3153     if (imdct_mem > f.temp_memory_required) f.temp_memory_required = imdct_mem;
3154   }
3155 
3156   f.first_decode = true;
3157 
3158   /+
3159   if (f.alloc.alloc_buffer) {
3160     debug(stb_vorbis) assert(f.temp_offset == f.alloc.alloc_buffer_length_in_bytes);
3161     // check if there's enough temp memory so we don't error later
3162     if (f.setup_offset+ /*(*f).sizeof+*/ f.temp_memory_required > cast(uint)f.temp_offset) return error(f, STBVorbisError.outofmem);
3163   }
3164   +/
3165 
3166   f.first_audio_page_offset = f.fileOffset();
3167 
3168   return true;
3169 }
3170 private uint vorbis_find_page (VorbisDecoder f, uint* end, uint* last) {
3171   for (;;) {
3172     if (f.eof) return 0;
3173     auto n = get8(f);
3174     if (n == 0x4f) { // page header candidate
3175       uint retry_loc = f.fileOffset;
3176       // check if we're off the end of a file_section stream
3177       if (retry_loc-25 > f.stream_len) return 0;
3178       // check the rest of the header
3179       int i = void;
3180       for (i = 1; i < 4; ++i) if (get8(f) != ogg_page_header[i]) break;
3181       if (f.eof) return 0;
3182       if (i == 4) {
3183         ubyte[27] header;
3184         //for (i=0; i < 4; ++i) header[i] = ogg_page_header[i];
3185         header[0..4] = cast(immutable(ubyte)[])ogg_page_header[0..4];
3186         for (i = 4; i < 27; ++i) header[i] = get8(f);
3187         if (f.eof) return 0;
3188         if (header[4] != 0) goto invalid;
3189         uint goal = header[22]+(header[23]<<8)+(header[24]<<16)+(header[25]<<24);
3190         for (i = 22; i < 26; ++i) header[i] = 0;
3191         uint crc = 0;
3192         for (i = 0; i < 27; ++i) crc = crc32_update(crc, header[i]);
3193         uint len = 0;
3194         for (i = 0; i < header[26]; ++i) {
3195           auto s = get8(f);
3196           crc = crc32_update(crc, s);
3197           len += s;
3198         }
3199         if (len && f.eof) return 0;
3200         for (i = 0; i < len; ++i) crc = crc32_update(crc, get8(f));
3201         // finished parsing probable page
3202         if (crc == goal) {
3203           // we could now check that it's either got the last
3204           // page flag set, OR it's followed by the capture
3205           // pattern, but I guess TECHNICALLY you could have
3206           // a file with garbage between each ogg page and recover
3207           // from it automatically? So even though that paranoia
3208           // might decrease the chance of an invalid decode by
3209           // another 2^32, not worth it since it would hose those
3210           // invalid-but-useful files?
3211           if (end) *end = f.fileOffset;
3212           if (last) *last = (header[5]&0x04 ? 1 : 0);
3213           set_file_offset(f, retry_loc-1);
3214           return 1;
3215         }
3216       }
3217      invalid:
3218       // not a valid page, so rewind and look for next one
3219       set_file_offset(f, retry_loc);
3220     }
3221   }
3222   assert(0);
3223 }
3224 
3225 enum SAMPLE_unknown = 0xffffffff;
3226 
3227 // seeking is implemented with a binary search, which narrows down the range to
3228 // 64K, before using a linear search (because finding the synchronization
3229 // pattern can be expensive, and the chance we'd find the end page again is
3230 // relatively high for small ranges)
3231 //
3232 // two initial interpolation-style probes are used at the start of the search
3233 // to try to bound either side of the binary search sensibly, while still
3234 // working in O(log n) time if they fail.
3235 private int get_seek_page_info (VorbisDecoder f, ProbedPage* z) {
3236   ubyte[27] header;
3237   ubyte[255] lacing;
3238 
3239   // record where the page starts
3240   z.page_start = f.fileOffset;
3241 
3242   // parse the header
3243   getn(f, header.ptr, 27);
3244   if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S') return 0;
3245   getn(f, lacing.ptr, header[26]);
3246 
3247   // determine the length of the payload
3248   uint len = 0;
3249   foreach (immutable i; 0..header[26]) len += lacing[i];
3250 
3251   // this implies where the page ends
3252   z.page_end = z.page_start+27+header[26]+len;
3253 
3254   // read the last-decoded sample out of the data
3255   z.last_decoded_sample = header[6]+(header[7]<<8)+(header[8]<<16)+(header[9]<<24);
3256 
3257   // restore file state to where we were
3258   set_file_offset(f, z.page_start);
3259   return 1;
3260 }
3261 
3262 // rarely used function to seek back to the preceeding page while finding the start of a packet
3263 private int go_to_page_before (VorbisDecoder f, uint limit_offset) {
3264   uint previous_safe, end;
3265 
3266   // now we want to seek back 64K from the limit
3267   if (limit_offset >= 65536 && limit_offset-65536 >= f.first_audio_page_offset) {
3268     previous_safe = limit_offset-65536;
3269   } else {
3270     previous_safe = f.first_audio_page_offset;
3271   }
3272 
3273   set_file_offset(f, previous_safe);
3274 
3275   while (vorbis_find_page(f, &end, null)) {
3276     if (end >= limit_offset && f.fileOffset < limit_offset) return 1;
3277     set_file_offset(f, end);
3278   }
3279 
3280   return 0;
3281 }
3282 
3283 // implements the search logic for finding a page and starting decoding. if
3284 // the function succeeds, current_loc_valid will be true and current_loc will
3285 // be less than or equal to the provided sample number (the closer the
3286 // better).
3287 private int seek_to_sample_coarse (VorbisDecoder f, uint sample_number) {
3288   ProbedPage left, right, mid;
3289   int i, start_seg_with_known_loc, end_pos, page_start;
3290   uint delta, stream_length, padding;
3291   double offset, bytes_per_sample;
3292   int probe = 0;
3293 
3294   // find the last page and validate the target sample
3295   stream_length = f.streamLengthInSamples;
3296   if (stream_length == 0) return error(f, STBVorbisError.seek_without_length);
3297   if (sample_number > stream_length) return error(f, STBVorbisError.seek_invalid);
3298 
3299   // this is the maximum difference between the window-center (which is the
3300   // actual granule position value), and the right-start (which the spec
3301   // indicates should be the granule position (give or take one)).
3302   padding = ((f.blocksize_1-f.blocksize_0)>>2);
3303   if (sample_number < padding) sample_number = 0; else sample_number -= padding;
3304 
3305   left = f.p_first;
3306   while (left.last_decoded_sample == ~0U) {
3307     // (untested) the first page does not have a 'last_decoded_sample'
3308     set_file_offset(f, left.page_end);
3309     if (!get_seek_page_info(f, &left)) goto error;
3310   }
3311 
3312   right = f.p_last;
3313   debug(stb_vorbis) assert(right.last_decoded_sample != ~0U);
3314 
3315   // starting from the start is handled differently
3316   if (sample_number <= left.last_decoded_sample) {
3317     f.seekStart;
3318     return 1;
3319   }
3320 
3321   while (left.page_end != right.page_start) {
3322     debug(stb_vorbis) assert(left.page_end < right.page_start);
3323     // search range in bytes
3324     delta = right.page_start-left.page_end;
3325     if (delta <= 65536) {
3326       // there's only 64K left to search - handle it linearly
3327       set_file_offset(f, left.page_end);
3328     } else {
3329       if (probe < 2) {
3330         if (probe == 0) {
3331           // first probe (interpolate)
3332           double data_bytes = right.page_end-left.page_start;
3333           bytes_per_sample = data_bytes/right.last_decoded_sample;
3334           offset = left.page_start+bytes_per_sample*(sample_number-left.last_decoded_sample);
3335         } else {
3336           // second probe (try to bound the other side)
3337           double error = (cast(double)sample_number-mid.last_decoded_sample)*bytes_per_sample;
3338           if (error >= 0 && error <  8000) error =  8000;
3339           if (error <  0 && error > -8000) error = -8000;
3340           offset += error*2;
3341         }
3342 
3343         // ensure the offset is valid
3344         if (offset < left.page_end) offset = left.page_end;
3345         if (offset > right.page_start-65536) offset = right.page_start-65536;
3346 
3347         set_file_offset(f, cast(uint)offset);
3348       } else {
3349         // binary search for large ranges (offset by 32K to ensure
3350         // we don't hit the right page)
3351         set_file_offset(f, left.page_end+(delta/2)-32768);
3352       }
3353 
3354       if (!vorbis_find_page(f, null, null)) goto error;
3355     }
3356 
3357     for (;;) {
3358       if (!get_seek_page_info(f, &mid)) goto error;
3359       if (mid.last_decoded_sample != ~0U) break;
3360       // (untested) no frames end on this page
3361       set_file_offset(f, mid.page_end);
3362       debug(stb_vorbis) assert(mid.page_start < right.page_start);
3363     }
3364 
3365     // if we've just found the last page again then we're in a tricky file,
3366     // and we're close enough.
3367     if (mid.page_start == right.page_start) break;
3368 
3369     if (sample_number < mid.last_decoded_sample) right = mid; else left = mid;
3370 
3371     ++probe;
3372   }
3373 
3374   // seek back to start of the last packet
3375   page_start = left.page_start;
3376   set_file_offset(f, page_start);
3377   if (!start_page(f)) return error(f, STBVorbisError.seek_failed);
3378   end_pos = f.end_seg_with_known_loc;
3379   debug(stb_vorbis) assert(end_pos >= 0);
3380 
3381   for (;;) {
3382     for (i = end_pos; i > 0; --i) if (f.segments.ptr[i-1] != 255) break;
3383     start_seg_with_known_loc = i;
3384     if (start_seg_with_known_loc > 0 || !(f.page_flag&PAGEFLAG_continued_packet)) break;
3385     // (untested) the final packet begins on an earlier page
3386     if (!go_to_page_before(f, page_start)) goto error;
3387     page_start = f.fileOffset;
3388     if (!start_page(f)) goto error;
3389     end_pos = f.segment_count-1;
3390   }
3391 
3392   // prepare to start decoding
3393   f.current_loc_valid = false;
3394   f.last_seg = false;
3395   f.valid_bits = 0;
3396   f.packet_bytes = 0;
3397   f.bytes_in_seg = 0;
3398   f.previous_length = 0;
3399   f.next_seg = start_seg_with_known_loc;
3400 
3401   for (i = 0; i < start_seg_with_known_loc; ++i) skip(f, f.segments.ptr[i]);
3402 
3403   // start decoding (optimizable - this frame is generally discarded)
3404   if (!vorbis_pump_first_frame(f)) return 0;
3405   if (f.current_loc > sample_number) return error(f, STBVorbisError.seek_failed);
3406   return 1;
3407 
3408 error:
3409   // try to restore the file to a valid state
3410   f.seekStart;
3411   return error(f, STBVorbisError.seek_failed);
3412 }
3413 
3414 // the same as vorbis_decode_initial, but without advancing
3415 private int peek_decode_initial (VorbisDecoder f, int* p_left_start, int* p_left_end, int* p_right_start, int* p_right_end, int* mode) {
3416   if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode)) return 0;
3417 
3418   // either 1 or 2 bytes were read, figure out which so we can rewind
3419   int bits_read = 1+ilog(f.mode_count-1);
3420   if (f.mode_config.ptr[*mode].blockflag) bits_read += 2;
3421   int bytes_read = (bits_read+7)/8;
3422 
3423   f.bytes_in_seg += bytes_read;
3424   f.packet_bytes -= bytes_read;
3425   skip(f, -bytes_read);
3426   if (f.next_seg == -1) f.next_seg = f.segment_count-1; else --f.next_seg;
3427   f.valid_bits = 0;
3428 
3429   return 1;
3430 }
3431 
3432 // ////////////////////////////////////////////////////////////////////////// //
3433 // utility and supporting functions for getting s16 samples
3434 enum PLAYBACK_MONO  = (1<<0);
3435 enum PLAYBACK_LEFT  = (1<<1);
3436 enum PLAYBACK_RIGHT = (1<<2);
3437 
3438 enum L = (PLAYBACK_LEFT |PLAYBACK_MONO);
3439 enum C = (PLAYBACK_LEFT |PLAYBACK_RIGHT|PLAYBACK_MONO);
3440 enum R = (PLAYBACK_RIGHT|PLAYBACK_MONO);
3441 
3442 immutable byte[6][7] channel_position = [
3443   [ 0 ],
3444   [ C ],
3445   [ L, R ],
3446   [ L, C, R ],
3447   [ L, R, L, R ],
3448   [ L, C, R, L, R ],
3449   [ L, C, R, L, R, C ],
3450 ];
3451 
3452 
3453 version(STB_VORBIS_NO_FAST_SCALED_FLOAT) {
3454   enum declfcvar(string name) = "{}";
3455   template FAST_SCALED_FLOAT_TO_INT(string x, string s) {
3456     static assert(s == "15");
3457     enum FAST_SCALED_FLOAT_TO_INT = q{import core.stdc.math : lrintf; int v = lrintf((${x})*32768.0f);}.cmacroFixVars!"x"(x);
3458   }
3459 } else {
3460   //k8: actually, this is only marginally faster than using `lrintf()`, but anyway...
3461   align(1) union float_conv {
3462   align(1):
3463     float f;
3464     int i;
3465   }
3466   enum declfcvar(string name) = "float_conv "~name~" = void;";
3467   static assert(float_conv.i.sizeof == 4 && float_conv.f.sizeof == 4);
3468   // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
3469   //#define check_endianness()
3470   enum MAGIC(string SHIFT) = q{(1.5f*(1<<(23-${SHIFT}))+0.5f/(1<<${SHIFT}))}.cmacroFixVars!("SHIFT")(SHIFT);
3471   enum ADDEND(string SHIFT) = q{(((150-${SHIFT})<<23)+(1<<22))}.cmacroFixVars!("SHIFT")(SHIFT);
3472   enum FAST_SCALED_FLOAT_TO_INT(string x, string s) = q{temp.f = (${x})+${MAGIC}; int v = temp.i-${ADDEND};}
3473     .cmacroFixVars!("x", "s", "MAGIC", "ADDEND")(x, s, MAGIC!(s), ADDEND!(s));
3474 }
3475 
3476 private void copy_samples (short* dest, float* src, int len) {
3477   //check_endianness();
3478   mixin(declfcvar!"temp");
3479   foreach (immutable _; 0..len) {
3480     mixin(FAST_SCALED_FLOAT_TO_INT!("*src", "15"));
3481     if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3482     *dest++ = cast(short)v; //k8
3483     ++src;
3484   }
3485 }
3486 
3487 private void compute_samples (int mask, short* output, int num_c, float** data, int d_offset, int len) {
3488   import core.stdc..string : memset;
3489   enum BUFFER_SIZE = 32;
3490   float[BUFFER_SIZE] buffer;
3491   int n = BUFFER_SIZE;
3492   //check_endianness();
3493   mixin(declfcvar!"temp");
3494   for (uint o = 0; o < len; o += BUFFER_SIZE) {
3495     memset(buffer.ptr, 0, (buffer).sizeof);
3496     if (o+n > len) n = len-o;
3497     foreach (immutable j; 0..num_c) {
3498       if (channel_position[num_c].ptr[j]&mask) foreach (immutable i; 0..n) buffer.ptr[i] += data[j][d_offset+o+i];
3499     }
3500     foreach (immutable i; 0..n) {
3501       mixin(FAST_SCALED_FLOAT_TO_INT!("buffer[i]", "15"));
3502       if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3503       output[o+i] = cast(short)v; //k8
3504     }
3505   }
3506 }
3507 
3508 private void compute_stereo_samples (short* output, int num_c, float** data, int d_offset, int len) {
3509   import core.stdc..string : memset;
3510 
3511   enum BUFFER_SIZE = 32;
3512   float[BUFFER_SIZE] buffer;
3513   int n = BUFFER_SIZE>>1;
3514   // o is the offset in the source data
3515   //check_endianness();
3516   mixin(declfcvar!"temp");
3517   for (uint o = 0; o < len; o += BUFFER_SIZE>>1) {
3518     // o2 is the offset in the output data
3519     int o2 = o<<1;
3520     memset(buffer.ptr, 0, buffer.sizeof);
3521     if (o+n > len) n = len-o;
3522     foreach (immutable j; 0..num_c) {
3523       int m = channel_position[num_c].ptr[j]&(PLAYBACK_LEFT|PLAYBACK_RIGHT);
3524       if (m == (PLAYBACK_LEFT|PLAYBACK_RIGHT)) {
3525         foreach (immutable i; 0..n) {
3526           buffer.ptr[i*2+0] += data[j][d_offset+o+i];
3527           buffer.ptr[i*2+1] += data[j][d_offset+o+i];
3528         }
3529       } else if (m == PLAYBACK_LEFT) {
3530         foreach (immutable i; 0..n) buffer.ptr[i*2+0] += data[j][d_offset+o+i];
3531       } else if (m == PLAYBACK_RIGHT) {
3532         foreach (immutable i; 0..n) buffer.ptr[i*2+1] += data[j][d_offset+o+i];
3533       }
3534     }
3535     foreach (immutable i; 0..n<<1) {
3536       mixin(FAST_SCALED_FLOAT_TO_INT!("buffer[i]", "15"));
3537       if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3538       output[o2+i] = cast(short)v; //k8
3539     }
3540   }
3541 }
3542 
3543 private void convert_samples_short (int buf_c, short** buffer, int b_offset, int data_c, float** data, int d_offset, int samples) {
3544   import core.stdc..string : memset;
3545 
3546   if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
3547     immutable int[2][3] channel_selector = [ [0,0], [PLAYBACK_MONO,0], [PLAYBACK_LEFT, PLAYBACK_RIGHT] ];
3548     foreach (immutable i; 0..buf_c) compute_samples(channel_selector[buf_c].ptr[i], buffer[i]+b_offset, data_c, data, d_offset, samples);
3549   } else {
3550     int limit = (buf_c < data_c ? buf_c : data_c);
3551     foreach (immutable i; 0..limit) copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
3552     foreach (immutable i; limit..buf_c) memset(buffer[i]+b_offset, 0, short.sizeof*samples);
3553   }
3554 }
3555 
3556 private void convert_channels_short_interleaved (int buf_c, short* buffer, int data_c, float** data, int d_offset, int len) {
3557   //check_endianness();
3558   mixin(declfcvar!"temp");
3559   if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
3560     debug(stb_vorbis) assert(buf_c == 2);
3561     foreach (immutable i; 0..buf_c) compute_stereo_samples(buffer, data_c, data, d_offset, len);
3562   } else {
3563     int limit = (buf_c < data_c ? buf_c : data_c);
3564     foreach (immutable j; 0..len) {
3565       foreach (immutable i; 0..limit) {
3566         float f = data[i][d_offset+j];
3567         mixin(FAST_SCALED_FLOAT_TO_INT!("f", "15"));//data[i][d_offset+j], 15);
3568         if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3569         *buffer++ = cast(short)v; //k8
3570       }
3571       foreach (immutable i; limit..buf_c) *buffer++ = 0;
3572     }
3573   }
3574 }
3575 } // @nogc
3576 
3577 
3578 public class VorbisDecoder {
3579 
3580     // modified to use audio-formats I/O callbacks
3581     IOCallbacks* io;
3582     void* userData;
3583 
3584     // run-time results
3585     bool eof = true;
3586     STBVorbisError error;
3587 
3588   // return # of bytes read, 0 on eof, -1 on error
3589   // if called with `buf is null`, do `close()`
3590  // alias readCB = int delegate (void[] buf, uint ofs, VorbisDecoder vb) nothrow @nogc;
3591 
3592   //TODO
3593   static struct Allocator {
3594   static nothrow @nogc: // because
3595     void* alloc (uint sz, VorbisDecoder vb) {
3596       import core.stdc.stdlib : malloc;
3597       return malloc(sz);
3598     }
3599     void free (void* p, VorbisDecoder vb) {
3600       import core.stdc.stdlib : free;
3601       free(p);
3602     }
3603     void* allocTemp (uint sz, VorbisDecoder vb) {
3604       import core.stdc.stdlib : malloc;
3605       return malloc(sz);
3606     }
3607     void freeTemp (void* p, uint sz, VorbisDecoder vb) {
3608       import core.stdc.stdlib : free;
3609       free(p);
3610     }
3611     uint tempSave (VorbisDecoder vb) { return 0; }
3612     void tempRestore (uint pos, VorbisDecoder vb) {}
3613   }
3614 
3615 nothrow @nogc:
3616 private:
3617   bool isOpened;
3618   //readCB stmread;
3619   uint stlastofs = uint.max;
3620   uint stst;
3621   uint stpos;
3622   uint stend;
3623   bool stclose;
3624   FILE* stfl;
3625 
3626 private:
3627   //ubyte* stream;
3628   //ubyte* stream_start;
3629   //ubyte* stream_end;
3630   //uint stream_len;
3631 
3632   /+bool push_mode;+/
3633 
3634   uint first_audio_page_offset;
3635 
3636   ProbedPage p_first, p_last;
3637 
3638   // memory management
3639   Allocator alloc;
3640   int setup_offset;
3641   int temp_offset;
3642 
3643 
3644 
3645   // header info
3646   int[2] blocksize;
3647   int blocksize_0, blocksize_1;
3648   int codebook_count;
3649   Codebook* codebooks;
3650   int floor_count;
3651   ushort[64] floor_types; // varies
3652   Floor* floor_config;
3653   int residue_count;
3654   ushort[64] residue_types; // varies
3655   Residue* residue_config;
3656   int mapping_count;
3657   Mapping* mapping;
3658   int mode_count;
3659   Mode[64] mode_config;  // varies
3660 
3661   uint total_samples;
3662 
3663   // decode buffer
3664   float*[STB_VORBIS_MAX_CHANNELS] channel_buffers;
3665   float*[STB_VORBIS_MAX_CHANNELS] outputs;
3666 
3667   float*[STB_VORBIS_MAX_CHANNELS] previous_window;
3668   int previous_length;
3669 
3670   version(STB_VORBIS_NO_DEFER_FLOOR) {
3671     float*[STB_VORBIS_MAX_CHANNELS] floor_buffers;
3672   } else {
3673     short*[STB_VORBIS_MAX_CHANNELS] finalY;
3674   }
3675 
3676   uint current_loc; // sample location of next frame to decode
3677   int current_loc_valid;
3678 
3679   // per-blocksize precomputed data
3680 
3681   // twiddle factors
3682   float*[2] A, B, C;
3683   float*[2] window;
3684   ushort*[2] bit_reverse;
3685 
3686   // current page/packet/segment streaming info
3687   uint serial; // stream serial number for verification
3688   int last_page;
3689   int segment_count;
3690   ubyte[255] segments;
3691   ubyte page_flag;
3692   ubyte bytes_in_seg;
3693   ubyte first_decode;
3694   int next_seg;
3695   int last_seg;  // flag that we're on the last segment
3696   int last_seg_which; // what was the segment number of the last seg?
3697   uint acc;
3698   int valid_bits;
3699   int packet_bytes;
3700   int end_seg_with_known_loc;
3701   uint known_loc_for_packet;
3702   int discard_samples_deferred;
3703   uint samples_output;
3704 
3705   // push mode scanning
3706   /+
3707   int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
3708   CRCscan[STB_VORBIS_PUSHDATA_CRC_COUNT] scan;
3709   +/
3710 
3711   // sample-access
3712   int channel_buffer_start;
3713   int channel_buffer_end;
3714 
3715 private: // k8: 'cause i'm evil
3716   // user-accessible info
3717   uint sample_rate;
3718   int vrchannels;
3719 
3720   uint setup_memory_required;
3721   uint temp_memory_required;
3722   uint setup_temp_memory_required;
3723 
3724   bool read_comments;
3725   ubyte* comment_data;
3726   uint comment_size;
3727 
3728   // functions to get comment data
3729   uint comment_data_pos;
3730 
3731 private:
3732 
3733   // return # of bytes read, 0 on eof, -1 on error
3734   // if called with `buf is null`, do `close()`
3735   // alias readCB = int delegate (void[] buf, uint ofs, VorbisDecoder vb) nothrow @nogc;
3736   int stmread(void[] buf, uint ofs, VorbisDecoder vb)
3737   {
3738     io.seek(ofs, false, userData);
3739     int bytes = io.read(buf.ptr, cast(int)(buf.length), userData);
3740     return bytes;
3741   }
3742 
3743   int rawRead (void[] buf) 
3744   {
3745     if (isOpened && buf.length > 0 && stpos < stend) 
3746     {
3747       if (stend-stpos < buf.length) 
3748           buf = buf[0..stend-stpos];
3749       auto rd = stmread(buf, stpos, this);
3750       if (rd > 0) 
3751           stpos += rd;
3752       return rd;
3753     }
3754     return 0;
3755   }
3756 
3757   void rawSkip (int n) 
3758   {
3759     if (isOpened && n > 0) 
3760     { 
3761         if ((stpos += n) > stend) 
3762             stpos = stend; 
3763     } 
3764   }
3765 
3766   void rawSeek (int n) 
3767   { 
3768     if (isOpened) 
3769     { 
3770         stpos = stst+(n < 0 ? 0 : n); 
3771         if (stpos > stend) 
3772             stpos = stend; 
3773     } 
3774   }
3775 
3776   void rawClose () 
3777   { 
3778     if (isOpened) 
3779     { 
3780         isOpened = false; 
3781         //stmread(null, 0, this); 
3782     } 
3783   }
3784 
3785 final:
3786 private:
3787   void doInit () {
3788     import core.stdc..string : memset;
3789     /*
3790     if (z) {
3791       alloc = *z;
3792       alloc.alloc_buffer_length_in_bytes = (alloc.alloc_buffer_length_in_bytes+3)&~3;
3793       temp_offset = alloc.alloc_buffer_length_in_bytes;
3794     }
3795     */
3796     eof = false;
3797     error = STBVorbisError.no_error;
3798     /+stream = null;+/
3799     codebooks = null;
3800     /+page_crc_tests = -1;+/
3801   }
3802 
3803   static int stflRead (void[] buf, uint ofs, VorbisDecoder vb) {
3804     if (buf !is null) {
3805       //{ import core.stdc.stdio; printf("stflRead: ofs=%u; len=%u\n", ofs, cast(uint)buf.length); }
3806       if (vb.stlastofs != ofs) {
3807         import core.stdc.stdio : fseek, SEEK_SET;
3808         vb.stlastofs = ofs;
3809         fseek(vb.stfl, ofs, SEEK_SET);
3810       }
3811       import core.stdc.stdio : fread;
3812       return cast(int)fread(buf.ptr, 1, buf.length, vb.stfl);
3813     } else {
3814       if (vb.stclose) {
3815         import core.stdc.stdio : fclose;
3816         if (vb.stfl !is null) fclose(vb.stfl);
3817       }
3818       vb.stfl = null;
3819       return 0;
3820     }
3821   }
3822 
3823 public:
3824   this () {}
3825   ~this () { close(); }
3826 
3827   this (IOCallbacks* io, void* userData) 
3828   {
3829     this.userData = userData;
3830 	stend = cast(uint) io.getFileLength(userData);
3831 	this.io = io;
3832 	isOpened = true;
3833 	eof = false;
3834 	read_comments = true;
3835 	if (start_decoder(this)) {
3836 		vorbis_pump_first_frame(this);
3837 		return;
3838 	}
3839   }
3840 
3841   @property bool closed () { return !isOpened; }
3842 
3843  /* void open (FILE *fl, bool doclose=true) {
3844     import core.stdc.stdio : ftell, fseek, SEEK_SET, SEEK_END;
3845     close();
3846     if (fl is null) { error = STBVorbisError.invalid_stream; return; }
3847     stclose = doclose;
3848     stst = stpos = cast(uint)ftell(fl);
3849     fseek(fl, 0, SEEK_END);
3850     stend = cast(uint)ftell(fl);
3851     stlastofs = stlastofs.max;
3852     stclose = false;
3853     stfl = fl;
3854     import std.functional : toDelegate;
3855     stmread = toDelegate(&stflRead);
3856     isOpened = true;
3857     eof = false;
3858     read_comments = true;
3859     if (start_decoder(this)) {
3860       vorbis_pump_first_frame(this);
3861       return;
3862     }
3863     auto err = error;
3864     close();
3865     error = err;
3866   }
3867 
3868   void open (const(char)[] filename) {
3869     import core.stdc.stdio : fopen;
3870     import std.internal.cstring; // sorry
3871     close();
3872     FILE* fl = fopen(filename.tempCString, "rb");
3873     if (fl is null) { error = STBVorbisError.file_open_failure; return; }
3874     open(fl, true);
3875   }*/
3876 
3877   /+
3878   void openPushdata(void* data, int data_len, // the memory available for decoding
3879                     int* data_used)           // only defined on success
3880   {
3881     close();
3882     eof = false;
3883     stream = cast(ubyte*)data;
3884     stream_end = stream+data_len;
3885     push_mode = true;
3886     if (!start_decoder(this)) {
3887       auto err = error;
3888       if (eof) err = STBVorbisError.need_more_data; else close();
3889       error = err;
3890       return;
3891     }
3892     *data_used = stream-(cast(ubyte*)data);
3893     error = STBVorbisError.no_error;
3894   }
3895   +/
3896 
3897   void close () {
3898     import core.stdc..string : memset;
3899 
3900     setup_free(this, this.comment_data);
3901     if (this.residue_config) {
3902       foreach (immutable i; 0..this.residue_count) {
3903         Residue* r = this.residue_config+i;
3904         if (r.classdata) {
3905           foreach (immutable j; 0..this.codebooks[r.classbook].entries) setup_free(this, r.classdata[j]);
3906           setup_free(this, r.classdata);
3907         }
3908         setup_free(this, r.residue_books);
3909       }
3910     }
3911 
3912     if (this.codebooks) {
3913       foreach (immutable i; 0..this.codebook_count) {
3914         Codebook* c = this.codebooks+i;
3915         setup_free(this, c.codeword_lengths);
3916         setup_free(this, c.multiplicands);
3917         setup_free(this, c.codewords);
3918         setup_free(this, c.sorted_codewords);
3919         // c.sorted_values[-1] is the first entry in the array
3920         setup_free(this, c.sorted_values ? c.sorted_values-1 : null);
3921       }
3922       setup_free(this, this.codebooks);
3923     }
3924     setup_free(this, this.floor_config);
3925     setup_free(this, this.residue_config);
3926     if (this.mapping) {
3927       foreach (immutable i; 0..this.mapping_count) setup_free(this, this.mapping[i].chan);
3928       setup_free(this, this.mapping);
3929     }
3930     foreach (immutable i; 0..(this.vrchannels > STB_VORBIS_MAX_CHANNELS ? STB_VORBIS_MAX_CHANNELS : this.vrchannels)) {
3931       setup_free(this, this.channel_buffers.ptr[i]);
3932       setup_free(this, this.previous_window.ptr[i]);
3933       version(STB_VORBIS_NO_DEFER_FLOOR) setup_free(this, this.floor_buffers.ptr[i]);
3934       setup_free(this, this.finalY.ptr[i]);
3935     }
3936     foreach (immutable i; 0..2) {
3937       setup_free(this, this.A.ptr[i]);
3938       setup_free(this, this.B.ptr[i]);
3939       setup_free(this, this.C.ptr[i]);
3940       setup_free(this, this.window.ptr[i]);
3941       setup_free(this, this.bit_reverse.ptr[i]);
3942     }
3943 
3944     rawClose();
3945     isOpened = false;
3946     stlastofs = uint.max;
3947     stst = 0;
3948     stpos = 0;
3949     stend = 0;
3950     stclose = false;
3951     stfl = null;
3952 
3953     sample_rate = 0;
3954     vrchannels = 0;
3955 
3956     setup_memory_required = 0;
3957     temp_memory_required = 0;
3958     setup_temp_memory_required = 0;
3959 
3960     read_comments = 0;
3961     comment_data = null;
3962     comment_size = 0;
3963 
3964     comment_data_pos = 0;
3965 
3966     /+
3967     stream = null;
3968     stream_start = null;
3969     stream_end = null;
3970     +/
3971 
3972     //stream_len = 0;
3973 
3974     /+push_mode = false;+/
3975 
3976     first_audio_page_offset = 0;
3977 
3978     p_first = p_first.init;
3979     p_last = p_last.init;
3980 
3981     setup_offset = 0;
3982     temp_offset = 0;
3983 
3984     eof = true;
3985     error = STBVorbisError.no_error;
3986 
3987     blocksize[] = 0;
3988     blocksize_0 = 0;
3989     blocksize_1 = 0;
3990     codebook_count = 0;
3991     codebooks = null;
3992     floor_count = 0;
3993     floor_types[] = 0;
3994     floor_config = null;
3995     residue_count = 0;
3996     residue_types[] = 0;
3997     residue_config = null;
3998     mapping_count = 0;
3999     mapping = null;
4000     mode_count = 0;
4001     mode_config[] = Mode.init;
4002 
4003     total_samples = 0;
4004 
4005     channel_buffers[] = null;
4006     outputs[] = null;
4007 
4008     previous_window[] = null;
4009     previous_length = 0;
4010 
4011     version(STB_VORBIS_NO_DEFER_FLOOR) {
4012       floor_buffers[] = null;
4013     } else {
4014       finalY[] = null;
4015     }
4016 
4017     current_loc = 0;
4018     current_loc_valid = 0;
4019 
4020     A[] = null;
4021     B[] = null;
4022     C[] = null;
4023     window[] = null;
4024     bit_reverse = null;
4025 
4026     serial = 0;
4027     last_page = 0;
4028     segment_count = 0;
4029     segments[] = 0;
4030     page_flag = 0;
4031     bytes_in_seg = 0;
4032     first_decode = 0;
4033     next_seg = 0;
4034     last_seg = 0;
4035     last_seg_which = 0;
4036     acc = 0;
4037     valid_bits = 0;
4038     packet_bytes = 0;
4039     end_seg_with_known_loc = 0;
4040     known_loc_for_packet = 0;
4041     discard_samples_deferred = 0;
4042     samples_output = 0;
4043 
4044     /+
4045     page_crc_tests = -1;
4046     scan[] = CRCscan.init;
4047     +/
4048 
4049     channel_buffer_start = 0;
4050     channel_buffer_end = 0;
4051   }
4052 
4053   @property const pure {
4054     int getSampleOffset () { return (current_loc_valid ? current_loc : -1); }
4055 
4056     @property ubyte chans () { return (isOpened ? cast(ubyte)this.vrchannels : 0); }
4057     @property uint sampleRate () { return (isOpened ? this.sample_rate : 0); }
4058     @property uint maxFrameSize () { return (isOpened ? this.blocksize_1>>1 : 0); }
4059 
4060     @property uint getSetupMemoryRequired () { return (isOpened ? this.setup_memory_required : 0); }
4061     @property uint getSetupTempMemoryRequired () { return (isOpened ? this.setup_temp_memory_required : 0); }
4062     @property uint getTempMemoryRequired () { return (isOpened ? this.temp_memory_required : 0); }
4063   }
4064 
4065   // will clear last error
4066   @property int lastError () {
4067     int e = error;
4068     error = STBVorbisError.no_error;
4069     return e;
4070   }
4071 
4072   // PUSHDATA API
4073   /+
4074   void flushPushdata () {
4075     if (push_mode) {
4076       previous_length = 0;
4077       page_crc_tests = 0;
4078       discard_samples_deferred = 0;
4079       current_loc_valid = false;
4080       first_decode = false;
4081       samples_output = 0;
4082       channel_buffer_start = 0;
4083       channel_buffer_end = 0;
4084     }
4085   }
4086 
4087   // return value: number of bytes we used
4088   int decodeFramePushdata(
4089            void* data, int data_len, // the memory available for decoding
4090            int* channels,            // place to write number of float* buffers
4091            float*** output,          // place to write float** array of float* buffers
4092            int* samples              // place to write number of output samples
4093        )
4094   {
4095     if (!this.push_mode) return .error(this, STBVorbisError.invalid_api_mixing);
4096 
4097     if (this.page_crc_tests >= 0) {
4098       *samples = 0;
4099       return vorbis_search_for_page_pushdata(this, cast(ubyte*)data, data_len);
4100     }
4101 
4102     this.stream = cast(ubyte*)data;
4103     this.stream_end = this.stream+data_len;
4104     this.error = STBVorbisError.no_error;
4105 
4106     // check that we have the entire packet in memory
4107     if (!is_whole_packet_present(this, false)) {
4108       *samples = 0;
4109       return 0;
4110     }
4111 
4112     int len, left, right;
4113 
4114     if (!vorbis_decode_packet(this, &len, &left, &right)) {
4115       // save the actual error we encountered
4116       STBVorbisError error = this.error;
4117       if (error == STBVorbisError.bad_packet_type) {
4118         // flush and resynch
4119         this.error = STBVorbisError.no_error;
4120         while (get8_packet(this) != EOP) if (this.eof) break;
4121         *samples = 0;
4122         return this.stream-data;
4123       }
4124       if (error == STBVorbisError.continued_packet_flag_invalid) {
4125         if (this.previous_length == 0) {
4126           // we may be resynching, in which case it's ok to hit one
4127           // of these; just discard the packet
4128           this.error = STBVorbisError.no_error;
4129           while (get8_packet(this) != EOP) if (this.eof) break;
4130           *samples = 0;
4131           return this.stream-data;
4132         }
4133       }
4134       // if we get an error while parsing, what to do?
4135       // well, it DEFINITELY won't work to continue from where we are!
4136       flushPushdata();
4137       // restore the error that actually made us bail
4138       this.error = error;
4139       *samples = 0;
4140       return 1;
4141     }
4142 
4143     // success!
4144     len = vorbis_finish_frame(this, len, left, right);
4145     foreach (immutable i; 0..this.vrchannels) this.outputs.ptr[i] = this.channel_buffers.ptr[i]+left;
4146 
4147     if (channels) *channels = this.vrchannels;
4148     *samples = len;
4149     *output = this.outputs.ptr;
4150     return this.stream-data;
4151   }
4152   +/
4153 
4154   public uint fileOffset () {
4155     if (/+push_mode ||+/ !isOpened) return 0;
4156     /+if (stream !is null) return cast(uint)(stream-stream_start);+/
4157     return (stpos > stst ? stpos-stst : 0);
4158   }
4159 
4160   public uint stream_len () { return stend-stst; }
4161 
4162   // DATA-PULLING API
4163   public int seekFrame (uint sample_number) {
4164     uint max_frame_samples;
4165 
4166     /+if (this.push_mode) return -.error(this, STBVorbisError.invalid_api_mixing);+/
4167 
4168     // fast page-level search
4169     if (!seek_to_sample_coarse(this, sample_number)) return 0;
4170 
4171     assert(this.current_loc_valid);
4172     assert(this.current_loc <= sample_number);
4173 
4174     // linear search for the relevant packet
4175     max_frame_samples = (this.blocksize_1*3-this.blocksize_0)>>2;
4176     while (this.current_loc < sample_number) {
4177       int left_start, left_end, right_start, right_end, mode, frame_samples;
4178       if (!peek_decode_initial(this, &left_start, &left_end, &right_start, &right_end, &mode)) return .error(this, STBVorbisError.seek_failed);
4179       // calculate the number of samples returned by the next frame
4180       frame_samples = right_start-left_start;
4181       if (this.current_loc+frame_samples > sample_number) {
4182         return 1; // the next frame will contain the sample
4183       } else if (this.current_loc+frame_samples+max_frame_samples > sample_number) {
4184         // there's a chance the frame after this could contain the sample
4185         vorbis_pump_first_frame(this);
4186       } else {
4187         // this frame is too early to be relevant
4188         this.current_loc += frame_samples;
4189         this.previous_length = 0;
4190         maybe_start_packet(this);
4191         flush_packet(this);
4192       }
4193     }
4194     // the next frame will start with the sample
4195     assert(this.current_loc == sample_number);
4196     return 1;
4197   }
4198 
4199   public int seek (uint sample_number) {
4200     if (!seekFrame(sample_number)) return 0;
4201     if (sample_number != this.current_loc) {
4202       int n;
4203       uint frame_start = this.current_loc;
4204       getFrameFloat(&n, null);
4205       assert(sample_number > frame_start);
4206       assert(this.channel_buffer_start+cast(int)(sample_number-frame_start) <= this.channel_buffer_end);
4207       this.channel_buffer_start += (sample_number-frame_start);
4208     }
4209     return 1;
4210   }
4211 
4212   public bool seekStart () {
4213     /+if (push_mode) { .error(this, STBVorbisError.invalid_api_mixing); return; }+/
4214     set_file_offset(this, first_audio_page_offset);
4215     previous_length = 0;
4216     first_decode = true;
4217     next_seg = -1;
4218     return vorbis_pump_first_frame(this);
4219   }
4220 
4221   public uint streamLengthInSamples () {
4222     uint restore_offset, previous_safe;
4223     uint end, last_page_loc;
4224 
4225     /+if (this.push_mode) return .error(this, STBVorbisError.invalid_api_mixing);+/
4226     if (!this.total_samples) {
4227       uint last;
4228       uint lo, hi;
4229       char[6] header;
4230 
4231       // first, store the current decode position so we can restore it
4232       restore_offset = fileOffset;
4233 
4234       // now we want to seek back 64K from the end (the last page must
4235       // be at most a little less than 64K, but let's allow a little slop)
4236       if (this.stream_len >= 65536 && this.stream_len-65536 >= this.first_audio_page_offset) {
4237         previous_safe = this.stream_len-65536;
4238       } else {
4239         previous_safe = this.first_audio_page_offset;
4240       }
4241 
4242       set_file_offset(this, previous_safe);
4243       // previous_safe is now our candidate 'earliest known place that seeking
4244       // to will lead to the final page'
4245 
4246       if (!vorbis_find_page(this, &end, &last)) {
4247         // if we can't find a page, we're hosed!
4248         this.error = STBVorbisError.cant_find_last_page;
4249         this.total_samples = 0xffffffff;
4250         goto done;
4251       }
4252 
4253       // check if there are more pages
4254       last_page_loc = fileOffset;
4255 
4256       // stop when the last_page flag is set, not when we reach eof;
4257       // this allows us to stop short of a 'file_section' end without
4258       // explicitly checking the length of the section
4259       while (!last) {
4260         set_file_offset(this, end);
4261         if (!vorbis_find_page(this, &end, &last)) {
4262           // the last page we found didn't have the 'last page' flag set. whoops!
4263           break;
4264         }
4265         previous_safe = last_page_loc+1;
4266         last_page_loc = fileOffset;
4267       }
4268 
4269       set_file_offset(this, last_page_loc);
4270 
4271       // parse the header
4272       getn(this, cast(ubyte*)header, 6);
4273       // extract the absolute granule position
4274       lo = get32(this);
4275       hi = get32(this);
4276       if (lo == 0xffffffff && hi == 0xffffffff) {
4277         this.error = STBVorbisError.cant_find_last_page;
4278         this.total_samples = SAMPLE_unknown;
4279         goto done;
4280       }
4281       if (hi) lo = 0xfffffffe; // saturate
4282       this.total_samples = lo;
4283 
4284       this.p_last.page_start = last_page_loc;
4285       this.p_last.page_end = end;
4286       this.p_last.last_decoded_sample = lo;
4287 
4288      done:
4289       set_file_offset(this, restore_offset);
4290     }
4291     return (this.total_samples == SAMPLE_unknown ? 0 : this.total_samples);
4292   }
4293 
4294   public float streamLengthInSeconds () {
4295     return (isOpened ? streamLengthInSamples()/cast(float)sample_rate : 0.0f);
4296   }
4297 
4298   public int getFrameFloat (int* channels, float*** output) {
4299     int len, right, left;
4300     /+if (push_mode) return .error(this, STBVorbisError.invalid_api_mixing);+/
4301 
4302     if (!vorbis_decode_packet(this, &len, &left, &right)) {
4303       channel_buffer_start = channel_buffer_end = 0;
4304       return 0;
4305     }
4306 
4307     len = vorbis_finish_frame(this, len, left, right);
4308     foreach (immutable i; 0..this.vrchannels) this.outputs.ptr[i] = this.channel_buffers.ptr[i]+left;
4309 
4310     channel_buffer_start = left;
4311     channel_buffer_end = left+len;
4312 
4313     if (channels) *channels = this.vrchannels;
4314     if (output) *output = this.outputs.ptr;
4315     return len;
4316   }
4317 
4318   /+
4319   public VorbisDecoder stb_vorbis_open_memory (const(void)* data, int len, int* error=null, stb_vorbis_alloc* alloc=null) {
4320     VorbisDecoder this;
4321     stb_vorbis_ctx p = void;
4322     if (data is null) return null;
4323     vorbis_init(&p, alloc);
4324     p.stream = cast(ubyte*)data;
4325     p.stream_end = cast(ubyte*)data+len;
4326     p.stream_start = cast(ubyte*)p.stream;
4327     p.stream_len = len;
4328     p.push_mode = false;
4329     if (start_decoder(&p)) {
4330       this = vorbis_alloc(&p);
4331       if (this) {
4332         *this = p;
4333         vorbis_pump_first_frame(this);
4334         return this;
4335       }
4336     }
4337     if (error) *error = p.error;
4338     vorbis_deinit(&p);
4339     return null;
4340   }
4341   +/
4342 
4343   // s16 samples API
4344   int getFrameShort (int num_c, short** buffer, int num_samples) {
4345     float** output;
4346     int len = getFrameFloat(null, &output);
4347     if (len > num_samples) len = num_samples;
4348     if (len) convert_samples_short(num_c, buffer, 0, vrchannels, output, 0, len);
4349     return len;
4350   }
4351 
4352   int getFrameShortInterleaved (int num_c, short* buffer, int num_shorts) {
4353     float** output;
4354     int len;
4355     if (num_c == 1) return getFrameShort(num_c, &buffer, num_shorts);
4356     len = getFrameFloat(null, &output);
4357     if (len) {
4358       if (len*num_c > num_shorts) len = num_shorts/num_c;
4359       convert_channels_short_interleaved(num_c, buffer, vrchannels, output, 0, len);
4360     }
4361     return len;
4362   }
4363 
4364   int getSamplesShortInterleaved (int channels, short* buffer, int num_shorts) {
4365     float** outputs;
4366     int len = num_shorts/channels;
4367     int n = 0;
4368     int z = this.vrchannels;
4369     if (z > channels) z = channels;
4370     while (n < len) {
4371       int k = channel_buffer_end-channel_buffer_start;
4372       if (n+k >= len) k = len-n;
4373       if (k) convert_channels_short_interleaved(channels, buffer, vrchannels, channel_buffers.ptr, channel_buffer_start, k);
4374       buffer += k*channels;
4375       n += k;
4376       channel_buffer_start += k;
4377       if (n == len) break;
4378       if (!getFrameFloat(null, &outputs)) break;
4379     }
4380     return n;
4381   }
4382 
4383   int getSamplesShort (int channels, short** buffer, int len) {
4384     float** outputs;
4385     int n = 0;
4386     int z = this.vrchannels;
4387     if (z > channels) z = channels;
4388     while (n < len) {
4389       int k = channel_buffer_end-channel_buffer_start;
4390       if (n+k >= len) k = len-n;
4391       if (k) convert_samples_short(channels, buffer, n, vrchannels, channel_buffers.ptr, channel_buffer_start, k);
4392       n += k;
4393       channel_buffer_start += k;
4394       if (n == len) break;
4395       if (!getFrameFloat(null, &outputs)) break;
4396     }
4397     return n;
4398   }
4399 
4400   /+
4401   public int stb_vorbis_decode_filename (string filename, int* channels, int* sample_rate, short** output) {
4402     import core.stdc.stdlib : malloc, realloc;
4403 
4404     int data_len, offset, total, limit, error;
4405     short* data;
4406     VorbisDecoder v = stb_vorbis_open_filename(filename, &error, null);
4407     if (v is null) return -1;
4408     limit = v.vrchannels*4096;
4409     *channels = v.vrchannels;
4410     if (sample_rate) *sample_rate = v.sample_rate;
4411     offset = data_len = 0;
4412     total = limit;
4413     data = cast(short*)malloc(total*(*data).sizeof);
4414     if (data is null) {
4415       stb_vorbis_close(v);
4416       return -2;
4417     }
4418     for (;;) {
4419       int n = stb_vorbis_get_frame_short_interleaved(v, v.vrchannels, data+offset, total-offset);
4420       if (n == 0) break;
4421       data_len += n;
4422       offset += n*v.vrchannels;
4423       if (offset+limit > total) {
4424         short *data2;
4425         total *= 2;
4426         data2 = cast(short*)realloc(data, total*(*data).sizeof);
4427         if (data2 is null) {
4428           import core.stdc.stdlib : free;
4429           free(data);
4430           stb_vorbis_close(v);
4431           return -2;
4432         }
4433         data = data2;
4434       }
4435     }
4436     *output = data;
4437     stb_vorbis_close(v);
4438     return data_len;
4439   }
4440 
4441   public int stb_vorbis_decode_memory (const(void)* mem, int len, int* channels, int* sample_rate, short** output) {
4442     import core.stdc.stdlib : malloc, realloc;
4443 
4444     int data_len, offset, total, limit, error;
4445     short* data;
4446     VorbisDecoder v = stb_vorbis_open_memory(mem, len, &error, null);
4447     if (v is null) return -1;
4448     limit = v.vrchannels*4096;
4449     *channels = v.vrchannels;
4450     if (sample_rate) *sample_rate = v.sample_rate;
4451     offset = data_len = 0;
4452     total = limit;
4453     data = cast(short*)malloc(total*(*data).sizeof);
4454     if (data is null) {
4455       stb_vorbis_close(v);
4456       return -2;
4457     }
4458     for (;;) {
4459       int n = stb_vorbis_get_frame_short_interleaved(v, v.vrchannels, data+offset, total-offset);
4460       if (n == 0) break;
4461       data_len += n;
4462       offset += n*v.vrchannels;
4463       if (offset+limit > total) {
4464         short *data2;
4465         total *= 2;
4466         data2 = cast(short*)realloc(data, total*(*data).sizeof);
4467         if (data2 is null) {
4468           import core.stdc.stdlib : free;
4469           free(data);
4470           stb_vorbis_close(v);
4471           return -2;
4472         }
4473         data = data2;
4474       }
4475     }
4476     *output = data;
4477     stb_vorbis_close(v);
4478     return data_len;
4479   }
4480 
4481   +/
4482 
4483   public int stb_vorbis_get_samples_float_interleaved (int channels, float* buffer, int num_floats) 
4484   {
4485     float** outputs;
4486     int len = num_floats/channels;
4487     int n = 0;
4488     int z = this.vrchannels;
4489     if (z > channels) z = channels;
4490     while (n < len) {
4491         int k = this.channel_buffer_end-this.channel_buffer_start;
4492         if (n+k >= len) k = len-n;
4493         foreach (immutable j; 0..k) {
4494         foreach (immutable i; 0..z) *buffer++ = (this.channel_buffers.ptr[i])[this.channel_buffer_start+j];
4495         foreach (immutable i; z..channels) *buffer++ = 0;
4496         }
4497         n += k;
4498         this.channel_buffer_start += k;
4499         if (n == len) break;
4500         if (!getFrameFloat(null, &outputs)) break;
4501     }
4502     return n;
4503   }
4504   
4505 
4506   public int getSamplesFloat (int achans, float** buffer, int num_samples) {
4507     import core.stdc..string : memcpy, memset;
4508     float** outputs;
4509     int n = 0;
4510     int z = vrchannels;
4511     if (z > achans) z = achans;
4512     while (n < num_samples) {
4513       int k = channel_buffer_end-channel_buffer_start;
4514       if (n+k >= num_samples) k = num_samples-n;
4515       if (k) {
4516         foreach (immutable i; 0..z) memcpy(buffer[i]+n, channel_buffers.ptr[i]+channel_buffer_start, float.sizeof*k);
4517         foreach (immutable i; z..achans) memset(buffer[i]+n, 0, float.sizeof*k);
4518       }
4519       n += k;
4520       channel_buffer_start += k;
4521       if (n == num_samples) break;
4522       if (!getFrameFloat(null, &outputs)) break;
4523     }
4524     return n;
4525   }
4526 
4527 private: // k8: 'cause i'm evil
4528   private enum cmt_len_size = 2;
4529   nothrow /*@trusted*/ @nogc {
4530     public @property bool comment_empty () const pure { return (comment_get_line_len == 0); }
4531 
4532     // 0: error
4533     // includes length itself
4534     private uint comment_get_line_len () const pure {
4535       if (comment_data_pos >= comment_size) return 0;
4536       if (comment_size-comment_data_pos < cmt_len_size) return 0;
4537       uint len = comment_data[comment_data_pos];
4538       len += cast(uint)comment_data[comment_data_pos+1]<<8;
4539       return (len >= cmt_len_size && comment_data_pos+len <= comment_size ? len : 0);
4540     }
4541 
4542     public bool comment_rewind () {
4543       comment_data_pos = 0;
4544       for (;;) {
4545         auto len = comment_get_line_len();
4546         if (!len) { comment_data_pos = comment_size; return false; }
4547         if (len != cmt_len_size) return true;
4548         comment_data_pos += len;
4549       }
4550     }
4551 
4552     // true: has something to read after skip
4553     public bool comment_skip () {
4554       comment_data_pos += comment_get_line_len();
4555       for (;;) {
4556         auto len = comment_get_line_len();
4557         if (!len) { comment_data_pos = comment_size; return false; }
4558         if (len != cmt_len_size) break;
4559         comment_data_pos += len;
4560       }
4561       return true;
4562     }
4563 
4564     public const(char)[] comment_line () {
4565       auto len = comment_get_line_len();
4566       if (len < cmt_len_size) return null;
4567       if (len == cmt_len_size) return "";
4568       return (cast(char*)comment_data+comment_data_pos+cmt_len_size)[0..len-cmt_len_size];
4569     }
4570 
4571     public const(char)[] comment_name () {
4572       auto line = comment_line();
4573       if (line.length == 0) return line;
4574       uint epos = 0;
4575       while (epos < line.length && line.ptr[epos] != '=') ++epos;
4576       return (epos < line.length ? line[0..epos] : "");
4577     }
4578 
4579     public const(char)[] comment_value () {
4580       auto line = comment_line();
4581       if (line.length == 0) return line;
4582       uint epos = 0;
4583       while (epos < line.length && line.ptr[epos] != '=') ++epos;
4584       return (epos < line.length ? line[epos+1..$] : line);
4585     }
4586   }
4587 }
4588 
4589 
4590 // ////////////////////////////////////////////////////////////////////////// //
4591 private:
4592 // cool helper to translate C defines
4593 template cmacroFixVars(T...) {
4594   /**
4595    * 64-bit implementation of fasthash
4596    *
4597    * Params:
4598    *   buf =  data buffer
4599    *   seed = the seed
4600    *
4601    * Returns:
4602    *   32-bit or 64-bit hash
4603    */
4604   size_t hashOf (const(void)* buf, size_t len, size_t seed=0) pure nothrow @trusted @nogc {
4605     enum Get8Bytes = q{
4606       cast(ulong)data[0]|
4607       (cast(ulong)data[1]<<8)|
4608       (cast(ulong)data[2]<<16)|
4609       (cast(ulong)data[3]<<24)|
4610       (cast(ulong)data[4]<<32)|
4611       (cast(ulong)data[5]<<40)|
4612       (cast(ulong)data[6]<<48)|
4613       (cast(ulong)data[7]<<56)
4614     };
4615     enum m = 0x880355f21e6d1965UL;
4616     auto data = cast(const(ubyte)*)buf;
4617     ulong h = seed;
4618     ulong t;
4619     foreach (immutable _; 0..len/8) {
4620       version(HasUnalignedOps) {
4621         if (__ctfe) {
4622           t = mixin(Get8Bytes);
4623         } else {
4624           t = *cast(ulong*)data;
4625         }
4626       } else {
4627         t = mixin(Get8Bytes);
4628       }
4629       data += 8;
4630       t ^= t>>23;
4631       t *= 0x2127599bf4325c37UL;
4632       t ^= t>>47;
4633       h ^= t;
4634       h *= m;
4635     }
4636 
4637     h ^= len*m;
4638     t = 0;
4639     switch (len&7) {
4640       case 7: t ^= cast(ulong)data[6]<<48; goto case 6;
4641       case 6: t ^= cast(ulong)data[5]<<40; goto case 5;
4642       case 5: t ^= cast(ulong)data[4]<<32; goto case 4;
4643       case 4: t ^= cast(ulong)data[3]<<24; goto case 3;
4644       case 3: t ^= cast(ulong)data[2]<<16; goto case 2;
4645       case 2: t ^= cast(ulong)data[1]<<8; goto case 1;
4646       case 1: t ^= cast(ulong)data[0]; goto default;
4647       default:
4648         t ^= t>>23;
4649         t *= 0x2127599bf4325c37UL;
4650         t ^= t>>47;
4651         h ^= t;
4652         h *= m;
4653         break;
4654     }
4655 
4656     h ^= h>>23;
4657     h *= 0x2127599bf4325c37UL;
4658     h ^= h>>47;
4659     static if (size_t.sizeof == 4) {
4660       // 32-bit hash
4661       // the following trick converts the 64-bit hashcode to Fermat
4662       // residue, which shall retain information from both the higher
4663       // and lower parts of hashcode.
4664       return cast(size_t)(h-(h>>32));
4665     } else {
4666       return h;
4667     }
4668   }
4669 
4670   string cmacroFixVars (string s, string[] names...) {
4671     assert(T.length == names.length, "cmacroFixVars: names and arguments count mismatch");
4672     enum tmpPfxName = "__temp_prefix__";
4673     string res;
4674     string tmppfx;
4675     uint pos = 0;
4676     // skip empty lines (for pretty printing)
4677     // trim trailing spaces
4678     while (s.length > 0 && s[$-1] <= ' ') s = s[0..$-1];
4679     uint linestpos = 0; // start of the current line
4680     while (pos < s.length) {
4681       if (s[pos] > ' ') break;
4682       if (s[pos] == '\n') linestpos = pos+1;
4683       ++pos;
4684     }
4685     pos = linestpos;
4686     while (pos+2 < s.length) {
4687       int epos = pos;
4688       while (epos+2 < s.length && (s[epos] != '$' || s[epos+1] != '{')) ++epos;
4689       if (epos > pos) {
4690         if (s.length-epos < 3) break;
4691         res ~= s[pos..epos];
4692         pos = epos;
4693       }
4694       assert(s[pos] == '$' && s[pos+1] == '{');
4695       pos += 2;
4696       bool found = false;
4697       if (s.length-pos >= tmpPfxName.length+1 && s[pos+tmpPfxName.length] == '}' && s[pos..pos+tmpPfxName.length] == tmpPfxName) {
4698         if (tmppfx.length == 0) {
4699           // generate temporary prefix
4700           auto hash = hashOf(s.ptr, s.length);
4701           immutable char[16] hexChars = "0123456789abcdef";
4702           tmppfx = "_temp_macro_var_";
4703           foreach_reverse (immutable idx; 0..size_t.sizeof*2) {
4704             tmppfx ~= hexChars[hash&0x0f];
4705             hash >>= 4;
4706           }
4707           tmppfx ~= "_";
4708         }
4709         pos += tmpPfxName.length+1;
4710         res ~= tmppfx;
4711         found = true;
4712       } else {
4713         foreach (immutable nidx, string oname; T) {
4714           static assert(oname.length > 0);
4715           if (s.length-pos >= oname.length+1 && s[pos+oname.length] == '}' && s[pos..pos+oname.length] == oname) {
4716             found = true;
4717             pos += oname.length+1;
4718             res ~= names[nidx];
4719             break;
4720           }
4721         }
4722       }
4723       assert(found, "unknown variable in macro");
4724     }
4725     if (pos < s.length) res ~= s[pos..$];
4726     return res;
4727   }
4728 }
4729 
4730 // ////////////////////////////////////////////////////////////////////////// //
4731 /* Version history
4732     1.09    - 2016/04/04 - back out 'avoid discarding last frame' fix from previous version
4733     1.08    - 2016/04/02 - fixed multiple warnings; fix setup memory leaks;
4734                            avoid discarding last frame of audio data
4735     1.07    - 2015/01/16 - fixed some warnings, fix mingw, const-correct API
4736                            some more crash fixes when out of memory or with corrupt files
4737     1.06    - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
4738                            some crash fixes when out of memory or with corrupt files
4739     1.05    - 2015/04/19 - don't define __forceinline if it's redundant
4740     1.04    - 2014/08/27 - fix missing const-correct case in API
4741     1.03    - 2014/08/07 - Warning fixes
4742     1.02    - 2014/07/09 - Declare qsort compare function _cdecl on windows
4743     1.01    - 2014/06/18 - fix stb_vorbis_get_samples_float
4744     1.0     - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in multichannel
4745                            (API change) report sample rate for decode-full-file funcs
4746     0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
4747     0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
4748     0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
4749     0.99993 - remove assert that fired on legal files with empty tables
4750     0.99992 - rewind-to-start
4751     0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
4752     0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
4753     0.9998 - add a full-decode function with a memory source
4754     0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
4755     0.9996 - query length of vorbis stream in samples/seconds
4756     0.9995 - bugfix to another optimization that only happened in certain files
4757     0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
4758     0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
4759     0.9992 - performance improvement of IMDCT; now performs close to reference implementation
4760     0.9991 - performance improvement of IMDCT
4761     0.999 - (should have been 0.9990) performance improvement of IMDCT
4762     0.998 - no-CRT support from Casey Muratori
4763     0.997 - bugfixes for bugs found by Terje Mathisen
4764     0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
4765     0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
4766     0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
4767     0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
4768     0.992 - fixes for MinGW warning
4769     0.991 - turn fast-float-conversion on by default
4770     0.990 - fix push-mode seek recovery if you seek into the headers
4771     0.98b - fix to bad release of 0.98
4772     0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
4773     0.97 - builds under c++ (typecasting, don't use 'class' keyword)
4774     0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
4775     0.95 - clamping code for 16-bit functions
4776     0.94 - not publically released
4777     0.93 - fixed all-zero-floor case (was decoding garbage)
4778     0.92 - fixed a memory leak
4779     0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
4780     0.90 - first public release
4781 */
4782 
4783 /*
4784 ------------------------------------------------------------------------------
4785 This software is available under 2 licenses -- choose whichever you prefer.
4786 ------------------------------------------------------------------------------
4787 ALTERNATIVE A - MIT License
4788 Copyright (c) 2017 Sean Barrett
4789 Permission is hereby granted, free of charge, to any person obtaining a copy of
4790 this software and associated documentation files (the "Software"), to deal in
4791 the Software without restriction, including without limitation the rights to
4792 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
4793 of the Software, and to permit persons to whom the Software is furnished to do
4794 so, subject to the following conditions:
4795 The above copyright notice and this permission notice shall be included in all
4796 copies or substantial portions of the Software.
4797 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
4798 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
4799 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
4800 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
4801 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
4802 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
4803 SOFTWARE.
4804 ------------------------------------------------------------------------------
4805 ALTERNATIVE B - Public Domain (www.unlicense.org)
4806 This is free and unencumbered software released into the public domain.
4807 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
4808 software, either in source code form or as a compiled binary, for any purpose,
4809 commercial or non-commercial, and by any means.
4810 In jurisdictions that recognize copyright laws, the author or authors of this
4811 software dedicate any and all copyright interest in the software to the public
4812 domain. We make this dedication for the benefit of the public at large and to
4813 the detriment of our heirs and successors. We intend this dedication to be an
4814 overt act of relinquishment in perpetuity of all present and future rights to
4815 this software under copyright law.
4816 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
4817 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
4818 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
4819 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
4820 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
4821 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
4822 ------------------------------------------------------------------------------
4823 */