1 module audioformats.stb_vorbis2; 2 3 // Ogg Vorbis audio decoder - v1.22 - public domain 4 // http://nothings.org/stb_vorbis/ 5 // 6 // Original version written by Sean Barrett in 2007. 7 // 8 // Originally sponsored by RAD Game Tools. Seeking implementation 9 // sponsored by Phillip Bennefall, Marc Andersen, Aaron Baker, 10 // Elias Software, Aras Pranckevicius, and Sean Barrett. 11 // 12 // Translated to D by Guillaume Piolat. 13 // 14 // LICENSE 15 // 16 // See end of file for license information. 17 // 18 // Limitations: 19 // 20 // - floor 0 not supported (used in old ogg vorbis files pre-2004) 21 // - lossless sample-truncation at beginning ignored 22 // - cannot concatenate multiple vorbis streams 23 // - sample positions are 32-bit, limiting seekable 192Khz 24 // files to around 6 hours (Ogg supports 64-bit) 25 // 26 // Feature contributors: 27 // Dougall Johnson (sample-exact seeking) 28 // 29 // Bugfix/warning contributors: 30 // Terje Mathisen Niklas Frykholm Andy Hill 31 // Casey Muratori John Bolton Gargaj 32 // Laurent Gomila Marc LeBlanc Ronny Chevalier 33 // Bernhard Wodo Evan Balster github:alxprd 34 // Tom Beaumont Ingo Leitgeb Nicolas Guillemot 35 // Phillip Bennefall Rohit Thiago Goulart 36 // github:manxorist Saga Musix github:infatum 37 // Timur Gagiev Maxwell Koo Peter Waller 38 // github:audinowho Dougall Johnson David Reid 39 // github:Clownacy Pedro J. Estebanez Remi Verschelde 40 // AnthoFoxo github:morlat Gabriel Ravier 41 // 42 // Partial history: 43 // 1.22d - 2021-10-31 - translated to D to update the one in audio-formats, and perhaps get seeking. 44 // 1.22 - 2021-07-11 - various small fixes 45 // 1.21 - 2021-07-02 - fix bug for files with no comments 46 // 1.20 - 2020-07-11 - several small fixes 47 // 1.19 - 2020-02-05 - warnings 48 // 1.18 - 2020-02-02 - fix seek bugs; parse header comments; misc warnings etc. 49 // 1.17 - 2019-07-08 - fix CVE-2019-13217..CVE-2019-13223 (by ForAllSecure) 50 // 1.16 - 2019-03-04 - fix warnings 51 // 1.15 - 2019-02-07 - explicit failure if Ogg Skeleton data is found 52 // 1.14 - 2018-02-11 - delete bogus dealloca usage 53 // 1.13 - 2018-01-29 - fix truncation of last frame (hopefully) 54 // 1.12 - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files 55 // 1.11 - 2017-07-23 - fix MinGW compilation 56 // 1.10 - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory 57 // 1.09 - 2016-04-04 - back out 'truncation of last frame' fix from previous version 58 // 1.08 - 2016-04-02 - warnings; setup memory leaks; truncation of last frame 59 // 1.07 - 2015-01-16 - fixes for crashes on invalid files; warning fixes; const 60 // 1.06 - 2015-08-31 - full, correct support for seeking API (Dougall Johnson) 61 // some crash fixes when out of memory or with corrupt files 62 // fix some inappropriately signed shifts 63 // 1.05 - 2015-04-19 - don't define __forceinline if it's redundant 64 // 1.04 - 2014-08-27 - fix missing const-correct case in API 65 // 1.03 - 2014-08-07 - warning fixes 66 // 1.02 - 2014-07-09 - declare qsort comparison as explicitly _cdecl in Windows 67 // 1.01 - 2014-06-18 - fix stb_vorbis_get_samples_float (interleaved was correct) 68 // 1.0 - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in >2-channel; 69 // (API change) report sample rate for decode-full-file funcs 70 // 71 // See end of file for full version history. 72 // 73 // Notes about the D translation: removed push data API. 74 75 76 /////////// THREAD SAFETY 77 78 // Individual stb_vorbis* handles are not thread-safe; you cannot decode from 79 // them from multiple threads at the same time. However, you can have multiple 80 // stb_vorbis* handles and decode from them independently in multiple thrads. 81 82 83 /////////// MEMORY ALLOCATION 84 85 // normally stb_vorbis uses malloc() to allocate memory at startup, 86 // and alloca() to allocate temporary memory during a frame on the 87 // stack. (Memory consumption will depend on the amount of setup 88 // data in the file and how you set the compile flags for speed 89 // vs. size. In my test files the maximal-size usage is ~150KB.) 90 // 91 // You can modify the wrapper functions in the source (setup_malloc, 92 // setup_temp_malloc, temp_malloc) to change this behavior, or you 93 // can use a simpler allocation model: you pass in a buffer from 94 // which stb_vorbis will allocate _all_ its memory (including the 95 // temp memory). "open" may fail with a VORBIS_outofmem if you 96 // do not pass in enough data; there is no way to determine how 97 // much you do need except to succeed (at which point you can 98 // query get_info to find the exact amount required. yes I know 99 // this is lame). 100 // 101 // If you pass in a non-NULL buffer of the type below, allocation 102 // will occur from it as described above. Otherwise just pass NULL 103 // to use malloc()/alloca() 104 105 import core.stdc.stdlib: malloc, free, qsort, abs; 106 import core.stdc.string: memset, memcmp, memcpy; 107 import std.math: ldexp, sin, cos, floor, exp, log, pow; 108 import audioformats.io; 109 110 nothrow: 111 @nogc: 112 113 struct stb_vorbis_alloc 114 { 115 ubyte* alloc_buffer; 116 int alloc_buffer_length_in_bytes; 117 } 118 119 120 /////////// FUNCTIONS USEABLE WITH ALL INPUT MODES 121 122 struct stb_vorbis_info 123 { 124 uint sample_rate; 125 int channels; 126 127 uint setup_memory_required; 128 uint setup_temp_memory_required; 129 uint temp_memory_required; 130 131 int max_frame_size; 132 } 133 134 struct stb_vorbis_comment 135 { 136 char *vendor; 137 138 int comment_list_length; 139 char **comment_list; 140 } 141 142 //////// ERROR CODES 143 144 alias STBVorbisError = int; 145 enum : STBVorbisError 146 { 147 VORBIS__no_error, 148 149 VORBIS_need_more_data=1, // not a real error 150 151 VORBIS_invalid_api_mixing, // can't mix API modes 152 VORBIS_outofmem, // not enough memory 153 VORBIS_feature_not_supported, // uses floor 0 154 VORBIS_too_many_channels, // STB_VORBIS_MAX_CHANNELS is too small 155 VORBIS_file_open_failure, // fopen() failed 156 VORBIS_seek_without_length, // can't seek in unknown-length file 157 158 VORBIS_unexpected_eof=10, // file is truncated? 159 VORBIS_seek_invalid, // seek past EOF 160 161 // decoding errors (corrupt/invalid stream) -- you probably 162 // don't care about the exact details of these 163 164 // vorbis errors: 165 VORBIS_invalid_setup=20, 166 VORBIS_invalid_stream, 167 168 // ogg errors: 169 VORBIS_missing_capture_pattern=30, 170 VORBIS_invalid_stream_structure_version, 171 VORBIS_continued_packet_flag_invalid, 172 VORBIS_incorrect_stream_serial_number, 173 VORBIS_invalid_first_page, 174 VORBIS_bad_packet_type, 175 VORBIS_cant_find_last_page, 176 VORBIS_seek_failed, 177 VORBIS_ogg_skeleton_not_supported 178 } 179 180 // 181 // HEADER ENDS HERE 182 // 183 ////////////////////////////////////////////////////////////////////////////// 184 185 // global configuration settings (e.g. set these in the project/makefile), 186 // or just set them in this file at the top (although ideally the first few 187 // should be visible when the header file is compiled too, although it's not 188 // crucial) 189 190 // STB_VORBIS_NO_STDIO 191 // does not compile the code for the APIs that use FILE *s internally 192 // or externally (implied by STB_VORBIS_NO_PULLDATA_API) 193 // #define STB_VORBIS_NO_STDIO 194 195 // STB_VORBIS_NO_INTEGER_CONVERSION 196 // does not compile the code for converting audio sample data from 197 // float to integer (implied by STB_VORBIS_NO_PULLDATA_API) 198 // #define STB_VORBIS_NO_INTEGER_CONVERSION 199 200 // STB_VORBIS_NO_FAST_SCALED_FLOAT 201 // does not use a fast float-to-int trick to accelerate float-to-int on 202 // most platforms which requires endianness be defined correctly. 203 //#define STB_VORBIS_NO_FAST_SCALED_FLOAT 204 205 206 // STB_VORBIS_MAX_CHANNELS [number] 207 // globally define this to the maximum number of channels you need. 208 // The spec does not put a restriction on channels except that 209 // the count is stored in a byte, so 255 is the hard limit. 210 // Reducing this saves about 16 bytes per value, so using 16 saves 211 // (255-16)*16 or around 4KB. Plus anything other memory usage 212 // I forgot to account for. Can probably go as low as 8 (7.1 audio), 213 // 6 (5.1 audio), or 2 (stereo only). 214 enum STB_VORBIS_MAX_CHANNELS = 16; 215 216 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number] 217 // sets the log size of the huffman-acceleration table. Maximum 218 // supported value is 24. with larger numbers, more decodings are O(1), 219 // but the table size is larger so worse cache missing, so you'll have 220 // to probe (and try multiple ogg vorbis files) to find the sweet spot. 221 enum STB_VORBIS_FAST_HUFFMAN_LENGTH = 10; 222 223 // STB_VORBIS_FAST_BINARY_LENGTH [number] 224 // sets the log size of the binary-search acceleration table. this 225 // is used in similar fashion to the fast-huffman size to set initial 226 // parameters for the binary search 227 228 // STB_VORBIS_FAST_HUFFMAN_INT 229 // The fast huffman tables are much more efficient if they can be 230 // stored as 16-bit results instead of 32-bit results. This restricts 231 // the codebooks to having only 65535 possible outcomes, though. 232 // (At least, accelerated by the huffman table.) 233 enum STB_VORBIS_FAST_HUFFMAN_SHORT = true; 234 235 enum STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH = false; 236 237 enum STB_VORBIS_DIVIDES_IN_CODEBOOK = false; // cannot be true, support for this not translated 238 enum STB_VORBIS_DIVIDES_IN_RESIDUE = false; 239 240 enum STB_VORBIS_DIVIDE_TABLE = false; 241 enum STB_VORBIS_NO_DEFER_FLOOR = false; // STB_VORBIS_NO_DEFER_FLOOR not defined 242 enum STB_VORBIS_NO_CRT = false; 243 enum STB_VORBIS_NO_STDIO = false; 244 enum STB_VORBIS_NO_INTEGER_CONVERSION = true; 245 enum STB_VORBIS_NO_FAST_SCALED_FLOAT = true; 246 247 static assert(STB_VORBIS_MAX_CHANNELS <= 256); 248 static assert(STB_VORBIS_FAST_HUFFMAN_LENGTH <= 24); 249 250 enum MAX_BLOCKSIZE_LOG = 13; // from specification 251 enum MAX_BLOCKSIZE = (1 << MAX_BLOCKSIZE_LOG); 252 253 alias uint8 = ubyte; 254 alias int8 = byte; 255 alias uint16 = ushort; 256 alias int16 = short; 257 alias uint32 = uint; 258 alias int32 = int; 259 260 enum TRUE = 1; 261 enum FALSE = 0; 262 enum void* NULL = null; 263 264 alias codetype = float; 265 266 // @NOTE 267 // 268 // Some arrays below are tagged "//varies", which means it's actually 269 // a variable-sized piece of data, but rather than malloc I assume it's 270 // small enough it's better to just allocate it all together with the 271 // main thing 272 // 273 // Most of the variables are specified with the smallest size I could pack 274 // them into. It might give better performance to make them all full-sized 275 // integers. It should be safe to freely rearrange the structures or change 276 // the sizes larger--nothing relies on silently truncating etc., nor the 277 // order of variables. 278 279 enum FAST_HUFFMAN_TABLE_SIZE = (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH); 280 enum FAST_HUFFMAN_TABLE_MASK = (FAST_HUFFMAN_TABLE_SIZE - 1); 281 282 struct Codebook 283 { 284 int dimensions, entries; 285 uint8 *codeword_lengths; 286 float minimum_value; 287 float delta_value; 288 uint8 value_bits; 289 uint8 lookup_type; 290 uint8 sequence_p; 291 uint8 sparse; 292 uint32 lookup_values; 293 codetype *multiplicands; 294 uint32 *codewords; 295 int16[FAST_HUFFMAN_TABLE_SIZE] fast_huffman; 296 uint32 *sorted_codewords; 297 int *sorted_values; 298 int sorted_entries; 299 } ; 300 301 struct Floor0 302 { 303 uint8 order; 304 uint16 rate; 305 uint16 bark_map_size; 306 uint8 amplitude_bits; 307 uint8 amplitude_offset; 308 uint8 number_of_books; 309 uint8[16] book_list; // varies 310 } 311 312 struct Floor1 313 { 314 uint8 partitions; 315 uint8[32] partition_class_list; // varies 316 uint8[16] class_dimensions; // varies 317 uint8[16] class_subclasses; // varies 318 uint8[16] class_masterbooks; // varies 319 int16[8][16] subclass_books; // varies 320 uint16[31*8+2] Xlist; // varies 321 uint8[31*8+2] sorted_order; 322 uint8[2][31*8+2] neighbors; 323 uint8 floor1_multiplier; 324 uint8 rangebits; 325 int values; 326 } 327 328 union Floor 329 { 330 Floor0 floor0; 331 Floor1 floor1; 332 } 333 334 struct Residue 335 { 336 uint32 begin, end; 337 uint32 part_size; 338 uint8 classifications; 339 uint8 classbook; 340 uint8 **classdata; 341 int16[8]*residue_books; 342 } ; 343 344 struct MappingChannel 345 { 346 uint8 magnitude; 347 uint8 angle; 348 uint8 mux; 349 } ; 350 351 struct Mapping 352 { 353 uint16 coupling_steps; 354 MappingChannel *chan; 355 uint8 submaps; 356 uint8[15] submap_floor; // varies 357 uint8[15] submap_residue; // varies 358 } ; 359 360 struct Mode 361 { 362 uint8 blockflag; 363 uint8 mapping; 364 uint16 windowtype; 365 uint16 transformtype; 366 } 367 368 struct CRCscan 369 { 370 uint32 goal_crc; // expected crc if match 371 int bytes_left; // bytes left in packet 372 uint32 crc_so_far; // running crc 373 int bytes_done; // bytes processed in _current_ chunk 374 uint32 sample_loc; // granule pos encoded in page 375 } 376 377 struct ProbedPage 378 { 379 uint32 page_start, page_end; 380 uint32 last_decoded_sample; 381 } 382 383 struct stb_vorbis 384 { 385 // user-accessible info 386 uint sample_rate; 387 int channels; 388 389 uint setup_memory_required; 390 uint temp_memory_required; 391 uint setup_temp_memory_required; 392 393 char *vendor; 394 int comment_list_length; 395 char **comment_list; 396 397 // input config 398 IOCallbacks* _io; 399 void* _userData; 400 401 /*uint8 *stream; 402 uint8 *stream_start; 403 uint8 *stream_end;*/ 404 405 uint32 stream_len; 406 407 // the page to seek to when seeking to start, may be zero 408 uint32 first_audio_page_offset; 409 410 // p_first is the page on which the first audio packet ends 411 // (but not necessarily the page on which it starts) 412 ProbedPage p_first, p_last; 413 414 // memory management 415 stb_vorbis_alloc alloc; 416 int setup_offset; 417 int temp_offset; 418 419 // run-time results 420 int eof; 421 STBVorbisError error; 422 423 // user-useful data 424 425 // header info 426 int[2] blocksize; 427 int blocksize_0, blocksize_1; 428 int codebook_count; 429 Codebook *codebooks; 430 int floor_count; 431 uint16[64] floor_types; // varies 432 Floor *floor_config; 433 int residue_count; 434 uint16[64] residue_types; // varies 435 Residue *residue_config; 436 int mapping_count; 437 Mapping *mapping; 438 int mode_count; 439 Mode[64] mode_config; // varies 440 441 uint32 total_samples; 442 443 // decode buffer 444 float*[STB_VORBIS_MAX_CHANNELS] channel_buffers; 445 float*[STB_VORBIS_MAX_CHANNELS] outputs ; 446 447 float*[STB_VORBIS_MAX_CHANNELS] previous_window; 448 int previous_length; 449 450 int16*[STB_VORBIS_MAX_CHANNELS] finalY; 451 452 long current_sample_loc; // sample location of next sample to decode 453 454 455 uint32 current_loc; // sample location of next frame to decode 456 int current_loc_valid; 457 458 // per-blocksize precomputed data 459 460 // twiddle factors 461 float*[2] A, B, C; 462 float*[2] window; 463 uint16*[2] bit_reverse; 464 465 // current page/packet/segment streaming info 466 uint32 serial; // stream serial number for verification 467 int last_page; 468 int segment_count; 469 uint8[255] segments; 470 uint8 page_flag; 471 uint8 bytes_in_seg; 472 uint8 first_decode; 473 int next_seg; 474 int last_seg; // flag that we're on the last segment 475 int last_seg_which; // what was the segment number of the last seg? 476 uint32 acc; 477 int valid_bits; 478 int packet_bytes; 479 int end_seg_with_known_loc; 480 uint32 known_loc_for_packet; 481 int discard_samples_deferred; 482 uint32 samples_output; 483 484 // sample-access 485 int channel_buffer_start; 486 int channel_buffer_end; 487 488 uint32[256] crc_table; // was a global, moved here 489 }; 490 491 alias vorb = stb_vorbis; 492 493 static int error(vorb *f, STBVorbisError e) 494 { 495 f.error = e; 496 if (!f.eof && e != VORBIS_need_more_data) { 497 f.error=e; // breakpoint for debugging 498 } 499 return 0; 500 } 501 502 503 // these functions are used for allocating temporary memory 504 // while decoding. if you can afford the stack space, use 505 // alloca(); otherwise, provide a temp buffer and it will 506 // allocate out of those. 507 508 size_t array_size_required(size_t count, size_t size) 509 { 510 return (count * ((void *).sizeof + size)); 511 } 512 513 void* temp_alloc(vorb *f, size_t size) 514 { 515 assert(f.alloc.alloc_buffer); 516 return setup_temp_malloc(f, cast(int)size); // note: never use alloca 517 } 518 519 void temp_free(vorb *f, void* p) 520 { 521 } 522 523 int temp_alloc_save(vorb *f) 524 { 525 return f.temp_offset; 526 } 527 528 void temp_alloc_restore(vorb *f, int p) 529 { 530 f.temp_offset = p; 531 } 532 533 void* temp_block_array(vorb *f, size_t count, size_t size) 534 { 535 return make_block_array(temp_alloc(f, cast(int) array_size_required(count,size)), cast(int) count, cast(int)size); 536 } 537 538 // given a sufficiently large block of memory, make an array of pointers to subblocks of it 539 static void *make_block_array(void *mem, int count, int size) 540 { 541 int i; 542 void ** p = cast(void **) mem; 543 char *q = cast(char *) (p + count); 544 for (i=0; i < count; ++i) { 545 p[i] = q; 546 q += size; 547 } 548 return p; 549 } 550 551 void *setup_malloc(vorb *f, size_t sz) 552 { 553 return setup_malloc(f, cast(int)sz); 554 } 555 556 void *setup_malloc(vorb *f, int sz) 557 { 558 sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs. 559 f.setup_memory_required += sz; 560 if (f.alloc.alloc_buffer) { 561 void *p = cast(char *) f.alloc.alloc_buffer + f.setup_offset; 562 if (f.setup_offset + sz > f.temp_offset) return NULL; 563 f.setup_offset += sz; 564 return p; 565 } 566 return sz ? malloc(sz) : NULL; 567 } 568 569 static void setup_free(vorb *f, void *p) 570 { 571 if (f.alloc.alloc_buffer) return; // do nothing; setup mem is a stack 572 free(p); 573 } 574 575 void *setup_temp_malloc(vorb *f, int sz) 576 { 577 sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs. 578 if (f.alloc.alloc_buffer) { 579 if (f.temp_offset - sz < f.setup_offset) 580 return NULL; 581 f.temp_offset -= sz; 582 return cast(char *) f.alloc.alloc_buffer + f.temp_offset; 583 } 584 return malloc(sz); 585 } 586 587 void setup_temp_free(vorb *f, void *p, size_t sz) 588 { 589 if (f.alloc.alloc_buffer) 590 { 591 f.temp_offset += (sz+7)&~7; 592 return; 593 } 594 free(p); 595 } 596 597 enum uint CRC32_POLY = 0x04c11db7; // from spec 598 599 void crc32_init(vorb* f) 600 { 601 int i,j; 602 uint32 s; 603 for(i=0; i < 256; i++) { 604 for (s=cast(uint32) i << 24, j=0; j < 8; ++j) 605 s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0); 606 f.crc_table[i] = s; 607 } 608 } 609 610 uint32 crc32_update(vorb* f, uint32 crc, uint8 byte_) 611 { 612 return (crc << 8) ^ f.crc_table[byte_ ^ (crc >> 24)]; 613 } 614 615 616 // used in setup, and for huffman that doesn't go fast path 617 static uint bit_reverse(uint n) 618 { 619 n = ((n & 0xAAAAAAAA) >> 1) | ((n & 0x55555555) << 1); 620 n = ((n & 0xCCCCCCCC) >> 2) | ((n & 0x33333333) << 2); 621 n = ((n & 0xF0F0F0F0) >> 4) | ((n & 0x0F0F0F0F) << 4); 622 n = ((n & 0xFF00FF00) >> 8) | ((n & 0x00FF00FF) << 8); 623 return (n >> 16) | (n << 16); 624 } 625 626 static float square(float x) 627 { 628 return x*x; 629 } 630 631 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3 632 // as required by the specification. fast(?) implementation from stb.h 633 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup 634 static int ilog(int32 n) 635 { 636 static immutable byte[16] log2_4 = [ 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 ]; 637 638 if (n < 0) return 0; // signed n returns 0 639 640 // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29) 641 if (n < (1 << 14)) 642 if (n < (1 << 4)) return 0 + log2_4[n ]; 643 else if (n < (1 << 9)) return 5 + log2_4[n >> 5]; 644 else return 10 + log2_4[n >> 10]; 645 else if (n < (1 << 24)) 646 if (n < (1 << 19)) return 15 + log2_4[n >> 15]; 647 else return 20 + log2_4[n >> 20]; 648 else if (n < (1 << 29)) return 25 + log2_4[n >> 25]; 649 else return 30 + log2_4[n >> 30]; 650 } 651 652 enum M_PI = 3.14159265358979323846264f; // from CRC 653 654 // code length assigned to a value with no huffman encoding 655 enum NO_CODE = 255; 656 657 /////////////////////// LEAF SETUP FUNCTIONS ////////////////////////// 658 // 659 // these functions are only called at setup, and only a few times 660 // per file 661 662 static float float32_unpack(uint32 x) 663 { 664 // from the specification 665 uint32 mantissa = x & 0x1fffff; 666 uint32 sign = x & 0x80000000; 667 uint32 exp = (x & 0x7fe00000) >> 21; 668 double res = sign ? -cast(double)mantissa : cast(double)mantissa; 669 return cast(float) ldexp(cast(float)res, cast(int)exp-788); 670 } 671 672 673 // zlib & jpeg huffman tables assume that the output symbols 674 // can either be arbitrarily arranged, or have monotonically 675 // increasing frequencies--they rely on the lengths being sorted; 676 // this makes for a very simple generation algorithm. 677 // vorbis allows a huffman table with non-sorted lengths. This 678 // requires a more sophisticated construction, since symbols in 679 // order do not map to huffman codes "in order". 680 static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values) 681 { 682 if (!c.sparse) { 683 c.codewords [symbol] = huff_code; 684 } else { 685 c.codewords [count] = huff_code; 686 c.codeword_lengths[count] = cast(ubyte)len; 687 values [count] = symbol; 688 } 689 } 690 691 static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values) 692 { 693 int i,k,m=0; 694 uint32[32] available; 695 696 memset(available.ptr, 0, 32); 697 // find the first entry 698 for (k=0; k < n; ++k) if (len[k] < NO_CODE) break; 699 if (k == n) { assert(c.sorted_entries == 0); return TRUE; } 700 assert(len[k] < 32); // no error return required, code reading lens checks this 701 // add to the list 702 add_entry(c, 0, k, m++, len[k], values); 703 // add all available leaves 704 for (i=1; i <= len[k]; ++i) 705 available[i] = 1U << (32-i); 706 // note that the above code treats the first case specially, 707 // but it's really the same as the following code, so they 708 // could probably be combined (except the initial code is 0, 709 // and I use 0 in available[] to mean 'empty') 710 for (i=k+1; i < n; ++i) { 711 uint32 res; 712 int z = len[i], y; 713 if (z == NO_CODE) continue; 714 assert(z < 32); // no error return required, code reading lens checks this 715 // find lowest available leaf (should always be earliest, 716 // which is what the specification calls for) 717 // note that this property, and the fact we can never have 718 // more than one free leaf at a given level, isn't totally 719 // trivial to prove, but it seems true and the assert never 720 // fires, so! 721 while (z > 0 && !available[z]) --z; 722 if (z == 0) { return FALSE; } 723 res = available[z]; 724 available[z] = 0; 725 add_entry(c, bit_reverse(res), i, m++, len[i], values); 726 // propagate availability up the tree 727 if (z != len[i]) { 728 for (y=len[i]; y > z; --y) { 729 assert(available[y] == 0); 730 available[y] = res + (1 << (32-y)); 731 } 732 } 733 } 734 return TRUE; 735 } 736 737 // accelerated huffman table allows fast O(1) match of all symbols 738 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH 739 static void compute_accelerated_huffman(Codebook *c) 740 { 741 int i, len; 742 for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i) 743 c.fast_huffman[i] = -1; 744 745 len = c.sparse ? c.sorted_entries : c.entries; 746 if (len > 32767) len = 32767; // largest possible value we can encode! 747 for (i=0; i < len; ++i) { 748 if (c.codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) { 749 uint32 z = c.sparse ? bit_reverse(c.sorted_codewords[i]) : c.codewords[i]; 750 // set table entries for all bit combinations in the higher bits 751 while (z < FAST_HUFFMAN_TABLE_SIZE) { 752 c.fast_huffman[z] = cast(short)i; 753 z += 1 << c.codeword_lengths[i]; 754 } 755 } 756 } 757 } 758 759 extern(C) int uint32_compare_2(const void *p, const void *q) 760 { 761 uint32 x = *cast(uint32 *) p; 762 uint32 y = *cast(uint32 *) q; 763 return x < y ? -1 : x > y; 764 } 765 766 static int include_in_sort(Codebook *c, uint8 len) 767 { 768 if (c.sparse) { assert(len != NO_CODE); return TRUE; } 769 if (len == NO_CODE) return FALSE; 770 if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE; 771 return FALSE; 772 } 773 774 // if the fast table above doesn't work, we want to binary 775 // search them... need to reverse the bits 776 static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values) 777 { 778 int i, len; 779 // build a list of all the entries 780 // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN. 781 // this is kind of a frivolous optimization--I don't see any performance improvement, 782 // but it's like 4 extra lines of code, so. 783 if (!c.sparse) { 784 int k = 0; 785 for (i=0; i < c.entries; ++i) 786 if (include_in_sort(c, lengths[i])) 787 c.sorted_codewords[k++] = bit_reverse(c.codewords[i]); 788 assert(k == c.sorted_entries); 789 } else { 790 for (i=0; i < c.sorted_entries; ++i) 791 c.sorted_codewords[i] = bit_reverse(c.codewords[i]); 792 } 793 794 qsort(c.sorted_codewords, c.sorted_entries, (c.sorted_codewords[0]).sizeof, &uint32_compare_2); 795 c.sorted_codewords[c.sorted_entries] = 0xffffffff; 796 797 len = c.sparse ? c.sorted_entries : c.entries; 798 // now we need to indicate how they correspond; we could either 799 // #1: sort a different data structure that says who they correspond to 800 // #2: for each sorted entry, search the original list to find who corresponds 801 // #3: for each original entry, find the sorted entry 802 // #1 requires extra storage, #2 is slow, #3 can use binary search! 803 for (i=0; i < len; ++i) { 804 ubyte huff_len = c.sparse ? lengths[values[i]] : lengths[i]; 805 if (include_in_sort(c, huff_len)) { 806 uint32 code = bit_reverse(c.codewords[i]); 807 int x=0, n=c.sorted_entries; 808 while (n > 1) { 809 // invariant: sc[x] <= code < sc[x+n] 810 int m = x + (n >> 1); 811 if (c.sorted_codewords[m] <= code) { 812 x = m; 813 n -= (n>>1); 814 } else { 815 n >>= 1; 816 } 817 } 818 assert(c.sorted_codewords[x] == code); 819 if (c.sparse) { 820 c.sorted_values[x] = values[i]; 821 c.codeword_lengths[x] = huff_len; 822 } else { 823 c.sorted_values[x] = i; 824 } 825 } 826 } 827 } 828 829 // only run while parsing the header (3 times) 830 static int vorbis_validate(uint8 *data) 831 { 832 static immutable char[6] vorbis = "vorbis"; 833 return memcmp(data, vorbis.ptr, 6) == 0; 834 } 835 836 // called from setup only, once per code book 837 // (formula implied by specification) 838 static int lookup1_values(int entries, int dim) 839 { 840 int r = cast(int) floor(exp(cast(float) log(cast(float) entries) / dim)); 841 if (cast(int) floor(pow(cast(float) r+1, dim)) <= entries) // (int) cast for MinGW warning; 842 ++r; // floor() to avoid _ftol() when non-CRT 843 if (pow(cast(float) r+1, dim) <= entries) 844 return -1; 845 if (cast(int) floor(pow(cast(float) r, dim)) > entries) 846 return -1; 847 return r; 848 } 849 850 // called twice per file 851 static void compute_twiddle_factors(int n, float *A, float *B, float *C) 852 { 853 int n4 = n >> 2, n8 = n >> 3; 854 int k,k2; 855 856 for (k=k2=0; k < n4; ++k,k2+=2) { 857 A[k2 ] = cast(float) cos(4*k*M_PI/n); 858 A[k2+1] = cast(float) -sin(4*k*M_PI/n); 859 B[k2 ] = cast(float) cos((k2+1)*M_PI/n/2) * 0.5f; 860 B[k2+1] = cast(float) sin((k2+1)*M_PI/n/2) * 0.5f; 861 } 862 for (k=k2=0; k < n8; ++k,k2+=2) { 863 C[k2 ] = cast(float) cos(2*(k2+1)*M_PI/n); 864 C[k2+1] = cast(float) -sin(2*(k2+1)*M_PI/n); 865 } 866 } 867 868 static void compute_window(int n, float *window) 869 { 870 int n2 = n >> 1, i; 871 for (i=0; i < n2; ++i) 872 window[i] = cast(float) sin(0.5 * M_PI * square(cast(float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI))); 873 } 874 875 static void compute_bitreverse(int n, uint16 *rev) 876 { 877 int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions 878 int i, n8 = n >> 3; 879 for (i=0; i < n8; ++i) 880 rev[i] = cast(ushort) ( (bit_reverse(i) >> (32-ld+3)) << 2 ); 881 } 882 883 static int init_blocksize(vorb *f, int b, int n) 884 { 885 int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3; 886 f.A[b] = cast(float *) setup_malloc(f, 4 * n2); 887 f.B[b] = cast(float *) setup_malloc(f, 4 * n2); 888 f.C[b] = cast(float *) setup_malloc(f, 4 * n4); 889 if (!f.A[b] || !f.B[b] || !f.C[b]) return error(f, VORBIS_outofmem); 890 compute_twiddle_factors(n, f.A[b], f.B[b], f.C[b]); 891 f.window[b] = cast(float *) setup_malloc(f, 4 * n2); 892 if (!f.window[b]) return error(f, VORBIS_outofmem); 893 compute_window(n, f.window[b]); 894 f.bit_reverse[b] = cast(uint16 *) setup_malloc(f, 2 * n8); 895 if (!f.bit_reverse[b]) return error(f, VORBIS_outofmem); 896 compute_bitreverse(n, f.bit_reverse[b]); 897 return TRUE; 898 } 899 900 static void neighbors(uint16 *x, int n, int *plow, int *phigh) 901 { 902 int low = -1; 903 int high = 65536; 904 int i; 905 for (i=0; i < n; ++i) { 906 if (x[i] > low && x[i] < x[n]) { *plow = i; low = x[i]; } 907 if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; } 908 } 909 } 910 911 // this has been repurposed so y is now the original index instead of y 912 struct stbv__floor_ordering 913 { 914 uint16 x,id; 915 } 916 917 extern(C) int point_compare_2(const void *p, const void *q) 918 { 919 stbv__floor_ordering *a = cast(stbv__floor_ordering *) p; 920 stbv__floor_ordering *b = cast(stbv__floor_ordering *) q; 921 return a.x < b.x ? -1 : a.x > b.x; 922 } 923 924 // 925 /////////////////////// END LEAF SETUP FUNCTIONS ////////////////////////// 926 927 uint8 get8(vorb *z) 928 { 929 ubyte r; 930 if (z._io.read(&r, 1, z._userData) == 1) 931 return r; 932 else 933 { 934 z.eof = TRUE; 935 return 0; 936 } 937 } 938 939 static uint32 get32(vorb *f) 940 { 941 uint32 x; 942 x = get8(f); 943 x += get8(f) << 8; 944 x += get8(f) << 16; 945 x += cast(uint32) get8(f) << 24; 946 return x; 947 } 948 949 static int getn(vorb *z, uint8 *data, int n) 950 { 951 ubyte r; 952 if (z._io.read(data, n, z._userData) == n) 953 return 1; 954 else 955 { 956 z.eof = 1; 957 return 0; 958 } 959 } 960 961 static void skip(vorb *z, int n) 962 { 963 z._io.skip(n, z._userData); 964 } 965 966 static int set_file_offset(stb_vorbis *f, uint loc) 967 { 968 if (f._io.seek( loc, false, f._userData)) 969 { 970 return 1; 971 } 972 else 973 { 974 f.eof = 1; 975 f._io.seek(f.stream_len, false, f._userData); // stb_vorbis set to end of the file, for some reason. 976 return 0; 977 } 978 } 979 980 981 static immutable uint8[4] ogg_page_header = [ 0x4f, 0x67, 0x67, 0x53 ]; 982 983 static int capture_pattern(vorb *f) 984 { 985 if (0x4f != get8(f)) return FALSE; 986 if (0x67 != get8(f)) return FALSE; 987 if (0x67 != get8(f)) return FALSE; 988 if (0x53 != get8(f)) return FALSE; 989 return TRUE; 990 } 991 992 enum PAGEFLAG_continued_packet = 1; 993 enum PAGEFLAG_first_page = 2; 994 enum PAGEFLAG_last_page = 4; 995 996 static int start_page_no_capturepattern(vorb *f) 997 { 998 uint32 loc0,loc1,n; 999 if (f.first_decode) { 1000 f.p_first.page_start = stb_vorbis_get_file_offset(f) - 4; 1001 } 1002 // stream structure version 1003 if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version); 1004 // header flag 1005 f.page_flag = get8(f); 1006 // absolute granule position 1007 loc0 = get32(f); 1008 loc1 = get32(f); 1009 // @TODO: validate loc0,loc1 as valid positions? 1010 // stream serial number -- vorbis doesn't interleave, so discard 1011 get32(f); 1012 //if (f.serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number); 1013 // page sequence number 1014 n = get32(f); 1015 f.last_page = n; 1016 // CRC32 1017 get32(f); 1018 // page_segments 1019 f.segment_count = get8(f); 1020 if (!getn(f, f.segments.ptr, f.segment_count)) 1021 return error(f, VORBIS_unexpected_eof); 1022 // assume we _don't_ know any the sample position of any segments 1023 f.end_seg_with_known_loc = -2; 1024 if (loc0 != ~0U || loc1 != ~0U) { 1025 int i; 1026 // determine which packet is the last one that will complete 1027 for (i=f.segment_count-1; i >= 0; --i) 1028 if (f.segments[i] < 255) 1029 break; 1030 // 'i' is now the index of the _last_ segment of a packet that ends 1031 if (i >= 0) { 1032 f.end_seg_with_known_loc = i; 1033 f.known_loc_for_packet = loc0; 1034 } 1035 } 1036 if (f.first_decode) { 1037 int i,len; 1038 len = 0; 1039 for (i=0; i < f.segment_count; ++i) 1040 len += f.segments[i]; 1041 len += 27 + f.segment_count; 1042 f.p_first.page_end = f.p_first.page_start + len; 1043 f.p_first.last_decoded_sample = loc0; 1044 } 1045 f.next_seg = 0; 1046 return TRUE; 1047 } 1048 1049 static int start_page(vorb *f) 1050 { 1051 if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern); 1052 return start_page_no_capturepattern(f); 1053 } 1054 1055 static int start_packet(vorb *f) 1056 { 1057 while (f.next_seg == -1) { 1058 if (!start_page(f)) return FALSE; 1059 if (f.page_flag & PAGEFLAG_continued_packet) 1060 return error(f, VORBIS_continued_packet_flag_invalid); 1061 } 1062 f.last_seg = FALSE; 1063 f.valid_bits = 0; 1064 f.packet_bytes = 0; 1065 f.bytes_in_seg = 0; 1066 // f.next_seg is now valid 1067 return TRUE; 1068 } 1069 1070 int maybe_start_packet(vorb *f) 1071 { 1072 if (f.next_seg == -1) { 1073 int x = get8(f); 1074 if (f.eof) return FALSE; // EOF at page boundary is not an error! 1075 if (0x4f != x ) return error(f, VORBIS_missing_capture_pattern); 1076 if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern); 1077 if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern); 1078 if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern); 1079 if (!start_page_no_capturepattern(f)) return FALSE; 1080 if (f.page_flag & PAGEFLAG_continued_packet) { 1081 // set up enough state that we can read this packet if we want, 1082 // e.g. during recovery 1083 f.last_seg = FALSE; 1084 f.bytes_in_seg = 0; 1085 return error(f, VORBIS_continued_packet_flag_invalid); 1086 } 1087 } 1088 return start_packet(f); 1089 } 1090 1091 static int next_segment(vorb *f) 1092 { 1093 int len; 1094 if (f.last_seg) return 0; 1095 if (f.next_seg == -1) { 1096 f.last_seg_which = f.segment_count-1; // in case start_page fails 1097 if (!start_page(f)) { f.last_seg = 1; return 0; } 1098 if (!(f.page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid); 1099 } 1100 len = f.segments[f.next_seg++]; 1101 if (len < 255) { 1102 f.last_seg = TRUE; 1103 f.last_seg_which = f.next_seg-1; 1104 } 1105 if (f.next_seg >= f.segment_count) 1106 f.next_seg = -1; 1107 assert(f.bytes_in_seg == 0); 1108 f.bytes_in_seg = cast(ubyte)len; 1109 return len; 1110 } 1111 1112 enum EOP = (-1); 1113 enum INVALID_BITS = (-1); 1114 1115 static int get8_packet_raw(vorb *f) 1116 { 1117 if (!f.bytes_in_seg) { // CLANG! 1118 if (f.last_seg) return EOP; 1119 else if (!next_segment(f)) return EOP; 1120 } 1121 assert(f.bytes_in_seg > 0); 1122 --f.bytes_in_seg; 1123 ++f.packet_bytes; 1124 return get8(f); 1125 } 1126 1127 static int get8_packet(vorb *f) 1128 { 1129 int x = get8_packet_raw(f); 1130 f.valid_bits = 0; 1131 return x; 1132 } 1133 1134 static int get32_packet(vorb *f) 1135 { 1136 uint32 x; 1137 x = get8_packet(f); 1138 x += get8_packet(f) << 8; 1139 x += get8_packet(f) << 16; 1140 x += cast(uint32) get8_packet(f) << 24; 1141 return x; 1142 } 1143 1144 static void flush_packet(vorb *f) 1145 { 1146 while (get8_packet_raw(f) != EOP) 1147 { 1148 } 1149 } 1150 1151 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important 1152 // as the huffman decoder? 1153 static uint32 get_bits(vorb *f, int n) 1154 { 1155 uint32 z; 1156 1157 if (f.valid_bits < 0) return 0; 1158 if (f.valid_bits < n) { 1159 if (n > 24) { 1160 // the accumulator technique below would not work correctly in this case 1161 z = get_bits(f, 24); 1162 z += get_bits(f, n-24) << 24; 1163 return z; 1164 } 1165 if (f.valid_bits == 0) f.acc = 0; 1166 while (f.valid_bits < n) { 1167 int z2 = get8_packet_raw(f); 1168 if (z2 == EOP) { 1169 f.valid_bits = INVALID_BITS; 1170 return 0; 1171 } 1172 f.acc += z2 << f.valid_bits; 1173 f.valid_bits += 8; 1174 } 1175 } 1176 1177 assert(f.valid_bits >= n); 1178 z = f.acc & ((1 << n)-1); 1179 f.acc >>= n; 1180 f.valid_bits -= n; 1181 return z; 1182 } 1183 1184 // @OPTIMIZE: primary accumulator for huffman 1185 // expand the buffer to as many bits as possible without reading off end of packet 1186 // it might be nice to allow f.valid_bits and f.acc to be stored in registers, 1187 // e.g. cache them locally and decode locally 1188 static void prep_huffman(vorb *f) 1189 { 1190 if (f.valid_bits <= 24) { 1191 if (f.valid_bits == 0) f.acc = 0; 1192 do { 1193 int z; 1194 if (f.last_seg && !f.bytes_in_seg) return; 1195 z = get8_packet_raw(f); 1196 if (z == EOP) return; 1197 f.acc += cast(uint) z << f.valid_bits; 1198 f.valid_bits += 8; 1199 } while (f.valid_bits <= 24); 1200 } 1201 } 1202 1203 enum 1204 { 1205 VORBIS_packet_id = 1, 1206 VORBIS_packet_comment = 3, 1207 VORBIS_packet_setup = 5 1208 }; 1209 1210 static int codebook_decode_scalar_raw(vorb *f, Codebook *c) 1211 { 1212 int i; 1213 prep_huffman(f); 1214 1215 if (c.codewords == NULL && c.sorted_codewords == NULL) 1216 return -1; 1217 1218 // cases to use binary search: sorted_codewords && !c.codewords 1219 // sorted_codewords && c.entries > 8 1220 if (c.entries > 8 ? c.sorted_codewords!=NULL : !c.codewords) { 1221 // binary search 1222 uint32 code = bit_reverse(f.acc); 1223 int x=0, n=c.sorted_entries, len; 1224 1225 while (n > 1) { 1226 // invariant: sc[x] <= code < sc[x+n] 1227 int m = x + (n >> 1); 1228 if (c.sorted_codewords[m] <= code) { 1229 x = m; 1230 n -= (n>>1); 1231 } else { 1232 n >>= 1; 1233 } 1234 } 1235 // x is now the sorted index 1236 if (!c.sparse) x = c.sorted_values[x]; 1237 // x is now sorted index if sparse, or symbol otherwise 1238 len = c.codeword_lengths[x]; 1239 if (f.valid_bits >= len) { 1240 f.acc >>= len; 1241 f.valid_bits -= len; 1242 return x; 1243 } 1244 1245 f.valid_bits = 0; 1246 return -1; 1247 } 1248 1249 // if small, linear search 1250 assert(!c.sparse); 1251 for (i=0; i < c.entries; ++i) { 1252 if (c.codeword_lengths[i] == NO_CODE) continue; 1253 if (c.codewords[i] == (f.acc & ((1 << c.codeword_lengths[i])-1))) { 1254 if (f.valid_bits >= c.codeword_lengths[i]) { 1255 f.acc >>= c.codeword_lengths[i]; 1256 f.valid_bits -= c.codeword_lengths[i]; 1257 return i; 1258 } 1259 f.valid_bits = 0; 1260 return -1; 1261 } 1262 } 1263 1264 error(f, VORBIS_invalid_stream); 1265 f.valid_bits = 0; 1266 return -1; 1267 } 1268 1269 1270 static int codebook_decode_scalar(vorb *f, Codebook *c) 1271 { 1272 int i; 1273 if (f.valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH) 1274 prep_huffman(f); 1275 // fast huffman table lookup 1276 i = f.acc & FAST_HUFFMAN_TABLE_MASK; 1277 i = c.fast_huffman[i]; 1278 if (i >= 0) { 1279 f.acc >>= c.codeword_lengths[i]; 1280 f.valid_bits -= c.codeword_lengths[i]; 1281 if (f.valid_bits < 0) { f.valid_bits = 0; return -1; } 1282 return i; 1283 } 1284 return codebook_decode_scalar_raw(f,c); 1285 } 1286 1287 void DECODE_RAW(ref int var, vorb* f, Codebook* c) 1288 { 1289 var = codebook_decode_scalar(f,c); 1290 } 1291 1292 void DECODE(ref int var, vorb* f, Codebook* c) 1293 { 1294 var = codebook_decode_scalar(f,c); 1295 if (c.sparse) 1296 var = c.sorted_values[var]; 1297 } 1298 1299 void DECODE_VQ(ref int var, vorb* f, Codebook* c) 1300 { 1301 DECODE_RAW(var, f, c); 1302 } 1303 1304 1305 1306 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case 1307 // where we avoid one addition 1308 codetype CODEBOOK_ELEMENT(Codebook* c, int off) 1309 { 1310 return (c.multiplicands[off]); 1311 } 1312 1313 codetype CODEBOOK_ELEMENT_FAST(Codebook* c, int off) 1314 { 1315 return (c.multiplicands[off]); 1316 } 1317 1318 codetype CODEBOOK_ELEMENT_BASE(Codebook* c) 1319 { 1320 return 0; 1321 } 1322 1323 static int codebook_decode_start(vorb *f, Codebook *c) 1324 { 1325 int z = -1; 1326 1327 // type 0 is only legal in a scalar context 1328 if (c.lookup_type == 0) 1329 error(f, VORBIS_invalid_stream); 1330 else { 1331 DECODE_VQ(z,f,c); 1332 if (c.sparse) assert(z < c.sorted_entries); 1333 if (z < 0) { // check for EOP 1334 if (!f.bytes_in_seg) 1335 if (f.last_seg) 1336 return z; 1337 error(f, VORBIS_invalid_stream); 1338 } 1339 } 1340 return z; 1341 } 1342 1343 static int codebook_decode(vorb *f, Codebook *c, float *output, int len) 1344 { 1345 int i,z = codebook_decode_start(f,c); 1346 if (z < 0) return FALSE; 1347 if (len > c.dimensions) len = c.dimensions; 1348 1349 z *= c.dimensions; 1350 if (c.sequence_p) 1351 { 1352 float last = CODEBOOK_ELEMENT_BASE(c); 1353 for (i=0; i < len; ++i) 1354 { 1355 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last; 1356 output[i] += val; 1357 last = val + c.minimum_value; 1358 } 1359 } 1360 else 1361 { 1362 float last = CODEBOOK_ELEMENT_BASE(c); 1363 for (i=0; i < len; ++i) 1364 { 1365 output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last; 1366 } 1367 } 1368 return TRUE; 1369 } 1370 1371 static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step) 1372 { 1373 int i,z = codebook_decode_start(f,c); 1374 float last = CODEBOOK_ELEMENT_BASE(c); 1375 if (z < 0) return FALSE; 1376 if (len > c.dimensions) len = c.dimensions; 1377 1378 z *= c.dimensions; 1379 for (i=0; i < len; ++i) { 1380 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last; 1381 output[i*step] += val; 1382 if (c.sequence_p) last = val; 1383 } 1384 1385 return TRUE; 1386 } 1387 1388 static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode) 1389 { 1390 int c_inter = *c_inter_p; 1391 int p_inter = *p_inter_p; 1392 int i,z, effective = c.dimensions; 1393 1394 // type 0 is only legal in a scalar context 1395 if (c.lookup_type == 0) return error(f, VORBIS_invalid_stream); 1396 1397 while (total_decode > 0) { 1398 float last = CODEBOOK_ELEMENT_BASE(c); 1399 DECODE_VQ(z,f,c); 1400 static if (!STB_VORBIS_DIVIDES_IN_CODEBOOK) 1401 { 1402 assert(!c.sparse || z < c.sorted_entries); 1403 } 1404 if (z < 0) { 1405 if (!f.bytes_in_seg) 1406 if (f.last_seg) return FALSE; 1407 return error(f, VORBIS_invalid_stream); 1408 } 1409 1410 // if this will take us off the end of the buffers, stop short! 1411 // we check by computing the length of the virtual interleaved 1412 // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter), 1413 // and the length we'll be using (effective) 1414 if (c_inter + p_inter*ch + effective > len * ch) { 1415 effective = len*ch - (p_inter*ch - c_inter); 1416 } 1417 1418 { 1419 z *= c.dimensions; 1420 if (c.sequence_p) { 1421 for (i=0; i < effective; ++i) { 1422 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last; 1423 if (outputs[c_inter]) 1424 outputs[c_inter][p_inter] += val; 1425 if (++c_inter == ch) { c_inter = 0; ++p_inter; } 1426 last = val; 1427 } 1428 } else { 1429 for (i=0; i < effective; ++i) { 1430 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last; 1431 if (outputs[c_inter]) 1432 outputs[c_inter][p_inter] += val; 1433 if (++c_inter == ch) { c_inter = 0; ++p_inter; } 1434 } 1435 } 1436 } 1437 1438 total_decode -= effective; 1439 } 1440 *c_inter_p = c_inter; 1441 *p_inter_p = p_inter; 1442 return TRUE; 1443 } 1444 1445 static int predict_point(int x, int x0, int x1, int y0, int y1) 1446 { 1447 int dy = y1 - y0; 1448 int adx = x1 - x0; 1449 // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86? 1450 int err = abs(dy) * (x - x0); 1451 int off = err / adx; 1452 return dy < 0 ? y0 - off : y0 + off; 1453 } 1454 1455 // the following table is block-copied from the specification 1456 static immutable float[256] inverse_db_table = 1457 [ 1458 1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f, 1459 1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f, 1460 1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f, 1461 2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f, 1462 2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f, 1463 3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f, 1464 4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f, 1465 6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f, 1466 7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f, 1467 1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f, 1468 1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f, 1469 1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f, 1470 2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f, 1471 2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f, 1472 3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f, 1473 4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f, 1474 5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f, 1475 7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f, 1476 9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f, 1477 1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f, 1478 1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f, 1479 2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f, 1480 2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f, 1481 3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f, 1482 4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f, 1483 5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f, 1484 7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f, 1485 9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f, 1486 0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f, 1487 0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f, 1488 0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f, 1489 0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f, 1490 0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f, 1491 0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f, 1492 0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f, 1493 0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f, 1494 0.00092223983f, 0.00098217216f, 0.0010459992f, 0.0011139742f, 1495 0.0011863665f, 0.0012634633f, 0.0013455702f, 0.0014330129f, 1496 0.0015261382f, 0.0016253153f, 0.0017309374f, 0.0018434235f, 1497 0.0019632195f, 0.0020908006f, 0.0022266726f, 0.0023713743f, 1498 0.0025254795f, 0.0026895994f, 0.0028643847f, 0.0030505286f, 1499 0.0032487691f, 0.0034598925f, 0.0036847358f, 0.0039241906f, 1500 0.0041792066f, 0.0044507950f, 0.0047400328f, 0.0050480668f, 1501 0.0053761186f, 0.0057254891f, 0.0060975636f, 0.0064938176f, 1502 0.0069158225f, 0.0073652516f, 0.0078438871f, 0.0083536271f, 1503 0.0088964928f, 0.009474637f, 0.010090352f, 0.010746080f, 1504 0.011444421f, 0.012188144f, 0.012980198f, 0.013823725f, 1505 0.014722068f, 0.015678791f, 0.016697687f, 0.017782797f, 1506 0.018938423f, 0.020169149f, 0.021479854f, 0.022875735f, 1507 0.024362330f, 0.025945531f, 0.027631618f, 0.029427276f, 1508 0.031339626f, 0.033376252f, 0.035545228f, 0.037855157f, 1509 0.040315199f, 0.042935108f, 0.045725273f, 0.048696758f, 1510 0.051861348f, 0.055231591f, 0.058820850f, 0.062643361f, 1511 0.066714279f, 0.071049749f, 0.075666962f, 0.080584227f, 1512 0.085821044f, 0.091398179f, 0.097337747f, 0.10366330f, 1513 0.11039993f, 0.11757434f, 0.12521498f, 0.13335215f, 1514 0.14201813f, 0.15124727f, 0.16107617f, 0.17154380f, 1515 0.18269168f, 0.19456402f, 0.20720788f, 0.22067342f, 1516 0.23501402f, 0.25028656f, 0.26655159f, 0.28387361f, 1517 0.30232132f, 0.32196786f, 0.34289114f, 0.36517414f, 1518 0.38890521f, 0.41417847f, 0.44109412f, 0.46975890f, 1519 0.50028648f, 0.53279791f, 0.56742212f, 0.60429640f, 1520 0.64356699f, 0.68538959f, 0.72993007f, 0.77736504f, 1521 0.82788260f, 0.88168307f, 0.9389798f, 1.0f 1522 ]; 1523 1524 1525 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine, 1526 // note that you must produce bit-identical output to decode correctly; 1527 // this specific sequence of operations is specified in the spec (it's 1528 // drawing integer-quantized frequency-space lines that the encoder 1529 // expects to be exactly the same) 1530 // ... also, isn't the whole point of Bresenham's algorithm to NOT 1531 // have to divide in the setup? sigh. 1532 1533 static void draw_line(float *output, int x0, int y0, int x1, int y1, int n) 1534 { 1535 int dy = y1 - y0; 1536 int adx = x1 - x0; 1537 int ady = abs(dy); 1538 int base; 1539 int x=x0,y=y0; 1540 int err = 0; 1541 int sy; 1542 1543 base = dy / adx; 1544 if (dy < 0) 1545 sy = base - 1; 1546 else 1547 sy = base+1; 1548 ady -= abs(base) * adx; 1549 if (x1 > n) x1 = n; 1550 if (x < x1) { 1551 output[x] *= inverse_db_table[y&255]; 1552 for (++x; x < x1; ++x) { 1553 err += ady; 1554 if (err >= adx) { 1555 err -= adx; 1556 y += sy; 1557 } else 1558 y += base; 1559 output[x] *= inverse_db_table[y&255]; 1560 } 1561 } 1562 } 1563 1564 static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype) 1565 { 1566 int k; 1567 if (rtype == 0) { 1568 int step = n / book.dimensions; 1569 for (k=0; k < step; ++k) 1570 if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step)) 1571 return FALSE; 1572 } else { 1573 for (k=0; k < n; ) { 1574 if (!codebook_decode(f, book, target+offset, n-k)) 1575 return FALSE; 1576 k += book.dimensions; 1577 offset += book.dimensions; 1578 } 1579 } 1580 return TRUE; 1581 } 1582 1583 // n is 1/2 of the blocksize -- 1584 // specification: "Correct per-vector decode length is [n]/2" 1585 static void decode_residue(vorb *f, float **residue_buffers, int ch, int n, int rn, uint8 *do_not_decode) 1586 { 1587 int i,j,pass; 1588 Residue *r = f.residue_config + rn; 1589 int rtype = f.residue_types[rn]; 1590 int c = r.classbook; 1591 int classwords = f.codebooks[c].dimensions; 1592 uint actual_size = rtype == 2 ? n*2 : n; 1593 uint limit_r_begin = (r.begin < actual_size ? r.begin : actual_size); 1594 uint limit_r_end = (r.end < actual_size ? r.end : actual_size); 1595 int n_read = limit_r_end - limit_r_begin; 1596 int part_read = n_read / r.part_size; 1597 int temp_alloc_point = temp_alloc_save(f); 1598 uint8 ***part_classdata = cast(uint8 ***) temp_block_array(f,f.channels, part_read * (uint8*).sizeof); 1599 1600 for (i=0; i < ch; ++i) 1601 if (!do_not_decode[i]) 1602 memset(residue_buffers[i], 0, float.sizeof * n); 1603 1604 if (rtype == 2 && ch != 1) { 1605 for (j=0; j < ch; ++j) 1606 if (!do_not_decode[j]) 1607 break; 1608 if (j == ch) 1609 goto done; 1610 1611 for (pass=0; pass < 8; ++pass) { 1612 int pcount = 0, class_set = 0; 1613 if (ch == 2) { 1614 while (pcount < part_read) { 1615 int z = r.begin + pcount*r.part_size; 1616 int c_inter = (z & 1), p_inter = z>>1; 1617 if (pass == 0) { 1618 Codebook *c2 = f.codebooks+r.classbook; 1619 int q; 1620 DECODE(q,f,c2); 1621 if (q == EOP) goto done; 1622 part_classdata[0][class_set] = r.classdata[q]; 1623 } 1624 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) 1625 { 1626 int z2 = r.begin + pcount*r.part_size; 1627 int c2 = part_classdata[0][class_set][i]; 1628 int b = r.residue_books[c2][pass]; 1629 if (b >= 0) { 1630 Codebook *book = f.codebooks + b; 1631 1632 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) 1633 goto done; 1634 1635 } 1636 else 1637 { 1638 z2 += r.part_size; 1639 c_inter = z2 & 1; 1640 p_inter = z2 >> 1; 1641 } 1642 } 1643 ++class_set; 1644 } 1645 } else if (ch > 2) { 1646 while (pcount < part_read) { 1647 int z = r.begin + pcount*r.part_size; 1648 int c_inter = z % ch, p_inter = z/ch; 1649 if (pass == 0) { 1650 Codebook *c3 = f.codebooks+r.classbook; 1651 int q; 1652 DECODE(q,f,c3); 1653 if (q == EOP) goto done; 1654 part_classdata[0][class_set] = r.classdata[q]; 1655 } 1656 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) { 1657 int z3 = r.begin + pcount*r.part_size; 1658 int c3 = part_classdata[0][class_set][i]; 1659 int b = r.residue_books[c3][pass]; 1660 if (b >= 0) { 1661 Codebook *book = f.codebooks + b; 1662 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) 1663 goto done; 1664 } else { 1665 z3 += r.part_size; 1666 c_inter = z3 % ch; 1667 p_inter = z3 / ch; 1668 } 1669 } 1670 ++class_set; 1671 } 1672 } 1673 } 1674 goto done; 1675 } 1676 1677 for (pass=0; pass < 8; ++pass) { 1678 int pcount = 0, class_set=0; 1679 while (pcount < part_read) { 1680 if (pass == 0) { 1681 for (j=0; j < ch; ++j) { 1682 if (!do_not_decode[j]) { 1683 Codebook *c4 = f.codebooks+r.classbook; 1684 int temp; 1685 DECODE(temp,f,c4); 1686 if (temp == EOP) goto done; 1687 part_classdata[j][class_set] = r.classdata[temp]; 1688 } 1689 } 1690 } 1691 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) { 1692 for (j=0; j < ch; ++j) { 1693 if (!do_not_decode[j]) { 1694 int c5 = part_classdata[j][class_set][i]; 1695 int b = r.residue_books[c5][pass]; 1696 if (b >= 0) { 1697 float *target = residue_buffers[j]; 1698 int offset = r.begin + pcount * r.part_size; 1699 int n2 = r.part_size; 1700 Codebook *book = f.codebooks + b; 1701 if (!residue_decode(f, book, target, offset, n2, rtype)) 1702 goto done; 1703 } 1704 } 1705 } 1706 } 1707 ++class_set; 1708 } 1709 } 1710 done: 1711 1712 temp_free(f,part_classdata); 1713 temp_alloc_restore(f,temp_alloc_point); 1714 } 1715 1716 // the following were split out into separate functions while optimizing; 1717 // they could be pushed back up but eh. __forceinline showed no change; 1718 // they're probably already being inlined. 1719 static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A) 1720 { 1721 float *ee0 = e + i_off; 1722 float *ee2 = ee0 + k_off; 1723 int i; 1724 1725 assert((n & 3) == 0); 1726 for (i=(n>>2); i > 0; --i) { 1727 float k00_20, k01_21; 1728 k00_20 = ee0[ 0] - ee2[ 0]; 1729 k01_21 = ee0[-1] - ee2[-1]; 1730 ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0]; 1731 ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1]; 1732 ee2[ 0] = k00_20 * A[0] - k01_21 * A[1]; 1733 ee2[-1] = k01_21 * A[0] + k00_20 * A[1]; 1734 A += 8; 1735 1736 k00_20 = ee0[-2] - ee2[-2]; 1737 k01_21 = ee0[-3] - ee2[-3]; 1738 ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2]; 1739 ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3]; 1740 ee2[-2] = k00_20 * A[0] - k01_21 * A[1]; 1741 ee2[-3] = k01_21 * A[0] + k00_20 * A[1]; 1742 A += 8; 1743 1744 k00_20 = ee0[-4] - ee2[-4]; 1745 k01_21 = ee0[-5] - ee2[-5]; 1746 ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4]; 1747 ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5]; 1748 ee2[-4] = k00_20 * A[0] - k01_21 * A[1]; 1749 ee2[-5] = k01_21 * A[0] + k00_20 * A[1]; 1750 A += 8; 1751 1752 k00_20 = ee0[-6] - ee2[-6]; 1753 k01_21 = ee0[-7] - ee2[-7]; 1754 ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6]; 1755 ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7]; 1756 ee2[-6] = k00_20 * A[0] - k01_21 * A[1]; 1757 ee2[-7] = k01_21 * A[0] + k00_20 * A[1]; 1758 A += 8; 1759 ee0 -= 8; 1760 ee2 -= 8; 1761 } 1762 } 1763 1764 static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1) 1765 { 1766 int i; 1767 float k00_20, k01_21; 1768 1769 float *e0 = e + d0; 1770 float *e2 = e0 + k_off; 1771 1772 for (i=lim >> 2; i > 0; --i) { 1773 k00_20 = e0[-0] - e2[-0]; 1774 k01_21 = e0[-1] - e2[-1]; 1775 e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0]; 1776 e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1]; 1777 e2[-0] = (k00_20)*A[0] - (k01_21) * A[1]; 1778 e2[-1] = (k01_21)*A[0] + (k00_20) * A[1]; 1779 1780 A += k1; 1781 1782 k00_20 = e0[-2] - e2[-2]; 1783 k01_21 = e0[-3] - e2[-3]; 1784 e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2]; 1785 e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3]; 1786 e2[-2] = (k00_20)*A[0] - (k01_21) * A[1]; 1787 e2[-3] = (k01_21)*A[0] + (k00_20) * A[1]; 1788 1789 A += k1; 1790 1791 k00_20 = e0[-4] - e2[-4]; 1792 k01_21 = e0[-5] - e2[-5]; 1793 e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4]; 1794 e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5]; 1795 e2[-4] = (k00_20)*A[0] - (k01_21) * A[1]; 1796 e2[-5] = (k01_21)*A[0] + (k00_20) * A[1]; 1797 1798 A += k1; 1799 1800 k00_20 = e0[-6] - e2[-6]; 1801 k01_21 = e0[-7] - e2[-7]; 1802 e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6]; 1803 e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7]; 1804 e2[-6] = (k00_20)*A[0] - (k01_21) * A[1]; 1805 e2[-7] = (k01_21)*A[0] + (k00_20) * A[1]; 1806 1807 e0 -= 8; 1808 e2 -= 8; 1809 1810 A += k1; 1811 } 1812 } 1813 1814 static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0) 1815 { 1816 int i; 1817 float A0 = A[0]; 1818 float A1 = A[0+1]; 1819 float A2 = A[0+a_off]; 1820 float A3 = A[0+a_off+1]; 1821 float A4 = A[0+a_off*2+0]; 1822 float A5 = A[0+a_off*2+1]; 1823 float A6 = A[0+a_off*3+0]; 1824 float A7 = A[0+a_off*3+1]; 1825 1826 float k00,k11; 1827 1828 float *ee0 = e +i_off; 1829 float *ee2 = ee0+k_off; 1830 1831 for (i=n; i > 0; --i) { 1832 k00 = ee0[ 0] - ee2[ 0]; 1833 k11 = ee0[-1] - ee2[-1]; 1834 ee0[ 0] = ee0[ 0] + ee2[ 0]; 1835 ee0[-1] = ee0[-1] + ee2[-1]; 1836 ee2[ 0] = (k00) * A0 - (k11) * A1; 1837 ee2[-1] = (k11) * A0 + (k00) * A1; 1838 1839 k00 = ee0[-2] - ee2[-2]; 1840 k11 = ee0[-3] - ee2[-3]; 1841 ee0[-2] = ee0[-2] + ee2[-2]; 1842 ee0[-3] = ee0[-3] + ee2[-3]; 1843 ee2[-2] = (k00) * A2 - (k11) * A3; 1844 ee2[-3] = (k11) * A2 + (k00) * A3; 1845 1846 k00 = ee0[-4] - ee2[-4]; 1847 k11 = ee0[-5] - ee2[-5]; 1848 ee0[-4] = ee0[-4] + ee2[-4]; 1849 ee0[-5] = ee0[-5] + ee2[-5]; 1850 ee2[-4] = (k00) * A4 - (k11) * A5; 1851 ee2[-5] = (k11) * A4 + (k00) * A5; 1852 1853 k00 = ee0[-6] - ee2[-6]; 1854 k11 = ee0[-7] - ee2[-7]; 1855 ee0[-6] = ee0[-6] + ee2[-6]; 1856 ee0[-7] = ee0[-7] + ee2[-7]; 1857 ee2[-6] = (k00) * A6 - (k11) * A7; 1858 ee2[-7] = (k11) * A6 + (k00) * A7; 1859 1860 ee0 -= k0; 1861 ee2 -= k0; 1862 } 1863 } 1864 1865 static void iter_54(float *z) 1866 { 1867 float k00,k11,k22,k33; 1868 float y0,y1,y2,y3; 1869 1870 k00 = z[ 0] - z[-4]; 1871 y0 = z[ 0] + z[-4]; 1872 y2 = z[-2] + z[-6]; 1873 k22 = z[-2] - z[-6]; 1874 1875 z[-0] = y0 + y2; // z0 + z4 + z2 + z6 1876 z[-2] = y0 - y2; // z0 + z4 - z2 - z6 1877 1878 // done with y0,y2 1879 1880 k33 = z[-3] - z[-7]; 1881 1882 z[-4] = k00 + k33; // z0 - z4 + z3 - z7 1883 z[-6] = k00 - k33; // z0 - z4 - z3 + z7 1884 1885 // done with k33 1886 1887 k11 = z[-1] - z[-5]; 1888 y1 = z[-1] + z[-5]; 1889 y3 = z[-3] + z[-7]; 1890 1891 z[-1] = y1 + y3; // z1 + z5 + z3 + z7 1892 z[-3] = y1 - y3; // z1 + z5 - z3 - z7 1893 z[-5] = k11 - k22; // z1 - z5 + z2 - z6 1894 z[-7] = k11 + k22; // z1 - z5 - z2 + z6 1895 } 1896 1897 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n) 1898 { 1899 int a_off = base_n >> 3; 1900 float A2 = A[0+a_off]; 1901 float *z = e + i_off; 1902 float *base = z - 16 * n; 1903 1904 while (z > base) { 1905 float k00,k11; 1906 float l00,l11; 1907 1908 k00 = z[-0] - z[ -8]; 1909 k11 = z[-1] - z[ -9]; 1910 l00 = z[-2] - z[-10]; 1911 l11 = z[-3] - z[-11]; 1912 z[ -0] = z[-0] + z[ -8]; 1913 z[ -1] = z[-1] + z[ -9]; 1914 z[ -2] = z[-2] + z[-10]; 1915 z[ -3] = z[-3] + z[-11]; 1916 z[ -8] = k00; 1917 z[ -9] = k11; 1918 z[-10] = (l00+l11) * A2; 1919 z[-11] = (l11-l00) * A2; 1920 1921 k00 = z[ -4] - z[-12]; 1922 k11 = z[ -5] - z[-13]; 1923 l00 = z[ -6] - z[-14]; 1924 l11 = z[ -7] - z[-15]; 1925 z[ -4] = z[ -4] + z[-12]; 1926 z[ -5] = z[ -5] + z[-13]; 1927 z[ -6] = z[ -6] + z[-14]; 1928 z[ -7] = z[ -7] + z[-15]; 1929 z[-12] = k11; 1930 z[-13] = -k00; 1931 z[-14] = (l11-l00) * A2; 1932 z[-15] = (l00+l11) * -A2; 1933 1934 iter_54(z); 1935 iter_54(z-8); 1936 z -= 16; 1937 } 1938 } 1939 1940 static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype) 1941 { 1942 int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l; 1943 int ld; 1944 // @OPTIMIZE: reduce register pressure by using fewer variables? 1945 int save_point = temp_alloc_save(f); 1946 float *buf2 = cast(float *) temp_alloc(f, n2 * float.sizeof); 1947 float* u = null; 1948 float* v = null; 1949 // twiddle factors 1950 float *A = f.A[blocktype]; 1951 1952 // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio" 1953 // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function. 1954 1955 // kernel from paper 1956 1957 1958 // merged: 1959 // copy and reflect spectral data 1960 // step 0 1961 1962 // note that it turns out that the items added together during 1963 // this step are, in fact, being added to themselves (as reflected 1964 // by step 0). inexplicable inefficiency! this became obvious 1965 // once I combined the passes. 1966 1967 // so there's a missing 'times 2' here (for adding X to itself). 1968 // this propagates through linearly to the end, where the numbers 1969 // are 1/2 too small, and need to be compensated for. 1970 1971 { 1972 float *d, e, AA, e_stop; 1973 d = &buf2[n2-2]; 1974 AA = A; 1975 e = &buffer[0]; 1976 e_stop = &buffer[n2]; 1977 while (e != e_stop) { 1978 d[1] = (e[0] * AA[0] - e[2]*AA[1]); 1979 d[0] = (e[0] * AA[1] + e[2]*AA[0]); 1980 d -= 2; 1981 AA += 2; 1982 e += 4; 1983 } 1984 1985 e = &buffer[n2-3]; 1986 while (d >= buf2) { 1987 d[1] = (-e[2] * AA[0] - -e[0]*AA[1]); 1988 d[0] = (-e[2] * AA[1] + -e[0]*AA[0]); 1989 d -= 2; 1990 AA += 2; 1991 e -= 4; 1992 } 1993 } 1994 1995 // now we use symbolic names for these, so that we can 1996 // possibly swap their meaning as we change which operations 1997 // are in place 1998 1999 u = buffer; 2000 v = buf2; 2001 2002 // step 2 (paper output is w, now u) 2003 // this could be in place, but the data ends up in the wrong 2004 // place... _somebody_'s got to swap it, so this is nominated 2005 { 2006 float *AA = &A[n2-8]; 2007 float *d0, d1, e0, e1; 2008 2009 e0 = &v[n4]; 2010 e1 = &v[0]; 2011 2012 d0 = &u[n4]; 2013 d1 = &u[0]; 2014 2015 while (AA >= A) { 2016 float v40_20, v41_21; 2017 2018 v41_21 = e0[1] - e1[1]; 2019 v40_20 = e0[0] - e1[0]; 2020 d0[1] = e0[1] + e1[1]; 2021 d0[0] = e0[0] + e1[0]; 2022 d1[1] = v41_21*AA[4] - v40_20*AA[5]; 2023 d1[0] = v40_20*AA[4] + v41_21*AA[5]; 2024 2025 v41_21 = e0[3] - e1[3]; 2026 v40_20 = e0[2] - e1[2]; 2027 d0[3] = e0[3] + e1[3]; 2028 d0[2] = e0[2] + e1[2]; 2029 d1[3] = v41_21*AA[0] - v40_20*AA[1]; 2030 d1[2] = v40_20*AA[0] + v41_21*AA[1]; 2031 2032 AA -= 8; 2033 2034 d0 += 4; 2035 d1 += 4; 2036 e0 += 4; 2037 e1 += 4; 2038 } 2039 } 2040 2041 // step 3 2042 ld = ilog(n) - 1; // ilog is off-by-one from normal definitions 2043 2044 // optimized step 3: 2045 2046 // the original step3 loop can be nested r inside s or s inside r; 2047 // it's written originally as s inside r, but this is dumb when r 2048 // iterates many times, and s few. So I have two copies of it and 2049 // switch between them halfway. 2050 2051 // this is iteration 0 of step 3 2052 imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A); 2053 imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A); 2054 2055 // this is iteration 1 of step 3 2056 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16); 2057 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16); 2058 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16); 2059 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16); 2060 2061 l=2; 2062 for (; l < (ld-3)>>1; ++l) { 2063 int k0 = n >> (l+2), k0_2 = k0>>1; 2064 int lim = 1 << (l+1); 2065 int i; 2066 for (i=0; i < lim; ++i) 2067 imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3)); 2068 } 2069 2070 for (; l < ld-6; ++l) { 2071 int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1; 2072 int rlim = n >> (l+6), r; 2073 int lim = 1 << (l+1); 2074 int i_off; 2075 float *A0 = A; 2076 i_off = n2-1; 2077 for (r=rlim; r > 0; --r) { 2078 imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0); 2079 A0 += k1*4; 2080 i_off -= 8; 2081 } 2082 } 2083 2084 // iterations with count: 2085 // ld-6,-5,-4 all interleaved together 2086 // the big win comes from getting rid of needless flops 2087 // due to the constants on pass 5 & 4 being all 1 and 0; 2088 // combining them to be simultaneous to improve cache made little difference 2089 imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n); 2090 2091 // output is u 2092 2093 // step 4, 5, and 6 2094 // cannot be in-place because of step 5 2095 { 2096 uint16 *bitrev = f.bit_reverse[blocktype]; 2097 // weirdly, I'd have thought reading sequentially and writing 2098 // erratically would have been better than vice-versa, but in 2099 // fact that's not what my testing showed. (That is, with 2100 // j = bitreverse(i), do you read i and write j, or read j and write i.) 2101 2102 float *d0 = &v[n4-4]; 2103 float *d1 = &v[n2-4]; 2104 while (d0 >= v) { 2105 int k4; 2106 2107 k4 = bitrev[0]; 2108 d1[3] = u[k4+0]; 2109 d1[2] = u[k4+1]; 2110 d0[3] = u[k4+2]; 2111 d0[2] = u[k4+3]; 2112 2113 k4 = bitrev[1]; 2114 d1[1] = u[k4+0]; 2115 d1[0] = u[k4+1]; 2116 d0[1] = u[k4+2]; 2117 d0[0] = u[k4+3]; 2118 2119 d0 -= 4; 2120 d1 -= 4; 2121 bitrev += 2; 2122 } 2123 } 2124 // (paper output is u, now v) 2125 2126 2127 // data must be in buf2 2128 assert(v == buf2); 2129 2130 // step 7 (paper output is v, now v) 2131 // this is now in place 2132 { 2133 float *C = f.C[blocktype]; 2134 float *d, e; 2135 2136 d = v; 2137 e = v + n2 - 4; 2138 2139 while (d < e) { 2140 float a02,a11,b0,b1,b2,b3; 2141 2142 a02 = d[0] - e[2]; 2143 a11 = d[1] + e[3]; 2144 2145 b0 = C[1]*a02 + C[0]*a11; 2146 b1 = C[1]*a11 - C[0]*a02; 2147 2148 b2 = d[0] + e[ 2]; 2149 b3 = d[1] - e[ 3]; 2150 2151 d[0] = b2 + b0; 2152 d[1] = b3 + b1; 2153 e[2] = b2 - b0; 2154 e[3] = b1 - b3; 2155 2156 a02 = d[2] - e[0]; 2157 a11 = d[3] + e[1]; 2158 2159 b0 = C[3]*a02 + C[2]*a11; 2160 b1 = C[3]*a11 - C[2]*a02; 2161 2162 b2 = d[2] + e[ 0]; 2163 b3 = d[3] - e[ 1]; 2164 2165 d[2] = b2 + b0; 2166 d[3] = b3 + b1; 2167 e[0] = b2 - b0; 2168 e[1] = b1 - b3; 2169 2170 C += 4; 2171 d += 4; 2172 e -= 4; 2173 } 2174 } 2175 2176 // data must be in buf2 2177 2178 2179 // step 8+decode (paper output is X, now buffer) 2180 // this generates pairs of data a la 8 and pushes them directly through 2181 // the decode kernel (pushing rather than pulling) to avoid having 2182 // to make another pass later 2183 2184 // this cannot POSSIBLY be in place, so we refer to the buffers directly 2185 2186 { 2187 float *d0, d1, d2, d3; 2188 2189 float *B = f.B[blocktype] + n2 - 8; 2190 float *e = buf2 + n2 - 8; 2191 d0 = &buffer[0]; 2192 d1 = &buffer[n2-4]; 2193 d2 = &buffer[n2]; 2194 d3 = &buffer[n-4]; 2195 while (e >= v) { 2196 float p0,p1,p2,p3; 2197 2198 p3 = e[6]*B[7] - e[7]*B[6]; 2199 p2 = -e[6]*B[6] - e[7]*B[7]; 2200 2201 d0[0] = p3; 2202 d1[3] = - p3; 2203 d2[0] = p2; 2204 d3[3] = p2; 2205 2206 p1 = e[4]*B[5] - e[5]*B[4]; 2207 p0 = -e[4]*B[4] - e[5]*B[5]; 2208 2209 d0[1] = p1; 2210 d1[2] = - p1; 2211 d2[1] = p0; 2212 d3[2] = p0; 2213 2214 p3 = e[2]*B[3] - e[3]*B[2]; 2215 p2 = -e[2]*B[2] - e[3]*B[3]; 2216 2217 d0[2] = p3; 2218 d1[1] = - p3; 2219 d2[2] = p2; 2220 d3[1] = p2; 2221 2222 p1 = e[0]*B[1] - e[1]*B[0]; 2223 p0 = -e[0]*B[0] - e[1]*B[1]; 2224 2225 d0[3] = p1; 2226 d1[0] = - p1; 2227 d2[3] = p0; 2228 d3[0] = p0; 2229 2230 B -= 8; 2231 e -= 8; 2232 d0 += 4; 2233 d2 += 4; 2234 d1 -= 4; 2235 d3 -= 4; 2236 } 2237 } 2238 2239 temp_free(f,buf2); 2240 temp_alloc_restore(f,save_point); 2241 } 2242 2243 2244 static float *get_window(vorb *f, int len) 2245 { 2246 len <<= 1; 2247 if (len == f.blocksize_0) return f.window[0]; 2248 if (len == f.blocksize_1) return f.window[1]; 2249 return null; 2250 } 2251 2252 alias YTYPE = int16; 2253 2254 static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag) 2255 { 2256 int n2 = n >> 1; 2257 int s = map.chan[i].mux, floor; 2258 floor = map.submap_floor[s]; 2259 if (f.floor_types[floor] == 0) { 2260 return error(f, VORBIS_invalid_stream); 2261 } else { 2262 Floor1 *g = &f.floor_config[floor].floor1; 2263 int j,q; 2264 int lx = 0, ly = finalY[0] * g.floor1_multiplier; 2265 for (q=1; q < g.values; ++q) { 2266 j = g.sorted_order[q]; 2267 if (finalY[j] >= 0) 2268 { 2269 int hy = finalY[j] * g.floor1_multiplier; 2270 int hx = g.Xlist[j]; 2271 if (lx != hx) 2272 draw_line(target, lx,ly, hx,hy, n2); 2273 lx = hx, ly = hy; 2274 } 2275 } 2276 if (lx < n2) { 2277 // optimization of: draw_line(target, lx,ly, n,ly, n2); 2278 for (j=lx; j < n2; ++j) 2279 target[j] *= inverse_db_table[ly]; 2280 } 2281 } 2282 return TRUE; 2283 } 2284 2285 // The meaning of "left" and "right" 2286 // 2287 // For a given frame: 2288 // we compute samples from 0..n 2289 // window_center is n/2 2290 // we'll window and mix the samples from left_start to left_end with data from the previous frame 2291 // all of the samples from left_end to right_start can be output without mixing; however, 2292 // this interval is 0-length except when transitioning between short and long frames 2293 // all of the samples from right_start to right_end need to be mixed with the next frame, 2294 // which we don't have, so those get saved in a buffer 2295 // frame N's right_end-right_start, the number of samples to mix with the next frame, 2296 // has to be the same as frame N+1's left_end-left_start (which they are by 2297 // construction) 2298 2299 static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode) 2300 { 2301 Mode *m; 2302 int i, n, prev, next, window_center; 2303 f.channel_buffer_start = f.channel_buffer_end = 0; 2304 2305 retry: 2306 if (f.eof) return FALSE; 2307 if (!maybe_start_packet(f)) 2308 return FALSE; 2309 // check packet type 2310 if (get_bits(f,1) != 0) { 2311 while (EOP != get8_packet(f)) {} 2312 goto retry; 2313 } 2314 2315 if (f.alloc.alloc_buffer) 2316 assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset); 2317 2318 i = get_bits(f, ilog(f.mode_count-1)); 2319 if (i == EOP) return FALSE; 2320 if (i >= f.mode_count) return FALSE; 2321 *mode = i; 2322 m = f.mode_config.ptr + i; 2323 if (m.blockflag) { 2324 n = f.blocksize_1; 2325 prev = get_bits(f,1); 2326 next = get_bits(f,1); 2327 } else { 2328 prev = next = 0; 2329 n = f.blocksize_0; 2330 } 2331 2332 // WINDOWING 2333 2334 window_center = n >> 1; 2335 if (m.blockflag && !prev) { 2336 *p_left_start = (n - f.blocksize_0) >> 2; 2337 *p_left_end = (n + f.blocksize_0) >> 2; 2338 } else { 2339 *p_left_start = 0; 2340 *p_left_end = window_center; 2341 } 2342 if (m.blockflag && !next) { 2343 *p_right_start = (n*3 - f.blocksize_0) >> 2; 2344 *p_right_end = (n*3 + f.blocksize_0) >> 2; 2345 } else { 2346 *p_right_start = window_center; 2347 *p_right_end = n; 2348 } 2349 2350 return TRUE; 2351 } 2352 2353 static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left) 2354 { 2355 Mapping *map; 2356 int i,j,k,n,n2; 2357 int[256] zero_channel; 2358 int[256] really_zero_channel; 2359 2360 // WINDOWING 2361 2362 n = f.blocksize[m.blockflag]; 2363 map = &f.mapping[m.mapping]; 2364 2365 // FLOORS 2366 n2 = n >> 1; 2367 2368 for (i=0; i < f.channels; ++i) { 2369 int s = map.chan[i].mux, floor; 2370 zero_channel[i] = FALSE; 2371 floor = map.submap_floor[s]; 2372 if (f.floor_types[floor] == 0) { 2373 return error(f, VORBIS_invalid_stream); 2374 } else { 2375 Floor1 *g = &f.floor_config[floor].floor1; 2376 if (get_bits(f, 1)) { 2377 short *finalY; 2378 uint8[256] step2_flag; 2379 static immutable int[4] range_list = [ 256, 128, 86, 64 ]; 2380 int range = range_list[g.floor1_multiplier-1]; 2381 int offset = 2; 2382 finalY = f.finalY[i]; 2383 finalY[0] = cast(short) get_bits(f, ilog(range)-1); 2384 finalY[1] = cast(short) get_bits(f, ilog(range)-1); 2385 for (j=0; j < g.partitions; ++j) { 2386 int pclass = g.partition_class_list[j]; 2387 int cdim = g.class_dimensions[pclass]; 2388 int cbits = g.class_subclasses[pclass]; 2389 int csub = (1 << cbits)-1; 2390 int cval = 0; 2391 if (cbits) { 2392 Codebook *c = f.codebooks + g.class_masterbooks[pclass]; 2393 DECODE(cval,f,c); 2394 } 2395 for (k=0; k < cdim; ++k) { 2396 int book = g.subclass_books[pclass][cval & csub]; 2397 cval = cval >> cbits; 2398 if (book >= 0) { 2399 int temp; 2400 Codebook *c = f.codebooks + book; 2401 DECODE(temp,f,c); 2402 finalY[offset++] = cast(short)temp; 2403 } else 2404 finalY[offset++] = 0; 2405 } 2406 } 2407 if (f.valid_bits == INVALID_BITS) goto error; // behavior according to spec 2408 step2_flag[0] = step2_flag[1] = 1; 2409 for (j=2; j < g.values; ++j) { 2410 int low, high, pred, highroom, lowroom, room, val; 2411 low = g.neighbors[j][0]; 2412 high = g.neighbors[j][1]; 2413 //neighbors(g.Xlist, j, &low, &high); 2414 pred = predict_point(g.Xlist[j], g.Xlist[low], g.Xlist[high], finalY[low], finalY[high]); 2415 val = finalY[j]; 2416 highroom = range - pred; 2417 lowroom = pred; 2418 if (highroom < lowroom) 2419 room = highroom * 2; 2420 else 2421 room = lowroom * 2; 2422 if (val) { 2423 step2_flag[low] = step2_flag[high] = 1; 2424 step2_flag[j] = 1; 2425 if (val >= room) 2426 if (highroom > lowroom) 2427 finalY[j] = cast(short)(val - lowroom + pred); 2428 else 2429 finalY[j] = cast(short)(pred - val + highroom - 1); 2430 else 2431 if (val & 1) 2432 finalY[j] = cast(short)(pred - ((val+1)>>1)); 2433 else 2434 finalY[j] = cast(short)(pred + (val>>1)); 2435 } else { 2436 step2_flag[j] = 0; 2437 finalY[j] = cast(short)pred; 2438 } 2439 } 2440 2441 // defer final floor computation until _after_ residue 2442 for (j=0; j < g.values; ++j) { 2443 if (!step2_flag[j]) 2444 finalY[j] = -1; 2445 } 2446 } else { 2447 error: 2448 zero_channel[i] = TRUE; 2449 } 2450 // So we just defer everything else to later 2451 2452 // at this point we've decoded the floor into buffer 2453 } 2454 } 2455 2456 // at this point we've decoded all floors 2457 2458 if (f.alloc.alloc_buffer) 2459 assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset); 2460 2461 // re-enable coupled channels if necessary 2462 memcpy(really_zero_channel.ptr, zero_channel.ptr, (really_zero_channel[0]).sizeof * f.channels); 2463 for (i=0; i < map.coupling_steps; ++i) 2464 if (!zero_channel[map.chan[i].magnitude] || !zero_channel[map.chan[i].angle]) { 2465 zero_channel[map.chan[i].magnitude] = zero_channel[map.chan[i].angle] = FALSE; 2466 } 2467 2468 // RESIDUE DECODE 2469 for (i=0; i < map.submaps; ++i) { 2470 float *[STB_VORBIS_MAX_CHANNELS] residue_buffers; 2471 int r; 2472 uint8[256] do_not_decode; 2473 int ch = 0; 2474 for (j=0; j < f.channels; ++j) { 2475 if (map.chan[j].mux == i) { 2476 if (zero_channel[j]) { 2477 do_not_decode[ch] = TRUE; 2478 residue_buffers[ch] = null; 2479 } else { 2480 do_not_decode[ch] = FALSE; 2481 residue_buffers[ch] = f.channel_buffers[j]; 2482 } 2483 ++ch; 2484 } 2485 } 2486 r = map.submap_residue[i]; 2487 decode_residue(f, residue_buffers.ptr, ch, n2, r, do_not_decode.ptr); 2488 } 2489 2490 if (f.alloc.alloc_buffer) 2491 assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset); 2492 2493 // INVERSE COUPLING 2494 for (i = map.coupling_steps-1; i >= 0; --i) { 2495 int n3 = n >> 1; 2496 float *m_ = f.channel_buffers[map.chan[i].magnitude]; 2497 float *a = f.channel_buffers[map.chan[i].angle ]; 2498 for (j=0; j < n3; ++j) { 2499 float a2,m2; 2500 if (m_[j] > 0) 2501 if (a[j] > 0) 2502 m2 = m_[j], a2 = m_[j] - a[j]; 2503 else 2504 a2 = m_[j], m2 = m_[j] + a[j]; 2505 else 2506 if (a[j] > 0) 2507 m2 = m_[j], a2 = m_[j] + a[j]; 2508 else 2509 a2 = m_[j], m2 = m_[j] - a[j]; 2510 m_[j] = m2; 2511 a[j] = a2; 2512 } 2513 } 2514 2515 // finish decoding the floors 2516 for (i=0; i < f.channels; ++i) { 2517 if (really_zero_channel[i]) { 2518 memset(f.channel_buffers[i], 0, (*f.channel_buffers[i]).sizeof * n2); 2519 } else { 2520 do_floor(f, map, i, n, f.channel_buffers[i], f.finalY[i], null); 2521 } 2522 } 2523 2524 // INVERSE MDCT 2525 for (i=0; i < f.channels; ++i) 2526 inverse_mdct(f.channel_buffers[i], n, f, m.blockflag); 2527 2528 // this shouldn't be necessary, unless we exited on an error 2529 // and want to flush to get to the next packet 2530 flush_packet(f); 2531 2532 if (f.first_decode) { 2533 // assume we start so first non-discarded sample is sample 0 2534 // this isn't to spec, but spec would require us to read ahead 2535 // and decode the size of all current frames--could be done, 2536 // but presumably it's not a commonly used feature 2537 f.current_loc = 0u - n2; // start of first frame is positioned for discard (NB this is an intentional unsigned overflow/wrap-around) 2538 // we might have to discard samples "from" the next frame too, 2539 // if we're lapping a large block then a small at the start? 2540 f.discard_samples_deferred = n - right_end; 2541 f.current_loc_valid = TRUE; 2542 f.first_decode = FALSE; 2543 } else if (f.discard_samples_deferred) { 2544 if (f.discard_samples_deferred >= right_start - left_start) { 2545 f.discard_samples_deferred -= (right_start - left_start); 2546 left_start = right_start; 2547 *p_left = left_start; 2548 } else { 2549 left_start += f.discard_samples_deferred; 2550 *p_left = left_start; 2551 f.discard_samples_deferred = 0; 2552 } 2553 } else if (f.previous_length == 0 && f.current_loc_valid) { 2554 // we're recovering from a seek... that means we're going to discard 2555 // the samples from this packet even though we know our position from 2556 // the last page header, so we need to update the position based on 2557 // the discarded samples here 2558 // but wait, the code below is going to add this in itself even 2559 // on a discard, so we don't need to do it here... 2560 } 2561 2562 // check if we have ogg information about the sample # for this packet 2563 if (f.last_seg_which == f.end_seg_with_known_loc) { 2564 // if we have a valid current loc, and this is final: 2565 if (f.current_loc_valid && (f.page_flag & PAGEFLAG_last_page)) { 2566 uint32 current_end = f.known_loc_for_packet; 2567 // then let's infer the size of the (probably) short final frame 2568 if (current_end < f.current_loc + (right_end-left_start)) { 2569 if (current_end < f.current_loc) { 2570 // negative truncation, that's impossible! 2571 *len = 0; 2572 } else { 2573 *len = current_end - f.current_loc; 2574 } 2575 *len += left_start; // this doesn't seem right, but has no ill effect on my test files 2576 if (*len > right_end) *len = right_end; // this should never happen 2577 f.current_loc += *len; 2578 return TRUE; 2579 } 2580 } 2581 // otherwise, just set our sample loc 2582 // guess that the ogg granule pos refers to the _middle_ of the 2583 // last frame? 2584 // set f.current_loc to the position of left_start 2585 f.current_loc = f.known_loc_for_packet - (n2-left_start); 2586 f.current_loc_valid = TRUE; 2587 } 2588 if (f.current_loc_valid) 2589 f.current_loc += (right_start - left_start); 2590 2591 if (f.alloc.alloc_buffer) 2592 assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset); 2593 *len = right_end; // ignore samples after the window goes to 0 2594 2595 return TRUE; 2596 } 2597 2598 static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right) 2599 { 2600 int mode, left_end, right_end; 2601 if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0; 2602 return vorbis_decode_packet_rest(f, len, f.mode_config.ptr + mode, *p_left, left_end, *p_right, right_end, p_left); 2603 } 2604 2605 static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right) 2606 { 2607 int prev,i,j; 2608 // we use right&left (the start of the right- and left-window sin()-regions) 2609 // to determine how much to return, rather than inferring from the rules 2610 // (same result, clearer code); 'left' indicates where our sin() window 2611 // starts, therefore where the previous window's right edge starts, and 2612 // therefore where to start mixing from the previous buffer. 'right' 2613 // indicates where our sin() ending-window starts, therefore that's where 2614 // we start saving, and where our returned-data ends. 2615 2616 // mixin from previous window 2617 if (f.previous_length) { 2618 int i2, j2, n = f.previous_length; 2619 float *w = get_window(f, n); 2620 if (w == NULL) return 0; 2621 for (i2=0; i2 < f.channels; ++i2) { 2622 for (j2=0; j2 < n; ++j2) 2623 f.channel_buffers[i2][left+j2] = 2624 f.channel_buffers[i2][left+j2]*w[ j2] + 2625 f.previous_window[i2][ j2]*w[n-1-j2]; 2626 } 2627 } 2628 2629 prev = f.previous_length; 2630 2631 // last half of this data becomes previous window 2632 f.previous_length = len - right; 2633 2634 // @OPTIMIZE: could avoid this copy by double-buffering the 2635 // output (flipping previous_window with channel_buffers), but 2636 // then previous_window would have to be 2x as large, and 2637 // channel_buffers couldn't be temp mem (although they're NOT 2638 // currently temp mem, they could be (unless we want to level 2639 // performance by spreading out the computation)) 2640 for (i=0; i < f.channels; ++i) 2641 for (j=0; right+j < len; ++j) 2642 f.previous_window[i][j] = f.channel_buffers[i][right+j]; 2643 2644 if (!prev) 2645 // there was no previous packet, so this data isn't valid... 2646 // this isn't entirely true, only the would-have-overlapped data 2647 // isn't valid, but this seems to be what the spec requires 2648 return 0; 2649 2650 // truncate a short frame 2651 if (len < right) right = len; 2652 2653 f.samples_output += right-left; 2654 2655 return right - left; 2656 } 2657 2658 static int vorbis_pump_first_frame(stb_vorbis *f) 2659 { 2660 int len, right, left, res; 2661 res = vorbis_decode_packet(f, &len, &left, &right); 2662 if (res) 2663 vorbis_finish_frame(f, len, left, right); 2664 return res; 2665 } 2666 2667 2668 static int start_decoder(vorb *f) 2669 { 2670 uint8[6] header; 2671 uint8 x,y; 2672 int len,i,j,k, max_submaps = 0; 2673 int longest_floorlist=0; 2674 2675 // first page, first packet 2676 f.first_decode = TRUE; 2677 2678 if (!start_page(f)) return FALSE; 2679 // validate page flag 2680 if (!(f.page_flag & PAGEFLAG_first_page)) return error(f, VORBIS_invalid_first_page); 2681 if (f.page_flag & PAGEFLAG_last_page) return error(f, VORBIS_invalid_first_page); 2682 if (f.page_flag & PAGEFLAG_continued_packet) return error(f, VORBIS_invalid_first_page); 2683 // check for expected packet length 2684 if (f.segment_count != 1) return error(f, VORBIS_invalid_first_page); 2685 if (f.segments[0] != 30) { 2686 // check for the Ogg skeleton fishead identifying header to refine our error 2687 if (f.segments[0] == 64 && 2688 getn(f, header.ptr, 6) && 2689 header[0] == 'f' && 2690 header[1] == 'i' && 2691 header[2] == 's' && 2692 header[3] == 'h' && 2693 header[4] == 'e' && 2694 header[5] == 'a' && 2695 get8(f) == 'd' && 2696 get8(f) == '\0') return error(f, VORBIS_ogg_skeleton_not_supported); 2697 else 2698 return error(f, VORBIS_invalid_first_page); 2699 } 2700 2701 // read packet 2702 // check packet header 2703 if (get8(f) != VORBIS_packet_id) return error(f, VORBIS_invalid_first_page); 2704 if (!getn(f, header.ptr, 6)) return error(f, VORBIS_unexpected_eof); 2705 if (!vorbis_validate(header.ptr)) return error(f, VORBIS_invalid_first_page); 2706 // vorbis_version 2707 if (get32(f) != 0) return error(f, VORBIS_invalid_first_page); 2708 f.channels = get8(f); if (!f.channels) return error(f, VORBIS_invalid_first_page); 2709 if (f.channels > STB_VORBIS_MAX_CHANNELS) return error(f, VORBIS_too_many_channels); 2710 f.sample_rate = get32(f); if (!f.sample_rate) return error(f, VORBIS_invalid_first_page); 2711 get32(f); // bitrate_maximum 2712 get32(f); // bitrate_nominal 2713 get32(f); // bitrate_minimum 2714 x = get8(f); 2715 { 2716 int log0,log1; 2717 log0 = x & 15; 2718 log1 = x >> 4; 2719 f.blocksize_0 = 1 << log0; 2720 f.blocksize_1 = 1 << log1; 2721 if (log0 < 6 || log0 > 13) return error(f, VORBIS_invalid_setup); 2722 if (log1 < 6 || log1 > 13) return error(f, VORBIS_invalid_setup); 2723 if (log0 > log1) return error(f, VORBIS_invalid_setup); 2724 } 2725 2726 // framing_flag 2727 x = get8(f); 2728 if (!(x & 1)) return error(f, VORBIS_invalid_first_page); 2729 2730 // second packet! 2731 if (!start_page(f)) return FALSE; 2732 2733 if (!start_packet(f)) return FALSE; 2734 2735 if (!next_segment(f)) return FALSE; 2736 2737 if (get8_packet(f) != VORBIS_packet_comment) return error(f, VORBIS_invalid_setup); 2738 for (i=0; i < 6; ++i) header[i] = cast(ubyte) get8_packet(f); 2739 if (!vorbis_validate(header.ptr)) return error(f, VORBIS_invalid_setup); 2740 //file vendor 2741 len = get32_packet(f); 2742 f.vendor = cast(char*)setup_malloc(f, len+1); 2743 if (f.vendor == NULL) return error(f, VORBIS_outofmem); 2744 for(i=0; i < len; ++i) { 2745 f.vendor[i] = cast(char) get8_packet(f); 2746 } 2747 f.vendor[len] = cast(char)'\0'; 2748 //user comments 2749 f.comment_list_length = get32_packet(f); 2750 f.comment_list = null; 2751 if (f.comment_list_length > 0) 2752 { 2753 f.comment_list = cast(char**) setup_malloc(f, cast(int)( (char*).sizeof * (f.comment_list_length) )); 2754 if (f.comment_list == NULL) return error(f, VORBIS_outofmem); 2755 } 2756 2757 for(i=0; i < f.comment_list_length; ++i) { 2758 len = get32_packet(f); 2759 f.comment_list[i] = cast(char*)setup_malloc(f, len+1); 2760 if (f.comment_list[i] == null) return error(f, VORBIS_outofmem); 2761 2762 for(j=0; j < len; ++j) { 2763 f.comment_list[i][j] = cast(char) get8_packet(f); 2764 } 2765 f.comment_list[i][len] = cast(char)'\0'; 2766 } 2767 2768 // framing_flag 2769 x = cast(ubyte) get8_packet(f); 2770 if (!(x & 1)) return error(f, VORBIS_invalid_setup); 2771 2772 2773 skip(f, f.bytes_in_seg); 2774 f.bytes_in_seg = 0; 2775 2776 do { 2777 len = next_segment(f); 2778 skip(f, len); 2779 f.bytes_in_seg = 0; 2780 } while (len); 2781 2782 // third packet! 2783 if (!start_packet(f)) return FALSE; 2784 2785 crc32_init(f); 2786 2787 if (get8_packet(f) != VORBIS_packet_setup) return error(f, VORBIS_invalid_setup); 2788 for (i=0; i < 6; ++i) header[i] = cast(ubyte) get8_packet(f); 2789 if (!vorbis_validate(header.ptr)) return error(f, VORBIS_invalid_setup); 2790 2791 // codebooks 2792 2793 f.codebook_count = get_bits(f,8) + 1; 2794 f.codebooks = cast(Codebook *) setup_malloc(f, cast(int)( (*f.codebooks).sizeof * f.codebook_count) ); 2795 if (f.codebooks == null) return error(f, VORBIS_outofmem); 2796 memset(f.codebooks, 0, (*f.codebooks).sizeof * f.codebook_count); 2797 for (i=0; i < f.codebook_count; ++i) { 2798 uint32 *values; 2799 int ordered, sorted_count; 2800 int total=0; 2801 uint8 *lengths; 2802 Codebook *c = f.codebooks+i; 2803 x = cast(ubyte) get_bits(f, 8); if (x != 0x42) return error(f, VORBIS_invalid_setup); 2804 x = cast(ubyte) get_bits(f, 8); if (x != 0x43) return error(f, VORBIS_invalid_setup); 2805 x = cast(ubyte) get_bits(f, 8); if (x != 0x56) return error(f, VORBIS_invalid_setup); 2806 x = cast(ubyte) get_bits(f, 8); 2807 c.dimensions = (get_bits(f, 8)<<8) + x; 2808 x = cast(ubyte) get_bits(f, 8); 2809 y = cast(ubyte) get_bits(f, 8); 2810 c.entries = (get_bits(f, 8)<<16) + (y<<8) + x; 2811 ordered = get_bits(f,1); 2812 c.sparse = ordered ? 0 : cast(ubyte) get_bits(f,1); 2813 2814 if (c.dimensions == 0 && c.entries != 0) return error(f, VORBIS_invalid_setup); 2815 2816 if (c.sparse) 2817 lengths = cast(uint8 *) setup_temp_malloc(f, c.entries); 2818 else 2819 lengths = c.codeword_lengths = cast(uint8 *) setup_malloc(f, c.entries); 2820 2821 if (!lengths) return error(f, VORBIS_outofmem); 2822 2823 if (ordered) { 2824 int current_entry = 0; 2825 int current_length = get_bits(f,5) + 1; 2826 while (current_entry < c.entries) { 2827 int limit = c.entries - current_entry; 2828 int n = get_bits(f, ilog(limit)); 2829 if (current_length >= 32) return error(f, VORBIS_invalid_setup); 2830 if (current_entry + n > cast(int) c.entries) { return error(f, VORBIS_invalid_setup); } 2831 memset(lengths + current_entry, current_length, n); 2832 current_entry += n; 2833 ++current_length; 2834 } 2835 } else { 2836 for (j=0; j < c.entries; ++j) { 2837 int present = c.sparse ? get_bits(f,1) : 1; 2838 if (present) { 2839 lengths[j] = cast(ubyte) (get_bits(f, 5) + 1); 2840 ++total; 2841 if (lengths[j] == 32) 2842 return error(f, VORBIS_invalid_setup); 2843 } else { 2844 lengths[j] = NO_CODE; 2845 } 2846 } 2847 } 2848 2849 if (c.sparse && total >= c.entries >> 2) { 2850 // convert sparse items to non-sparse! 2851 if (c.entries > cast(int) f.setup_temp_memory_required) 2852 f.setup_temp_memory_required = c.entries; 2853 2854 c.codeword_lengths = cast(uint8 *) setup_malloc(f, c.entries); 2855 if (c.codeword_lengths == null) return error(f, VORBIS_outofmem); 2856 memcpy(c.codeword_lengths, lengths, c.entries); 2857 setup_temp_free(f, lengths, c.entries); // note this is only safe if there have been no intervening temp mallocs! 2858 lengths = c.codeword_lengths; 2859 c.sparse = 0; 2860 } 2861 2862 // compute the size of the sorted tables 2863 if (c.sparse) { 2864 sorted_count = total; 2865 } else { 2866 sorted_count = 0; 2867 for (j=0; j < c.entries; ++j) 2868 if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE) 2869 ++sorted_count; 2870 } 2871 2872 c.sorted_entries = sorted_count; 2873 values = null; 2874 2875 if (!c.sparse) { 2876 c.codewords = cast(uint32 *) setup_malloc(f, cast(int)( (c.codewords[0]).sizeof * c.entries ) ); 2877 if (!c.codewords) return error(f, VORBIS_outofmem); 2878 } else { 2879 uint size; 2880 if (c.sorted_entries) { 2881 c.codeword_lengths = cast(uint8 *) setup_malloc(f, c.sorted_entries); 2882 if (!c.codeword_lengths) return error(f, VORBIS_outofmem); 2883 c.codewords = cast(uint32 *) setup_temp_malloc(f, cast(int)( (*c.codewords).sizeof * c.sorted_entries )); 2884 if (!c.codewords) return error(f, VORBIS_outofmem); 2885 values = cast(uint32 *) setup_temp_malloc(f, cast(int)( (*values).sizeof * c.sorted_entries ) ); 2886 if (!values) return error(f, VORBIS_outofmem); 2887 } 2888 size = cast(uint)( c.entries + ((*c.codewords).sizeof + (*values).sizeof) * c.sorted_entries ); 2889 if (size > f.setup_temp_memory_required) 2890 f.setup_temp_memory_required = size; 2891 } 2892 2893 if (!compute_codewords(c, lengths, c.entries, values)) { 2894 if (c.sparse) setup_temp_free(f, values, 0); 2895 return error(f, VORBIS_invalid_setup); 2896 } 2897 2898 if (c.sorted_entries) { 2899 // allocate an extra slot for sentinels 2900 c.sorted_codewords = cast(uint32 *) setup_malloc(f, cast(int)( (*c.sorted_codewords).sizeof * (c.sorted_entries+1) )); 2901 if (c.sorted_codewords == null) return error(f, VORBIS_outofmem); 2902 // allocate an extra slot at the front so that c.sorted_values[-1] is defined 2903 // so that we can catch that case without an extra if 2904 c.sorted_values = cast( int *) setup_malloc(f, cast(int)( (*c.sorted_values ).sizeof * (c.sorted_entries+1) ) ); 2905 if (c.sorted_values == null) return error(f, VORBIS_outofmem); 2906 ++c.sorted_values; 2907 c.sorted_values[-1] = -1; 2908 compute_sorted_huffman(c, lengths, values); 2909 } 2910 2911 if (c.sparse) { 2912 setup_temp_free(f, values, (*values).sizeof*c.sorted_entries); 2913 setup_temp_free(f, c.codewords, (*c.codewords).sizeof*c.sorted_entries); 2914 setup_temp_free(f, lengths, c.entries); 2915 c.codewords = null; 2916 } 2917 2918 compute_accelerated_huffman(c); 2919 2920 c.lookup_type = cast(ubyte) get_bits(f, 4); 2921 if (c.lookup_type > 2) return error(f, VORBIS_invalid_setup); 2922 if (c.lookup_type > 0) { 2923 uint16 *mults; 2924 c.minimum_value = float32_unpack(get_bits(f, 32)); 2925 c.delta_value = float32_unpack(get_bits(f, 32)); 2926 c.value_bits = cast(ubyte) (get_bits(f, 4)+1); 2927 c.sequence_p = cast(ubyte) get_bits(f,1); 2928 if (c.lookup_type == 1) { 2929 int values2 = lookup1_values(c.entries, c.dimensions); 2930 if (values2 < 0) return error(f, VORBIS_invalid_setup); 2931 c.lookup_values = cast(uint32) values2; 2932 } else { 2933 c.lookup_values = c.entries * c.dimensions; 2934 } 2935 if (c.lookup_values == 0) return error(f, VORBIS_invalid_setup); 2936 mults = cast(uint16 *) setup_temp_malloc(f, cast(int)( (mults[0]).sizeof * c.lookup_values)); 2937 if (mults == null) return error(f, VORBIS_outofmem); 2938 for (j=0; j < cast(int) c.lookup_values; ++j) { 2939 int q = get_bits(f, c.value_bits); 2940 if (q == EOP) 2941 { 2942 setup_temp_free(f,mults,(mults[0]).sizeof*c.lookup_values); 2943 return error(f, VORBIS_invalid_setup); 2944 } 2945 mults[j] = cast(ushort)q; 2946 } 2947 2948 if (c.lookup_type == 1) { 2949 int len2, sparse = c.sparse; 2950 float last=0; 2951 // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop 2952 if (sparse) { 2953 if (c.sorted_entries == 0) goto skip; 2954 c.multiplicands = cast(codetype *) setup_malloc(f, (c.multiplicands[0]).sizeof * c.sorted_entries * c.dimensions); 2955 } else 2956 c.multiplicands = cast(codetype *) setup_malloc(f, (c.multiplicands[0]).sizeof * c.entries * c.dimensions); 2957 if (c.multiplicands == null) { setup_temp_free(f,mults,(mults[0]).sizeof*c.lookup_values); return error(f, VORBIS_outofmem); } 2958 len2 = sparse ? c.sorted_entries : c.entries; 2959 for (j=0; j < len2; ++j) { 2960 uint z = sparse ? c.sorted_values[j] : j; 2961 uint div=1; 2962 for (k=0; k < c.dimensions; ++k) { 2963 int off = (z / div) % c.lookup_values; 2964 float val = mults[off]*c.delta_value + c.minimum_value + last; 2965 c.multiplicands[j*c.dimensions + k] = val; 2966 if (c.sequence_p) 2967 last = val; 2968 if (k+1 < c.dimensions) { 2969 if (div > uint.max / cast(uint) c.lookup_values) { 2970 setup_temp_free(f, mults,(mults[0]).sizeof*c.lookup_values); 2971 return error(f, VORBIS_invalid_setup); 2972 } 2973 div *= c.lookup_values; 2974 } 2975 } 2976 } 2977 c.lookup_type = 2; 2978 } 2979 else 2980 { 2981 float last=0; 2982 c.multiplicands = cast(codetype *) setup_malloc(f, (c.multiplicands[0]).sizeof * c.lookup_values); 2983 if (c.multiplicands == null) { setup_temp_free(f, mults,(mults[0]).sizeof*c.lookup_values); return error(f, VORBIS_outofmem); } 2984 for (j=0; j < cast(int) c.lookup_values; ++j) { 2985 float val = mults[j] * c.delta_value + c.minimum_value + last; 2986 c.multiplicands[j] = val; 2987 if (c.sequence_p) 2988 last = val; 2989 } 2990 } 2991 skip:; 2992 setup_temp_free(f, mults, (mults[0]).sizeof * c.lookup_values); 2993 2994 } 2995 } 2996 2997 // time domain transfers (notused) 2998 2999 x = cast(ubyte)(get_bits(f, 6) + 1); 3000 for (i=0; i < x; ++i) { 3001 uint32 z = get_bits(f, 16); 3002 if (z != 0) return error(f, VORBIS_invalid_setup); 3003 } 3004 3005 // Floors 3006 f.floor_count = get_bits(f, 6)+1; 3007 f.floor_config = cast(Floor *) setup_malloc(f, f.floor_count * (*f.floor_config).sizeof); 3008 if (f.floor_config == null) return error(f, VORBIS_outofmem); 3009 for (i=0; i < f.floor_count; ++i) { 3010 f.floor_types[i] = cast(ushort) get_bits(f, 16); 3011 if (f.floor_types[i] > 1) return error(f, VORBIS_invalid_setup); 3012 if (f.floor_types[i] == 0) { 3013 Floor0 *g = &f.floor_config[i].floor0; 3014 g.order = cast(ubyte) get_bits(f,8); 3015 g.rate = cast(ushort) get_bits(f,16); 3016 g.bark_map_size = cast(ushort) get_bits(f,16); 3017 g.amplitude_bits = cast(ubyte) get_bits(f,6); 3018 g.amplitude_offset = cast(ubyte) get_bits(f,8); 3019 g.number_of_books = cast(ubyte)(get_bits(f,4) + 1); 3020 for (j=0; j < g.number_of_books; ++j) 3021 g.book_list[j] = cast(ubyte) get_bits(f,8); 3022 return error(f, VORBIS_feature_not_supported); 3023 } else { 3024 stbv__floor_ordering[31*8+2] p; 3025 Floor1 *g = &f.floor_config[i].floor1; 3026 int max_class = -1; 3027 g.partitions = cast(ubyte) get_bits(f, 5); 3028 for (j=0; j < g.partitions; ++j) { 3029 g.partition_class_list[j] = cast(ubyte) get_bits(f, 4); 3030 if (g.partition_class_list[j] > max_class) 3031 max_class = g.partition_class_list[j]; 3032 } 3033 for (j=0; j <= max_class; ++j) { 3034 g.class_dimensions[j] = cast(ubyte)(get_bits(f, 3)+1); 3035 g.class_subclasses[j] = cast(ubyte)get_bits(f, 2); 3036 if (g.class_subclasses[j]) { 3037 g.class_masterbooks[j] = cast(ubyte) get_bits(f, 8); 3038 if (g.class_masterbooks[j] >= f.codebook_count) return error(f, VORBIS_invalid_setup); 3039 } 3040 for (k=0; k < 1 << g.class_subclasses[j]; ++k) { 3041 g.subclass_books[j][k] = cast(int16) (get_bits(f,8)-1); 3042 if (g.subclass_books[j][k] >= f.codebook_count) return error(f, VORBIS_invalid_setup); 3043 } 3044 } 3045 g.floor1_multiplier = cast(ubyte)(get_bits(f,2)+1); 3046 g.rangebits = cast(ubyte) get_bits(f,4); 3047 g.Xlist[0] = 0; 3048 g.Xlist[1] = cast(ushort)(1 << g.rangebits); 3049 g.values = 2; 3050 for (j=0; j < g.partitions; ++j) { 3051 int c = g.partition_class_list[j]; 3052 for (k=0; k < g.class_dimensions[c]; ++k) { 3053 g.Xlist[g.values] = cast(ushort) get_bits(f, g.rangebits); 3054 ++g.values; 3055 } 3056 } 3057 // precompute the sorting 3058 for (j=0; j < g.values; ++j) { 3059 p[j].x = g.Xlist[j]; 3060 p[j].id = cast(ushort) j; 3061 } 3062 qsort(p.ptr, g.values, (p[0]).sizeof, &point_compare_2); 3063 for (j=0; j < g.values-1; ++j) 3064 if (p[j].x == p[j+1].x) 3065 return error(f, VORBIS_invalid_setup); 3066 for (j=0; j < g.values; ++j) 3067 g.sorted_order[j] = cast(uint8) p[j].id; 3068 // precompute the neighbors 3069 for (j=2; j < g.values; ++j) { 3070 int low = 0,hi = 0; 3071 neighbors(g.Xlist.ptr, j, &low,&hi); 3072 g.neighbors[j][0] = cast(ubyte)low; 3073 g.neighbors[j][1] = cast(ubyte)hi; 3074 } 3075 3076 if (g.values > longest_floorlist) 3077 longest_floorlist = g.values; 3078 } 3079 } 3080 3081 // Residue 3082 f.residue_count = get_bits(f, 6)+1; 3083 f.residue_config = cast(Residue *) setup_malloc(f, f.residue_count * (f.residue_config[0]).sizeof); 3084 if (f.residue_config == null) return error(f, VORBIS_outofmem); 3085 memset(f.residue_config, 0, f.residue_count * (f.residue_config[0]).sizeof); 3086 for (i=0; i < f.residue_count; ++i) { 3087 uint8[64] residue_cascade; 3088 Residue *r = f.residue_config+i; 3089 f.residue_types[i] = cast(ushort) get_bits(f, 16); 3090 if (f.residue_types[i] > 2) return error(f, VORBIS_invalid_setup); 3091 r.begin = get_bits(f, 24); 3092 r.end = get_bits(f, 24); 3093 if (r.end < r.begin) return error(f, VORBIS_invalid_setup); 3094 r.part_size = get_bits(f,24)+1; 3095 r.classifications = cast(ubyte)(get_bits(f,6)+1); 3096 r.classbook = cast(ubyte) get_bits(f,8); 3097 if (r.classbook >= f.codebook_count) return error(f, VORBIS_invalid_setup); 3098 for (j=0; j < r.classifications; ++j) { 3099 uint8 high_bits=0; 3100 uint8 low_bits = cast(ubyte) get_bits(f,3); 3101 if (get_bits(f,1)) 3102 high_bits = cast(ubyte) get_bits(f,5); 3103 residue_cascade[j] = cast(ubyte)(high_bits*8 + low_bits); 3104 } 3105 r.residue_books = cast(int16[8]*) setup_malloc(f, (r.residue_books[0]).sizeof * r.classifications); 3106 if (r.residue_books == null) return error(f, VORBIS_outofmem); 3107 for (j=0; j < r.classifications; ++j) { 3108 for (k=0; k < 8; ++k) { 3109 if (residue_cascade[j] & (1 << k)) { 3110 r.residue_books[j][k] = cast(short) get_bits(f, 8); 3111 if (r.residue_books[j][k] >= f.codebook_count) return error(f, VORBIS_invalid_setup); 3112 } else { 3113 r.residue_books[j][k] = -1; 3114 } 3115 } 3116 } 3117 // precompute the classifications[] array to avoid inner-loop mod/divide 3118 // call it 'classdata' since we already have r.classifications 3119 r.classdata = cast(uint8 **) setup_malloc(f, (*r.classdata).sizeof * f.codebooks[r.classbook].entries); 3120 if (!r.classdata) return error(f, VORBIS_outofmem); 3121 memset(r.classdata, 0, (*r.classdata).sizeof * f.codebooks[r.classbook].entries); 3122 for (j=0; j < f.codebooks[r.classbook].entries; ++j) { 3123 int classwords = f.codebooks[r.classbook].dimensions; 3124 int temp = j; 3125 r.classdata[j] = cast(uint8 *) setup_malloc(f, (r.classdata[j][0]).sizeof * classwords); 3126 if (r.classdata[j] == null) return error(f, VORBIS_outofmem); 3127 for (k=classwords-1; k >= 0; --k) { 3128 r.classdata[j][k] = cast(ubyte)(temp % r.classifications); 3129 temp /= r.classifications; 3130 } 3131 } 3132 } 3133 3134 f.mapping_count = get_bits(f,6)+1; 3135 f.mapping = cast(Mapping *) setup_malloc(f, f.mapping_count * (*f.mapping).sizeof); 3136 if (f.mapping == null) return error(f, VORBIS_outofmem); 3137 memset(f.mapping, 0, f.mapping_count * (*f.mapping).sizeof); 3138 for (i=0; i < f.mapping_count; ++i) { 3139 Mapping *m = f.mapping + i; 3140 int mapping_type = get_bits(f,16); 3141 if (mapping_type != 0) return error(f, VORBIS_invalid_setup); 3142 m.chan = cast(MappingChannel *) setup_malloc(f, f.channels * (*m.chan).sizeof); 3143 if (m.chan == null) return error(f, VORBIS_outofmem); 3144 if (get_bits(f,1)) 3145 m.submaps = cast(ubyte)(get_bits(f,4)+1); 3146 else 3147 m.submaps = 1; 3148 if (m.submaps > max_submaps) 3149 max_submaps = m.submaps; 3150 if (get_bits(f,1)) { 3151 m.coupling_steps = cast(ushort)(get_bits(f,8)+1); 3152 if (m.coupling_steps > f.channels) return error(f, VORBIS_invalid_setup); 3153 for (k=0; k < m.coupling_steps; ++k) { 3154 m.chan[k].magnitude = cast(ubyte) get_bits(f, ilog(f.channels-1)); 3155 m.chan[k].angle = cast(ubyte) get_bits(f, ilog(f.channels-1)); 3156 if (m.chan[k].magnitude >= f.channels) return error(f, VORBIS_invalid_setup); 3157 if (m.chan[k].angle >= f.channels) return error(f, VORBIS_invalid_setup); 3158 if (m.chan[k].magnitude == m.chan[k].angle) return error(f, VORBIS_invalid_setup); 3159 } 3160 } else 3161 m.coupling_steps = 0; 3162 3163 // reserved field 3164 if (get_bits(f,2)) return error(f, VORBIS_invalid_setup); 3165 if (m.submaps > 1) { 3166 for (j=0; j < f.channels; ++j) { 3167 m.chan[j].mux = cast(ubyte) get_bits(f, 4); 3168 if (m.chan[j].mux >= m.submaps) return error(f, VORBIS_invalid_setup); 3169 } 3170 } else 3171 // @SPECIFICATION: this case is missing from the spec 3172 for (j=0; j < f.channels; ++j) 3173 m.chan[j].mux = 0; 3174 3175 for (j=0; j < m.submaps; ++j) { 3176 get_bits(f,8); // discard 3177 m.submap_floor[j] = cast(ubyte) get_bits(f,8); 3178 m.submap_residue[j] = cast(ubyte) get_bits(f,8); 3179 if (m.submap_floor[j] >= f.floor_count) return error(f, VORBIS_invalid_setup); 3180 if (m.submap_residue[j] >= f.residue_count) return error(f, VORBIS_invalid_setup); 3181 } 3182 } 3183 3184 // Modes 3185 f.mode_count = get_bits(f, 6)+1; 3186 for (i=0; i < f.mode_count; ++i) { 3187 Mode *m = f.mode_config.ptr+i; 3188 m.blockflag = cast(ubyte) get_bits(f,1); 3189 m.windowtype = cast(ushort) get_bits(f,16); 3190 m.transformtype = cast(ushort) get_bits(f,16); 3191 m.mapping = cast(ubyte) get_bits(f,8); 3192 if (m.windowtype != 0) return error(f, VORBIS_invalid_setup); 3193 if (m.transformtype != 0) return error(f, VORBIS_invalid_setup); 3194 if (m.mapping >= f.mapping_count) return error(f, VORBIS_invalid_setup); 3195 } 3196 3197 flush_packet(f); 3198 3199 f.previous_length = 0; 3200 3201 for (i=0; i < f.channels; ++i) { 3202 f.channel_buffers[i] = cast(float *) setup_malloc(f, (float).sizeof * f.blocksize_1); 3203 f.previous_window[i] = cast(float *) setup_malloc(f, (float).sizeof * f.blocksize_1/2); 3204 f.finalY[i] = cast(int16 *) setup_malloc(f, (int16).sizeof * longest_floorlist); 3205 if (f.channel_buffers[i] == null || f.previous_window[i] == null || f.finalY[i] == null) return error(f, VORBIS_outofmem); 3206 memset(f.channel_buffers[i], 0, (float).sizeof * f.blocksize_1); 3207 } 3208 3209 if (!init_blocksize(f, 0, f.blocksize_0)) return FALSE; 3210 if (!init_blocksize(f, 1, f.blocksize_1)) return FALSE; 3211 f.blocksize[0] = f.blocksize_0; 3212 f.blocksize[1] = f.blocksize_1; 3213 3214 // compute how much temporary memory is needed 3215 3216 // 1. 3217 { 3218 uint32 imdct_mem = cast(uint)( (f.blocksize_1 * (float.sizeof) >> 1)); 3219 uint32 classify_mem; 3220 int i2,max_part_read=0; 3221 for (i2=0; i2 < f.residue_count; ++i2) { 3222 Residue *r = f.residue_config + i2; 3223 uint actual_size = f.blocksize_1 / 2; 3224 uint limit_r_begin = r.begin < actual_size ? r.begin : actual_size; 3225 uint limit_r_end = r.end < actual_size ? r.end : actual_size; 3226 int n_read = limit_r_end - limit_r_begin; 3227 int part_read = n_read / r.part_size; 3228 if (part_read > max_part_read) 3229 max_part_read = part_read; 3230 } 3231 classify_mem = cast(uint)( f.channels * ((void*).sizeof + max_part_read * (uint8 *).sizeof)); 3232 // maximum reasonable partition size is f.blocksize_1 3233 3234 f.temp_memory_required = classify_mem; 3235 if (imdct_mem > f.temp_memory_required) 3236 f.temp_memory_required = imdct_mem; 3237 } 3238 3239 3240 if (f.alloc.alloc_buffer) { 3241 assert(f.temp_offset == f.alloc.alloc_buffer_length_in_bytes); 3242 // check if there's enough temp memory so we don't error later 3243 if (f.setup_offset + (*f).sizeof + f.temp_memory_required > cast(uint) f.temp_offset) 3244 return error(f, VORBIS_outofmem); 3245 } 3246 3247 // @TODO: stb_vorbis_seek_start expects first_audio_page_offset to point to a page 3248 // without PAGEFLAG_continued_packet, so this either points to the first page, or 3249 // the page after the end of the headers. It might be cleaner to point to a page 3250 // in the middle of the headers, when that's the page where the first audio packet 3251 // starts, but we'd have to also correctly skip the end of any continued packet in 3252 // stb_vorbis_seek_start. 3253 if (f.next_seg == -1) { 3254 f.first_audio_page_offset = stb_vorbis_get_file_offset(f); 3255 } else { 3256 f.first_audio_page_offset = 0; 3257 } 3258 3259 return TRUE; 3260 } 3261 3262 static void vorbis_deinit(stb_vorbis *p) 3263 { 3264 int i,j; 3265 3266 setup_free(p, p.vendor); 3267 for (i=0; i < p.comment_list_length; ++i) { 3268 setup_free(p, p.comment_list[i]); 3269 } 3270 setup_free(p, p.comment_list); 3271 3272 if (p.residue_config) { 3273 for (i=0; i < p.residue_count; ++i) { 3274 Residue *r = p.residue_config+i; 3275 if (r.classdata) { 3276 for (j=0; j < p.codebooks[r.classbook].entries; ++j) 3277 setup_free(p, r.classdata[j]); 3278 setup_free(p, r.classdata); 3279 } 3280 setup_free(p, r.residue_books); 3281 } 3282 } 3283 3284 if (p.codebooks) { 3285 for (i=0; i < p.codebook_count; ++i) { 3286 Codebook *c = p.codebooks + i; 3287 setup_free(p, c.codeword_lengths); 3288 setup_free(p, c.multiplicands); 3289 setup_free(p, c.codewords); 3290 setup_free(p, c.sorted_codewords); 3291 // c.sorted_values[-1] is the first entry in the array 3292 setup_free(p, c.sorted_values ? c.sorted_values-1 : null); 3293 } 3294 setup_free(p, p.codebooks); 3295 } 3296 setup_free(p, p.floor_config); 3297 setup_free(p, p.residue_config); 3298 if (p.mapping) { 3299 for (i=0; i < p.mapping_count; ++i) 3300 setup_free(p, p.mapping[i].chan); 3301 setup_free(p, p.mapping); 3302 } 3303 for (i=0; i < p.channels && i < STB_VORBIS_MAX_CHANNELS; ++i) { 3304 setup_free(p, p.channel_buffers[i]); 3305 setup_free(p, p.previous_window[i]); 3306 setup_free(p, p.finalY[i]); 3307 } 3308 for (i=0; i < 2; ++i) { 3309 setup_free(p, p.A[i]); 3310 setup_free(p, p.B[i]); 3311 setup_free(p, p.C[i]); 3312 setup_free(p, p.window[i]); 3313 setup_free(p, p.bit_reverse[i]); 3314 } 3315 } 3316 3317 void stb_vorbis_close(stb_vorbis *p) 3318 { 3319 vorbis_deinit(p); 3320 setup_free(p,p); 3321 } 3322 3323 static void vorbis_init(stb_vorbis *p, stb_vorbis_alloc *z) 3324 { 3325 memset(p, 0, (*p).sizeof); // NULL out all malloc'd pointers to start 3326 if (z) { 3327 p.alloc = *z; 3328 p.alloc.alloc_buffer_length_in_bytes &= ~7; 3329 p.temp_offset = p.alloc.alloc_buffer_length_in_bytes; 3330 } 3331 p.eof = 0; 3332 p.error = VORBIS__no_error; 3333 p.codebooks = null; 3334 } 3335 3336 long stb_vorbis_get_sample_offset(stb_vorbis *f) 3337 { 3338 // Note: this is rewritten vs original stb_vorbis, who wasn't returning something good. 3339 if (f.current_loc_valid) 3340 { 3341 return f.current_sample_loc; 3342 } 3343 else 3344 return -1; 3345 } 3346 3347 stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f) 3348 { 3349 stb_vorbis_info d; 3350 d.channels = f.channels; 3351 d.sample_rate = f.sample_rate; 3352 d.setup_memory_required = f.setup_memory_required; 3353 d.setup_temp_memory_required = f.setup_temp_memory_required; 3354 d.temp_memory_required = f.temp_memory_required; 3355 d.max_frame_size = f.blocksize_1 >> 1; 3356 return d; 3357 } 3358 3359 stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f) 3360 { 3361 stb_vorbis_comment d; 3362 d.vendor = f.vendor; 3363 d.comment_list_length = f.comment_list_length; 3364 d.comment_list = f.comment_list; 3365 return d; 3366 } 3367 3368 int stb_vorbis_get_error(stb_vorbis *f) 3369 { 3370 int e = f.error; 3371 f.error = VORBIS__no_error; 3372 return e; 3373 } 3374 3375 static stb_vorbis * vorbis_alloc(stb_vorbis *f) 3376 { 3377 stb_vorbis *p = cast(stb_vorbis *) setup_malloc(f, stb_vorbis.sizeof); 3378 return p; 3379 } 3380 3381 uint stb_vorbis_get_file_offset(stb_vorbis *f) 3382 { 3383 return cast(uint) f._io.tell(f._userData); 3384 } 3385 3386 // 3387 // DATA-PULLING API 3388 // 3389 3390 uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last) 3391 { 3392 for(;;) { 3393 int n; 3394 if (f.eof) return 0; 3395 n = get8(f); 3396 if (n == 0x4f) { // page header candidate 3397 uint retry_loc = stb_vorbis_get_file_offset(f); 3398 int i; 3399 // check if we're off the end of a file_section stream 3400 if (retry_loc - 25 > f.stream_len) 3401 return 0; 3402 // check the rest of the header 3403 for (i=1; i < 4; ++i) 3404 if (get8(f) != ogg_page_header[i]) 3405 break; 3406 if (f.eof) return 0; 3407 if (i == 4) { 3408 uint8[27] header; 3409 uint32 i2, crc, goal, len; 3410 for (i2=0; i2 < 4; ++i2) 3411 header[i2] = ogg_page_header[i2]; 3412 for (; i2 < 27; ++i2) 3413 header[i2] = get8(f); 3414 if (f.eof) return 0; 3415 if (header[4] != 0) goto invalid; 3416 goal = header[22] + (header[23] << 8) + (header[24]<<16) + (cast(uint32)header[25]<<24); 3417 for (i2=22; i2 < 26; ++i2) 3418 header[i2] = 0; 3419 crc = 0; 3420 for (i2=0; i2 < 27; ++i2) 3421 crc = crc32_update(f, crc, header[i2]); 3422 len = 0; 3423 for (i2=0; i2 < header[26]; ++i2) { 3424 int s = get8(f); 3425 crc = crc32_update(f, crc,cast(ubyte)s); 3426 len += s; 3427 } 3428 if (len && f.eof) return 0; 3429 for (i2=0; i2 < len; ++i2) 3430 crc = crc32_update(f, crc, get8(f)); 3431 // finished parsing probable page 3432 if (crc == goal) { 3433 // we could now check that it's either got the last 3434 // page flag set, OR it's followed by the capture 3435 // pattern, but I guess TECHNICALLY you could have 3436 // a file with garbage between each ogg page and recover 3437 // from it automatically? So even though that paranoia 3438 // might decrease the chance of an invalid decode by 3439 // another 2^32, not worth it since it would hose those 3440 // invalid-but-useful files? 3441 if (end) 3442 *end = stb_vorbis_get_file_offset(f); 3443 if (last) { 3444 if (header[5] & 0x04) 3445 *last = 1; 3446 else 3447 *last = 0; 3448 } 3449 set_file_offset(f, retry_loc-1); 3450 return 1; 3451 } 3452 } 3453 invalid: 3454 // not a valid page, so rewind and look for next one 3455 set_file_offset(f, retry_loc); 3456 } 3457 } 3458 assert(false); 3459 } 3460 3461 3462 enum SAMPLE_unknown = 0xffffffff; 3463 3464 // seeking is implemented with a binary search, which narrows down the range to 3465 // 64K, before using a linear search (because finding the synchronization 3466 // pattern can be expensive, and the chance we'd find the end page again is 3467 // relatively high for small ranges) 3468 // 3469 // two initial interpolation-style probes are used at the start of the search 3470 // to try to bound either side of the binary search sensibly, while still 3471 // working in O(log n) time if they fail. 3472 3473 static int get_seek_page_info(stb_vorbis *f, ProbedPage *z) 3474 { 3475 uint8[27] header; 3476 uint8[255] lacing; 3477 int i,len; 3478 3479 // record where the page starts 3480 z.page_start = stb_vorbis_get_file_offset(f); 3481 3482 // parse the header 3483 getn(f, header.ptr, 27); 3484 if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S') 3485 return 0; 3486 getn(f, lacing.ptr, header[26]); 3487 3488 // determine the length of the payload 3489 len = 0; 3490 for (i=0; i < header[26]; ++i) 3491 len += lacing[i]; 3492 3493 // this implies where the page ends 3494 z.page_end = z.page_start + 27 + header[26] + len; 3495 3496 // read the last-decoded sample out of the data 3497 z.last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24); 3498 3499 // restore file state to where we were 3500 set_file_offset(f, z.page_start); 3501 return 1; 3502 } 3503 3504 // rarely used function to seek back to the preceding page while finding the 3505 // start of a packet 3506 static int go_to_page_before(stb_vorbis *f, uint limit_offset) 3507 { 3508 uint previous_safe, end; 3509 3510 // now we want to seek back 64K from the limit 3511 if (limit_offset >= 65536 && limit_offset-65536 >= f.first_audio_page_offset) 3512 previous_safe = limit_offset - 65536; 3513 else 3514 previous_safe = f.first_audio_page_offset; 3515 3516 set_file_offset(f, previous_safe); 3517 3518 while (vorbis_find_page(f, &end, null)) { 3519 if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset) 3520 return 1; 3521 set_file_offset(f, end); 3522 } 3523 3524 return 0; 3525 } 3526 3527 // implements the search logic for finding a page and starting decoding. if 3528 // the function succeeds, current_loc_valid will be true and current_loc will 3529 // be less than or equal to the provided sample number (the closer the 3530 // better). 3531 static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number) 3532 { 3533 ProbedPage left, right, mid; 3534 int i, start_seg_with_known_loc, end_pos, page_start; 3535 uint32 delta, stream_length, padding, last_sample_limit; 3536 double offset = 0.0, bytes_per_sample = 0.0; 3537 int probe = 0; 3538 3539 // find the last page and validate the target sample 3540 stream_length = stb_vorbis_stream_length_in_samples(f); 3541 if (stream_length == 0) return error(f, VORBIS_seek_without_length); 3542 if (sample_number > stream_length) return error(f, VORBIS_seek_invalid); 3543 3544 // this is the maximum difference between the window-center (which is the 3545 // actual granule position value), and the right-start (which the spec 3546 // indicates should be the granule position (give or take one)). 3547 padding = ((f.blocksize_1 - f.blocksize_0) >> 2); 3548 if (sample_number < padding) 3549 last_sample_limit = 0; 3550 else 3551 last_sample_limit = sample_number - padding; 3552 3553 left = f.p_first; 3554 while (left.last_decoded_sample == ~0U) { 3555 // (untested) the first page does not have a 'last_decoded_sample' 3556 set_file_offset(f, left.page_end); 3557 if (!get_seek_page_info(f, &left)) goto error; 3558 } 3559 3560 right = f.p_last; 3561 assert(right.last_decoded_sample != ~0U); 3562 3563 // starting from the start is handled differently 3564 if (last_sample_limit <= left.last_decoded_sample) { 3565 if (stb_vorbis_seek_start(f)) { 3566 if (f.current_loc > sample_number) 3567 return error(f, VORBIS_seek_failed); 3568 return 1; 3569 } 3570 return 0; 3571 } 3572 3573 while (left.page_end != right.page_start) { 3574 assert(left.page_end < right.page_start); 3575 // search range in bytes 3576 delta = right.page_start - left.page_end; 3577 if (delta <= 65536) { 3578 // there's only 64K left to search - handle it linearly 3579 set_file_offset(f, left.page_end); 3580 } else { 3581 if (probe < 2) { 3582 if (probe == 0) { 3583 // first probe (interpolate) 3584 double data_bytes = right.page_end - left.page_start; 3585 bytes_per_sample = data_bytes / right.last_decoded_sample; 3586 offset = left.page_start + bytes_per_sample * (last_sample_limit - left.last_decoded_sample); 3587 } else { 3588 // second probe (try to bound the other side) 3589 double error = (cast(double) last_sample_limit - mid.last_decoded_sample) * bytes_per_sample; 3590 if (error >= 0 && error < 8000) error = 8000; 3591 if (error < 0 && error > -8000) error = -8000; 3592 offset += error * 2; 3593 } 3594 3595 // ensure the offset is valid 3596 if (offset < left.page_end) 3597 offset = left.page_end; 3598 if (offset > right.page_start - 65536) 3599 offset = right.page_start - 65536; 3600 3601 set_file_offset(f, cast(uint) offset); 3602 } else { 3603 // binary search for large ranges (offset by 32K to ensure 3604 // we don't hit the right page) 3605 set_file_offset(f, left.page_end + (delta / 2) - 32768); 3606 } 3607 3608 if (!vorbis_find_page(f, null, null)) goto error; 3609 } 3610 3611 for (;;) { 3612 if (!get_seek_page_info(f, &mid)) goto error; 3613 if (mid.last_decoded_sample != ~0U) break; 3614 // (untested) no frames end on this page 3615 set_file_offset(f, mid.page_end); 3616 assert(mid.page_start < right.page_start); 3617 } 3618 3619 // if we've just found the last page again then we're in a tricky file, 3620 // and we're close enough (if it wasn't an interpolation probe). 3621 if (mid.page_start == right.page_start) { 3622 if (probe >= 2 || delta <= 65536) 3623 break; 3624 } else { 3625 if (last_sample_limit < mid.last_decoded_sample) 3626 right = mid; 3627 else 3628 left = mid; 3629 } 3630 3631 ++probe; 3632 } 3633 3634 // seek back to start of the last packet 3635 page_start = left.page_start; 3636 set_file_offset(f, page_start); 3637 if (!start_page(f)) return error(f, VORBIS_seek_failed); 3638 end_pos = f.end_seg_with_known_loc; 3639 assert(end_pos >= 0); 3640 3641 for (;;) { 3642 for (i = end_pos; i > 0; --i) 3643 if (f.segments[i-1] != 255) 3644 break; 3645 3646 start_seg_with_known_loc = i; 3647 3648 if (start_seg_with_known_loc > 0 || !(f.page_flag & PAGEFLAG_continued_packet)) 3649 break; 3650 3651 // (untested) the final packet begins on an earlier page 3652 if (!go_to_page_before(f, page_start)) 3653 goto error; 3654 3655 page_start = stb_vorbis_get_file_offset(f); 3656 if (!start_page(f)) goto error; 3657 end_pos = f.segment_count - 1; 3658 } 3659 3660 // prepare to start decoding 3661 f.current_loc_valid = FALSE; 3662 f.last_seg = FALSE; 3663 f.valid_bits = 0; 3664 f.packet_bytes = 0; 3665 f.bytes_in_seg = 0; 3666 f.previous_length = 0; 3667 f.next_seg = start_seg_with_known_loc; 3668 3669 for (i = 0; i < start_seg_with_known_loc; i++) 3670 skip(f, f.segments[i]); 3671 3672 // start decoding (optimizable - this frame is generally discarded) 3673 if (!vorbis_pump_first_frame(f)) 3674 return 0; 3675 if (f.current_loc > sample_number) 3676 return error(f, VORBIS_seek_failed); 3677 return 1; 3678 3679 error: 3680 // try to restore the file to a valid state 3681 stb_vorbis_seek_start(f); 3682 return error(f, VORBIS_seek_failed); 3683 } 3684 3685 // the same as vorbis_decode_initial, but without advancing 3686 static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode) 3687 { 3688 int bits_read, bytes_read; 3689 3690 if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode)) 3691 return 0; 3692 3693 // either 1 or 2 bytes were read, figure out which so we can rewind 3694 bits_read = 1 + ilog(f.mode_count-1); 3695 if (f.mode_config[*mode].blockflag) 3696 bits_read += 2; 3697 bytes_read = (bits_read + 7) / 8; 3698 3699 f.bytes_in_seg += bytes_read; 3700 f.packet_bytes -= bytes_read; 3701 skip(f, -bytes_read); 3702 if (f.next_seg == -1) 3703 f.next_seg = f.segment_count - 1; 3704 else 3705 f.next_seg--; 3706 f.valid_bits = 0; 3707 3708 return 1; 3709 } 3710 3711 int stb_vorbis_seek_frame(stb_vorbis *f, uint sample_number) 3712 { 3713 uint32 max_frame_samples; 3714 3715 // fast page-level search 3716 if (!seek_to_sample_coarse(f, sample_number)) 3717 return 0; 3718 3719 assert(f.current_loc_valid); 3720 assert(f.current_loc <= sample_number); 3721 3722 // linear search for the relevant packet 3723 max_frame_samples = (f.blocksize_1*3 - f.blocksize_0) >> 2; 3724 while (f.current_loc < sample_number) { 3725 int left_start, left_end, right_start, right_end, mode, frame_samples; 3726 if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode)) 3727 return error(f, VORBIS_seek_failed); 3728 // calculate the number of samples returned by the next frame 3729 frame_samples = right_start - left_start; 3730 if (f.current_loc + frame_samples > sample_number) { 3731 return 1; // the next frame will contain the sample 3732 } else if (f.current_loc + frame_samples + max_frame_samples > sample_number) { 3733 // there's a chance the frame after this could contain the sample 3734 vorbis_pump_first_frame(f); 3735 } else { 3736 // this frame is too early to be relevant 3737 f.current_loc += frame_samples; 3738 f.previous_length = 0; 3739 maybe_start_packet(f); 3740 flush_packet(f); 3741 } 3742 } 3743 // the next frame should start with the sample 3744 if (f.current_loc != sample_number) return error(f, VORBIS_seek_failed); 3745 return 1; 3746 } 3747 3748 int stb_vorbis_seek(stb_vorbis *f, long sample_number) 3749 { 3750 if (sample_number < 0) 3751 return 0; 3752 3753 if (sample_number > uint.max) 3754 return 0; 3755 3756 if (!stb_vorbis_seek_frame(f, cast(uint)sample_number)) 3757 return 0; 3758 3759 if (sample_number != f.current_loc) { 3760 int n; 3761 uint32 frame_start = f.current_loc; 3762 stb_vorbis_get_frame_float(f, &n, null); 3763 assert(sample_number > frame_start); 3764 assert(f.channel_buffer_start + cast(int) (sample_number-frame_start) <= f.channel_buffer_end); 3765 f.channel_buffer_start += (sample_number - frame_start); 3766 } 3767 f.current_sample_loc = sample_number; 3768 3769 return 1; 3770 } 3771 3772 int stb_vorbis_seek_start(stb_vorbis *f) 3773 { 3774 set_file_offset(f, f.first_audio_page_offset); 3775 f.previous_length = 0; 3776 f.first_decode = TRUE; 3777 f.next_seg = -1; 3778 return vorbis_pump_first_frame(f); 3779 } 3780 3781 uint stb_vorbis_stream_length_in_samples(stb_vorbis *f) 3782 { 3783 uint restore_offset, previous_safe; 3784 uint end, last_page_loc; 3785 3786 if (!f.total_samples) { 3787 uint last; 3788 uint32 lo,hi; 3789 char[6] header; 3790 3791 // first, store the current decode position so we can restore it 3792 restore_offset = stb_vorbis_get_file_offset(f); 3793 3794 // now we want to seek back 64K from the end (the last page must 3795 // be at most a little less than 64K, but let's allow a little slop) 3796 if (f.stream_len >= 65536 && f.stream_len-65536 >= f.first_audio_page_offset) 3797 previous_safe = f.stream_len - 65536; 3798 else 3799 previous_safe = f.first_audio_page_offset; 3800 3801 set_file_offset(f, previous_safe); 3802 // previous_safe is now our candidate 'earliest known place that seeking 3803 // to will lead to the final page' 3804 3805 if (!vorbis_find_page(f, &end, &last)) { 3806 // if we can't find a page, we're hosed! 3807 f.error = VORBIS_cant_find_last_page; 3808 f.total_samples = 0xffffffff; 3809 goto done; 3810 } 3811 3812 // check if there are more pages 3813 last_page_loc = stb_vorbis_get_file_offset(f); 3814 3815 // stop when the last_page flag is set, not when we reach eof; 3816 // this allows us to stop short of a 'file_section' end without 3817 // explicitly checking the length of the section 3818 while (!last) { 3819 set_file_offset(f, end); 3820 if (!vorbis_find_page(f, &end, &last)) { 3821 // the last page we found didn't have the 'last page' flag 3822 // set. whoops! 3823 break; 3824 } 3825 //previous_safe = last_page_loc+1; // NOTE: not used after this point, but note for debugging 3826 last_page_loc = stb_vorbis_get_file_offset(f); 3827 } 3828 3829 set_file_offset(f, last_page_loc); 3830 3831 // parse the header 3832 getn(f, cast(ubyte*)header.ptr, 6); 3833 // extract the absolute granule position 3834 lo = get32(f); 3835 hi = get32(f); 3836 if (lo == 0xffffffff && hi == 0xffffffff) { 3837 f.error = VORBIS_cant_find_last_page; 3838 f.total_samples = SAMPLE_unknown; 3839 goto done; 3840 } 3841 if (hi) 3842 lo = 0xfffffffe; // saturate 3843 f.total_samples = lo; 3844 3845 f.p_last.page_start = last_page_loc; 3846 f.p_last.page_end = end; 3847 f.p_last.last_decoded_sample = lo; 3848 3849 done: 3850 set_file_offset(f, restore_offset); 3851 } 3852 return f.total_samples == SAMPLE_unknown ? 0 : f.total_samples; 3853 } 3854 3855 float stb_vorbis_stream_length_in_seconds(stb_vorbis *f) 3856 { 3857 return stb_vorbis_stream_length_in_samples(f) / cast(float) f.sample_rate; 3858 } 3859 3860 int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output) 3861 { 3862 int len, right,left,i; 3863 3864 if (!vorbis_decode_packet(f, &len, &left, &right)) { 3865 f.channel_buffer_start = f.channel_buffer_end = 0; 3866 return 0; 3867 } 3868 3869 len = vorbis_finish_frame(f, len, left, right); 3870 for (i=0; i < f.channels; ++i) 3871 f.outputs[i] = f.channel_buffers[i] + left; 3872 3873 f.channel_buffer_start = left; 3874 f.channel_buffer_end = left+len; 3875 3876 if (channels) *channels = f.channels; 3877 if (output) *output = f.outputs.ptr; 3878 return len; 3879 } 3880 3881 stb_vorbis* stb_vorbis_open_file_section(IOCallbacks* io, void* userData, int *error, stb_vorbis_alloc *alloc, uint length) 3882 { 3883 stb_vorbis *f; 3884 stb_vorbis p; 3885 vorbis_init(&p, alloc); 3886 p._io = io; 3887 p._userData = userData; 3888 p.stream_len = length; 3889 if (start_decoder(&p)) 3890 { 3891 f = vorbis_alloc(&p); 3892 if (f) 3893 { 3894 *f = p; 3895 vorbis_pump_first_frame(f); 3896 return f; 3897 } 3898 } 3899 if (error) *error = p.error; 3900 vorbis_deinit(&p); 3901 return null; 3902 } 3903 3904 stb_vorbis* stb_vorbis_open_file(IOCallbacks* io, void* userData, int *error, stb_vorbis_alloc *alloc) 3905 { 3906 uint len = cast(uint) io.getFileLength(userData); 3907 return stb_vorbis_open_file_section(io, userData, error, alloc, len); 3908 } 3909 3910 3911 int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats) 3912 { 3913 float **outputs; 3914 int len = num_floats / channels; 3915 int n=0; 3916 int z = f.channels; 3917 if (z > channels) z = channels; 3918 while (n < len) { 3919 int i,j; 3920 int k = f.channel_buffer_end - f.channel_buffer_start; 3921 if (n+k >= len) k = len - n; 3922 for (j=0; j < k; ++j) { 3923 for (i=0; i < z; ++i) 3924 *buffer++ = f.channel_buffers[i][f.channel_buffer_start+j]; 3925 for ( ; i < channels; ++i) 3926 *buffer++ = 0; 3927 } 3928 n += k; 3929 f.channel_buffer_start += k; 3930 if (n == len) 3931 break; 3932 if (!stb_vorbis_get_frame_float(f, null, &outputs)) 3933 break; 3934 } 3935 return n; 3936 } 3937 3938 int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples) 3939 { 3940 float **outputs; 3941 int n=0; 3942 int z = f.channels; 3943 if (z > channels) z = channels; 3944 while (n < num_samples) { 3945 int i; 3946 int k = f.channel_buffer_end - f.channel_buffer_start; 3947 if (n+k >= num_samples) k = num_samples - n; 3948 if (k) { 3949 for (i=0; i < z; ++i) 3950 memcpy(buffer[i]+n, f.channel_buffers[i]+f.channel_buffer_start, (float.sizeof)*k); 3951 for ( ; i < channels; ++i) 3952 memset(buffer[i]+n, 0, (float.sizeof) * k); 3953 } 3954 n += k; 3955 f.channel_buffer_start += k; 3956 if (n == num_samples) 3957 break; 3958 if (!stb_vorbis_get_frame_float(f, null, &outputs)) 3959 break; 3960 } 3961 return n; 3962 } 3963 3964 /* Version history 3965 1.17 - 2019-07-08 - fix CVE-2019-13217, -13218, -13219, -13220, -13221, -13222, -13223 3966 found with Mayhem by ForAllSecure 3967 1.16 - 2019-03-04 - fix warnings 3968 1.15 - 2019-02-07 - explicit failure if Ogg Skeleton data is found 3969 1.14 - 2018-02-11 - delete bogus dealloca usage 3970 1.13 - 2018-01-29 - fix truncation of last frame (hopefully) 3971 1.12 - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files 3972 1.11 - 2017-07-23 - fix MinGW compilation 3973 1.10 - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory 3974 1.09 - 2016-04-04 - back out 'avoid discarding last frame' fix from previous version 3975 1.08 - 2016-04-02 - fixed multiple warnings; fix setup memory leaks; 3976 avoid discarding last frame of audio data 3977 1.07 - 2015-01-16 - fixed some warnings, fix mingw, const-correct API 3978 some more crash fixes when out of memory or with corrupt files 3979 1.06 - 2015-08-31 - full, correct support for seeking API (Dougall Johnson) 3980 some crash fixes when out of memory or with corrupt files 3981 1.05 - 2015-04-19 - don't define __forceinline if it's redundant 3982 1.04 - 2014-08-27 - fix missing const-correct case in API 3983 1.03 - 2014-08-07 - Warning fixes 3984 1.02 - 2014-07-09 - Declare qsort compare function _cdecl on windows 3985 1.01 - 2014-06-18 - fix stb_vorbis_get_samples_float 3986 1.0 - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in multichannel 3987 (API change) report sample rate for decode-full-file funcs 3988 0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila 3989 0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem 3990 0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence 3991 0.99993 - remove assert that fired on legal files with empty tables 3992 0.99992 - rewind-to-start 3993 0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo 3994 0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++ 3995 0.9998 - add a full-decode function with a memory source 3996 0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition 3997 0.9996 - query length of vorbis stream in samples/seconds 3998 0.9995 - bugfix to another optimization that only happened in certain files 3999 0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors 4000 0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation 4001 0.9992 - performance improvement of IMDCT; now performs close to reference implementation 4002 0.9991 - performance improvement of IMDCT 4003 0.999 - (should have been 0.9990) performance improvement of IMDCT 4004 0.998 - no-CRT support from Casey Muratori 4005 0.997 - bugfixes for bugs found by Terje Mathisen 4006 0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen 4007 0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen 4008 0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen 4009 0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen 4010 0.992 - fixes for MinGW warning 4011 0.991 - turn fast-float-conversion on by default 4012 0.990 - fix push-mode seek recovery if you seek into the headers 4013 0.98b - fix to bad release of 0.98 4014 0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode 4015 0.97 - builds under c++ (typecasting, don't use 'class' keyword) 4016 0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code 4017 0.95 - clamping code for 16-bit functions 4018 0.94 - not publically released 4019 0.93 - fixed all-zero-floor case (was decoding garbage) 4020 0.92 - fixed a memory leak 4021 0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION 4022 0.90 - first public release 4023 */ 4024 4025 /* 4026 ------------------------------------------------------------------------------ 4027 This software is available under 2 licenses -- choose whichever you prefer. 4028 ------------------------------------------------------------------------------ 4029 ALTERNATIVE A - MIT License 4030 Copyright (c) 2017 Sean Barrett 4031 Permission is hereby granted, free of charge, to any person obtaining a copy of 4032 this software and associated documentation files (the "Software"), to deal in 4033 the Software without restriction, including without limitation the rights to 4034 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 4035 of the Software, and to permit persons to whom the Software is furnished to do 4036 so, subject to the following conditions: 4037 The above copyright notice and this permission notice shall be included in all 4038 copies or substantial portions of the Software. 4039 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 4040 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 4041 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 4042 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 4043 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 4044 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 4045 SOFTWARE. 4046 ------------------------------------------------------------------------------ 4047 ALTERNATIVE B - Public Domain (www.unlicense.org) 4048 This is free and unencumbered software released into the public domain. 4049 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this 4050 software, either in source code form or as a compiled binary, for any purpose, 4051 commercial or non-commercial, and by any means. 4052 In jurisdictions that recognize copyright laws, the author or authors of this 4053 software dedicate any and all copyright interest in the software to the public 4054 domain. We make this dedication for the benefit of the public at large and to 4055 the detriment of our heirs and successors. We intend this dedication to be an 4056 overt act of relinquishment in perpetuity of all present and future rights to 4057 this software under copyright law. 4058 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 4059 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 4060 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 4061 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 4062 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 4063 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 4064 ------------------------------------------------------------------------------ 4065 */