The OpenD Programming Language

1 // jpgd.h - C++ class for JPEG decompression.
2 // Rich Geldreich <richgel99@gmail.com>
3 // Alex Evans: Linear memory allocator (taken from jpge.h).
4 // v1.04, May. 19, 2012: Code tweaks to fix VS2008 static code analysis warnings (all looked harmless)
5 // D translation by Ketmar // Invisible Vector
6 //
7 // This is free and unencumbered software released into the public domain.
8 //
9 // Anyone is free to copy, modify, publish, use, compile, sell, or
10 // distribute this software, either in source code form or as a compiled
11 // binary, for any purpose, commercial or non-commercial, and by any
12 // means.
13 //
14 // In jurisdictions that recognize copyright laws, the author or authors
15 // of this software dedicate any and all copyright interest in the
16 // software to the public domain. We make this dedication for the benefit
17 // of the public at large and to the detriment of our heirs and
18 // successors. We intend this dedication to be an overt act of
19 // relinquishment in perpetuity of all present and future rights to this
20 // software under copyright law.
21 //
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25 // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 // OTHER DEALINGS IN THE SOFTWARE.
29 //
30 // For more information, please refer to <http://unlicense.org/>
31 //
32 // Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2.
33 //
34 // Chroma upsampling quality: H2V2 is upsampled in the frequency domain, H2V1 and H1V2 are upsampled using point sampling.
35 // Chroma upsampling reference: "Fast Scheme for Image Size Change in the Compressed Domain"
36 // http://vision.ai.uiuc.edu/~dugad/research/dct/index.html
37 /**
38  * Loads a JPEG image from a memory buffer or a file.
39  *
40  * req_comps can be 1 (grayscale), 3 (RGB), or 4 (RGBA).
41  * On return, width/height will be set to the image's dimensions, and actual_comps will be set to the either 1 (grayscale) or 3 (RGB).
42  * Requesting a 8 or 32bpp image is currently a little faster than 24bpp because the jpeg_decoder class itself currently always unpacks to either 8 or 32bpp.
43  */
44 module arsd.jpeg;
45 
46 @system:
47 
48 // Set to 1 to enable freq. domain chroma upsampling on images using H2V2 subsampling (0=faster nearest neighbor sampling).
49 // This is slower, but results in higher quality on images with highly saturated colors.
50 version = JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING;
51 
52 /// Input stream interface.
53 /// This delegate is called when the internal input buffer is empty.
54 /// Parameters:
55 ///   pBuf - input buffer
56 ///   max_bytes_to_read - maximum bytes that can be written to pBuf
57 ///   pEOF_flag - set this to true if at end of stream (no more bytes remaining)
58 ///   Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0).
59 ///   Notes: This delegate will be called in a loop until you set *pEOF_flag to true or the internal buffer is full.
60 alias JpegStreamReadFunc = int delegate (void* pBuf, int max_bytes_to_read, bool* pEOF_flag);
61 
62 
63 // ////////////////////////////////////////////////////////////////////////// //
64 private:
65 void *jpgd_malloc (size_t nSize) { import core.stdc.stdlib : malloc; return malloc(nSize); }
66 void jpgd_free (void *p) { import core.stdc.stdlib : free; if (p !is null) free(p); }
67 
68 // Success/failure error codes.
69 alias jpgd_status = int;
70 enum /*jpgd_status*/ {
71   JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1,
72   JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE,
73   JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS,
74   JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH,
75   JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER,
76   JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS,
77   JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE,
78   JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER, JPGD_ASSERTION_ERROR,
79   JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM,
80 }
81 
82 enum {
83   JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4,
84   JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 8192, JPGD_MAX_HEIGHT = 16384, JPGD_MAX_WIDTH = 16384,
85 }
86 
87 // DCT coefficients are stored in this sequence.
88 static immutable int[64] g_ZAG = [  0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 ];
89 
90 alias JPEG_MARKER = int;
91 enum /*JPEG_MARKER*/ {
92   M_SOF0  = 0xC0, M_SOF1  = 0xC1, M_SOF2  = 0xC2, M_SOF3  = 0xC3, M_SOF5  = 0xC5, M_SOF6  = 0xC6, M_SOF7  = 0xC7, M_JPG   = 0xC8,
93   M_SOF9  = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT   = 0xC4, M_DAC   = 0xCC,
94   M_RST0  = 0xD0, M_RST1  = 0xD1, M_RST2  = 0xD2, M_RST3  = 0xD3, M_RST4  = 0xD4, M_RST5  = 0xD5, M_RST6  = 0xD6, M_RST7  = 0xD7,
95   M_SOI   = 0xD8, M_EOI   = 0xD9, M_SOS   = 0xDA, M_DQT   = 0xDB, M_DNL   = 0xDC, M_DRI   = 0xDD, M_DHP   = 0xDE, M_EXP   = 0xDF,
96   M_APP0  = 0xE0, M_APP15 = 0xEF, M_JPG0  = 0xF0, M_JPG13 = 0xFD, M_COM   = 0xFE, M_TEM   = 0x01, M_ERROR = 0x100, RST0   = 0xD0,
97   M_APP1  = 0xE1,
98 }
99 
100 alias JPEG_SUBSAMPLING = int;
101 enum /*JPEG_SUBSAMPLING*/ { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 }
102 
103 enum CONST_BITS = 13;
104 enum PASS1_BITS = 2;
105 enum SCALEDONE = cast(int)1;
106 
107 enum FIX_0_298631336 = cast(int)2446;  /* FIX(0.298631336) */
108 enum FIX_0_390180644 = cast(int)3196;  /* FIX(0.390180644) */
109 enum FIX_0_541196100 = cast(int)4433;  /* FIX(0.541196100) */
110 enum FIX_0_765366865 = cast(int)6270;  /* FIX(0.765366865) */
111 enum FIX_0_899976223 = cast(int)7373;  /* FIX(0.899976223) */
112 enum FIX_1_175875602 = cast(int)9633;  /* FIX(1.175875602) */
113 enum FIX_1_501321110 = cast(int)12299; /* FIX(1.501321110) */
114 enum FIX_1_847759065 = cast(int)15137; /* FIX(1.847759065) */
115 enum FIX_1_961570560 = cast(int)16069; /* FIX(1.961570560) */
116 enum FIX_2_053119869 = cast(int)16819; /* FIX(2.053119869) */
117 enum FIX_2_562915447 = cast(int)20995; /* FIX(2.562915447) */
118 enum FIX_3_072711026 = cast(int)25172; /* FIX(3.072711026) */
119 
120 int DESCALE() (int x, int n) { pragma(inline, true); return (((x) + (SCALEDONE << ((n)-1))) >> (n)); }
121 int DESCALE_ZEROSHIFT() (int x, int n) { pragma(inline, true); return (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n)); }
122 ubyte CLAMP() (int i) { pragma(inline, true); return cast(ubyte)(cast(uint)i > 255 ? (((~i) >> 31) & 0xFF) : i); }
123 
124 
125 // Compiler creates a fast path 1D IDCT for X non-zero columns
126 struct Row(int NONZERO_COLS) {
127 pure nothrow @trusted @nogc:
128   static void idct(int* pTemp, const(jpeg_decoder.jpgd_block_t)* pSrc) {
129     static if (NONZERO_COLS == 0) {
130       // nothing
131     } else static if (NONZERO_COLS == 1) {
132       immutable int dcval = (pSrc[0] << PASS1_BITS);
133       pTemp[0] = dcval;
134       pTemp[1] = dcval;
135       pTemp[2] = dcval;
136       pTemp[3] = dcval;
137       pTemp[4] = dcval;
138       pTemp[5] = dcval;
139       pTemp[6] = dcval;
140       pTemp[7] = dcval;
141     } else {
142       // ACCESS_COL() will be optimized at compile time to either an array access, or 0.
143       //#define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0)
144       template ACCESS_COL(int x) {
145         static if (x < NONZERO_COLS) enum ACCESS_COL = "cast(int)pSrc["~x.stringof~"]"; else enum ACCESS_COL = "0";
146       }
147 
148       immutable int z2 = mixin(ACCESS_COL!2), z3 = mixin(ACCESS_COL!6);
149 
150       immutable int z1 = (z2 + z3)*FIX_0_541196100;
151       immutable int tmp2 = z1 + z3*(-FIX_1_847759065);
152       immutable int tmp3 = z1 + z2*FIX_0_765366865;
153 
154       immutable int tmp0 = (mixin(ACCESS_COL!0) + mixin(ACCESS_COL!4)) << CONST_BITS;
155       immutable int tmp1 = (mixin(ACCESS_COL!0) - mixin(ACCESS_COL!4)) << CONST_BITS;
156 
157       immutable int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
158 
159       immutable int atmp0 = mixin(ACCESS_COL!7), atmp1 = mixin(ACCESS_COL!5), atmp2 = mixin(ACCESS_COL!3), atmp3 = mixin(ACCESS_COL!1);
160 
161       immutable int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
162       immutable int bz5 = (bz3 + bz4)*FIX_1_175875602;
163 
164       immutable int az1 = bz1*(-FIX_0_899976223);
165       immutable int az2 = bz2*(-FIX_2_562915447);
166       immutable int az3 = bz3*(-FIX_1_961570560) + bz5;
167       immutable int az4 = bz4*(-FIX_0_390180644) + bz5;
168 
169       immutable int btmp0 = atmp0*FIX_0_298631336 + az1 + az3;
170       immutable int btmp1 = atmp1*FIX_2_053119869 + az2 + az4;
171       immutable int btmp2 = atmp2*FIX_3_072711026 + az2 + az3;
172       immutable int btmp3 = atmp3*FIX_1_501321110 + az1 + az4;
173 
174       pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS-PASS1_BITS);
175       pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS-PASS1_BITS);
176       pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS-PASS1_BITS);
177       pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS-PASS1_BITS);
178       pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS-PASS1_BITS);
179       pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS-PASS1_BITS);
180       pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS-PASS1_BITS);
181       pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS-PASS1_BITS);
182     }
183   }
184 }
185 
186 
187 // Compiler creates a fast path 1D IDCT for X non-zero rows
188 struct Col (int NONZERO_ROWS) {
189 pure nothrow @trusted @nogc:
190   static void idct(ubyte* pDst_ptr, const(int)* pTemp) {
191     static assert(NONZERO_ROWS > 0);
192     static if (NONZERO_ROWS == 1) {
193       int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS+3);
194       immutable ubyte dcval_clamped = cast(ubyte)CLAMP(dcval);
195       pDst_ptr[0*8] = dcval_clamped;
196       pDst_ptr[1*8] = dcval_clamped;
197       pDst_ptr[2*8] = dcval_clamped;
198       pDst_ptr[3*8] = dcval_clamped;
199       pDst_ptr[4*8] = dcval_clamped;
200       pDst_ptr[5*8] = dcval_clamped;
201       pDst_ptr[6*8] = dcval_clamped;
202       pDst_ptr[7*8] = dcval_clamped;
203     } else {
204       // ACCESS_ROW() will be optimized at compile time to either an array access, or 0.
205       //#define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0)
206       template ACCESS_ROW(int x) {
207         static if (x < NONZERO_ROWS) enum ACCESS_ROW = "pTemp["~(x*8).stringof~"]"; else enum ACCESS_ROW = "0";
208       }
209 
210       immutable int z2 = mixin(ACCESS_ROW!2);
211       immutable int z3 = mixin(ACCESS_ROW!6);
212 
213       immutable int z1 = (z2 + z3)*FIX_0_541196100;
214       immutable int tmp2 = z1 + z3*(-FIX_1_847759065);
215       immutable int tmp3 = z1 + z2*FIX_0_765366865;
216 
217       immutable int tmp0 = (mixin(ACCESS_ROW!0) + mixin(ACCESS_ROW!4)) << CONST_BITS;
218       immutable int tmp1 = (mixin(ACCESS_ROW!0) - mixin(ACCESS_ROW!4)) << CONST_BITS;
219 
220       immutable int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
221 
222       immutable int atmp0 = mixin(ACCESS_ROW!7), atmp1 = mixin(ACCESS_ROW!5), atmp2 = mixin(ACCESS_ROW!3), atmp3 = mixin(ACCESS_ROW!1);
223 
224       immutable int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
225       immutable int bz5 = (bz3 + bz4)*FIX_1_175875602;
226 
227       immutable int az1 = bz1*(-FIX_0_899976223);
228       immutable int az2 = bz2*(-FIX_2_562915447);
229       immutable int az3 = bz3*(-FIX_1_961570560) + bz5;
230       immutable int az4 = bz4*(-FIX_0_390180644) + bz5;
231 
232       immutable int btmp0 = atmp0*FIX_0_298631336 + az1 + az3;
233       immutable int btmp1 = atmp1*FIX_2_053119869 + az2 + az4;
234       immutable int btmp2 = atmp2*FIX_3_072711026 + az2 + az3;
235       immutable int btmp3 = atmp3*FIX_1_501321110 + az1 + az4;
236 
237       int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS+PASS1_BITS+3);
238       pDst_ptr[8*0] = cast(ubyte)CLAMP(i);
239 
240       i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS+PASS1_BITS+3);
241       pDst_ptr[8*7] = cast(ubyte)CLAMP(i);
242 
243       i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS+PASS1_BITS+3);
244       pDst_ptr[8*1] = cast(ubyte)CLAMP(i);
245 
246       i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS+PASS1_BITS+3);
247       pDst_ptr[8*6] = cast(ubyte)CLAMP(i);
248 
249       i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS+PASS1_BITS+3);
250       pDst_ptr[8*2] = cast(ubyte)CLAMP(i);
251 
252       i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS+PASS1_BITS+3);
253       pDst_ptr[8*5] = cast(ubyte)CLAMP(i);
254 
255       i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS+PASS1_BITS+3);
256       pDst_ptr[8*3] = cast(ubyte)CLAMP(i);
257 
258       i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS+PASS1_BITS+3);
259       pDst_ptr[8*4] = cast(ubyte)CLAMP(i);
260     }
261   }
262 }
263 
264 
265 static immutable ubyte[512] s_idct_row_table = [
266   1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0,
267   4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0,
268   6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0,
269   6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0,
270   8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2,
271   8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2,
272   8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4,
273   8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8,
274 ];
275 
276 static immutable ubyte[64] s_idct_col_table = [ 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 ];
277 
278 void idct() (const(jpeg_decoder.jpgd_block_t)* pSrc_ptr, ubyte* pDst_ptr, int block_max_zag) {
279   assert(block_max_zag >= 1);
280   assert(block_max_zag <= 64);
281 
282   if (block_max_zag <= 1)
283   {
284     int k = ((pSrc_ptr[0] + 4) >> 3) + 128;
285     k = CLAMP(k);
286     k = k | (k<<8);
287     k = k | (k<<16);
288 
289     for (int i = 8; i > 0; i--)
290     {
291       *cast(int*)&pDst_ptr[0] = k;
292       *cast(int*)&pDst_ptr[4] = k;
293       pDst_ptr += 8;
294     }
295     return;
296   }
297 
298   int[64] temp;
299 
300   const(jpeg_decoder.jpgd_block_t)* pSrc = pSrc_ptr;
301   int* pTemp = temp.ptr;
302 
303   const(ubyte)* pRow_tab = &s_idct_row_table.ptr[(block_max_zag - 1) * 8];
304   int i;
305   for (i = 8; i > 0; i--, pRow_tab++)
306   {
307     switch (*pRow_tab)
308     {
309       case 0: Row!(0).idct(pTemp, pSrc); break;
310       case 1: Row!(1).idct(pTemp, pSrc); break;
311       case 2: Row!(2).idct(pTemp, pSrc); break;
312       case 3: Row!(3).idct(pTemp, pSrc); break;
313       case 4: Row!(4).idct(pTemp, pSrc); break;
314       case 5: Row!(5).idct(pTemp, pSrc); break;
315       case 6: Row!(6).idct(pTemp, pSrc); break;
316       case 7: Row!(7).idct(pTemp, pSrc); break;
317       case 8: Row!(8).idct(pTemp, pSrc); break;
318       default: assert(0);
319     }
320 
321     pSrc += 8;
322     pTemp += 8;
323   }
324 
325   pTemp = temp.ptr;
326 
327   immutable int nonzero_rows = s_idct_col_table.ptr[block_max_zag - 1];
328   for (i = 8; i > 0; i--)
329   {
330     switch (nonzero_rows)
331     {
332       case 1: Col!(1).idct(pDst_ptr, pTemp); break;
333       case 2: Col!(2).idct(pDst_ptr, pTemp); break;
334       case 3: Col!(3).idct(pDst_ptr, pTemp); break;
335       case 4: Col!(4).idct(pDst_ptr, pTemp); break;
336       case 5: Col!(5).idct(pDst_ptr, pTemp); break;
337       case 6: Col!(6).idct(pDst_ptr, pTemp); break;
338       case 7: Col!(7).idct(pDst_ptr, pTemp); break;
339       case 8: Col!(8).idct(pDst_ptr, pTemp); break;
340       default: assert(0);
341     }
342 
343     pTemp++;
344     pDst_ptr++;
345   }
346 }
347 
348 void idct_4x4() (const(jpeg_decoder.jpgd_block_t)* pSrc_ptr, ubyte* pDst_ptr) {
349   int[64] temp;
350   int* pTemp = temp.ptr;
351   const(jpeg_decoder.jpgd_block_t)* pSrc = pSrc_ptr;
352 
353   for (int i = 4; i > 0; i--)
354   {
355     Row!(4).idct(pTemp, pSrc);
356     pSrc += 8;
357     pTemp += 8;
358   }
359 
360   pTemp = temp.ptr;
361   for (int i = 8; i > 0; i--)
362   {
363     Col!(4).idct(pDst_ptr, pTemp);
364     pTemp++;
365     pDst_ptr++;
366   }
367 }
368 
369 
370 // ////////////////////////////////////////////////////////////////////////// //
371 struct jpeg_decoder {
372 private import core.stdc.string : memcpy, memset;
373 private:
374   static auto JPGD_MIN(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a < b ? a : b); }
375   static auto JPGD_MAX(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a > b ? a : b); }
376 
377   alias jpgd_quant_t = short;
378   alias jpgd_block_t = short;
379   alias pDecode_block_func = void function (ref jpeg_decoder, int, int, int);
380 
381   static struct huff_tables {
382     bool ac_table;
383     uint[256] look_up;
384     uint[256] look_up2;
385     ubyte[256] code_size;
386     uint[512] tree;
387   }
388 
389   static struct coeff_buf {
390     ubyte* pData;
391     int block_num_x, block_num_y;
392     int block_len_x, block_len_y;
393     int block_size;
394   }
395 
396   static struct mem_block {
397     mem_block* m_pNext;
398     size_t m_used_count;
399     size_t m_size;
400     char[1] m_data;
401   }
402 
403   mem_block* m_pMem_blocks;
404   int m_image_x_size;
405   int m_image_y_size;
406   JpegStreamReadFunc readfn;
407   int m_progressive_flag;
408   ubyte[JPGD_MAX_HUFF_TABLES] m_huff_ac;
409   ubyte*[JPGD_MAX_HUFF_TABLES] m_huff_num;      // pointer to number of Huffman codes per bit size
410   ubyte*[JPGD_MAX_HUFF_TABLES] m_huff_val;      // pointer to Huffman codes per bit size
411   jpgd_quant_t*[JPGD_MAX_QUANT_TABLES] m_quant; // pointer to quantization tables
412   int m_scan_type;                              // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported)
413   int m_comps_in_frame;                         // # of components in frame
414   int[JPGD_MAX_COMPONENTS] m_comp_h_samp;       // component's horizontal sampling factor
415   int[JPGD_MAX_COMPONENTS] m_comp_v_samp;       // component's vertical sampling factor
416   int[JPGD_MAX_COMPONENTS] m_comp_quant;        // component's quantization table selector
417   int[JPGD_MAX_COMPONENTS] m_comp_ident;        // component's ID
418   int[JPGD_MAX_COMPONENTS] m_comp_h_blocks;
419   int[JPGD_MAX_COMPONENTS] m_comp_v_blocks;
420   int m_comps_in_scan;                          // # of components in scan
421   int[JPGD_MAX_COMPS_IN_SCAN] m_comp_list;      // components in this scan
422   int[JPGD_MAX_COMPONENTS] m_comp_dc_tab;       // component's DC Huffman coding table selector
423   int[JPGD_MAX_COMPONENTS] m_comp_ac_tab;       // component's AC Huffman coding table selector
424   int m_spectral_start;                         // spectral selection start
425   int m_spectral_end;                           // spectral selection end
426   int m_successive_low;                         // successive approximation low
427   int m_successive_high;                        // successive approximation high
428   int m_max_mcu_x_size;                         // MCU's max. X size in pixels
429   int m_max_mcu_y_size;                         // MCU's max. Y size in pixels
430   int m_blocks_per_mcu;
431   int m_max_blocks_per_row;
432   int m_mcus_per_row, m_mcus_per_col;
433   int[JPGD_MAX_BLOCKS_PER_MCU] m_mcu_org;
434   int m_total_lines_left;                       // total # lines left in image
435   int m_mcu_lines_left;                         // total # lines left in this MCU
436   int m_real_dest_bytes_per_scan_line;
437   int m_dest_bytes_per_scan_line;               // rounded up
438   int m_dest_bytes_per_pixel;                   // 4 (RGB) or 1 (Y)
439   huff_tables*[JPGD_MAX_HUFF_TABLES] m_pHuff_tabs;
440   coeff_buf*[JPGD_MAX_COMPONENTS] m_dc_coeffs;
441   coeff_buf*[JPGD_MAX_COMPONENTS] m_ac_coeffs;
442   int m_eob_run;
443   int[JPGD_MAX_COMPONENTS] m_block_y_mcu;
444   ubyte* m_pIn_buf_ofs;
445   int m_in_buf_left;
446   int m_tem_flag;
447   bool m_eof_flag;
448   ubyte[128] m_in_buf_pad_start;
449   ubyte[JPGD_IN_BUF_SIZE+128] m_in_buf;
450   ubyte[128] m_in_buf_pad_end;
451   int m_bits_left;
452   uint m_bit_buf;
453   int m_restart_interval;
454   int m_restarts_left;
455   int m_next_restart_num;
456   int m_max_mcus_per_row;
457   int m_max_blocks_per_mcu;
458   int m_expanded_blocks_per_mcu;
459   int m_expanded_blocks_per_row;
460   int m_expanded_blocks_per_component;
461   bool m_freq_domain_chroma_upsample;
462   int m_max_mcus_per_col;
463   uint[JPGD_MAX_COMPONENTS] m_last_dc_val;
464   jpgd_block_t* m_pMCU_coefficients;
465   int[JPGD_MAX_BLOCKS_PER_MCU] m_mcu_block_max_zag;
466   ubyte* m_pSample_buf;
467   int[256] m_crr;
468   int[256] m_cbb;
469   int[256] m_crg;
470   int[256] m_cbg;
471   ubyte* m_pScan_line_0;
472   ubyte* m_pScan_line_1;
473   jpgd_status m_error_code;
474   bool m_ready_flag;
475   int m_total_bytes_read;
476 
477 public:
478   // Inspect `error_code` after constructing to determine if the stream is valid or not. You may look at the `width`, `height`, etc.
479   // methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline.
480   this (JpegStreamReadFunc rfn) { decode_init(rfn); }
481 
482   ~this () { free_all_blocks(); }
483 
484   @disable this (this); // no copies
485 
486   // Call this method after constructing the object to begin decompression.
487   // If JPGD_SUCCESS is returned you may then call decode() on each scanline.
488   int begin_decoding () {
489     if (m_ready_flag) return JPGD_SUCCESS;
490     if (m_error_code) return JPGD_FAILED;
491     try {
492       decode_start();
493       m_ready_flag = true;
494       return JPGD_SUCCESS;
495     } catch (Exception e) {
496       //version(jpegd_test) {{ import core.stdc.stdio; stderr.fprintf("ERROR: %.*s...\n", cast(int)e.msg.length, e.msg.ptr); }}
497       version(jpegd_test) {{ import std.stdio; stderr.writeln(e.toString); }}
498     }
499     return JPGD_FAILED;
500   }
501 
502   // Returns the next scan line.
503   // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (`bytes_per_pixel` will return 1).
504   // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and `bytes_per_pixel` will return 4).
505   // Returns JPGD_SUCCESS if a scan line has been returned.
506   // Returns JPGD_DONE if all scan lines have been returned.
507   // Returns JPGD_FAILED if an error occurred. Inspect `error_code` for a more info.
508   int decode (/*const void** */void** pScan_line, uint* pScan_line_len) {
509     if (m_error_code || !m_ready_flag) return JPGD_FAILED;
510     if (m_total_lines_left == 0) return JPGD_DONE;
511     try {
512       if (m_mcu_lines_left == 0) {
513         if (m_progressive_flag) load_next_row(); else decode_next_row();
514         // Find the EOI marker if that was the last row.
515         if (m_total_lines_left <= m_max_mcu_y_size) find_eoi();
516         m_mcu_lines_left = m_max_mcu_y_size;
517       }
518       if (m_freq_domain_chroma_upsample) {
519         expanded_convert();
520         *pScan_line = m_pScan_line_0;
521       } else {
522         switch (m_scan_type) {
523           case JPGD_YH2V2:
524             if ((m_mcu_lines_left & 1) == 0) {
525               H2V2Convert();
526               *pScan_line = m_pScan_line_0;
527             } else {
528               *pScan_line = m_pScan_line_1;
529             }
530             break;
531           case JPGD_YH2V1:
532             H2V1Convert();
533             *pScan_line = m_pScan_line_0;
534             break;
535           case JPGD_YH1V2:
536             if ((m_mcu_lines_left & 1) == 0) {
537               H1V2Convert();
538               *pScan_line = m_pScan_line_0;
539             } else {
540               *pScan_line = m_pScan_line_1;
541             }
542             break;
543           case JPGD_YH1V1:
544             H1V1Convert();
545             *pScan_line = m_pScan_line_0;
546             break;
547           case JPGD_GRAYSCALE:
548             gray_convert();
549             *pScan_line = m_pScan_line_0;
550             break;
551           default:
552         }
553       }
554       *pScan_line_len = m_real_dest_bytes_per_scan_line;
555       --m_mcu_lines_left;
556       --m_total_lines_left;
557       return JPGD_SUCCESS;
558     } catch (Exception) {}
559     return JPGD_FAILED;
560   }
561 
562   @property const pure nothrow @trusted @nogc {
563     jpgd_status error_code () { pragma(inline, true); return m_error_code; }
564 
565     int width () { pragma(inline, true); return m_image_x_size; }
566     int height () { pragma(inline, true); return m_image_y_size; }
567 
568     int num_components () { pragma(inline, true); return m_comps_in_frame; }
569 
570     int bytes_per_pixel () { pragma(inline, true); return m_dest_bytes_per_pixel; }
571     int bytes_per_scan_line () { pragma(inline, true); return m_image_x_size * bytes_per_pixel(); }
572 
573     // Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file).
574     int total_bytes_read () { pragma(inline, true); return m_total_bytes_read; }
575   }
576 
577 private:
578   // Retrieve one character from the input stream.
579   uint get_char () {
580     // Any bytes remaining in buffer?
581     if (!m_in_buf_left) {
582       // Try to get more bytes.
583       prep_in_buffer();
584       // Still nothing to get?
585       if (!m_in_buf_left) {
586         // Pad the end of the stream with 0xFF 0xD9 (EOI marker)
587         int t = m_tem_flag;
588         m_tem_flag ^= 1;
589         return (t ? 0xD9 : 0xFF);
590       }
591     }
592     uint c = *m_pIn_buf_ofs++;
593     --m_in_buf_left;
594     return c;
595   }
596 
597   // Same as previous method, except can indicate if the character is a pad character or not.
598   uint get_char (bool* pPadding_flag) {
599     if (!m_in_buf_left) {
600       prep_in_buffer();
601       if (!m_in_buf_left) {
602         *pPadding_flag = true;
603         int t = m_tem_flag;
604         m_tem_flag ^= 1;
605         return (t ? 0xD9 : 0xFF);
606       }
607     }
608     *pPadding_flag = false;
609     uint c = *m_pIn_buf_ofs++;
610     --m_in_buf_left;
611     return c;
612   }
613 
614   // Inserts a previously retrieved character back into the input buffer.
615   void stuff_char (ubyte q) {
616     *(--m_pIn_buf_ofs) = q;
617     m_in_buf_left++;
618   }
619 
620   // Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered.
621   ubyte get_octet () {
622     bool padding_flag;
623     int c = get_char(&padding_flag);
624     if (c == 0xFF) {
625       if (padding_flag) return 0xFF;
626       c = get_char(&padding_flag);
627       if (padding_flag) { stuff_char(0xFF); return 0xFF; }
628       if (c == 0x00) return 0xFF;
629       stuff_char(cast(ubyte)(c));
630       stuff_char(0xFF);
631       return 0xFF;
632     }
633     return cast(ubyte)(c);
634   }
635 
636   // Retrieves a variable number of bits from the input stream. Does not recognize markers.
637   uint get_bits (int num_bits) {
638     if (!num_bits) return 0;
639     uint i = m_bit_buf >> (32 - num_bits);
640     if ((m_bits_left -= num_bits) <= 0) {
641       m_bit_buf <<= (num_bits += m_bits_left);
642       uint c1 = get_char();
643       uint c2 = get_char();
644       m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2;
645       m_bit_buf <<= -m_bits_left;
646       m_bits_left += 16;
647       assert(m_bits_left >= 0);
648     } else {
649       m_bit_buf <<= num_bits;
650     }
651     return i;
652   }
653 
654   // Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered.
655   uint get_bits_no_markers (int num_bits) {
656     if (!num_bits) return 0;
657     uint i = m_bit_buf >> (32 - num_bits);
658     if ((m_bits_left -= num_bits) <= 0) {
659       m_bit_buf <<= (num_bits += m_bits_left);
660       if (m_in_buf_left < 2 || m_pIn_buf_ofs[0] == 0xFF || m_pIn_buf_ofs[1] == 0xFF) {
661         uint c1 = get_octet();
662         uint c2 = get_octet();
663         m_bit_buf |= (c1 << 8) | c2;
664       } else {
665         m_bit_buf |= (cast(uint)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1];
666         m_in_buf_left -= 2;
667         m_pIn_buf_ofs += 2;
668       }
669       m_bit_buf <<= -m_bits_left;
670       m_bits_left += 16;
671       assert(m_bits_left >= 0);
672     } else {
673       m_bit_buf <<= num_bits;
674     }
675     return i;
676   }
677 
678   // Decodes a Huffman encoded symbol.
679   int huff_decode (huff_tables *pH) {
680     int symbol;
681     // Check first 8-bits: do we have a complete symbol?
682     if ((symbol = pH.look_up.ptr[m_bit_buf >> 24]) < 0) {
683       // Decode more bits, use a tree traversal to find symbol.
684       int ofs = 23;
685       do {
686         symbol = pH.tree.ptr[-cast(int)(symbol + ((m_bit_buf >> ofs) & 1))];
687         --ofs;
688       } while (symbol < 0);
689       get_bits_no_markers(8 + (23 - ofs));
690     } else {
691       get_bits_no_markers(pH.code_size.ptr[symbol]);
692     }
693     return symbol;
694   }
695 
696   // Decodes a Huffman encoded symbol.
697   int huff_decode (huff_tables *pH, ref int extra_bits) {
698     int symbol;
699     // Check first 8-bits: do we have a complete symbol?
700     if ((symbol = pH.look_up2.ptr[m_bit_buf >> 24]) < 0) {
701       // Use a tree traversal to find symbol.
702       int ofs = 23;
703       do {
704         symbol = pH.tree.ptr[-cast(int)(symbol + ((m_bit_buf >> ofs) & 1))];
705         --ofs;
706       } while (symbol < 0);
707       get_bits_no_markers(8 + (23 - ofs));
708       extra_bits = get_bits_no_markers(symbol & 0xF);
709     } else {
710       assert(((symbol >> 8) & 31) == pH.code_size.ptr[symbol & 255] + ((symbol & 0x8000) ? (symbol & 15) : 0));
711       if (symbol & 0x8000) {
712         get_bits_no_markers((symbol >> 8) & 31);
713         extra_bits = symbol >> 16;
714       } else {
715         int code_size = (symbol >> 8) & 31;
716         int num_extra_bits = symbol & 0xF;
717         int bits = code_size + num_extra_bits;
718         if (bits <= (m_bits_left + 16)) {
719           extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1);
720         } else {
721           get_bits_no_markers(code_size);
722           extra_bits = get_bits_no_markers(num_extra_bits);
723         }
724       }
725       symbol &= 0xFF;
726     }
727     return symbol;
728   }
729 
730   // Tables and macro used to fully decode the DPCM differences.
731   static immutable int[16] s_extend_test = [ 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 ];
732   static immutable int[16] s_extend_offset = [ 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1, ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1, ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1, ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 ];
733   static immutable int[18] s_extend_mask = [ 0, (1<<0), (1<<1), (1<<2), (1<<3), (1<<4), (1<<5), (1<<6), (1<<7), (1<<8), (1<<9), (1<<10), (1<<11), (1<<12), (1<<13), (1<<14), (1<<15), (1<<16) ];
734   // The logical AND's in this macro are to shut up static code analysis (aren't really necessary - couldn't find another way to do this)
735   //#define JPGD_HUFF_EXTEND(x, s) (((x) < s_extend_test[s & 15]) ? ((x) + s_extend_offset[s & 15]) : (x))
736   static JPGD_HUFF_EXTEND (int x, int s) nothrow @trusted @nogc { pragma(inline, true); return (((x) < s_extend_test.ptr[s & 15]) ? ((x) + s_extend_offset.ptr[s & 15]) : (x)); }
737 
738   // Clamps a value between 0-255.
739   //static ubyte clamp (int i) { if (cast(uint)(i) > 255) i = (((~i) >> 31) & 0xFF); return cast(ubyte)(i); }
740   alias clamp = CLAMP;
741 
742   static struct DCT_Upsample {
743   static:
744     static struct Matrix44 {
745     pure nothrow @trusted @nogc:
746       alias Element_Type = int;
747       enum { NUM_ROWS = 4, NUM_COLS = 4 }
748 
749       Element_Type[NUM_COLS][NUM_ROWS] v;
750 
751       this() (const scope auto ref Matrix44 m) {
752         foreach (immutable r; 0..NUM_ROWS) v[r][] = m.v[r][];
753       }
754 
755       //@property int rows () const { pragma(inline, true); return NUM_ROWS; }
756       //@property int cols () const { pragma(inline, true); return NUM_COLS; }
757 
758       ref inout(Element_Type) at (int r, int c) inout { pragma(inline, true); return v.ptr[r].ptr[c]; }
759 
760       ref Matrix44 opOpAssign(string op:"+") (const scope auto ref Matrix44 a) {
761         foreach (int r; 0..NUM_ROWS) {
762           at(r, 0) += a.at(r, 0);
763           at(r, 1) += a.at(r, 1);
764           at(r, 2) += a.at(r, 2);
765           at(r, 3) += a.at(r, 3);
766         }
767         return this;
768       }
769 
770       ref Matrix44 opOpAssign(string op:"-") (const scope auto ref Matrix44 a) {
771         foreach (int r; 0..NUM_ROWS) {
772           at(r, 0) -= a.at(r, 0);
773           at(r, 1) -= a.at(r, 1);
774           at(r, 2) -= a.at(r, 2);
775           at(r, 3) -= a.at(r, 3);
776         }
777         return this;
778       }
779 
780       Matrix44 opBinary(string op:"+") (const scope auto ref Matrix44 b) const {
781         alias a = this;
782         Matrix44 ret;
783         foreach (int r; 0..NUM_ROWS) {
784           ret.at(r, 0) = a.at(r, 0) + b.at(r, 0);
785           ret.at(r, 1) = a.at(r, 1) + b.at(r, 1);
786           ret.at(r, 2) = a.at(r, 2) + b.at(r, 2);
787           ret.at(r, 3) = a.at(r, 3) + b.at(r, 3);
788         }
789         return ret;
790       }
791 
792       Matrix44 opBinary(string op:"-") (const scope auto ref Matrix44 b) const {
793         alias a = this;
794         Matrix44 ret;
795         foreach (int r; 0..NUM_ROWS) {
796           ret.at(r, 0) = a.at(r, 0) - b.at(r, 0);
797           ret.at(r, 1) = a.at(r, 1) - b.at(r, 1);
798           ret.at(r, 2) = a.at(r, 2) - b.at(r, 2);
799           ret.at(r, 3) = a.at(r, 3) - b.at(r, 3);
800         }
801         return ret;
802       }
803 
804       static void add_and_store() (jpgd_block_t* pDst, const scope auto ref Matrix44 a, const scope auto ref Matrix44 b) {
805         foreach (int r; 0..4) {
806           pDst[0*8 + r] = cast(jpgd_block_t)(a.at(r, 0) + b.at(r, 0));
807           pDst[1*8 + r] = cast(jpgd_block_t)(a.at(r, 1) + b.at(r, 1));
808           pDst[2*8 + r] = cast(jpgd_block_t)(a.at(r, 2) + b.at(r, 2));
809           pDst[3*8 + r] = cast(jpgd_block_t)(a.at(r, 3) + b.at(r, 3));
810         }
811       }
812 
813       static void sub_and_store() (jpgd_block_t* pDst, const scope auto ref Matrix44 a, const scope auto ref Matrix44 b) {
814         foreach (int r; 0..4) {
815           pDst[0*8 + r] = cast(jpgd_block_t)(a.at(r, 0) - b.at(r, 0));
816           pDst[1*8 + r] = cast(jpgd_block_t)(a.at(r, 1) - b.at(r, 1));
817           pDst[2*8 + r] = cast(jpgd_block_t)(a.at(r, 2) - b.at(r, 2));
818           pDst[3*8 + r] = cast(jpgd_block_t)(a.at(r, 3) - b.at(r, 3));
819         }
820       }
821     }
822 
823     enum FRACT_BITS = 10;
824     enum SCALE = 1 << FRACT_BITS;
825 
826     alias Temp_Type = int;
827     //TODO: convert defines to mixins
828     //#define D(i) (((i) + (SCALE >> 1)) >> FRACT_BITS)
829     //#define F(i) ((int)((i) * SCALE + .5f))
830     // Any decent C++ compiler will optimize this at compile time to a 0, or an array access.
831     //#define AT(c, r) ((((c)>=NUM_COLS)||((r)>=NUM_ROWS)) ? 0 : pSrc[(c)+(r)*8])
832 
833     static int D(T) (T i) { pragma(inline, true); return (((i) + (SCALE >> 1)) >> FRACT_BITS); }
834     enum F(float i) = (cast(int)((i) * SCALE + 0.5f));
835 
836     // NUM_ROWS/NUM_COLS = # of non-zero rows/cols in input matrix
837     static struct P_Q(int NUM_ROWS, int NUM_COLS) {
838       static void calc (ref Matrix44 P, ref Matrix44 Q, const(jpgd_block_t)* pSrc) {
839         //auto AT (int c, int r) nothrow @trusted @nogc { return (c >= NUM_COLS || r >= NUM_ROWS ? 0 : pSrc[c+r*8]); }
840         template AT(int c, int r) {
841           static if (c >= NUM_COLS || r >= NUM_ROWS) enum AT = "0"; else enum AT = "pSrc["~c.stringof~"+"~r.stringof~"*8]";
842         }
843         // 4x8 = 4x8 times 8x8, matrix 0 is constant
844         immutable Temp_Type X000 = mixin(AT!(0, 0));
845         immutable Temp_Type X001 = mixin(AT!(0, 1));
846         immutable Temp_Type X002 = mixin(AT!(0, 2));
847         immutable Temp_Type X003 = mixin(AT!(0, 3));
848         immutable Temp_Type X004 = mixin(AT!(0, 4));
849         immutable Temp_Type X005 = mixin(AT!(0, 5));
850         immutable Temp_Type X006 = mixin(AT!(0, 6));
851         immutable Temp_Type X007 = mixin(AT!(0, 7));
852         immutable Temp_Type X010 = D(F!(0.415735f) * mixin(AT!(1, 0)) + F!(0.791065f) * mixin(AT!(3, 0)) + F!(-0.352443f) * mixin(AT!(5, 0)) + F!(0.277785f) * mixin(AT!(7, 0)));
853         immutable Temp_Type X011 = D(F!(0.415735f) * mixin(AT!(1, 1)) + F!(0.791065f) * mixin(AT!(3, 1)) + F!(-0.352443f) * mixin(AT!(5, 1)) + F!(0.277785f) * mixin(AT!(7, 1)));
854         immutable Temp_Type X012 = D(F!(0.415735f) * mixin(AT!(1, 2)) + F!(0.791065f) * mixin(AT!(3, 2)) + F!(-0.352443f) * mixin(AT!(5, 2)) + F!(0.277785f) * mixin(AT!(7, 2)));
855         immutable Temp_Type X013 = D(F!(0.415735f) * mixin(AT!(1, 3)) + F!(0.791065f) * mixin(AT!(3, 3)) + F!(-0.352443f) * mixin(AT!(5, 3)) + F!(0.277785f) * mixin(AT!(7, 3)));
856         immutable Temp_Type X014 = D(F!(0.415735f) * mixin(AT!(1, 4)) + F!(0.791065f) * mixin(AT!(3, 4)) + F!(-0.352443f) * mixin(AT!(5, 4)) + F!(0.277785f) * mixin(AT!(7, 4)));
857         immutable Temp_Type X015 = D(F!(0.415735f) * mixin(AT!(1, 5)) + F!(0.791065f) * mixin(AT!(3, 5)) + F!(-0.352443f) * mixin(AT!(5, 5)) + F!(0.277785f) * mixin(AT!(7, 5)));
858         immutable Temp_Type X016 = D(F!(0.415735f) * mixin(AT!(1, 6)) + F!(0.791065f) * mixin(AT!(3, 6)) + F!(-0.352443f) * mixin(AT!(5, 6)) + F!(0.277785f) * mixin(AT!(7, 6)));
859         immutable Temp_Type X017 = D(F!(0.415735f) * mixin(AT!(1, 7)) + F!(0.791065f) * mixin(AT!(3, 7)) + F!(-0.352443f) * mixin(AT!(5, 7)) + F!(0.277785f) * mixin(AT!(7, 7)));
860         immutable Temp_Type X020 = mixin(AT!(4, 0));
861         immutable Temp_Type X021 = mixin(AT!(4, 1));
862         immutable Temp_Type X022 = mixin(AT!(4, 2));
863         immutable Temp_Type X023 = mixin(AT!(4, 3));
864         immutable Temp_Type X024 = mixin(AT!(4, 4));
865         immutable Temp_Type X025 = mixin(AT!(4, 5));
866         immutable Temp_Type X026 = mixin(AT!(4, 6));
867         immutable Temp_Type X027 = mixin(AT!(4, 7));
868         immutable Temp_Type X030 = D(F!(0.022887f) * mixin(AT!(1, 0)) + F!(-0.097545f) * mixin(AT!(3, 0)) + F!(0.490393f) * mixin(AT!(5, 0)) + F!(0.865723f) * mixin(AT!(7, 0)));
869         immutable Temp_Type X031 = D(F!(0.022887f) * mixin(AT!(1, 1)) + F!(-0.097545f) * mixin(AT!(3, 1)) + F!(0.490393f) * mixin(AT!(5, 1)) + F!(0.865723f) * mixin(AT!(7, 1)));
870         immutable Temp_Type X032 = D(F!(0.022887f) * mixin(AT!(1, 2)) + F!(-0.097545f) * mixin(AT!(3, 2)) + F!(0.490393f) * mixin(AT!(5, 2)) + F!(0.865723f) * mixin(AT!(7, 2)));
871         immutable Temp_Type X033 = D(F!(0.022887f) * mixin(AT!(1, 3)) + F!(-0.097545f) * mixin(AT!(3, 3)) + F!(0.490393f) * mixin(AT!(5, 3)) + F!(0.865723f) * mixin(AT!(7, 3)));
872         immutable Temp_Type X034 = D(F!(0.022887f) * mixin(AT!(1, 4)) + F!(-0.097545f) * mixin(AT!(3, 4)) + F!(0.490393f) * mixin(AT!(5, 4)) + F!(0.865723f) * mixin(AT!(7, 4)));
873         immutable Temp_Type X035 = D(F!(0.022887f) * mixin(AT!(1, 5)) + F!(-0.097545f) * mixin(AT!(3, 5)) + F!(0.490393f) * mixin(AT!(5, 5)) + F!(0.865723f) * mixin(AT!(7, 5)));
874         immutable Temp_Type X036 = D(F!(0.022887f) * mixin(AT!(1, 6)) + F!(-0.097545f) * mixin(AT!(3, 6)) + F!(0.490393f) * mixin(AT!(5, 6)) + F!(0.865723f) * mixin(AT!(7, 6)));
875         immutable Temp_Type X037 = D(F!(0.022887f) * mixin(AT!(1, 7)) + F!(-0.097545f) * mixin(AT!(3, 7)) + F!(0.490393f) * mixin(AT!(5, 7)) + F!(0.865723f) * mixin(AT!(7, 7)));
876 
877         // 4x4 = 4x8 times 8x4, matrix 1 is constant
878         P.at(0, 0) = X000;
879         P.at(0, 1) = D(X001 * F!(0.415735f) + X003 * F!(0.791065f) + X005 * F!(-0.352443f) + X007 * F!(0.277785f));
880         P.at(0, 2) = X004;
881         P.at(0, 3) = D(X001 * F!(0.022887f) + X003 * F!(-0.097545f) + X005 * F!(0.490393f) + X007 * F!(0.865723f));
882         P.at(1, 0) = X010;
883         P.at(1, 1) = D(X011 * F!(0.415735f) + X013 * F!(0.791065f) + X015 * F!(-0.352443f) + X017 * F!(0.277785f));
884         P.at(1, 2) = X014;
885         P.at(1, 3) = D(X011 * F!(0.022887f) + X013 * F!(-0.097545f) + X015 * F!(0.490393f) + X017 * F!(0.865723f));
886         P.at(2, 0) = X020;
887         P.at(2, 1) = D(X021 * F!(0.415735f) + X023 * F!(0.791065f) + X025 * F!(-0.352443f) + X027 * F!(0.277785f));
888         P.at(2, 2) = X024;
889         P.at(2, 3) = D(X021 * F!(0.022887f) + X023 * F!(-0.097545f) + X025 * F!(0.490393f) + X027 * F!(0.865723f));
890         P.at(3, 0) = X030;
891         P.at(3, 1) = D(X031 * F!(0.415735f) + X033 * F!(0.791065f) + X035 * F!(-0.352443f) + X037 * F!(0.277785f));
892         P.at(3, 2) = X034;
893         P.at(3, 3) = D(X031 * F!(0.022887f) + X033 * F!(-0.097545f) + X035 * F!(0.490393f) + X037 * F!(0.865723f));
894         // 40 muls 24 adds
895 
896         // 4x4 = 4x8 times 8x4, matrix 1 is constant
897         Q.at(0, 0) = D(X001 * F!(0.906127f) + X003 * F!(-0.318190f) + X005 * F!(0.212608f) + X007 * F!(-0.180240f));
898         Q.at(0, 1) = X002;
899         Q.at(0, 2) = D(X001 * F!(-0.074658f) + X003 * F!(0.513280f) + X005 * F!(0.768178f) + X007 * F!(-0.375330f));
900         Q.at(0, 3) = X006;
901         Q.at(1, 0) = D(X011 * F!(0.906127f) + X013 * F!(-0.318190f) + X015 * F!(0.212608f) + X017 * F!(-0.180240f));
902         Q.at(1, 1) = X012;
903         Q.at(1, 2) = D(X011 * F!(-0.074658f) + X013 * F!(0.513280f) + X015 * F!(0.768178f) + X017 * F!(-0.375330f));
904         Q.at(1, 3) = X016;
905         Q.at(2, 0) = D(X021 * F!(0.906127f) + X023 * F!(-0.318190f) + X025 * F!(0.212608f) + X027 * F!(-0.180240f));
906         Q.at(2, 1) = X022;
907         Q.at(2, 2) = D(X021 * F!(-0.074658f) + X023 * F!(0.513280f) + X025 * F!(0.768178f) + X027 * F!(-0.375330f));
908         Q.at(2, 3) = X026;
909         Q.at(3, 0) = D(X031 * F!(0.906127f) + X033 * F!(-0.318190f) + X035 * F!(0.212608f) + X037 * F!(-0.180240f));
910         Q.at(3, 1) = X032;
911         Q.at(3, 2) = D(X031 * F!(-0.074658f) + X033 * F!(0.513280f) + X035 * F!(0.768178f) + X037 * F!(-0.375330f));
912         Q.at(3, 3) = X036;
913         // 40 muls 24 adds
914       }
915     }
916 
917     static struct R_S(int NUM_ROWS, int NUM_COLS) {
918       static void calc(ref Matrix44 R, ref Matrix44 S, const(jpgd_block_t)* pSrc) {
919         //auto AT (int c, int r) nothrow @trusted @nogc { return (c >= NUM_COLS || r >= NUM_ROWS ? 0 : pSrc[c+r*8]); }
920         template AT(int c, int r) {
921           static if (c >= NUM_COLS || r >= NUM_ROWS) enum AT = "0"; else enum AT = "pSrc["~c.stringof~"+"~r.stringof~"*8]";
922         }
923         // 4x8 = 4x8 times 8x8, matrix 0 is constant
924         immutable Temp_Type X100 = D(F!(0.906127f) * mixin(AT!(1, 0)) + F!(-0.318190f) * mixin(AT!(3, 0)) + F!(0.212608f) * mixin(AT!(5, 0)) + F!(-0.180240f) * mixin(AT!(7, 0)));
925         immutable Temp_Type X101 = D(F!(0.906127f) * mixin(AT!(1, 1)) + F!(-0.318190f) * mixin(AT!(3, 1)) + F!(0.212608f) * mixin(AT!(5, 1)) + F!(-0.180240f) * mixin(AT!(7, 1)));
926         immutable Temp_Type X102 = D(F!(0.906127f) * mixin(AT!(1, 2)) + F!(-0.318190f) * mixin(AT!(3, 2)) + F!(0.212608f) * mixin(AT!(5, 2)) + F!(-0.180240f) * mixin(AT!(7, 2)));
927         immutable Temp_Type X103 = D(F!(0.906127f) * mixin(AT!(1, 3)) + F!(-0.318190f) * mixin(AT!(3, 3)) + F!(0.212608f) * mixin(AT!(5, 3)) + F!(-0.180240f) * mixin(AT!(7, 3)));
928         immutable Temp_Type X104 = D(F!(0.906127f) * mixin(AT!(1, 4)) + F!(-0.318190f) * mixin(AT!(3, 4)) + F!(0.212608f) * mixin(AT!(5, 4)) + F!(-0.180240f) * mixin(AT!(7, 4)));
929         immutable Temp_Type X105 = D(F!(0.906127f) * mixin(AT!(1, 5)) + F!(-0.318190f) * mixin(AT!(3, 5)) + F!(0.212608f) * mixin(AT!(5, 5)) + F!(-0.180240f) * mixin(AT!(7, 5)));
930         immutable Temp_Type X106 = D(F!(0.906127f) * mixin(AT!(1, 6)) + F!(-0.318190f) * mixin(AT!(3, 6)) + F!(0.212608f) * mixin(AT!(5, 6)) + F!(-0.180240f) * mixin(AT!(7, 6)));
931         immutable Temp_Type X107 = D(F!(0.906127f) * mixin(AT!(1, 7)) + F!(-0.318190f) * mixin(AT!(3, 7)) + F!(0.212608f) * mixin(AT!(5, 7)) + F!(-0.180240f) * mixin(AT!(7, 7)));
932         immutable Temp_Type X110 = mixin(AT!(2, 0));
933         immutable Temp_Type X111 = mixin(AT!(2, 1));
934         immutable Temp_Type X112 = mixin(AT!(2, 2));
935         immutable Temp_Type X113 = mixin(AT!(2, 3));
936         immutable Temp_Type X114 = mixin(AT!(2, 4));
937         immutable Temp_Type X115 = mixin(AT!(2, 5));
938         immutable Temp_Type X116 = mixin(AT!(2, 6));
939         immutable Temp_Type X117 = mixin(AT!(2, 7));
940         immutable Temp_Type X120 = D(F!(-0.074658f) * mixin(AT!(1, 0)) + F!(0.513280f) * mixin(AT!(3, 0)) + F!(0.768178f) * mixin(AT!(5, 0)) + F!(-0.375330f) * mixin(AT!(7, 0)));
941         immutable Temp_Type X121 = D(F!(-0.074658f) * mixin(AT!(1, 1)) + F!(0.513280f) * mixin(AT!(3, 1)) + F!(0.768178f) * mixin(AT!(5, 1)) + F!(-0.375330f) * mixin(AT!(7, 1)));
942         immutable Temp_Type X122 = D(F!(-0.074658f) * mixin(AT!(1, 2)) + F!(0.513280f) * mixin(AT!(3, 2)) + F!(0.768178f) * mixin(AT!(5, 2)) + F!(-0.375330f) * mixin(AT!(7, 2)));
943         immutable Temp_Type X123 = D(F!(-0.074658f) * mixin(AT!(1, 3)) + F!(0.513280f) * mixin(AT!(3, 3)) + F!(0.768178f) * mixin(AT!(5, 3)) + F!(-0.375330f) * mixin(AT!(7, 3)));
944         immutable Temp_Type X124 = D(F!(-0.074658f) * mixin(AT!(1, 4)) + F!(0.513280f) * mixin(AT!(3, 4)) + F!(0.768178f) * mixin(AT!(5, 4)) + F!(-0.375330f) * mixin(AT!(7, 4)));
945         immutable Temp_Type X125 = D(F!(-0.074658f) * mixin(AT!(1, 5)) + F!(0.513280f) * mixin(AT!(3, 5)) + F!(0.768178f) * mixin(AT!(5, 5)) + F!(-0.375330f) * mixin(AT!(7, 5)));
946         immutable Temp_Type X126 = D(F!(-0.074658f) * mixin(AT!(1, 6)) + F!(0.513280f) * mixin(AT!(3, 6)) + F!(0.768178f) * mixin(AT!(5, 6)) + F!(-0.375330f) * mixin(AT!(7, 6)));
947         immutable Temp_Type X127 = D(F!(-0.074658f) * mixin(AT!(1, 7)) + F!(0.513280f) * mixin(AT!(3, 7)) + F!(0.768178f) * mixin(AT!(5, 7)) + F!(-0.375330f) * mixin(AT!(7, 7)));
948         immutable Temp_Type X130 = mixin(AT!(6, 0));
949         immutable Temp_Type X131 = mixin(AT!(6, 1));
950         immutable Temp_Type X132 = mixin(AT!(6, 2));
951         immutable Temp_Type X133 = mixin(AT!(6, 3));
952         immutable Temp_Type X134 = mixin(AT!(6, 4));
953         immutable Temp_Type X135 = mixin(AT!(6, 5));
954         immutable Temp_Type X136 = mixin(AT!(6, 6));
955         immutable Temp_Type X137 = mixin(AT!(6, 7));
956         // 80 muls 48 adds
957 
958         // 4x4 = 4x8 times 8x4, matrix 1 is constant
959         R.at(0, 0) = X100;
960         R.at(0, 1) = D(X101 * F!(0.415735f) + X103 * F!(0.791065f) + X105 * F!(-0.352443f) + X107 * F!(0.277785f));
961         R.at(0, 2) = X104;
962         R.at(0, 3) = D(X101 * F!(0.022887f) + X103 * F!(-0.097545f) + X105 * F!(0.490393f) + X107 * F!(0.865723f));
963         R.at(1, 0) = X110;
964         R.at(1, 1) = D(X111 * F!(0.415735f) + X113 * F!(0.791065f) + X115 * F!(-0.352443f) + X117 * F!(0.277785f));
965         R.at(1, 2) = X114;
966         R.at(1, 3) = D(X111 * F!(0.022887f) + X113 * F!(-0.097545f) + X115 * F!(0.490393f) + X117 * F!(0.865723f));
967         R.at(2, 0) = X120;
968         R.at(2, 1) = D(X121 * F!(0.415735f) + X123 * F!(0.791065f) + X125 * F!(-0.352443f) + X127 * F!(0.277785f));
969         R.at(2, 2) = X124;
970         R.at(2, 3) = D(X121 * F!(0.022887f) + X123 * F!(-0.097545f) + X125 * F!(0.490393f) + X127 * F!(0.865723f));
971         R.at(3, 0) = X130;
972         R.at(3, 1) = D(X131 * F!(0.415735f) + X133 * F!(0.791065f) + X135 * F!(-0.352443f) + X137 * F!(0.277785f));
973         R.at(3, 2) = X134;
974         R.at(3, 3) = D(X131 * F!(0.022887f) + X133 * F!(-0.097545f) + X135 * F!(0.490393f) + X137 * F!(0.865723f));
975         // 40 muls 24 adds
976         // 4x4 = 4x8 times 8x4, matrix 1 is constant
977         S.at(0, 0) = D(X101 * F!(0.906127f) + X103 * F!(-0.318190f) + X105 * F!(0.212608f) + X107 * F!(-0.180240f));
978         S.at(0, 1) = X102;
979         S.at(0, 2) = D(X101 * F!(-0.074658f) + X103 * F!(0.513280f) + X105 * F!(0.768178f) + X107 * F!(-0.375330f));
980         S.at(0, 3) = X106;
981         S.at(1, 0) = D(X111 * F!(0.906127f) + X113 * F!(-0.318190f) + X115 * F!(0.212608f) + X117 * F!(-0.180240f));
982         S.at(1, 1) = X112;
983         S.at(1, 2) = D(X111 * F!(-0.074658f) + X113 * F!(0.513280f) + X115 * F!(0.768178f) + X117 * F!(-0.375330f));
984         S.at(1, 3) = X116;
985         S.at(2, 0) = D(X121 * F!(0.906127f) + X123 * F!(-0.318190f) + X125 * F!(0.212608f) + X127 * F!(-0.180240f));
986         S.at(2, 1) = X122;
987         S.at(2, 2) = D(X121 * F!(-0.074658f) + X123 * F!(0.513280f) + X125 * F!(0.768178f) + X127 * F!(-0.375330f));
988         S.at(2, 3) = X126;
989         S.at(3, 0) = D(X131 * F!(0.906127f) + X133 * F!(-0.318190f) + X135 * F!(0.212608f) + X137 * F!(-0.180240f));
990         S.at(3, 1) = X132;
991         S.at(3, 2) = D(X131 * F!(-0.074658f) + X133 * F!(0.513280f) + X135 * F!(0.768178f) + X137 * F!(-0.375330f));
992         S.at(3, 3) = X136;
993         // 40 muls 24 adds
994       }
995     }
996   } // end namespace DCT_Upsample
997 
998   // Unconditionally frees all allocated m_blocks.
999   void free_all_blocks () {
1000     //m_pStream = null;
1001     readfn = null;
1002     for (mem_block *b = m_pMem_blocks; b; ) {
1003       mem_block* n = b.m_pNext;
1004       jpgd_free(b);
1005       b = n;
1006     }
1007     m_pMem_blocks = null;
1008   }
1009 
1010   // This method handles all errors. It will never return.
1011   // It could easily be changed to use C++ exceptions.
1012   /*JPGD_NORETURN*/ void stop_decoding (jpgd_status status, size_t line=__LINE__) {
1013     m_error_code = status;
1014     free_all_blocks();
1015     //longjmp(m_jmp_state, status);
1016     throw new Exception("jpeg decoding error", __FILE__, line);
1017   }
1018 
1019   void* alloc (size_t nSize, bool zero=false) {
1020     nSize = (JPGD_MAX(nSize, 1) + 3) & ~3;
1021     char *rv = null;
1022     for (mem_block *b = m_pMem_blocks; b; b = b.m_pNext)
1023     {
1024       if ((b.m_used_count + nSize) <= b.m_size)
1025       {
1026         rv = b.m_data.ptr + b.m_used_count;
1027         b.m_used_count += nSize;
1028         break;
1029       }
1030     }
1031     if (!rv)
1032     {
1033       size_t capacity = JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047);
1034       mem_block *b = cast(mem_block*)jpgd_malloc(mem_block.sizeof + capacity);
1035       if (!b) { stop_decoding(JPGD_NOTENOUGHMEM); }
1036       b.m_pNext = m_pMem_blocks; m_pMem_blocks = b;
1037       b.m_used_count = nSize;
1038       b.m_size = capacity;
1039       rv = b.m_data.ptr;
1040     }
1041     if (zero) memset(rv, 0, nSize);
1042     return rv;
1043   }
1044 
1045   void word_clear (void *p, ushort c, uint n) {
1046     ubyte *pD = cast(ubyte*)p;
1047     immutable ubyte l = c & 0xFF, h = (c >> 8) & 0xFF;
1048     while (n)
1049     {
1050       pD[0] = l; pD[1] = h; pD += 2;
1051       n--;
1052     }
1053   }
1054 
1055   // Refill the input buffer.
1056   // This method will sit in a loop until (A) the buffer is full or (B)
1057   // the stream's read() method reports and end of file condition.
1058   void prep_in_buffer () {
1059     m_in_buf_left = 0;
1060     m_pIn_buf_ofs = m_in_buf.ptr;
1061 
1062     if (m_eof_flag)
1063       return;
1064 
1065     do
1066     {
1067       int bytes_read = readfn(m_in_buf.ptr + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag);
1068       if (bytes_read == -1)
1069         stop_decoding(JPGD_STREAM_READ);
1070 
1071       m_in_buf_left += bytes_read;
1072     } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag));
1073 
1074     m_total_bytes_read += m_in_buf_left;
1075 
1076     // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid).
1077     // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.)
1078     word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64);
1079   }
1080 
1081   // Read a Huffman code table.
1082   void read_dht_marker () {
1083     int i, index, count;
1084     ubyte[17] huff_num;
1085     ubyte[256] huff_val;
1086 
1087     uint num_left = get_bits(16);
1088 
1089     if (num_left < 2)
1090       stop_decoding(JPGD_BAD_DHT_MARKER);
1091 
1092     num_left -= 2;
1093 
1094     while (num_left)
1095     {
1096       index = get_bits(8);
1097 
1098       huff_num.ptr[0] = 0;
1099 
1100       count = 0;
1101 
1102       for (i = 1; i <= 16; i++)
1103       {
1104         huff_num.ptr[i] = cast(ubyte)(get_bits(8));
1105         count += huff_num.ptr[i];
1106       }
1107 
1108       if (count > 255)
1109         stop_decoding(JPGD_BAD_DHT_COUNTS);
1110 
1111       for (i = 0; i < count; i++)
1112         huff_val.ptr[i] = cast(ubyte)(get_bits(8));
1113 
1114       i = 1 + 16 + count;
1115 
1116       if (num_left < cast(uint)i)
1117         stop_decoding(JPGD_BAD_DHT_MARKER);
1118 
1119       num_left -= i;
1120 
1121       if ((index & 0x10) > 0x10)
1122         stop_decoding(JPGD_BAD_DHT_INDEX);
1123 
1124       index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1);
1125 
1126       if (index >= JPGD_MAX_HUFF_TABLES)
1127         stop_decoding(JPGD_BAD_DHT_INDEX);
1128 
1129       if (!m_huff_num.ptr[index])
1130         m_huff_num.ptr[index] = cast(ubyte*)alloc(17);
1131 
1132       if (!m_huff_val.ptr[index])
1133         m_huff_val.ptr[index] = cast(ubyte*)alloc(256);
1134 
1135       m_huff_ac.ptr[index] = (index & 0x10) != 0;
1136       memcpy(m_huff_num.ptr[index], huff_num.ptr, 17);
1137       memcpy(m_huff_val.ptr[index], huff_val.ptr, 256);
1138     }
1139   }
1140 
1141   // Read a quantization table.
1142   void read_dqt_marker () {
1143     int n, i, prec;
1144     uint num_left;
1145     uint temp;
1146 
1147     num_left = get_bits(16);
1148 
1149     if (num_left < 2)
1150       stop_decoding(JPGD_BAD_DQT_MARKER);
1151 
1152     num_left -= 2;
1153 
1154     while (num_left)
1155     {
1156       n = get_bits(8);
1157       prec = n >> 4;
1158       n &= 0x0F;
1159 
1160       if (n >= JPGD_MAX_QUANT_TABLES)
1161         stop_decoding(JPGD_BAD_DQT_TABLE);
1162 
1163       if (!m_quant.ptr[n])
1164         m_quant.ptr[n] = cast(jpgd_quant_t*)alloc(64 * jpgd_quant_t.sizeof);
1165 
1166       // read quantization entries, in zag order
1167       for (i = 0; i < 64; i++)
1168       {
1169         temp = get_bits(8);
1170 
1171         if (prec)
1172           temp = (temp << 8) + get_bits(8);
1173 
1174         m_quant.ptr[n][i] = cast(jpgd_quant_t)(temp);
1175       }
1176 
1177       i = 64 + 1;
1178 
1179       if (prec)
1180         i += 64;
1181 
1182       if (num_left < cast(uint)i)
1183         stop_decoding(JPGD_BAD_DQT_LENGTH);
1184 
1185       num_left -= i;
1186     }
1187   }
1188 
1189   // Read the start of frame (SOF) marker.
1190   void read_sof_marker () {
1191     int i;
1192     uint num_left;
1193 
1194     num_left = get_bits(16);
1195 
1196     if (get_bits(8) != 8)   /* precision: sorry, only 8-bit precision is supported right now */
1197       stop_decoding(JPGD_BAD_PRECISION);
1198 
1199     m_image_y_size = get_bits(16);
1200 
1201     if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT))
1202       stop_decoding(JPGD_BAD_HEIGHT);
1203 
1204     m_image_x_size = get_bits(16);
1205 
1206     if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH))
1207       stop_decoding(JPGD_BAD_WIDTH);
1208 
1209     m_comps_in_frame = get_bits(8);
1210 
1211     if (m_comps_in_frame > JPGD_MAX_COMPONENTS)
1212       stop_decoding(JPGD_TOO_MANY_COMPONENTS);
1213 
1214     if (num_left != cast(uint)(m_comps_in_frame * 3 + 8))
1215       stop_decoding(JPGD_BAD_SOF_LENGTH);
1216 
1217     for (i = 0; i < m_comps_in_frame; i++)
1218     {
1219       m_comp_ident.ptr[i]  = get_bits(8);
1220       m_comp_h_samp.ptr[i] = get_bits(4);
1221       m_comp_v_samp.ptr[i] = get_bits(4);
1222       m_comp_quant.ptr[i]  = get_bits(8);
1223     }
1224   }
1225 
1226   private void exif_enforce(bool what) {
1227 	if(!what)
1228 		throw new Exception("jpeg exif data format error");
1229   }
1230 
1231   void read_exif_marker() {
1232     uint num_left;
1233 
1234     num_left = get_bits(16);
1235 
1236     if (num_left < 2)
1237       stop_decoding(JPGD_BAD_VARIABLE_MARKER);
1238 
1239     num_left -= 2;
1240 
1241     ubyte[] data;
1242     data.length = num_left;
1243     int offset;
1244 
1245     while (num_left)
1246     {
1247       data[offset++] = cast(ubyte) get_bits(8);
1248       num_left--;
1249     }
1250 
1251     if(data.length > 4 && data[0 .. 4] == "Exif") {
1252 	data = data[4 .. $];
1253 	while(data.length && data[0] == 0)
1254 		data = data[1 .. $];
1255 	if(data.length < 8)
1256 		return; // abandon the parse, no tiff header
1257 
1258 	int offsetAdjustment = 0;
1259 
1260 	bool bigEndian = data[0] == 'M';
1261 	// should be MM or II
1262 	exif_enforce(data[0] == data[1]);
1263 	if(!bigEndian)
1264 		exif_enforce(data[0] == 'I');
1265 	data = data[2 .. $];
1266 	offsetAdjustment += 2;
1267 
1268 	uint read4() {
1269 		exif_enforce(data.length >= 4);
1270 
1271 		uint ret;
1272 		if(bigEndian) {
1273 			ret |= data[0] << 24;
1274 			ret |= data[1] << 16;
1275 			ret |= data[2] <<  8;
1276 			ret |= data[3] <<  0;
1277 		} else {
1278 			ret |= data[3] << 24;
1279 			ret |= data[2] << 16;
1280 			ret |= data[1] <<  8;
1281 			ret |= data[0] <<  0;
1282 		}
1283 
1284 		data = data[4 .. $];
1285 		offsetAdjustment += 4;
1286 		return ret;
1287 	}
1288 
1289 	ushort read2() {
1290 		exif_enforce(data.length >= 2);
1291 
1292 		ushort ret;
1293 		if(bigEndian) {
1294 			ret |= data[0] << 8;
1295 			ret |= data[1] << 0;
1296 		} else {
1297 			ret |= data[1] << 8;
1298 			ret |= data[0] << 0;
1299 		}
1300 
1301 		data = data[2 .. $];
1302 		offsetAdjustment += 2;
1303 		return ret;
1304 	}
1305 
1306 	ubyte read1() {
1307 		exif_enforce(data.length >= 1);
1308 		ubyte ret = data[0];
1309 		data = data[1 .. $];
1310 		offsetAdjustment += 1;
1311 		return ret;
1312 	}
1313 
1314 	void jumpOffset(uint offset) {
1315 		exif_enforce(offsetAdjustment <= offset);
1316 		offset -= offsetAdjustment;
1317 		data = data[offset .. $];
1318 		offsetAdjustment += offset;
1319 	}
1320 
1321 	exif_enforce(read2() == 42);
1322 
1323 	while(data.length) {
1324 		auto nextIfdOffset = read4();
1325 		if(nextIfdOffset == 0)
1326 			return;
1327 		jumpOffset(nextIfdOffset);
1328 
1329 		// reading an ifd now
1330 		auto numberOfIfdEntries = read2();
1331 		foreach(item; 0 .. numberOfIfdEntries) {
1332 			auto tagId = read2();
1333 			auto fieldType = read2();
1334 			auto countOfType = read4();
1335 			auto valueOrOffset = read4();
1336 
1337 			// https://exiftool.org/TagNames/EXIF.html
1338 
1339 			// FIXME we could read a LOT more of this, but for now all i care about is orientation lol
1340 			if(tagId == 0x0112 && fieldType == 3 && countOfType == 1) {
1341 				/+
1342 					valueOrOffset can be:
1343 
1344 					1 = Horizontal (normal)
1345 					2 = Mirror horizontal
1346 					3 = Rotate 180
1347 					4 = Mirror vertical
1348 					5 = Mirror horizontal and rotate 270 CW
1349 					6 = Rotate 90 CW
1350 					7 = Mirror horizontal and rotate 90 CW
1351 					8 = Rotate 270 CW
1352 				+/
1353 
1354 				// it stores the data inline but packed into the first bytes
1355 				// so since this is a 16 bit thing packed to the left, we want to move it
1356 				// down to right slot based on endinanness. woof but meh.
1357 				if(bigEndian) {
1358 					this.orientation = valueOrOffset >> 16;
1359 				} else {
1360 					this.orientation = valueOrOffset;
1361 				}
1362 			}
1363 
1364 			// import std.stdio; writefln("%04x %d %d %d", tagId, fieldType, countOfType, valueOrOffset);
1365 		}
1366 	}
1367     }
1368 
1369     // format: Exif\0\0<tiff file bytes here>
1370     // are those two zero bytes just padding?
1371     /+
1372 	tiff file:
1373 
1374 	II or MM for byte order
1375 	then 16 bit number 42 (0x2a 0x00)
1376 	32 bit number containing byte offset of first IFD (should prolly be 8, saying it starts right after the header)
1377 
1378 	IFD:
1379 		16 bit number of fields
1380 		12-byte entries
1381 		4 byte offset of next ifd (0 if none)
1382 
1383 	IFD entry:
1384 		16 bit tag id
1385 		16 bit field type
1386 			1 = byte
1387 			2 = ascii stringz
1388 			3 = 16 bit ushort
1389 			4 = 32 bit ulong
1390 			5 = rational; numerator then denominator
1391 
1392 			and others, see https://web.archive.org/web/20210108174645/https://www.adobe.io/content/dam/udp/en/open/standards/tiff/TIFF6.pdf
1393 		32 bit number of values (count of the type)
1394 		32 bit value or offset (must be even number, can point anywhere in file, but if the type is 4 bytes or less it is just packed in here, left-aligned)
1395     +/
1396   }
1397 
1398     /++
1399 	The exif orientation value from the file, if present (0 if it was not present).
1400 
1401 	You do not have to look at this if you leave [autoRotateBasedOnExifOrientation] as the default `true` value.
1402 
1403 	History:
1404 		Added May 6, 2025
1405     +/
1406     public int orientation = 0;
1407 
1408     /++
1409 	If true (the default), the image will have the orientation automatically applied to the pixels before returning.
1410 
1411 	Otherwise, you must see [orientation] to know the intended look.
1412 
1413 	History:
1414 		Added May 7, 2025
1415     +/
1416     public bool autoRotateBasedOnExifOrientation = true;
1417 
1418   // Used to skip unrecognized markers.
1419   void skip_variable_marker () {
1420     uint num_left;
1421 
1422     num_left = get_bits(16);
1423 
1424     if (num_left < 2)
1425       stop_decoding(JPGD_BAD_VARIABLE_MARKER);
1426 
1427     num_left -= 2;
1428 
1429     while (num_left)
1430     {
1431       get_bits(8);
1432       num_left--;
1433     }
1434   }
1435 
1436   // Read a define restart interval (DRI) marker.
1437   void read_dri_marker () {
1438     if (get_bits(16) != 4)
1439       stop_decoding(JPGD_BAD_DRI_LENGTH);
1440 
1441     m_restart_interval = get_bits(16);
1442   }
1443 
1444   // Read a start of scan (SOS) marker.
1445   void read_sos_marker () {
1446     uint num_left;
1447     int i, ci, n, c, cc;
1448 
1449     num_left = get_bits(16);
1450 
1451     n = get_bits(8);
1452 
1453     m_comps_in_scan = n;
1454 
1455     num_left -= 3;
1456 
1457     if ( (num_left != cast(uint)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN) )
1458       stop_decoding(JPGD_BAD_SOS_LENGTH);
1459 
1460     for (i = 0; i < n; i++)
1461     {
1462       cc = get_bits(8);
1463       c = get_bits(8);
1464       num_left -= 2;
1465 
1466       for (ci = 0; ci < m_comps_in_frame; ci++)
1467         if (cc == m_comp_ident.ptr[ci])
1468           break;
1469 
1470       if (ci >= m_comps_in_frame)
1471         stop_decoding(JPGD_BAD_SOS_COMP_ID);
1472 
1473       m_comp_list.ptr[i]    = ci;
1474       m_comp_dc_tab.ptr[ci] = (c >> 4) & 15;
1475       m_comp_ac_tab.ptr[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1);
1476     }
1477 
1478     m_spectral_start  = get_bits(8);
1479     m_spectral_end    = get_bits(8);
1480     m_successive_high = get_bits(4);
1481     m_successive_low  = get_bits(4);
1482 
1483     if (!m_progressive_flag)
1484     {
1485       m_spectral_start = 0;
1486       m_spectral_end = 63;
1487     }
1488 
1489     num_left -= 3;
1490 
1491     /* read past whatever is num_left */
1492     while (num_left)
1493     {
1494       get_bits(8);
1495       num_left--;
1496     }
1497   }
1498 
1499   // Finds the next marker.
1500   int next_marker () {
1501     uint c, bytes;
1502 
1503     bytes = 0;
1504 
1505     do
1506     {
1507       do
1508       {
1509         bytes++;
1510         c = get_bits(8);
1511       } while (c != 0xFF);
1512 
1513       do
1514       {
1515         c = get_bits(8);
1516       } while (c == 0xFF);
1517 
1518     } while (c == 0);
1519 
1520     // If bytes > 0 here, there where extra bytes before the marker (not good).
1521 
1522     return c;
1523   }
1524 
1525   // Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is
1526   // encountered.
1527   int process_markers (bool allow_restarts = false) {
1528     int c;
1529 
1530     for ( ; ; ) {
1531       c = next_marker();
1532 
1533       switch (c)
1534       {
1535         case M_SOF0:
1536         case M_SOF1:
1537         case M_SOF2:
1538         case M_SOF3:
1539         case M_SOF5:
1540         case M_SOF6:
1541         case M_SOF7:
1542         //case M_JPG:
1543         case M_SOF9:
1544         case M_SOF10:
1545         case M_SOF11:
1546         case M_SOF13:
1547         case M_SOF14:
1548         case M_SOF15:
1549         case M_SOI:
1550         case M_EOI:
1551         case M_SOS:
1552           return c;
1553         case M_DHT:
1554           read_dht_marker();
1555           break;
1556         // No arithmitic support - dumb patents!
1557         case M_DAC:
1558           stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1559           break;
1560         case M_DQT:
1561           read_dqt_marker();
1562           break;
1563         case M_DRI:
1564           read_dri_marker();
1565           break;
1566 	case M_APP1: /* likely EXIF data */
1567           read_exif_marker();
1568 
1569 	break;
1570         //case M_APP0:  /* no need to read the JFIF marker */
1571 
1572         case M_RST0:    /* no parameters */
1573         case M_RST1:
1574         case M_RST2:
1575         case M_RST3:
1576         case M_RST4:
1577         case M_RST5:
1578         case M_RST6:
1579         case M_RST7:
1580 		if(allow_restarts)
1581 			continue;
1582 		else
1583 			goto case;
1584         case M_JPG:
1585         case M_TEM:
1586           stop_decoding(JPGD_UNEXPECTED_MARKER);
1587           break;
1588         default:    /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */
1589           skip_variable_marker();
1590           break;
1591       }
1592     }
1593 
1594     assert(0);
1595   }
1596 
1597   // Finds the start of image (SOI) marker.
1598   // This code is rather defensive: it only checks the first 512 bytes to avoid
1599   // false positives.
1600   void locate_soi_marker () {
1601     uint lastchar, thischar;
1602     uint bytesleft;
1603 
1604     lastchar = get_bits(8);
1605 
1606     thischar = get_bits(8);
1607 
1608     /* ok if it's a normal JPEG file without a special header */
1609 
1610     if ((lastchar == 0xFF) && (thischar == M_SOI))
1611       return;
1612 
1613     bytesleft = 4096; //512;
1614 
1615     for ( ; ; )
1616     {
1617       if (--bytesleft == 0)
1618         stop_decoding(JPGD_NOT_JPEG);
1619 
1620       lastchar = thischar;
1621 
1622       thischar = get_bits(8);
1623 
1624       if (lastchar == 0xFF)
1625       {
1626         if (thischar == M_SOI)
1627           break;
1628         else if (thischar == M_EOI) // get_bits will keep returning M_EOI if we read past the end
1629           stop_decoding(JPGD_NOT_JPEG);
1630       }
1631     }
1632 
1633     // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad.
1634     thischar = (m_bit_buf >> 24) & 0xFF;
1635 
1636     if (thischar != 0xFF)
1637       stop_decoding(JPGD_NOT_JPEG);
1638   }
1639 
1640   // Find a start of frame (SOF) marker.
1641   void locate_sof_marker () {
1642     locate_soi_marker();
1643 
1644     int c = process_markers();
1645 
1646     switch (c)
1647     {
1648       case M_SOF2:
1649         m_progressive_flag = true;
1650         goto case;
1651       case M_SOF0:  /* baseline DCT */
1652       case M_SOF1:  /* extended sequential DCT */
1653         read_sof_marker();
1654         break;
1655       case M_SOF9:  /* Arithmitic coding */
1656         stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1657         break;
1658       default:
1659         stop_decoding(JPGD_UNSUPPORTED_MARKER);
1660         break;
1661     }
1662   }
1663 
1664   // Find a start of scan (SOS) marker.
1665   int locate_sos_marker () {
1666     int c;
1667 
1668     c = process_markers();
1669 
1670     if (c == M_EOI)
1671       return false;
1672     else if (c != M_SOS)
1673       stop_decoding(JPGD_UNEXPECTED_MARKER);
1674 
1675     read_sos_marker();
1676 
1677     return true;
1678   }
1679 
1680   // Reset everything to default/uninitialized state.
1681   void initit (JpegStreamReadFunc rfn) {
1682     m_pMem_blocks = null;
1683     m_error_code = JPGD_SUCCESS;
1684     m_ready_flag = false;
1685     m_image_x_size = m_image_y_size = 0;
1686     readfn = rfn;
1687     m_progressive_flag = false;
1688 
1689     memset(m_huff_ac.ptr, 0, m_huff_ac.sizeof);
1690     memset(m_huff_num.ptr, 0, m_huff_num.sizeof);
1691     memset(m_huff_val.ptr, 0, m_huff_val.sizeof);
1692     memset(m_quant.ptr, 0, m_quant.sizeof);
1693 
1694     m_scan_type = 0;
1695     m_comps_in_frame = 0;
1696 
1697     memset(m_comp_h_samp.ptr, 0, m_comp_h_samp.sizeof);
1698     memset(m_comp_v_samp.ptr, 0, m_comp_v_samp.sizeof);
1699     memset(m_comp_quant.ptr, 0, m_comp_quant.sizeof);
1700     memset(m_comp_ident.ptr, 0, m_comp_ident.sizeof);
1701     memset(m_comp_h_blocks.ptr, 0, m_comp_h_blocks.sizeof);
1702     memset(m_comp_v_blocks.ptr, 0, m_comp_v_blocks.sizeof);
1703 
1704     m_comps_in_scan = 0;
1705     memset(m_comp_list.ptr, 0, m_comp_list.sizeof);
1706     memset(m_comp_dc_tab.ptr, 0, m_comp_dc_tab.sizeof);
1707     memset(m_comp_ac_tab.ptr, 0, m_comp_ac_tab.sizeof);
1708 
1709     m_spectral_start = 0;
1710     m_spectral_end = 0;
1711     m_successive_low = 0;
1712     m_successive_high = 0;
1713     m_max_mcu_x_size = 0;
1714     m_max_mcu_y_size = 0;
1715     m_blocks_per_mcu = 0;
1716     m_max_blocks_per_row = 0;
1717     m_mcus_per_row = 0;
1718     m_mcus_per_col = 0;
1719     m_expanded_blocks_per_component = 0;
1720     m_expanded_blocks_per_mcu = 0;
1721     m_expanded_blocks_per_row = 0;
1722     m_freq_domain_chroma_upsample = false;
1723 
1724     memset(m_mcu_org.ptr, 0, m_mcu_org.sizeof);
1725 
1726     m_total_lines_left = 0;
1727     m_mcu_lines_left = 0;
1728     m_real_dest_bytes_per_scan_line = 0;
1729     m_dest_bytes_per_scan_line = 0;
1730     m_dest_bytes_per_pixel = 0;
1731 
1732     memset(m_pHuff_tabs.ptr, 0, m_pHuff_tabs.sizeof);
1733 
1734     memset(m_dc_coeffs.ptr, 0, m_dc_coeffs.sizeof);
1735     memset(m_ac_coeffs.ptr, 0, m_ac_coeffs.sizeof);
1736     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
1737 
1738     m_eob_run = 0;
1739 
1740     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
1741 
1742     m_pIn_buf_ofs = m_in_buf.ptr;
1743     m_in_buf_left = 0;
1744     m_eof_flag = false;
1745     m_tem_flag = 0;
1746 
1747     memset(m_in_buf_pad_start.ptr, 0, m_in_buf_pad_start.sizeof);
1748     memset(m_in_buf.ptr, 0, m_in_buf.sizeof);
1749     memset(m_in_buf_pad_end.ptr, 0, m_in_buf_pad_end.sizeof);
1750 
1751     m_restart_interval = 0;
1752     m_restarts_left    = 0;
1753     m_next_restart_num = 0;
1754 
1755     m_max_mcus_per_row = 0;
1756     m_max_blocks_per_mcu = 0;
1757     m_max_mcus_per_col = 0;
1758 
1759     memset(m_last_dc_val.ptr, 0, m_last_dc_val.sizeof);
1760     m_pMCU_coefficients = null;
1761     m_pSample_buf = null;
1762 
1763     m_total_bytes_read = 0;
1764 
1765     m_pScan_line_0 = null;
1766     m_pScan_line_1 = null;
1767 
1768     // Ready the input buffer.
1769     prep_in_buffer();
1770 
1771     // Prime the bit buffer.
1772     m_bits_left = 16;
1773     m_bit_buf = 0;
1774 
1775     get_bits(16);
1776     get_bits(16);
1777 
1778     for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++)
1779       m_mcu_block_max_zag.ptr[i] = 64;
1780   }
1781 
1782   enum SCALEBITS = 16;
1783   enum ONE_HALF = (cast(int) 1 << (SCALEBITS-1));
1784   enum FIX(float x) = (cast(int)((x) * (1L<<SCALEBITS) + 0.5f));
1785 
1786   // Create a few tables that allow us to quickly convert YCbCr to RGB.
1787   void create_look_ups () {
1788     for (int i = 0; i <= 255; i++)
1789     {
1790       int k = i - 128;
1791       m_crr.ptr[i] = ( FIX!(1.40200f)  * k + ONE_HALF) >> SCALEBITS;
1792       m_cbb.ptr[i] = ( FIX!(1.77200f)  * k + ONE_HALF) >> SCALEBITS;
1793       m_crg.ptr[i] = (-FIX!(0.71414f)) * k;
1794       m_cbg.ptr[i] = (-FIX!(0.34414f)) * k + ONE_HALF;
1795     }
1796   }
1797 
1798   // This method throws back into the stream any bytes that where read
1799   // into the bit buffer during initial marker scanning.
1800   void fix_in_buffer () {
1801     // In case any 0xFF's where pulled into the buffer during marker scanning.
1802     assert((m_bits_left & 7) == 0);
1803 
1804     if (m_bits_left == 16)
1805       stuff_char(cast(ubyte)(m_bit_buf & 0xFF));
1806 
1807     if (m_bits_left >= 8)
1808       stuff_char(cast(ubyte)((m_bit_buf >> 8) & 0xFF));
1809 
1810     stuff_char(cast(ubyte)((m_bit_buf >> 16) & 0xFF));
1811     stuff_char(cast(ubyte)((m_bit_buf >> 24) & 0xFF));
1812 
1813     m_bits_left = 16;
1814     get_bits_no_markers(16);
1815     get_bits_no_markers(16);
1816   }
1817 
1818   void transform_mcu (int mcu_row) {
1819     jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1820     ubyte* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64;
1821 
1822     for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1823     {
1824       idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag.ptr[mcu_block]);
1825       pSrc_ptr += 64;
1826       pDst_ptr += 64;
1827     }
1828   }
1829 
1830   static immutable ubyte[64] s_max_rc = [
1831     17, 18, 34, 50, 50, 51, 52, 52, 52, 68, 84, 84, 84, 84, 85, 86, 86, 86, 86, 86,
1832     102, 118, 118, 118, 118, 118, 118, 119, 120, 120, 120, 120, 120, 120, 120, 136,
1833     136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136,
1834     136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136
1835   ];
1836 
1837   void transform_mcu_expand (int mcu_row) {
1838     jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1839     ubyte* pDst_ptr = m_pSample_buf + mcu_row * m_expanded_blocks_per_mcu * 64;
1840 
1841     // Y IDCT
1842     int mcu_block;
1843     for (mcu_block = 0; mcu_block < m_expanded_blocks_per_component; mcu_block++)
1844     {
1845       idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag.ptr[mcu_block]);
1846       pSrc_ptr += 64;
1847       pDst_ptr += 64;
1848     }
1849 
1850     // Chroma IDCT, with upsampling
1851     jpgd_block_t[64] temp_block;
1852 
1853     for (int i = 0; i < 2; i++)
1854     {
1855       DCT_Upsample.Matrix44 P, Q, R, S;
1856 
1857       assert(m_mcu_block_max_zag.ptr[mcu_block] >= 1);
1858       assert(m_mcu_block_max_zag.ptr[mcu_block] <= 64);
1859 
1860       int max_zag = m_mcu_block_max_zag.ptr[mcu_block++] - 1;
1861       if (max_zag <= 0) max_zag = 0; // should never happen, only here to shut up static analysis
1862       switch (s_max_rc.ptr[max_zag])
1863       {
1864       case 1*16+1:
1865         DCT_Upsample.P_Q!(1, 1).calc(P, Q, pSrc_ptr);
1866         DCT_Upsample.R_S!(1, 1).calc(R, S, pSrc_ptr);
1867         break;
1868       case 1*16+2:
1869         DCT_Upsample.P_Q!(1, 2).calc(P, Q, pSrc_ptr);
1870         DCT_Upsample.R_S!(1, 2).calc(R, S, pSrc_ptr);
1871         break;
1872       case 2*16+2:
1873         DCT_Upsample.P_Q!(2, 2).calc(P, Q, pSrc_ptr);
1874         DCT_Upsample.R_S!(2, 2).calc(R, S, pSrc_ptr);
1875         break;
1876       case 3*16+2:
1877         DCT_Upsample.P_Q!(3, 2).calc(P, Q, pSrc_ptr);
1878         DCT_Upsample.R_S!(3, 2).calc(R, S, pSrc_ptr);
1879         break;
1880       case 3*16+3:
1881         DCT_Upsample.P_Q!(3, 3).calc(P, Q, pSrc_ptr);
1882         DCT_Upsample.R_S!(3, 3).calc(R, S, pSrc_ptr);
1883         break;
1884       case 3*16+4:
1885         DCT_Upsample.P_Q!(3, 4).calc(P, Q, pSrc_ptr);
1886         DCT_Upsample.R_S!(3, 4).calc(R, S, pSrc_ptr);
1887         break;
1888       case 4*16+4:
1889         DCT_Upsample.P_Q!(4, 4).calc(P, Q, pSrc_ptr);
1890         DCT_Upsample.R_S!(4, 4).calc(R, S, pSrc_ptr);
1891         break;
1892       case 5*16+4:
1893         DCT_Upsample.P_Q!(5, 4).calc(P, Q, pSrc_ptr);
1894         DCT_Upsample.R_S!(5, 4).calc(R, S, pSrc_ptr);
1895         break;
1896       case 5*16+5:
1897         DCT_Upsample.P_Q!(5, 5).calc(P, Q, pSrc_ptr);
1898         DCT_Upsample.R_S!(5, 5).calc(R, S, pSrc_ptr);
1899         break;
1900       case 5*16+6:
1901         DCT_Upsample.P_Q!(5, 6).calc(P, Q, pSrc_ptr);
1902         DCT_Upsample.R_S!(5, 6).calc(R, S, pSrc_ptr);
1903         break;
1904       case 6*16+6:
1905         DCT_Upsample.P_Q!(6, 6).calc(P, Q, pSrc_ptr);
1906         DCT_Upsample.R_S!(6, 6).calc(R, S, pSrc_ptr);
1907         break;
1908       case 7*16+6:
1909         DCT_Upsample.P_Q!(7, 6).calc(P, Q, pSrc_ptr);
1910         DCT_Upsample.R_S!(7, 6).calc(R, S, pSrc_ptr);
1911         break;
1912       case 7*16+7:
1913         DCT_Upsample.P_Q!(7, 7).calc(P, Q, pSrc_ptr);
1914         DCT_Upsample.R_S!(7, 7).calc(R, S, pSrc_ptr);
1915         break;
1916       case 7*16+8:
1917         DCT_Upsample.P_Q!(7, 8).calc(P, Q, pSrc_ptr);
1918         DCT_Upsample.R_S!(7, 8).calc(R, S, pSrc_ptr);
1919         break;
1920       case 8*16+8:
1921         DCT_Upsample.P_Q!(8, 8).calc(P, Q, pSrc_ptr);
1922         DCT_Upsample.R_S!(8, 8).calc(R, S, pSrc_ptr);
1923         break;
1924       default:
1925         assert(false);
1926       }
1927 
1928       auto a = DCT_Upsample.Matrix44(P + Q);
1929       P -= Q;
1930       DCT_Upsample.Matrix44* b = &P;
1931       auto c = DCT_Upsample.Matrix44(R + S);
1932       R -= S;
1933       DCT_Upsample.Matrix44* d = &R;
1934 
1935       DCT_Upsample.Matrix44.add_and_store(temp_block.ptr, a, c);
1936       idct_4x4(temp_block.ptr, pDst_ptr);
1937       pDst_ptr += 64;
1938 
1939       DCT_Upsample.Matrix44.sub_and_store(temp_block.ptr, a, c);
1940       idct_4x4(temp_block.ptr, pDst_ptr);
1941       pDst_ptr += 64;
1942 
1943       DCT_Upsample.Matrix44.add_and_store(temp_block.ptr, *b, *d);
1944       idct_4x4(temp_block.ptr, pDst_ptr);
1945       pDst_ptr += 64;
1946 
1947       DCT_Upsample.Matrix44.sub_and_store(temp_block.ptr, *b, *d);
1948       idct_4x4(temp_block.ptr, pDst_ptr);
1949       pDst_ptr += 64;
1950 
1951       pSrc_ptr += 64;
1952     }
1953   }
1954 
1955   // Loads and dequantizes the next row of (already decoded) coefficients.
1956   // Progressive images only.
1957   void load_next_row () {
1958     int i;
1959     jpgd_block_t *p;
1960     jpgd_quant_t *q;
1961     int mcu_row, mcu_block, row_block = 0;
1962     int component_num, component_id;
1963     int[JPGD_MAX_COMPONENTS] block_x_mcu;
1964 
1965     memset(block_x_mcu.ptr, 0, JPGD_MAX_COMPONENTS * int.sizeof);
1966 
1967     for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
1968     {
1969       int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
1970 
1971       for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1972       {
1973         component_id = m_mcu_org.ptr[mcu_block];
1974         q = m_quant.ptr[m_comp_quant.ptr[component_id]];
1975 
1976         p = m_pMCU_coefficients + 64 * mcu_block;
1977 
1978         jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs.ptr[component_id], block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
1979         jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs.ptr[component_id], block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
1980         p[0] = pDC[0];
1981         memcpy(&p[1], &pAC[1], 63 * jpgd_block_t.sizeof);
1982 
1983         for (i = 63; i > 0; i--)
1984           if (p[g_ZAG[i]])
1985             break;
1986 
1987         m_mcu_block_max_zag.ptr[mcu_block] = i + 1;
1988 
1989         for ( ; i >= 0; i--)
1990           if (p[g_ZAG[i]])
1991             p[g_ZAG[i]] = cast(jpgd_block_t)(p[g_ZAG[i]] * q[i]);
1992 
1993         row_block++;
1994 
1995         if (m_comps_in_scan == 1)
1996           block_x_mcu.ptr[component_id]++;
1997         else
1998         {
1999           if (++block_x_mcu_ofs == m_comp_h_samp.ptr[component_id])
2000           {
2001             block_x_mcu_ofs = 0;
2002 
2003             if (++block_y_mcu_ofs == m_comp_v_samp.ptr[component_id])
2004             {
2005               block_y_mcu_ofs = 0;
2006 
2007               block_x_mcu.ptr[component_id] += m_comp_h_samp.ptr[component_id];
2008             }
2009           }
2010         }
2011       }
2012 
2013       if (m_freq_domain_chroma_upsample)
2014         transform_mcu_expand(mcu_row);
2015       else
2016         transform_mcu(mcu_row);
2017     }
2018 
2019     if (m_comps_in_scan == 1)
2020       m_block_y_mcu.ptr[m_comp_list.ptr[0]]++;
2021     else
2022     {
2023       for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2024       {
2025         component_id = m_comp_list.ptr[component_num];
2026 
2027         m_block_y_mcu.ptr[component_id] += m_comp_v_samp.ptr[component_id];
2028       }
2029     }
2030   }
2031 
2032   // Restart interval processing.
2033   void process_restart () {
2034     int i;
2035     int c = 0;
2036 
2037     // Align to a byte boundry
2038     // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers!
2039     //get_bits_no_markers(m_bits_left & 7);
2040 
2041     // Let's scan a little bit to find the marker, but not _too_ far.
2042     // 1536 is a "fudge factor" that determines how much to scan.
2043     for (i = 1536; i > 0; i--)
2044       if (get_char() == 0xFF)
2045         break;
2046 
2047     if (i == 0)
2048       stop_decoding(JPGD_BAD_RESTART_MARKER);
2049 
2050     for ( ; i > 0; i--)
2051       if ((c = get_char()) != 0xFF)
2052         break;
2053 
2054     if (i == 0)
2055       stop_decoding(JPGD_BAD_RESTART_MARKER);
2056 
2057     // Is it the expected marker? If not, something bad happened.
2058     if (c != (m_next_restart_num + M_RST0))
2059       stop_decoding(JPGD_BAD_RESTART_MARKER);
2060 
2061     // Reset each component's DC prediction values.
2062     memset(&m_last_dc_val, 0, m_comps_in_frame * uint.sizeof);
2063 
2064     m_eob_run = 0;
2065 
2066     m_restarts_left = m_restart_interval;
2067 
2068     m_next_restart_num = (m_next_restart_num + 1) & 7;
2069 
2070     // Get the bit buffer going again...
2071 
2072     m_bits_left = 16;
2073     get_bits_no_markers(16);
2074     get_bits_no_markers(16);
2075   }
2076 
2077   static int dequantize_ac (int c, int q) { pragma(inline, true); c *= q; return c; }
2078 
2079   // Decodes and dequantizes the next row of coefficients.
2080   void decode_next_row () {
2081     int row_block = 0;
2082 
2083     for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
2084     {
2085       if ((m_restart_interval) && (m_restarts_left == 0))
2086         process_restart();
2087 
2088       jpgd_block_t* p = m_pMCU_coefficients;
2089       for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64)
2090       {
2091         int component_id = m_mcu_org.ptr[mcu_block];
2092         jpgd_quant_t* q = m_quant.ptr[m_comp_quant.ptr[component_id]];
2093 
2094         int r, s;
2095         s = huff_decode(m_pHuff_tabs.ptr[m_comp_dc_tab.ptr[component_id]], r);
2096         s = JPGD_HUFF_EXTEND(r, s);
2097 
2098         m_last_dc_val.ptr[component_id] = (s += m_last_dc_val.ptr[component_id]);
2099 
2100         p[0] = cast(jpgd_block_t)(s * q[0]);
2101 
2102         int prev_num_set = m_mcu_block_max_zag.ptr[mcu_block];
2103 
2104         huff_tables *pH = m_pHuff_tabs.ptr[m_comp_ac_tab.ptr[component_id]];
2105 
2106         int k;
2107         for (k = 1; k < 64; k++)
2108         {
2109           int extra_bits;
2110           s = huff_decode(pH, extra_bits);
2111 
2112           r = s >> 4;
2113           s &= 15;
2114 
2115           if (s)
2116           {
2117             if (r)
2118             {
2119               if ((k + r) > 63)
2120                 stop_decoding(JPGD_DECODE_ERROR);
2121 
2122               if (k < prev_num_set)
2123               {
2124                 int n = JPGD_MIN(r, prev_num_set - k);
2125                 int kt = k;
2126                 while (n--)
2127                   p[g_ZAG[kt++]] = 0;
2128               }
2129 
2130               k += r;
2131             }
2132 
2133             s = JPGD_HUFF_EXTEND(extra_bits, s);
2134 
2135             assert(k < 64);
2136 
2137             p[g_ZAG[k]] = cast(jpgd_block_t)(dequantize_ac(s, q[k])); //s * q[k];
2138           }
2139           else
2140           {
2141             if (r == 15)
2142             {
2143               if ((k + 16) > 64)
2144                 stop_decoding(JPGD_DECODE_ERROR);
2145 
2146               if (k < prev_num_set)
2147               {
2148                 int n = JPGD_MIN(16, prev_num_set - k);
2149                 int kt = k;
2150                 while (n--)
2151                 {
2152                   assert(kt <= 63);
2153                   p[g_ZAG[kt++]] = 0;
2154                 }
2155               }
2156 
2157               k += 16 - 1; // - 1 because the loop counter is k
2158               assert(p[g_ZAG[k]] == 0);
2159             }
2160             else
2161               break;
2162           }
2163         }
2164 
2165         if (k < prev_num_set)
2166         {
2167           int kt = k;
2168           while (kt < prev_num_set)
2169             p[g_ZAG[kt++]] = 0;
2170         }
2171 
2172         m_mcu_block_max_zag.ptr[mcu_block] = k;
2173 
2174         row_block++;
2175       }
2176 
2177       if (m_freq_domain_chroma_upsample)
2178         transform_mcu_expand(mcu_row);
2179       else
2180         transform_mcu(mcu_row);
2181 
2182       m_restarts_left--;
2183     }
2184   }
2185 
2186   // YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB
2187   void H1V1Convert () {
2188     int row = m_max_mcu_y_size - m_mcu_lines_left;
2189     ubyte *d = m_pScan_line_0;
2190     ubyte *s = m_pSample_buf + row * 8;
2191 
2192     for (int i = m_max_mcus_per_row; i > 0; i--)
2193     {
2194       for (int j = 0; j < 8; j++)
2195       {
2196         int y = s[j];
2197         int cb = s[64+j];
2198         int cr = s[128+j];
2199 
2200         d[0] = clamp(y + m_crr.ptr[cr]);
2201         d[1] = clamp(y + ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16));
2202         d[2] = clamp(y + m_cbb.ptr[cb]);
2203         d[3] = 255;
2204 
2205         d += 4;
2206       }
2207 
2208       s += 64*3;
2209     }
2210   }
2211 
2212   // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB
2213   void H2V1Convert () {
2214     int row = m_max_mcu_y_size - m_mcu_lines_left;
2215     ubyte *d0 = m_pScan_line_0;
2216     ubyte *y = m_pSample_buf + row * 8;
2217     ubyte *c = m_pSample_buf + 2*64 + row * 8;
2218 
2219     for (int i = m_max_mcus_per_row; i > 0; i--)
2220     {
2221       for (int l = 0; l < 2; l++)
2222       {
2223         for (int j = 0; j < 4; j++)
2224         {
2225           int cb = c[0];
2226           int cr = c[64];
2227 
2228           int rc = m_crr.ptr[cr];
2229           int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2230           int bc = m_cbb.ptr[cb];
2231 
2232           int yy = y[j<<1];
2233           d0[0] = clamp(yy+rc);
2234           d0[1] = clamp(yy+gc);
2235           d0[2] = clamp(yy+bc);
2236           d0[3] = 255;
2237 
2238           yy = y[(j<<1)+1];
2239           d0[4] = clamp(yy+rc);
2240           d0[5] = clamp(yy+gc);
2241           d0[6] = clamp(yy+bc);
2242           d0[7] = 255;
2243 
2244           d0 += 8;
2245 
2246           c++;
2247         }
2248         y += 64;
2249       }
2250 
2251       y += 64*4 - 64*2;
2252       c += 64*4 - 8;
2253     }
2254   }
2255 
2256   // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB
2257   void H1V2Convert () {
2258     int row = m_max_mcu_y_size - m_mcu_lines_left;
2259     ubyte *d0 = m_pScan_line_0;
2260     ubyte *d1 = m_pScan_line_1;
2261     ubyte *y;
2262     ubyte *c;
2263 
2264     if (row < 8)
2265       y = m_pSample_buf + row * 8;
2266     else
2267       y = m_pSample_buf + 64*1 + (row & 7) * 8;
2268 
2269     c = m_pSample_buf + 64*2 + (row >> 1) * 8;
2270 
2271     for (int i = m_max_mcus_per_row; i > 0; i--)
2272     {
2273       for (int j = 0; j < 8; j++)
2274       {
2275         int cb = c[0+j];
2276         int cr = c[64+j];
2277 
2278         int rc = m_crr.ptr[cr];
2279         int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2280         int bc = m_cbb.ptr[cb];
2281 
2282         int yy = y[j];
2283         d0[0] = clamp(yy+rc);
2284         d0[1] = clamp(yy+gc);
2285         d0[2] = clamp(yy+bc);
2286         d0[3] = 255;
2287 
2288         yy = y[8+j];
2289         d1[0] = clamp(yy+rc);
2290         d1[1] = clamp(yy+gc);
2291         d1[2] = clamp(yy+bc);
2292         d1[3] = 255;
2293 
2294         d0 += 4;
2295         d1 += 4;
2296       }
2297 
2298       y += 64*4;
2299       c += 64*4;
2300     }
2301   }
2302 
2303   // YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB
2304   void H2V2Convert () {
2305     int row = m_max_mcu_y_size - m_mcu_lines_left;
2306     ubyte *d0 = m_pScan_line_0;
2307     ubyte *d1 = m_pScan_line_1;
2308     ubyte *y;
2309     ubyte *c;
2310 
2311     if (row < 8)
2312       y = m_pSample_buf + row * 8;
2313     else
2314       y = m_pSample_buf + 64*2 + (row & 7) * 8;
2315 
2316     c = m_pSample_buf + 64*4 + (row >> 1) * 8;
2317 
2318     for (int i = m_max_mcus_per_row; i > 0; i--)
2319     {
2320       for (int l = 0; l < 2; l++)
2321       {
2322         for (int j = 0; j < 8; j += 2)
2323         {
2324           int cb = c[0];
2325           int cr = c[64];
2326 
2327           int rc = m_crr.ptr[cr];
2328           int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2329           int bc = m_cbb.ptr[cb];
2330 
2331           int yy = y[j];
2332           d0[0] = clamp(yy+rc);
2333           d0[1] = clamp(yy+gc);
2334           d0[2] = clamp(yy+bc);
2335           d0[3] = 255;
2336 
2337           yy = y[j+1];
2338           d0[4] = clamp(yy+rc);
2339           d0[5] = clamp(yy+gc);
2340           d0[6] = clamp(yy+bc);
2341           d0[7] = 255;
2342 
2343           yy = y[j+8];
2344           d1[0] = clamp(yy+rc);
2345           d1[1] = clamp(yy+gc);
2346           d1[2] = clamp(yy+bc);
2347           d1[3] = 255;
2348 
2349           yy = y[j+8+1];
2350           d1[4] = clamp(yy+rc);
2351           d1[5] = clamp(yy+gc);
2352           d1[6] = clamp(yy+bc);
2353           d1[7] = 255;
2354 
2355           d0 += 8;
2356           d1 += 8;
2357 
2358           c++;
2359         }
2360         y += 64;
2361       }
2362 
2363       y += 64*6 - 64*2;
2364       c += 64*6 - 8;
2365     }
2366   }
2367 
2368   // Y (1 block per MCU) to 8-bit grayscale
2369   void gray_convert () {
2370     int row = m_max_mcu_y_size - m_mcu_lines_left;
2371     ubyte *d = m_pScan_line_0;
2372     ubyte *s = m_pSample_buf + row * 8;
2373 
2374     for (int i = m_max_mcus_per_row; i > 0; i--)
2375     {
2376       *cast(uint*)d = *cast(uint*)s;
2377       *cast(uint*)(&d[4]) = *cast(uint*)(&s[4]);
2378 
2379       s += 64;
2380       d += 8;
2381     }
2382   }
2383 
2384   void expanded_convert () {
2385     int row = m_max_mcu_y_size - m_mcu_lines_left;
2386 
2387     ubyte* Py = m_pSample_buf + (row / 8) * 64 * m_comp_h_samp.ptr[0] + (row & 7) * 8;
2388 
2389     ubyte* d = m_pScan_line_0;
2390 
2391     for (int i = m_max_mcus_per_row; i > 0; i--)
2392     {
2393       for (int k = 0; k < m_max_mcu_x_size; k += 8)
2394       {
2395         immutable int Y_ofs = k * 8;
2396         immutable int Cb_ofs = Y_ofs + 64 * m_expanded_blocks_per_component;
2397         immutable int Cr_ofs = Y_ofs + 64 * m_expanded_blocks_per_component * 2;
2398         for (int j = 0; j < 8; j++)
2399         {
2400           int y = Py[Y_ofs + j];
2401           int cb = Py[Cb_ofs + j];
2402           int cr = Py[Cr_ofs + j];
2403 
2404           d[0] = clamp(y + m_crr.ptr[cr]);
2405           d[1] = clamp(y + ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16));
2406           d[2] = clamp(y + m_cbb.ptr[cb]);
2407           d[3] = 255;
2408 
2409           d += 4;
2410         }
2411       }
2412 
2413       Py += 64 * m_expanded_blocks_per_mcu;
2414     }
2415   }
2416 
2417   // Find end of image (EOI) marker, so we can return to the user the exact size of the input stream.
2418   void find_eoi () {
2419     if (!m_progressive_flag)
2420     {
2421       // Attempt to read the EOI marker.
2422       //get_bits_no_markers(m_bits_left & 7);
2423 
2424       // Prime the bit buffer
2425       m_bits_left = 16;
2426       get_bits(16);
2427       get_bits(16);
2428 
2429       // The next marker _should_ be EOI
2430       process_markers(true); // but restarts are allowed as we can harmlessly skip them at the end of the stream
2431     }
2432 
2433     m_total_bytes_read -= m_in_buf_left;
2434   }
2435 
2436   // Creates the tables needed for efficient Huffman decoding.
2437   void make_huff_table (int index, huff_tables *pH) {
2438     int p, i, l, si;
2439     ubyte[257] huffsize;
2440     uint[257] huffcode;
2441     uint code;
2442     uint subtree;
2443     int code_size;
2444     int lastp;
2445     int nextfreeentry;
2446     int currententry;
2447 
2448     pH.ac_table = m_huff_ac.ptr[index] != 0;
2449 
2450     p = 0;
2451 
2452     for (l = 1; l <= 16; l++)
2453     {
2454       for (i = 1; i <= m_huff_num.ptr[index][l]; i++)
2455         huffsize.ptr[p++] = cast(ubyte)(l);
2456     }
2457 
2458     huffsize.ptr[p] = 0;
2459 
2460     lastp = p;
2461 
2462     code = 0;
2463     si = huffsize.ptr[0];
2464     p = 0;
2465 
2466     while (huffsize.ptr[p])
2467     {
2468       while (huffsize.ptr[p] == si)
2469       {
2470         huffcode.ptr[p++] = code;
2471         code++;
2472       }
2473 
2474       code <<= 1;
2475       si++;
2476     }
2477 
2478     memset(pH.look_up.ptr, 0, pH.look_up.sizeof);
2479     memset(pH.look_up2.ptr, 0, pH.look_up2.sizeof);
2480     memset(pH.tree.ptr, 0, pH.tree.sizeof);
2481     memset(pH.code_size.ptr, 0, pH.code_size.sizeof);
2482 
2483     nextfreeentry = -1;
2484 
2485     p = 0;
2486 
2487     while (p < lastp)
2488     {
2489       i = m_huff_val.ptr[index][p];
2490       code = huffcode.ptr[p];
2491       code_size = huffsize.ptr[p];
2492 
2493       pH.code_size.ptr[i] = cast(ubyte)(code_size);
2494 
2495       if (code_size <= 8)
2496       {
2497         code <<= (8 - code_size);
2498 
2499         for (l = 1 << (8 - code_size); l > 0; l--)
2500         {
2501           assert(i < 256);
2502 
2503           pH.look_up.ptr[code] = i;
2504 
2505           bool has_extrabits = false;
2506           int extra_bits = 0;
2507           int num_extra_bits = i & 15;
2508 
2509           int bits_to_fetch = code_size;
2510           if (num_extra_bits)
2511           {
2512             int total_codesize = code_size + num_extra_bits;
2513             if (total_codesize <= 8)
2514             {
2515               has_extrabits = true;
2516               extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize));
2517               assert(extra_bits <= 0x7FFF);
2518               bits_to_fetch += num_extra_bits;
2519             }
2520           }
2521 
2522           if (!has_extrabits)
2523             pH.look_up2.ptr[code] = i | (bits_to_fetch << 8);
2524           else
2525             pH.look_up2.ptr[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8);
2526 
2527           code++;
2528         }
2529       }
2530       else
2531       {
2532         subtree = (code >> (code_size - 8)) & 0xFF;
2533 
2534         currententry = pH.look_up.ptr[subtree];
2535 
2536         if (currententry == 0)
2537         {
2538           pH.look_up.ptr[subtree] = currententry = nextfreeentry;
2539           pH.look_up2.ptr[subtree] = currententry = nextfreeentry;
2540 
2541           nextfreeentry -= 2;
2542         }
2543 
2544         code <<= (16 - (code_size - 8));
2545 
2546         for (l = code_size; l > 9; l--)
2547         {
2548           if ((code & 0x8000) == 0)
2549             currententry--;
2550 
2551           if (pH.tree.ptr[-currententry - 1] == 0)
2552           {
2553             pH.tree.ptr[-currententry - 1] = nextfreeentry;
2554 
2555             currententry = nextfreeentry;
2556 
2557             nextfreeentry -= 2;
2558           }
2559           else
2560             currententry = pH.tree.ptr[-currententry - 1];
2561 
2562           code <<= 1;
2563         }
2564 
2565         if ((code & 0x8000) == 0)
2566           currententry--;
2567 
2568         pH.tree.ptr[-currententry - 1] = i;
2569       }
2570 
2571       p++;
2572     }
2573   }
2574 
2575   // Verifies the quantization tables needed for this scan are available.
2576   void check_quant_tables () {
2577     for (int i = 0; i < m_comps_in_scan; i++)
2578       if (m_quant.ptr[m_comp_quant.ptr[m_comp_list.ptr[i]]] == null)
2579         stop_decoding(JPGD_UNDEFINED_QUANT_TABLE);
2580   }
2581 
2582   // Verifies that all the Huffman tables needed for this scan are available.
2583   void check_huff_tables () {
2584     for (int i = 0; i < m_comps_in_scan; i++)
2585     {
2586       if ((m_spectral_start == 0) && (m_huff_num.ptr[m_comp_dc_tab.ptr[m_comp_list.ptr[i]]] == null))
2587         stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2588 
2589       if ((m_spectral_end > 0) && (m_huff_num.ptr[m_comp_ac_tab.ptr[m_comp_list.ptr[i]]] == null))
2590         stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2591     }
2592 
2593     for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++)
2594       if (m_huff_num.ptr[i])
2595       {
2596         if (!m_pHuff_tabs.ptr[i])
2597           m_pHuff_tabs.ptr[i] = cast(huff_tables*)alloc(huff_tables.sizeof);
2598 
2599         make_huff_table(i, m_pHuff_tabs.ptr[i]);
2600       }
2601   }
2602 
2603   // Determines the component order inside each MCU.
2604   // Also calcs how many MCU's are on each row, etc.
2605   void calc_mcu_block_order () {
2606     int component_num, component_id;
2607     int max_h_samp = 0, max_v_samp = 0;
2608 
2609     for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2610     {
2611       if (m_comp_h_samp.ptr[component_id] > max_h_samp)
2612         max_h_samp = m_comp_h_samp.ptr[component_id];
2613 
2614       if (m_comp_v_samp.ptr[component_id] > max_v_samp)
2615         max_v_samp = m_comp_v_samp.ptr[component_id];
2616     }
2617 
2618     for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2619     {
2620       m_comp_h_blocks.ptr[component_id] = ((((m_image_x_size * m_comp_h_samp.ptr[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8;
2621       m_comp_v_blocks.ptr[component_id] = ((((m_image_y_size * m_comp_v_samp.ptr[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8;
2622     }
2623 
2624     if (m_comps_in_scan == 1)
2625     {
2626       m_mcus_per_row = m_comp_h_blocks.ptr[m_comp_list.ptr[0]];
2627       m_mcus_per_col = m_comp_v_blocks.ptr[m_comp_list.ptr[0]];
2628     }
2629     else
2630     {
2631       m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp;
2632       m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp;
2633     }
2634 
2635     if (m_comps_in_scan == 1)
2636     {
2637       m_mcu_org.ptr[0] = m_comp_list.ptr[0];
2638 
2639       m_blocks_per_mcu = 1;
2640     }
2641     else
2642     {
2643       m_blocks_per_mcu = 0;
2644 
2645       for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2646       {
2647         int num_blocks;
2648 
2649         component_id = m_comp_list.ptr[component_num];
2650 
2651         num_blocks = m_comp_h_samp.ptr[component_id] * m_comp_v_samp.ptr[component_id];
2652 
2653         while (num_blocks--)
2654           m_mcu_org.ptr[m_blocks_per_mcu++] = component_id;
2655       }
2656     }
2657   }
2658 
2659   // Starts a new scan.
2660   int init_scan () {
2661     if (!locate_sos_marker())
2662       return false;
2663 
2664     calc_mcu_block_order();
2665 
2666     check_huff_tables();
2667 
2668     check_quant_tables();
2669 
2670     memset(m_last_dc_val.ptr, 0, m_comps_in_frame * uint.sizeof);
2671 
2672     m_eob_run = 0;
2673 
2674     if (m_restart_interval)
2675     {
2676       m_restarts_left = m_restart_interval;
2677       m_next_restart_num = 0;
2678     }
2679 
2680     fix_in_buffer();
2681 
2682     return true;
2683   }
2684 
2685   // Starts a frame. Determines if the number of components or sampling factors
2686   // are supported.
2687   void init_frame () {
2688     int i;
2689 
2690     if (m_comps_in_frame == 1)
2691     {
2692       version(jpegd_test) {{ import std.stdio; stderr.writeln("m_comp_h_samp=", m_comp_h_samp.ptr[0], "; m_comp_v_samp=", m_comp_v_samp.ptr[0]); }}
2693 
2694       //if ((m_comp_h_samp.ptr[0] != 1) || (m_comp_v_samp.ptr[0] != 1))
2695       //  stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2696 
2697       if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 1))
2698       {
2699         m_scan_type = JPGD_GRAYSCALE;
2700         m_max_blocks_per_mcu = 1;
2701         m_max_mcu_x_size = 8;
2702         m_max_mcu_y_size = 8;
2703       }
2704       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 2))
2705       {
2706         //k8: i added this, and i absolutely don't know what it means; but it decoded two sample images i found
2707         m_scan_type = JPGD_GRAYSCALE;
2708         m_max_blocks_per_mcu = 4;
2709         m_max_mcu_x_size = 8;
2710         m_max_mcu_y_size = 8;
2711       }
2712       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 1))
2713       {
2714       	// adr added this. idk if it is right seems wrong since it the same as above but..... meh ship it.
2715         m_scan_type = JPGD_GRAYSCALE;
2716         m_max_blocks_per_mcu = 4;
2717         m_max_mcu_x_size = 8;
2718         m_max_mcu_y_size = 8;
2719       }
2720       else {
2721       // code -231 brings us here
2722       //import std.conv;
2723       //assert(0, to!string(m_comp_h_samp) ~ to!string(m_comp_v_samp));
2724         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2725       }
2726     }
2727     else if (m_comps_in_frame == 3)
2728     {
2729       if ( ((m_comp_h_samp.ptr[1] != 1) || (m_comp_v_samp.ptr[1] != 1)) ||
2730            ((m_comp_h_samp.ptr[2] != 1) || (m_comp_v_samp.ptr[2] != 1)) )
2731         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2732 
2733       if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 1))
2734       {
2735         m_scan_type = JPGD_YH1V1;
2736 
2737         m_max_blocks_per_mcu = 3;
2738         m_max_mcu_x_size = 8;
2739         m_max_mcu_y_size = 8;
2740       }
2741       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 1))
2742       {
2743         m_scan_type = JPGD_YH2V1;
2744         m_max_blocks_per_mcu = 4;
2745         m_max_mcu_x_size = 16;
2746         m_max_mcu_y_size = 8;
2747       }
2748       else if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 2))
2749       {
2750         m_scan_type = JPGD_YH1V2;
2751         m_max_blocks_per_mcu = 4;
2752         m_max_mcu_x_size = 8;
2753         m_max_mcu_y_size = 16;
2754       }
2755       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 2))
2756       {
2757         m_scan_type = JPGD_YH2V2;
2758         m_max_blocks_per_mcu = 6;
2759         m_max_mcu_x_size = 16;
2760         m_max_mcu_y_size = 16;
2761       }
2762       else
2763         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2764     }
2765     else
2766       stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2767 
2768     m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size;
2769     m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size;
2770 
2771     // These values are for the *destination* pixels: after conversion.
2772     if (m_scan_type == JPGD_GRAYSCALE)
2773       m_dest_bytes_per_pixel = 1;
2774     else
2775       m_dest_bytes_per_pixel = 4;
2776 
2777     m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel;
2778 
2779     m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel);
2780 
2781     // Initialize two scan line buffers.
2782     m_pScan_line_0 = cast(ubyte*)alloc(m_dest_bytes_per_scan_line, true);
2783     if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2))
2784       m_pScan_line_1 = cast(ubyte*)alloc(m_dest_bytes_per_scan_line, true);
2785 
2786     m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu;
2787 
2788     // Should never happen
2789     if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW)
2790       stop_decoding(JPGD_ASSERTION_ERROR);
2791 
2792     // Allocate the coefficient buffer, enough for one MCU
2793     m_pMCU_coefficients = cast(jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * jpgd_block_t.sizeof);
2794 
2795     for (i = 0; i < m_max_blocks_per_mcu; i++)
2796       m_mcu_block_max_zag.ptr[i] = 64;
2797 
2798     m_expanded_blocks_per_component = m_comp_h_samp.ptr[0] * m_comp_v_samp.ptr[0];
2799     m_expanded_blocks_per_mcu = m_expanded_blocks_per_component * m_comps_in_frame;
2800     m_expanded_blocks_per_row = m_max_mcus_per_row * m_expanded_blocks_per_mcu;
2801     // Freq. domain chroma upsampling is only supported for H2V2 subsampling factor (the most common one I've seen).
2802     m_freq_domain_chroma_upsample = false;
2803     version(JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING) {
2804       m_freq_domain_chroma_upsample = (m_expanded_blocks_per_mcu == 4*3);
2805     }
2806 
2807     if (m_freq_domain_chroma_upsample)
2808       m_pSample_buf = cast(ubyte*)alloc(m_expanded_blocks_per_row * 64);
2809     else
2810       m_pSample_buf = cast(ubyte*)alloc(m_max_blocks_per_row * 64);
2811 
2812     m_total_lines_left = m_image_y_size;
2813 
2814     m_mcu_lines_left = 0;
2815 
2816     create_look_ups();
2817   }
2818 
2819   // The coeff_buf series of methods originally stored the coefficients
2820   // into a "virtual" file which was located in EMS, XMS, or a disk file. A cache
2821   // was used to make this process more efficient. Now, we can store the entire
2822   // thing in RAM.
2823   coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y) {
2824     coeff_buf* cb = cast(coeff_buf*)alloc(coeff_buf.sizeof);
2825 
2826     cb.block_num_x = block_num_x;
2827     cb.block_num_y = block_num_y;
2828     cb.block_len_x = block_len_x;
2829     cb.block_len_y = block_len_y;
2830     cb.block_size = cast(int)((block_len_x * block_len_y) * jpgd_block_t.sizeof);
2831     cb.pData = cast(ubyte*)alloc(cb.block_size * block_num_x * block_num_y, true);
2832     return cb;
2833   }
2834 
2835   jpgd_block_t* coeff_buf_getp (coeff_buf *cb, int block_x, int block_y) {
2836     assert((block_x < cb.block_num_x) && (block_y < cb.block_num_y));
2837     return cast(jpgd_block_t*)(cb.pData + block_x * cb.block_size + block_y * (cb.block_size * cb.block_num_x));
2838   }
2839 
2840   // The following methods decode the various types of m_blocks encountered
2841   // in progressively encoded images.
2842   static void decode_block_dc_first (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2843     int s, r;
2844     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_dc_coeffs.ptr[component_id], block_x, block_y);
2845 
2846     if ((s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_dc_tab.ptr[component_id]])) != 0)
2847     {
2848       r = pD.get_bits_no_markers(s);
2849       s = JPGD_HUFF_EXTEND(r, s);
2850     }
2851 
2852     pD.m_last_dc_val.ptr[component_id] = (s += pD.m_last_dc_val.ptr[component_id]);
2853 
2854     p[0] = cast(jpgd_block_t)(s << pD.m_successive_low);
2855   }
2856 
2857   static void decode_block_dc_refine (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2858     if (pD.get_bits_no_markers(1))
2859     {
2860       jpgd_block_t *p = pD.coeff_buf_getp(pD.m_dc_coeffs.ptr[component_id], block_x, block_y);
2861 
2862       p[0] |= (1 << pD.m_successive_low);
2863     }
2864   }
2865 
2866   static void decode_block_ac_first (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2867     int k, s, r;
2868 
2869     if (pD.m_eob_run)
2870     {
2871       pD.m_eob_run--;
2872       return;
2873     }
2874 
2875     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_ac_coeffs.ptr[component_id], block_x, block_y);
2876 
2877     for (k = pD.m_spectral_start; k <= pD.m_spectral_end; k++)
2878     {
2879       s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_ac_tab.ptr[component_id]]);
2880 
2881       r = s >> 4;
2882       s &= 15;
2883 
2884       if (s)
2885       {
2886         if ((k += r) > 63)
2887           pD.stop_decoding(JPGD_DECODE_ERROR);
2888 
2889         r = pD.get_bits_no_markers(s);
2890         s = JPGD_HUFF_EXTEND(r, s);
2891 
2892         p[g_ZAG[k]] = cast(jpgd_block_t)(s << pD.m_successive_low);
2893       }
2894       else
2895       {
2896         if (r == 15)
2897         {
2898           if ((k += 15) > 63)
2899             pD.stop_decoding(JPGD_DECODE_ERROR);
2900         }
2901         else
2902         {
2903           pD.m_eob_run = 1 << r;
2904 
2905           if (r)
2906             pD.m_eob_run += pD.get_bits_no_markers(r);
2907 
2908           pD.m_eob_run--;
2909 
2910           break;
2911         }
2912       }
2913     }
2914   }
2915 
2916   static void decode_block_ac_refine (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2917     int s, k, r;
2918     int p1 = 1 << pD.m_successive_low;
2919     int m1 = (-1) << pD.m_successive_low;
2920     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_ac_coeffs.ptr[component_id], block_x, block_y);
2921 
2922     assert(pD.m_spectral_end <= 63);
2923 
2924     k = pD.m_spectral_start;
2925 
2926     if (pD.m_eob_run == 0)
2927     {
2928       for ( ; k <= pD.m_spectral_end; k++)
2929       {
2930         s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_ac_tab.ptr[component_id]]);
2931 
2932         r = s >> 4;
2933         s &= 15;
2934 
2935         if (s)
2936         {
2937           if (s != 1)
2938             pD.stop_decoding(JPGD_DECODE_ERROR);
2939 
2940           if (pD.get_bits_no_markers(1))
2941             s = p1;
2942           else
2943             s = m1;
2944         }
2945         else
2946         {
2947           if (r != 15)
2948           {
2949             pD.m_eob_run = 1 << r;
2950 
2951             if (r)
2952               pD.m_eob_run += pD.get_bits_no_markers(r);
2953 
2954             break;
2955           }
2956         }
2957 
2958         do
2959         {
2960           jpgd_block_t *this_coef = p + g_ZAG[k & 63];
2961 
2962           if (*this_coef != 0)
2963           {
2964             if (pD.get_bits_no_markers(1))
2965             {
2966               if ((*this_coef & p1) == 0)
2967               {
2968                 if (*this_coef >= 0)
2969                   *this_coef = cast(jpgd_block_t)(*this_coef + p1);
2970                 else
2971                   *this_coef = cast(jpgd_block_t)(*this_coef + m1);
2972               }
2973             }
2974           }
2975           else
2976           {
2977             if (--r < 0)
2978               break;
2979           }
2980 
2981           k++;
2982 
2983         } while (k <= pD.m_spectral_end);
2984 
2985         if ((s) && (k < 64))
2986         {
2987           p[g_ZAG[k]] = cast(jpgd_block_t)(s);
2988         }
2989       }
2990     }
2991 
2992     if (pD.m_eob_run > 0)
2993     {
2994       for ( ; k <= pD.m_spectral_end; k++)
2995       {
2996         jpgd_block_t *this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis
2997 
2998         if (*this_coef != 0)
2999         {
3000           if (pD.get_bits_no_markers(1))
3001           {
3002             if ((*this_coef & p1) == 0)
3003             {
3004               if (*this_coef >= 0)
3005                 *this_coef = cast(jpgd_block_t)(*this_coef + p1);
3006               else
3007                 *this_coef = cast(jpgd_block_t)(*this_coef + m1);
3008             }
3009           }
3010         }
3011       }
3012 
3013       pD.m_eob_run--;
3014     }
3015   }
3016 
3017   // Decode a scan in a progressively encoded image.
3018   void decode_scan (pDecode_block_func decode_block_func) {
3019     int mcu_row, mcu_col, mcu_block;
3020     int[JPGD_MAX_COMPONENTS] block_x_mcu;
3021     int[JPGD_MAX_COMPONENTS] m_block_y_mcu;
3022 
3023     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
3024 
3025     for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++)
3026     {
3027       int component_num, component_id;
3028 
3029       memset(block_x_mcu.ptr, 0, block_x_mcu.sizeof);
3030 
3031       for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
3032       {
3033         int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
3034 
3035         if ((m_restart_interval) && (m_restarts_left == 0))
3036           process_restart();
3037 
3038         for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
3039         {
3040           component_id = m_mcu_org.ptr[mcu_block];
3041 
3042           decode_block_func(this, component_id, block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
3043 
3044           if (m_comps_in_scan == 1)
3045             block_x_mcu.ptr[component_id]++;
3046           else
3047           {
3048             if (++block_x_mcu_ofs == m_comp_h_samp.ptr[component_id])
3049             {
3050               block_x_mcu_ofs = 0;
3051 
3052               if (++block_y_mcu_ofs == m_comp_v_samp.ptr[component_id])
3053               {
3054                 block_y_mcu_ofs = 0;
3055                 block_x_mcu.ptr[component_id] += m_comp_h_samp.ptr[component_id];
3056               }
3057             }
3058           }
3059         }
3060 
3061         m_restarts_left--;
3062       }
3063 
3064       if (m_comps_in_scan == 1)
3065         m_block_y_mcu.ptr[m_comp_list.ptr[0]]++;
3066       else
3067       {
3068         for (component_num = 0; component_num < m_comps_in_scan; component_num++)
3069         {
3070           component_id = m_comp_list.ptr[component_num];
3071           m_block_y_mcu.ptr[component_id] += m_comp_v_samp.ptr[component_id];
3072         }
3073       }
3074     }
3075   }
3076 
3077   // Decode a progressively encoded image.
3078   void init_progressive () {
3079     int i;
3080 
3081     if (m_comps_in_frame == 4)
3082       stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
3083 
3084     // Allocate the coefficient buffers.
3085     for (i = 0; i < m_comps_in_frame; i++)
3086     {
3087       m_dc_coeffs.ptr[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp.ptr[i], m_max_mcus_per_col * m_comp_v_samp.ptr[i], 1, 1);
3088       m_ac_coeffs.ptr[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp.ptr[i], m_max_mcus_per_col * m_comp_v_samp.ptr[i], 8, 8);
3089     }
3090 
3091     for ( ; ; )
3092     {
3093       int dc_only_scan, refinement_scan;
3094       pDecode_block_func decode_block_func;
3095 
3096       if (!init_scan())
3097         break;
3098 
3099       dc_only_scan = (m_spectral_start == 0);
3100       refinement_scan = (m_successive_high != 0);
3101 
3102       if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63))
3103         stop_decoding(JPGD_BAD_SOS_SPECTRAL);
3104 
3105       if (dc_only_scan)
3106       {
3107         if (m_spectral_end)
3108           stop_decoding(JPGD_BAD_SOS_SPECTRAL);
3109       }
3110       else if (m_comps_in_scan != 1)  /* AC scans can only contain one component */
3111         stop_decoding(JPGD_BAD_SOS_SPECTRAL);
3112 
3113       if ((refinement_scan) && (m_successive_low != m_successive_high - 1))
3114         stop_decoding(JPGD_BAD_SOS_SUCCESSIVE);
3115 
3116       if (dc_only_scan)
3117       {
3118         if (refinement_scan)
3119           decode_block_func = &decode_block_dc_refine;
3120         else
3121           decode_block_func = &decode_block_dc_first;
3122       }
3123       else
3124       {
3125         if (refinement_scan)
3126           decode_block_func = &decode_block_ac_refine;
3127         else
3128           decode_block_func = &decode_block_ac_first;
3129       }
3130 
3131       decode_scan(decode_block_func);
3132 
3133       m_bits_left = 16;
3134       get_bits(16);
3135       get_bits(16);
3136     }
3137 
3138     m_comps_in_scan = m_comps_in_frame;
3139 
3140     for (i = 0; i < m_comps_in_frame; i++)
3141       m_comp_list.ptr[i] = i;
3142 
3143     calc_mcu_block_order();
3144   }
3145 
3146   void init_sequential () {
3147     if (!init_scan())
3148       stop_decoding(JPGD_UNEXPECTED_MARKER);
3149   }
3150 
3151   void decode_start () {
3152     init_frame();
3153 
3154     if (m_progressive_flag)
3155       init_progressive();
3156     else
3157       init_sequential();
3158   }
3159 
3160   void decode_init (JpegStreamReadFunc rfn) {
3161     initit(rfn);
3162     locate_sof_marker();
3163   }
3164 }
3165 
3166 
3167 // ////////////////////////////////////////////////////////////////////////// //
3168 /// read JPEG image header, determine dimensions and number of components.
3169 /// return `false` if image is not JPEG (i hope).
3170 public bool detect_jpeg_image_from_stream (scope JpegStreamReadFunc rfn, out int width, out int height, out int actual_comps) {
3171   if (rfn is null) return false;
3172   auto decoder = jpeg_decoder(rfn);
3173   version(jpegd_test) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
3174   if (decoder.error_code != JPGD_SUCCESS) return false;
3175   width = decoder.width;
3176   height = decoder.height;
3177   actual_comps = decoder.num_components;
3178   return true;
3179 }
3180 
3181 
3182 // ////////////////////////////////////////////////////////////////////////// //
3183 /// read JPEG image header, determine dimensions and number of components.
3184 /// return `false` if image is not JPEG (i hope).
3185 public bool detect_jpeg_image_from_file (const(char)[] filename, out int width, out int height, out int actual_comps) {
3186   import core.stdc.stdio;
3187 
3188   FILE* m_pFile;
3189   bool m_eof_flag, m_error_flag;
3190 
3191   if (filename.length == 0) throw new Exception("cannot open unnamed file");
3192   if (filename.length < 512) {
3193     char[513] buffer;
3194     //import core.stdc.stdlib : alloca;
3195     auto tfn = buffer[0 .. filename.length + 1]; // (cast(char*)alloca(filename.length+1))[0..filename.length+1];
3196     tfn[0..filename.length] = filename[];
3197     tfn[filename.length] = 0;
3198     m_pFile = fopen(tfn.ptr, "rb");
3199   } else {
3200     import core.stdc.stdlib : malloc, free;
3201     auto tfn = (cast(char*)malloc(filename.length+1))[0..filename.length+1];
3202     if (tfn !is null) {
3203       scope(exit) free(tfn.ptr);
3204       m_pFile = fopen(tfn.ptr, "rb");
3205     }
3206   }
3207   if (m_pFile is null) throw new Exception("cannot open file '"~filename.idup~"'");
3208   scope(exit) if (m_pFile) fclose(m_pFile);
3209 
3210   return detect_jpeg_image_from_stream(
3211     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3212       if (m_pFile is null) return -1;
3213       if (m_eof_flag) {
3214         *pEOF_flag = true;
3215         return 0;
3216       }
3217       if (m_error_flag) return -1;
3218       int bytes_read = cast(int)(fread(pBuf, 1, max_bytes_to_read, m_pFile));
3219       if (bytes_read < max_bytes_to_read) {
3220         if (ferror(m_pFile)) {
3221           m_error_flag = true;
3222           return -1;
3223         }
3224         m_eof_flag = true;
3225         *pEOF_flag = true;
3226       }
3227       return bytes_read;
3228     },
3229     width, height, actual_comps);
3230 }
3231 
3232 
3233 // ////////////////////////////////////////////////////////////////////////// //
3234 /// read JPEG image header, determine dimensions and number of components.
3235 /// return `false` if image is not JPEG (i hope).
3236 public bool detect_jpeg_image_from_memory (const(void)[] buf, out int width, out int height, out int actual_comps) {
3237   size_t bufpos;
3238   return detect_jpeg_image_from_stream(
3239     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3240       import core.stdc.string : memcpy;
3241       if (bufpos >= buf.length) {
3242         *pEOF_flag = true;
3243         return 0;
3244       }
3245       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
3246       memcpy(pBuf, (cast(const(ubyte)*)buf.ptr)+bufpos, max_bytes_to_read);
3247       bufpos += max_bytes_to_read;
3248       return max_bytes_to_read;
3249     },
3250     width, height, actual_comps);
3251 }
3252 
3253 
3254 // ////////////////////////////////////////////////////////////////////////// //
3255 /// decompress JPEG image, what else?
3256 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3257 public ubyte[] decompress_jpeg_image_from_stream(bool useMalloc=false) (scope JpegStreamReadFunc rfn, out int width, out int height, out int actual_comps, int req_comps=-1) {
3258   import core.stdc.string : memcpy;
3259 
3260   //actual_comps = 0;
3261   if (rfn is null) return null;
3262   if (req_comps != -1 && req_comps != 1 && req_comps != 3 && req_comps != 4) return null;
3263 
3264   auto decoder = jpeg_decoder(rfn);
3265   if (decoder.error_code != JPGD_SUCCESS) return null;
3266   version(jpegd_test) scope(exit) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
3267 
3268   immutable int image_width = decoder.width;
3269   immutable int image_height = decoder.height;
3270   width = image_width;
3271   height = image_height;
3272   actual_comps = decoder.num_components;
3273   if (req_comps < 0) req_comps = decoder.num_components;
3274 
3275   if (decoder.begin_decoding() != JPGD_SUCCESS) return null;
3276 
3277   immutable int dst_bpl = image_width*req_comps;
3278 
3279   static if (useMalloc) {
3280     ubyte* pImage_data = cast(ubyte*)jpgd_malloc(dst_bpl*image_height);
3281     if (pImage_data is null) return null;
3282     auto idata = pImage_data[0..dst_bpl*image_height];
3283   } else {
3284     auto idata = new ubyte[](dst_bpl*image_height);
3285     auto pImage_data = idata.ptr;
3286   }
3287 
3288   scope(failure) {
3289     static if (useMalloc) {
3290       jpgd_free(pImage_data);
3291     } else {
3292       import core.memory : GC;
3293       GC.free(idata.ptr);
3294       idata = null;
3295     }
3296   }
3297 
3298   for (int y = 0; y < image_height; ++y) {
3299     const(ubyte)* pScan_line;
3300     uint scan_line_len;
3301     if (decoder.decode(/*(const void**)*/cast(void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS) {
3302       static if (useMalloc) {
3303         jpgd_free(pImage_data);
3304       } else {
3305         import core.memory : GC;
3306         GC.free(idata.ptr);
3307         idata = null;
3308       }
3309       return null;
3310     }
3311 
3312     ubyte* pDst = pImage_data+y*dst_bpl;
3313 
3314     if ((req_comps == 1 && decoder.num_components == 1) || (req_comps == 4 && decoder.num_components == 3)) {
3315       memcpy(pDst, pScan_line, dst_bpl);
3316     } else if (decoder.num_components == 1) {
3317       if (req_comps == 3) {
3318         for (int x = 0; x < image_width; ++x) {
3319           ubyte luma = pScan_line[x];
3320           pDst[0] = luma;
3321           pDst[1] = luma;
3322           pDst[2] = luma;
3323           pDst += 3;
3324         }
3325       } else {
3326         for (int x = 0; x < image_width; ++x) {
3327           ubyte luma = pScan_line[x];
3328           pDst[0] = luma;
3329           pDst[1] = luma;
3330           pDst[2] = luma;
3331           pDst[3] = 255;
3332           pDst += 4;
3333         }
3334       }
3335     } else if (decoder.num_components == 3) {
3336       if (req_comps == 1) {
3337         immutable int YR = 19595, YG = 38470, YB = 7471;
3338         for (int x = 0; x < image_width; ++x) {
3339           int r = pScan_line[x*4+0];
3340           int g = pScan_line[x*4+1];
3341           int b = pScan_line[x*4+2];
3342           *pDst++ = cast(ubyte)((r * YR + g * YG + b * YB + 32768) >> 16);
3343         }
3344       } else {
3345         for (int x = 0; x < image_width; ++x) {
3346           pDst[0] = pScan_line[x*4+0];
3347           pDst[1] = pScan_line[x*4+1];
3348           pDst[2] = pScan_line[x*4+2];
3349           pDst += 3;
3350         }
3351       }
3352     }
3353   }
3354 
3355   return idata;
3356 }
3357 
3358 
3359 // ////////////////////////////////////////////////////////////////////////// //
3360 /// decompress JPEG image from disk file.
3361 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3362 public ubyte[] decompress_jpeg_image_from_file(bool useMalloc=false) (const(char)[] filename, out int width, out int height, out int actual_comps, int req_comps=-1) {
3363   import core.stdc.stdio;
3364 
3365   FILE* m_pFile;
3366   bool m_eof_flag, m_error_flag;
3367 
3368   if (filename.length == 0) throw new Exception("cannot open unnamed file");
3369   if (filename.length < 512) {
3370 	char[513] buffer;
3371     //import core.stdc.stdlib : alloca;
3372     auto tfn = buffer[0 .. filename.length + 1]; // (cast(char*)alloca(filename.length+1))[0..filename.length+1];
3373     tfn[0..filename.length] = filename[];
3374     tfn[filename.length] = 0;
3375     m_pFile = fopen(tfn.ptr, "rb");
3376   } else {
3377     import core.stdc.stdlib : malloc, free;
3378     auto tfn = (cast(char*)malloc(filename.length+1))[0..filename.length+1];
3379     if (tfn !is null) {
3380       scope(exit) free(tfn.ptr);
3381       m_pFile = fopen(tfn.ptr, "rb");
3382     }
3383   }
3384   if (m_pFile is null) throw new Exception("cannot open file '"~filename.idup~"'");
3385   scope(exit) if (m_pFile) fclose(m_pFile);
3386 
3387   return decompress_jpeg_image_from_stream!useMalloc(
3388     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3389       if (m_pFile is null) return -1;
3390       if (m_eof_flag) {
3391         *pEOF_flag = true;
3392         return 0;
3393       }
3394       if (m_error_flag) return -1;
3395       int bytes_read = cast(int)(fread(pBuf, 1, max_bytes_to_read, m_pFile));
3396       if (bytes_read < max_bytes_to_read) {
3397         if (ferror(m_pFile)) {
3398           m_error_flag = true;
3399           return -1;
3400         }
3401         m_eof_flag = true;
3402         *pEOF_flag = true;
3403       }
3404       return bytes_read;
3405     },
3406     width, height, actual_comps, req_comps);
3407 }
3408 
3409 
3410 // ////////////////////////////////////////////////////////////////////////// //
3411 /// decompress JPEG image from memory buffer.
3412 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3413 public ubyte[] decompress_jpeg_image_from_memory(bool useMalloc=false) (const(void)[] buf, out int width, out int height, out int actual_comps, int req_comps=-1) {
3414   size_t bufpos;
3415   return decompress_jpeg_image_from_stream!useMalloc(
3416     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3417       import core.stdc.string : memcpy;
3418       if (bufpos >= buf.length) {
3419         *pEOF_flag = true;
3420         return 0;
3421       }
3422       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
3423       memcpy(pBuf, (cast(const(ubyte)*)buf.ptr)+bufpos, max_bytes_to_read);
3424       bufpos += max_bytes_to_read;
3425       return max_bytes_to_read;
3426     },
3427     width, height, actual_comps, req_comps);
3428 }
3429 
3430 
3431 // ////////////////////////////////////////////////////////////////////////// //
3432 // if we have access "iv.vfs", add some handy API
3433 static if (__traits(compiles, { import iv.vfs; })) enum JpegHasIVVFS = true; else enum JpegHasIVVFS = false;
3434 
3435 static if (JpegHasIVVFS) {
3436 import iv.vfs;
3437 
3438 // ////////////////////////////////////////////////////////////////////////// //
3439 /// decompress JPEG image from disk file.
3440 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3441 public ubyte[] decompress_jpeg_image_from_file(bool useMalloc=false) (VFile fl, out int width, out int height, out int actual_comps, int req_comps=-1) {
3442   return decompress_jpeg_image_from_stream!useMalloc(
3443     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3444       if (!fl.isOpen) return -1;
3445       if (fl.eof) {
3446         *pEOF_flag = true;
3447         return 0;
3448       }
3449       auto rd = fl.rawRead(pBuf[0..max_bytes_to_read]);
3450       if (fl.eof) *pEOF_flag = true;
3451       return cast(int)rd.length;
3452     },
3453     width, height, actual_comps, req_comps);
3454 }
3455 // vfs API
3456 }
3457 
3458 
3459 // ////////////////////////////////////////////////////////////////////////// //
3460 // if we have access "arsd.color", add some handy API
3461 static if (__traits(compiles, { import arsd.color; })) enum JpegHasArsd = true; else enum JpegHasArsd = false;
3462 
3463 
3464 
3465 public struct LastJpegError {
3466 	int stage;
3467 	int code;
3468 	int details;
3469 }
3470 
3471 public LastJpegError lastJpegError;
3472 
3473 
3474 static if (JpegHasArsd) {
3475 import arsd.color;
3476 static import arsd.core;
3477 
3478 // ////////////////////////////////////////////////////////////////////////// //
3479 /// decompress JPEG image, what else?
3480 public MemoryImage readJpegFromStream (scope JpegStreamReadFunc rfn) {
3481   import core.stdc.string : memcpy;
3482   enum req_comps = 4;
3483 
3484   if (rfn is null) return null;
3485 
3486   auto decoder = jpeg_decoder(rfn);
3487   if (decoder.error_code != JPGD_SUCCESS) { lastJpegError = LastJpegError(1, decoder.error_code); return null; }
3488   version(jpegd_test) scope(exit) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
3489 
3490   immutable int image_width = decoder.width;
3491   immutable int image_height = decoder.height;
3492   //width = image_width;
3493   //height = image_height;
3494   //actual_comps = decoder.num_components;
3495 
3496   version(jpegd_test) {{ import core.stdc.stdio; stderr.fprintf("starting (%dx%d)...\n", image_width, image_height); }}
3497 
3498   auto err = decoder.begin_decoding();
3499   if (err != JPGD_SUCCESS || image_width < 1 || image_height < 1) {
3500 		lastJpegError = LastJpegError(2, err, decoder.m_error_code);
3501 		return null;
3502   }
3503 
3504   immutable int dst_bpl = image_width*req_comps;
3505   auto img = new TrueColorImage(image_width, image_height);
3506   scope(failure) { img.clearInternal(); img = null; }
3507   ubyte* pImage_data = img.imageData.bytes.ptr;
3508 
3509   for (int y = 0; y < image_height; ++y) {
3510     //version(jpegd_test) {{ import core.stdc.stdio; stderr.fprintf("loading line %d...\n", y); }}
3511 
3512     const(ubyte)* pScan_line;
3513     uint scan_line_len;
3514     err = decoder.decode(/*(const void**)*/cast(void**)&pScan_line, &scan_line_len);
3515     if (err != JPGD_SUCCESS) {
3516       lastJpegError = LastJpegError(3, err);
3517       img.clearInternal();
3518       img = null;
3519       //jpgd_free(pImage_data);
3520       return null;
3521     }
3522 
3523     ubyte* pDst = pImage_data+y*dst_bpl;
3524 
3525     if ((req_comps == 1 && decoder.num_components == 1) || (req_comps == 4 && decoder.num_components == 3)) {
3526       memcpy(pDst, pScan_line, dst_bpl);
3527     } else if (decoder.num_components == 1) {
3528       if (req_comps == 3) {
3529         for (int x = 0; x < image_width; ++x) {
3530           ubyte luma = pScan_line[x];
3531           pDst[0] = luma;
3532           pDst[1] = luma;
3533           pDst[2] = luma;
3534           pDst += 3;
3535         }
3536       } else {
3537         for (int x = 0; x < image_width; ++x) {
3538           ubyte luma = pScan_line[x];
3539           pDst[0] = luma;
3540           pDst[1] = luma;
3541           pDst[2] = luma;
3542           pDst[3] = 255;
3543           pDst += 4;
3544         }
3545       }
3546     } else if (decoder.num_components == 3) {
3547       if (req_comps == 1) {
3548         immutable int YR = 19595, YG = 38470, YB = 7471;
3549         for (int x = 0; x < image_width; ++x) {
3550           int r = pScan_line[x*4+0];
3551           int g = pScan_line[x*4+1];
3552           int b = pScan_line[x*4+2];
3553           *pDst++ = cast(ubyte)((r * YR + g * YG + b * YB + 32768) >> 16);
3554         }
3555       } else {
3556         for (int x = 0; x < image_width; ++x) {
3557           pDst[0] = pScan_line[x*4+0];
3558           pDst[1] = pScan_line[x*4+1];
3559           pDst[2] = pScan_line[x*4+2];
3560           pDst += 3;
3561         }
3562       }
3563     }
3564   }
3565 
3566   static void rotate180(TrueColorImage img) {
3567 	size_t cursor = img.imageData.colors.length - 1;
3568 
3569 	foreach(i, px; img.imageData.colors) {
3570 		img.imageData.colors[i] = img.imageData.colors[cursor];
3571 		img.imageData.colors[cursor] = px;
3572 
3573 		cursor -= 1;
3574 		if(i == cursor)
3575 			break;
3576 	}
3577   }
3578 
3579   static void mirrorHorizontally(TrueColorImage img) {
3580   	if(img.width < 2)
3581 		return;
3582   	foreach(row; 0 .. img.height) {
3583 		auto off1 = row * img.width;
3584 		auto off2 = off1 + img.width - 1;
3585 
3586 		while(off1 < off2) {
3587 			auto px = img.imageData.colors[off1];
3588 			img.imageData.colors[off1] = img.imageData.colors[off2];
3589 			img.imageData.colors[off2] = px;
3590 
3591 			off1++;
3592 			off2--;
3593 		}
3594 	}
3595   }
3596 
3597   static void mirrorVertically(TrueColorImage img) {
3598   	if(img.height < 2)
3599 		return;
3600   	foreach(column; 0 .. img.width) {
3601 		auto off1 = column;
3602 		auto off2 = img.imageData.colors.length - img.width + off1;
3603 
3604 		while(off1 < off2) {
3605 			auto px = img.imageData.colors[off1];
3606 			img.imageData.colors[off1] = img.imageData.colors[off2];
3607 			img.imageData.colors[off2] = px;
3608 
3609 			off1 += img.width;
3610 			off2 -= img.width;
3611 		}
3612 	}
3613   }
3614 
3615 
3616   static TrueColorImage rotate90(const TrueColorImage img) {
3617 	auto rotatedImage = new TrueColorImage(img.height, img.width); // swapped due to rotation
3618 	const area = img.imageData.colors.length;
3619 	const rowLength = img.height;
3620 	ptrdiff_t cursor = -1;
3621 
3622 	foreach(px; img.imageData.colors) {
3623 		cursor += rowLength;
3624 		if(cursor > area) {
3625 			cursor -= (area + 1);
3626 		}
3627 
3628 		rotatedImage.imageData.colors[cursor] = px;
3629 	}
3630 
3631 	return rotatedImage;
3632   }
3633 
3634   if(decoder.autoRotateBasedOnExifOrientation && img.imageData.colors.length)
3635   switch(decoder.orientation) {
3636   	case 0:
3637   	case 1:
3638 		// no work required
3639 	break;
3640 	case 2:
3641 		// mirror horizontal
3642 		mirrorHorizontally(img);
3643 	break;
3644 	case 3:
3645 		// rotate 180
3646 		rotate180(img);
3647 	break;
3648 	case 4:
3649 		// mirror vertical
3650 		mirrorVertically(img);
3651 	break;
3652 	case 5:
3653 		// mirror horizontal and rotate 270 CW
3654 		mirrorHorizontally(img);
3655 		rotate180(img);
3656 		img = rotate90(img);
3657 	break;
3658 	case 6:
3659 		// rotate 90 CW
3660 		img = rotate90(img);
3661 	break;
3662 	case 7:
3663 		// mirror horizontal and rotate 90 CW
3664 		mirrorHorizontally(img);
3665 		img = rotate90(img);
3666 	break;
3667 	case 8:
3668 		// rotate 270 CW aka 90 CCW
3669 		rotate180(img);
3670 		img = rotate90(img);
3671 	break;
3672 
3673 	default:
3674 		// unknown, just leave it alone
3675   }
3676 
3677   return img;
3678 }
3679 
3680 
3681 // ////////////////////////////////////////////////////////////////////////// //
3682 /// decompress JPEG image from disk file.
3683 /// Returns null if loading failed for any reason.
3684 public MemoryImage readJpeg (const(char)[] filename) {
3685   import core.stdc.stdio;
3686 
3687   FILE* m_pFile;
3688   bool m_eof_flag, m_error_flag;
3689 
3690   if (filename.length == 0) throw new Exception("cannot open unnamed file");
3691   if (filename.length < 512) {
3692 	char[513] buffer;
3693     //import core.stdc.stdlib : alloca;
3694     auto tfn = buffer[0 .. filename.length + 1]; // (cast(char*)alloca(filename.length+1))[0..filename.length+1];
3695     tfn[0..filename.length] = filename[];
3696     tfn[filename.length] = 0;
3697     m_pFile = fopen(tfn.ptr, "rb");
3698   } else {
3699     import core.stdc.stdlib : malloc, free;
3700     auto tfn = (cast(char*)malloc(filename.length+1))[0..filename.length+1];
3701     if (tfn !is null) {
3702       scope(exit) free(tfn.ptr);
3703       m_pFile = fopen(tfn.ptr, "rb");
3704     }
3705   }
3706   if (m_pFile is null) throw new Exception("cannot open file '"~filename.idup~"'");
3707   scope(exit) if (m_pFile) fclose(m_pFile);
3708 
3709   return readJpegFromStream(
3710     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3711       if (m_pFile is null) return -1;
3712       if (m_eof_flag) {
3713         *pEOF_flag = true;
3714         return 0;
3715       }
3716       if (m_error_flag) return -1;
3717       int bytes_read = cast(int)(fread(pBuf, 1, max_bytes_to_read, m_pFile));
3718       if (bytes_read < max_bytes_to_read) {
3719         if (ferror(m_pFile)) {
3720           m_error_flag = true;
3721           return -1;
3722         }
3723         m_eof_flag = true;
3724         *pEOF_flag = true;
3725       }
3726       return bytes_read;
3727     }
3728   );
3729 }
3730 
3731 /++
3732 	History:
3733 		Added January 22, 2021 (release version 9.2)
3734 +/
3735 public void writeJpeg(const(char)[] filename, TrueColorImage img, JpegParams params = JpegParams.init) {
3736 	if(!compress_image_to_jpeg_file(filename, img.width, img.height, 4, img.imageData.bytes, params))
3737 		throw new Exception("jpeg write failed"); // FIXME: check errno?
3738 }
3739 
3740 /++
3741   	Encodes an image as jpeg in memory.
3742 
3743 	History:
3744 		Added January 22, 2021 (release version 9.2)
3745 +/
3746 public ubyte[] encodeJpeg(TrueColorImage img, JpegParams params = JpegParams.init) {
3747   	ubyte[] data;
3748 	encodeJpeg((const scope ubyte[] i) {
3749 		data ~= i;
3750 		return true;
3751 	}, img, params);
3752 
3753 	return data;
3754 }
3755 
3756 /// ditto
3757 public void encodeJpeg(scope bool delegate(const scope ubyte[]) dg, TrueColorImage img, JpegParams params = JpegParams.init) {
3758 	if(!compress_image_to_jpeg_stream(
3759 		dg,
3760 		img.width, img.height, 4, img.imageData.bytes, params))
3761 		throw new Exception("encode");
3762 }
3763 
3764 
3765 // ////////////////////////////////////////////////////////////////////////// //
3766 /// decompress JPEG image from memory buffer.
3767 public MemoryImage readJpegFromMemory (const(void)[] buf) {
3768   size_t bufpos;
3769   return readJpegFromStream(
3770     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3771       import core.stdc.string : memcpy;
3772       if (bufpos >= buf.length) {
3773         *pEOF_flag = true;
3774         return 0;
3775       }
3776       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
3777       memcpy(pBuf, (cast(const(ubyte)*)buf.ptr)+bufpos, max_bytes_to_read);
3778       bufpos += max_bytes_to_read;
3779       return max_bytes_to_read;
3780     }
3781   );
3782 }
3783 // done with arsd API
3784 }
3785 
3786 
3787 static if (JpegHasIVVFS) {
3788 public MemoryImage readJpeg (VFile fl) {
3789   return readJpegFromStream(
3790     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3791       if (!fl.isOpen) return -1;
3792       if (fl.eof) {
3793         *pEOF_flag = true;
3794         return 0;
3795       }
3796       auto rd = fl.rawRead(pBuf[0..max_bytes_to_read]);
3797       if (fl.eof) *pEOF_flag = true;
3798       return cast(int)rd.length;
3799     }
3800   );
3801 }
3802 
3803 public bool detectJpeg (VFile fl, out int width, out int height, out int actual_comps) {
3804   return detect_jpeg_image_from_stream(
3805     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3806       if (!fl.isOpen) return -1;
3807       if (fl.eof) {
3808         *pEOF_flag = true;
3809         return 0;
3810       }
3811       auto rd = fl.rawRead(pBuf[0..max_bytes_to_read]);
3812       if (fl.eof) *pEOF_flag = true;
3813       return cast(int)rd.length;
3814     },
3815     width, height, actual_comps);
3816 }
3817 // vfs API
3818 }
3819 
3820 
3821 // ////////////////////////////////////////////////////////////////////////// //
3822 version(jpegd_test) {
3823 import arsd.color;
3824 import arsd.png;
3825 
3826 void main (string[] args) {
3827   import std.stdio;
3828   int width, height, comps;
3829   {
3830     assert(detect_jpeg_image_from_file((args.length > 1 ? args[1] : "image.jpg"), width, height, comps));
3831     writeln(width, "x", height, "x", comps);
3832     auto img = readJpeg((args.length > 1 ? args[1] : "image.jpg"));
3833     writeln(img.width, "x", img.height);
3834     writePng("z00.png", img);
3835   }
3836   {
3837     ubyte[] file;
3838     {
3839       auto fl = File(args.length > 1 ? args[1] : "image.jpg");
3840       file.length = cast(int)fl.size;
3841       fl.rawRead(file[]);
3842     }
3843     assert(detect_jpeg_image_from_memory(file[], width, height, comps));
3844     writeln(width, "x", height, "x", comps);
3845     auto img = readJpegFromMemory(file[]);
3846     writeln(img.width, "x", img.height);
3847     writePng("z01.png", img);
3848   }
3849 }
3850 }
3851 
3852 // jpge.cpp - C++ class for JPEG compression.
3853 // Public domain, Rich Geldreich <richgel99@gmail.com>
3854 // Alex Evans: Added RGBA support, linear memory allocator.
3855 // v1.01, Dec. 18, 2010 - Initial release
3856 // v1.02, Apr. 6, 2011 - Removed 2x2 ordered dither in H2V1 chroma subsampling method load_block_16_8_8(). (The rounding factor was 2, when it should have been 1. Either way, it wasn't helping.)
3857 // v1.03, Apr. 16, 2011 - Added support for optimized Huffman code tables, optimized dynamic memory allocation down to only 1 alloc.
3858 //                        Also from Alex Evans: Added RGBA support, linear memory allocator (no longer needed in v1.03).
3859 // v1.04, May. 19, 2012: Forgot to set m_pFile ptr to null in cfile_stream::close(). Thanks to Owen Kaluza for reporting this bug.
3860 //                       Code tweaks to fix VS2008 static code analysis warnings (all looked harmless).
3861 //                       Code review revealed method load_block_16_8_8() (used for the non-default H2V1 sampling mode to downsample chroma) somehow didn't get the rounding factor fix from v1.02.
3862 // D translation by Ketmar // Invisible Vector
3863 //
3864 // This is free and unencumbered software released into the public domain.
3865 //
3866 // Anyone is free to copy, modify, publish, use, compile, sell, or
3867 // distribute this software, either in source code form or as a compiled
3868 // binary, for any purpose, commercial or non-commercial, and by any
3869 // means.
3870 //
3871 // In jurisdictions that recognize copyright laws, the author or authors
3872 // of this software dedicate any and all copyright interest in the
3873 // software to the public domain. We make this dedication for the benefit
3874 // of the public at large and to the detriment of our heirs and
3875 // successors. We intend this dedication to be an overt act of
3876 // relinquishment in perpetuity of all present and future rights to this
3877 // software under copyright law.
3878 //
3879 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
3880 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
3881 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
3882 // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
3883 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
3884 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
3885 // OTHER DEALINGS IN THE SOFTWARE.
3886 //
3887 // For more information, please refer to <http://unlicense.org/>
3888 /**
3889  * Writes a JPEG image to a file or stream.
3890  * num_channels must be 1 (Y), 3 (RGB), 4 (RGBA), image pitch must be width*num_channels.
3891  * note that alpha will not be stored in jpeg file.
3892  */
3893 
3894 public:
3895 // ////////////////////////////////////////////////////////////////////////// //
3896 // JPEG chroma subsampling factors. Y_ONLY (grayscale images) and H2V2 (color images) are the most common.
3897 enum JpegSubsampling { Y_ONLY = 0, H1V1 = 1, H2V1 = 2, H2V2 = 3 }
3898 
3899 /// JPEG compression parameters structure.
3900 public struct JpegParams {
3901   /// Quality: 1-100, higher is better. Typical values are around 50-95.
3902   int quality = 85;
3903 
3904   /// subsampling:
3905   /// 0 = Y (grayscale) only
3906   /// 1 = YCbCr, no subsampling (H1V1, YCbCr 1x1x1, 3 blocks per MCU)
3907   /// 2 = YCbCr, H2V1 subsampling (YCbCr 2x1x1, 4 blocks per MCU)
3908   /// 3 = YCbCr, H2V2 subsampling (YCbCr 4x1x1, 6 blocks per MCU-- very common)
3909   JpegSubsampling subsampling = JpegSubsampling.H2V2;
3910 
3911   /// Disables CbCr discrimination - only intended for testing.
3912   /// If true, the Y quantization table is also used for the CbCr channels.
3913   bool noChromaDiscrimFlag = false;
3914 
3915   ///
3916   bool twoPass = true;
3917 
3918   ///
3919   bool check () const pure nothrow @trusted @nogc {
3920     if (quality < 1 || quality > 100) return false;
3921     if (cast(uint)subsampling > cast(uint)JpegSubsampling.H2V2) return false;
3922     return true;
3923   }
3924 }
3925 
3926 
3927 // ////////////////////////////////////////////////////////////////////////// //
3928 /// Writes JPEG image to file.
3929 /// num_channels must be 1 (Y), 3 (RGB), 4 (RGBA), image pitch must be width*num_channels.
3930 /// note that alpha will not be stored in jpeg file.
3931 bool compress_image_to_jpeg_stream (scope jpeg_encoder.WriteFunc wfn, int width, int height, int num_channels, const(ubyte)[] pImage_data) { return compress_image_to_jpeg_stream(wfn, width, height, num_channels, pImage_data, JpegParams()); }
3932 
3933 /// Writes JPEG image to file.
3934 /// num_channels must be 1 (Y), 3 (RGB), 4 (RGBA), image pitch must be width*num_channels.
3935 /// note that alpha will not be stored in jpeg file.
3936 bool compress_image_to_jpeg_stream (scope jpeg_encoder.WriteFunc wfn, int width, int height, int num_channels, const(ubyte)[] pImage_data, in JpegParams comp_params) {
3937   jpeg_encoder dst_image;
3938   if (!dst_image.setup(wfn, width, height, num_channels, comp_params)) return false;
3939   for (uint pass_index = 0; pass_index < dst_image.total_passes(); pass_index++) {
3940     for (int i = 0; i < height; i++) {
3941       const(ubyte)* pBuf = pImage_data.ptr+i*width*num_channels;
3942       if (!dst_image.process_scanline(pBuf)) return false;
3943     }
3944     if (!dst_image.process_scanline(null)) return false;
3945   }
3946   dst_image.deinit();
3947   //return dst_stream.close();
3948   return true;
3949 }
3950 
3951 
3952 /// Writes JPEG image to file.
3953 /// num_channels must be 1 (Y), 3 (RGB), 4 (RGBA), image pitch must be width*num_channels.
3954 /// note that alpha will not be stored in jpeg file.
3955 bool compress_image_to_jpeg_file (const(char)[] fname, int width, int height, int num_channels, const(ubyte)[] pImage_data) { return compress_image_to_jpeg_file(fname, width, height, num_channels, pImage_data, JpegParams()); }
3956 
3957 /// Writes JPEG image to file.
3958 /// num_channels must be 1 (Y), 3 (RGB), 4 (RGBA), image pitch must be width*num_channels.
3959 /// note that alpha will not be stored in jpeg file.
3960 bool compress_image_to_jpeg_file() (const(char)[] fname, int width, int height, int num_channels, const(ubyte)[] pImage_data, const scope auto ref JpegParams comp_params) {
3961   import std.internal.cstring;
3962   import core.stdc.stdio : FILE, fopen, fclose, fwrite;
3963   FILE* fl = fopen(fname.tempCString, "wb");
3964   if (fl is null) return false;
3965   scope(exit) if (fl !is null) fclose(fl);
3966   auto res = compress_image_to_jpeg_stream(
3967     delegate bool (scope const(ubyte)[] buf) {
3968       if (fwrite(buf.ptr, 1, buf.length, fl) != buf.length) return false;
3969       return true;
3970     }, width, height, num_channels, pImage_data, comp_params);
3971   if (res) {
3972     if (fclose(fl) != 0) res = false;
3973     fl = null;
3974   }
3975   return res;
3976 }
3977 
3978 
3979 // ////////////////////////////////////////////////////////////////////////// //
3980 private:
3981 nothrow @trusted @nogc {
3982 auto JPGE_MIN(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a < b ? a : b); }
3983 auto JPGE_MAX(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a > b ? a : b); }
3984 
3985 void *jpge_malloc (size_t nSize) { import core.stdc.stdlib : malloc; return malloc(nSize); }
3986 void jpge_free (void *p) { import core.stdc.stdlib : free; if (p !is null) free(p); }
3987 
3988 
3989 // Various JPEG enums and tables.
3990 enum { DC_LUM_CODES = 12, AC_LUM_CODES = 256, DC_CHROMA_CODES = 12, AC_CHROMA_CODES = 256, MAX_HUFF_SYMBOLS = 257, MAX_HUFF_CODESIZE = 32 }
3991 
3992 static immutable ubyte[64] s_zag = [ 0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 ];
3993 static immutable short[64] s_std_lum_quant = [ 16,11,12,14,12,10,16,14,13,14,18,17,16,19,24,40,26,24,22,22,24,49,35,37,29,40,58,51,61,60,57,51,56,55,64,72,92,78,64,68,87,69,55,56,80,109,81,87,95,98,103,104,103,62,77,113,121,112,100,120,92,101,103,99 ];
3994 static immutable short[64] s_std_croma_quant = [ 17,18,18,24,21,24,47,26,26,47,99,66,56,66,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99 ];
3995 static immutable ubyte[17] s_dc_lum_bits = [ 0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0 ];
3996 static immutable ubyte[DC_LUM_CODES] s_dc_lum_val = [ 0,1,2,3,4,5,6,7,8,9,10,11 ];
3997 static immutable ubyte[17] s_ac_lum_bits = [ 0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d ];
3998 static immutable ubyte[AC_LUM_CODES] s_ac_lum_val = [
3999   0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08,0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,
4000   0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28,0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,
4001   0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89,
4002   0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,
4003   0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,
4004   0xf9,0xfa
4005 ];
4006 static immutable ubyte[17] s_dc_chroma_bits = [ 0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0 ];
4007 static immutable ubyte[DC_CHROMA_CODES] s_dc_chroma_val = [ 0,1,2,3,4,5,6,7,8,9,10,11 ];
4008 static immutable ubyte[17] s_ac_chroma_bits = [ 0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77 ];
4009 static immutable ubyte[AC_CHROMA_CODES] s_ac_chroma_val = [
4010   0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91,0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,
4011   0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26,0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,
4012   0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87,
4013   0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,
4014   0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,
4015   0xf9,0xfa
4016 ];
4017 
4018 // Low-level helper functions.
4019 //template <class T> inline void clear_obj(T &obj) { memset(&obj, 0, sizeof(obj)); }
4020 
4021 enum YR = 19595, YG = 38470, YB = 7471, CB_R = -11059, CB_G = -21709, CB_B = 32768, CR_R = 32768, CR_G = -27439, CR_B = -5329; // int
4022 //ubyte clamp (int i) { if (cast(uint)(i) > 255U) { if (i < 0) i = 0; else if (i > 255) i = 255; } return cast(ubyte)(i); }
4023 ubyte clamp() (int i) { pragma(inline, true); return cast(ubyte)(cast(uint)i > 255 ? (((~i)>>31)&0xFF) : i); }
4024 
4025 void RGB_to_YCC (ubyte* pDst, const(ubyte)* pSrc, int num_pixels) {
4026   for (; num_pixels; pDst += 3, pSrc += 3, --num_pixels) {
4027     immutable int r = pSrc[0], g = pSrc[1], b = pSrc[2];
4028     pDst[0] = cast(ubyte)((r*YR+g*YG+b*YB+32768)>>16);
4029     pDst[1] = clamp(128+((r*CB_R+g*CB_G+b*CB_B+32768)>>16));
4030     pDst[2] = clamp(128+((r*CR_R+g*CR_G+b*CR_B+32768)>>16));
4031   }
4032 }
4033 
4034 void RGB_to_Y (ubyte* pDst, const(ubyte)* pSrc, int num_pixels) {
4035   for (; num_pixels; ++pDst, pSrc += 3, --num_pixels) {
4036     pDst[0] = cast(ubyte)((pSrc[0]*YR+pSrc[1]*YG+pSrc[2]*YB+32768)>>16);
4037   }
4038 }
4039 
4040 void RGBA_to_YCC (ubyte* pDst, const(ubyte)* pSrc, int num_pixels) {
4041   for (; num_pixels; pDst += 3, pSrc += 4, --num_pixels) {
4042     immutable int r = pSrc[0], g = pSrc[1], b = pSrc[2];
4043     pDst[0] = cast(ubyte)((r*YR+g*YG+b*YB+32768)>>16);
4044     pDst[1] = clamp(128+((r*CB_R+g*CB_G+b*CB_B+32768)>>16));
4045     pDst[2] = clamp(128+((r*CR_R+g*CR_G+b*CR_B+32768)>>16));
4046   }
4047 }
4048 
4049 void RGBA_to_Y (ubyte* pDst, const(ubyte)* pSrc, int num_pixels) {
4050   for (; num_pixels; ++pDst, pSrc += 4, --num_pixels) {
4051     pDst[0] = cast(ubyte)((pSrc[0]*YR+pSrc[1]*YG+pSrc[2]*YB+32768)>>16);
4052   }
4053 }
4054 
4055 void Y_to_YCC (ubyte* pDst, const(ubyte)* pSrc, int num_pixels) {
4056   for (; num_pixels; pDst += 3, ++pSrc, --num_pixels) { pDst[0] = pSrc[0]; pDst[1] = 128; pDst[2] = 128; }
4057 }
4058 
4059 // Forward DCT - DCT derived from jfdctint.
4060 enum { ROW_BITS = 2 }
4061 //#define DCT_DESCALE(x, n) (((x)+(((int)1)<<((n)-1)))>>(n))
4062 int DCT_DESCALE() (int x, int n) { pragma(inline, true); return (((x)+((cast(int)1)<<((n)-1)))>>(n)); }
4063 //#define DCT_MUL(var, c) (cast(short)(var)*cast(int)(c))
4064 
4065 //#define DCT1D(s0, s1, s2, s3, s4, s5, s6, s7)
4066 enum DCT1D = q{{
4067   int t0 = s0+s7, t7 = s0-s7, t1 = s1+s6, t6 = s1-s6, t2 = s2+s5, t5 = s2-s5, t3 = s3+s4, t4 = s3-s4;
4068   int t10 = t0+t3, t13 = t0-t3, t11 = t1+t2, t12 = t1-t2;
4069   int u1 = (cast(short)(t12+t13)*cast(int)(4433));
4070   s2 = u1+(cast(short)(t13)*cast(int)(6270));
4071   s6 = u1+(cast(short)(t12)*cast(int)(-15137));
4072   u1 = t4+t7;
4073   int u2 = t5+t6, u3 = t4+t6, u4 = t5+t7;
4074   int z5 = (cast(short)(u3+u4)*cast(int)(9633));
4075   t4 = (cast(short)(t4)*cast(int)(2446)); t5 = (cast(short)(t5)*cast(int)(16819));
4076   t6 = (cast(short)(t6)*cast(int)(25172)); t7 = (cast(short)(t7)*cast(int)(12299));
4077   u1 = (cast(short)(u1)*cast(int)(-7373)); u2 = (cast(short)(u2)*cast(int)(-20995));
4078   u3 = (cast(short)(u3)*cast(int)(-16069)); u4 = (cast(short)(u4)*cast(int)(-3196));
4079   u3 += z5; u4 += z5;
4080   s0 = t10+t11; s1 = t7+u1+u4; s3 = t6+u2+u3; s4 = t10-t11; s5 = t5+u2+u4; s7 = t4+u1+u3;
4081 }};
4082 
4083 void DCT2D (int* p) {
4084   int c;
4085   int* q = p;
4086   for (c = 7; c >= 0; --c, q += 8) {
4087     int s0 = q[0], s1 = q[1], s2 = q[2], s3 = q[3], s4 = q[4], s5 = q[5], s6 = q[6], s7 = q[7];
4088     //DCT1D(s0, s1, s2, s3, s4, s5, s6, s7);
4089     mixin(DCT1D);
4090     q[0] = s0<<ROW_BITS; q[1] = DCT_DESCALE(s1, CONST_BITS-ROW_BITS); q[2] = DCT_DESCALE(s2, CONST_BITS-ROW_BITS); q[3] = DCT_DESCALE(s3, CONST_BITS-ROW_BITS);
4091     q[4] = s4<<ROW_BITS; q[5] = DCT_DESCALE(s5, CONST_BITS-ROW_BITS); q[6] = DCT_DESCALE(s6, CONST_BITS-ROW_BITS); q[7] = DCT_DESCALE(s7, CONST_BITS-ROW_BITS);
4092   }
4093   for (q = p, c = 7; c >= 0; --c, ++q) {
4094     int s0 = q[0*8], s1 = q[1*8], s2 = q[2*8], s3 = q[3*8], s4 = q[4*8], s5 = q[5*8], s6 = q[6*8], s7 = q[7*8];
4095     //DCT1D(s0, s1, s2, s3, s4, s5, s6, s7);
4096     mixin(DCT1D);
4097     q[0*8] = DCT_DESCALE(s0, ROW_BITS+3); q[1*8] = DCT_DESCALE(s1, CONST_BITS+ROW_BITS+3); q[2*8] = DCT_DESCALE(s2, CONST_BITS+ROW_BITS+3); q[3*8] = DCT_DESCALE(s3, CONST_BITS+ROW_BITS+3);
4098     q[4*8] = DCT_DESCALE(s4, ROW_BITS+3); q[5*8] = DCT_DESCALE(s5, CONST_BITS+ROW_BITS+3); q[6*8] = DCT_DESCALE(s6, CONST_BITS+ROW_BITS+3); q[7*8] = DCT_DESCALE(s7, CONST_BITS+ROW_BITS+3);
4099   }
4100 }
4101 
4102 struct sym_freq { uint m_key, m_sym_index; }
4103 
4104 // Radix sorts sym_freq[] array by 32-bit key m_key. Returns ptr to sorted values.
4105 sym_freq* radix_sort_syms (uint num_syms, sym_freq* pSyms0, sym_freq* pSyms1) {
4106   const uint cMaxPasses = 4;
4107   uint[256*cMaxPasses] hist;
4108   //clear_obj(hist);
4109   for (uint i = 0; i < num_syms; i++) {
4110     uint freq = pSyms0[i].m_key;
4111     ++hist[freq&0xFF];
4112     ++hist[256+((freq>>8)&0xFF)];
4113     ++hist[256*2+((freq>>16)&0xFF)];
4114     ++hist[256*3+((freq>>24)&0xFF)];
4115   }
4116   sym_freq* pCur_syms = pSyms0;
4117   sym_freq* pNew_syms = pSyms1;
4118   uint total_passes = cMaxPasses; while (total_passes > 1 && num_syms == hist[(total_passes-1)*256]) --total_passes;
4119   uint[256] offsets;
4120   for (uint pass_shift = 0, pass = 0; pass < total_passes; ++pass, pass_shift += 8) {
4121     const(uint)* pHist = &hist[pass<<8];
4122     uint cur_ofs = 0;
4123     for (uint i = 0; i < 256; i++) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; }
4124     for (uint i = 0; i < num_syms; i++) pNew_syms[offsets[(pCur_syms[i].m_key>>pass_shift)&0xFF]++] = pCur_syms[i];
4125     sym_freq* t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t;
4126   }
4127   return pCur_syms;
4128 }
4129 
4130 // calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996.
4131 void calculate_minimum_redundancy (sym_freq* A, int n) {
4132   int root, leaf, next, avbl, used, dpth;
4133   if (n == 0) return;
4134   if (n == 1) { A[0].m_key = 1; return; }
4135   A[0].m_key += A[1].m_key; root = 0; leaf = 2;
4136   for (next=1; next < n-1; next++)
4137   {
4138     if (leaf>=n || A[root].m_key<A[leaf].m_key) { A[next].m_key = A[root].m_key; A[root++].m_key = next; } else A[next].m_key = A[leaf++].m_key;
4139     if (leaf>=n || (root<next && A[root].m_key<A[leaf].m_key)) { A[next].m_key += A[root].m_key; A[root++].m_key = next; } else A[next].m_key += A[leaf++].m_key;
4140   }
4141   A[n-2].m_key = 0;
4142   for (next=n-3; next>=0; next--) A[next].m_key = A[A[next].m_key].m_key+1;
4143   avbl = 1; used = dpth = 0; root = n-2; next = n-1;
4144   while (avbl>0)
4145   {
4146     while (root >= 0 && cast(int)A[root].m_key == dpth) { used++; root--; }
4147     while (avbl>used) { A[next--].m_key = dpth; avbl--; }
4148     avbl = 2*used; dpth++; used = 0;
4149   }
4150 }
4151 
4152 // Limits canonical Huffman code table's max code size to max_code_size.
4153 void huffman_enforce_max_code_size (int* pNum_codes, int code_list_len, int max_code_size) {
4154   if (code_list_len <= 1) return;
4155   for (int i = max_code_size+1; i <= MAX_HUFF_CODESIZE; i++) pNum_codes[max_code_size] += pNum_codes[i];
4156   uint total = 0;
4157   for (int i = max_code_size; i > 0; i--) total += ((cast(uint)pNum_codes[i])<<(max_code_size-i));
4158   while (total != (1UL<<max_code_size)) {
4159     pNum_codes[max_code_size]--;
4160     for (int i = max_code_size-1; i > 0; i--) {
4161       if (pNum_codes[i]) { pNum_codes[i]--; pNum_codes[i+1] += 2; break; }
4162     }
4163     total--;
4164   }
4165 }
4166 }
4167 
4168 
4169 // ////////////////////////////////////////////////////////////////////////// //
4170 // Lower level jpeg_encoder class - useful if more control is needed than the above helper functions.
4171 struct jpeg_encoder {
4172 public:
4173   alias WriteFunc = bool delegate (scope const(ubyte)[] buf);
4174 
4175 nothrow /*@trusted @nogc*/:
4176 private:
4177   alias sample_array_t = int;
4178 
4179   WriteFunc m_pStream;
4180   JpegParams m_params;
4181   ubyte m_num_components;
4182   ubyte[3] m_comp_h_samp;
4183   ubyte[3] m_comp_v_samp;
4184   int m_image_x, m_image_y, m_image_bpp, m_image_bpl;
4185   int m_image_x_mcu, m_image_y_mcu;
4186   int m_image_bpl_xlt, m_image_bpl_mcu;
4187   int m_mcus_per_row;
4188   int m_mcu_x, m_mcu_y;
4189   ubyte*[16] m_mcu_lines;
4190   ubyte m_mcu_y_ofs;
4191   sample_array_t[64] m_sample_array;
4192   short[64] m_coefficient_array;
4193   int[64][2] m_quantization_tables;
4194   uint[256][4] m_huff_codes;
4195   ubyte[256][4] m_huff_code_sizes;
4196   ubyte[17][4] m_huff_bits;
4197   ubyte[256][4] m_huff_val;
4198   uint[256][4] m_huff_count;
4199   int[3] m_last_dc_val;
4200   enum JPGE_OUT_BUF_SIZE = 2048;
4201   ubyte[JPGE_OUT_BUF_SIZE] m_out_buf;
4202   ubyte* m_pOut_buf;
4203   uint m_out_buf_left;
4204   uint m_bit_buffer;
4205   uint m_bits_in;
4206   ubyte m_pass_num;
4207   bool m_all_stream_writes_succeeded = true;
4208 
4209 private:
4210   // Generates an optimized offman table.
4211   void optimize_huffman_table (int table_num, int table_len) {
4212     sym_freq[MAX_HUFF_SYMBOLS] syms0;
4213     sym_freq[MAX_HUFF_SYMBOLS] syms1;
4214     syms0[0].m_key = 1; syms0[0].m_sym_index = 0;  // dummy symbol, assures that no valid code contains all 1's
4215     int num_used_syms = 1;
4216     const uint *pSym_count = &m_huff_count[table_num][0];
4217     for (int i = 0; i < table_len; i++) {
4218       if (pSym_count[i]) { syms0[num_used_syms].m_key = pSym_count[i]; syms0[num_used_syms++].m_sym_index = i+1; }
4219     }
4220     sym_freq* pSyms = radix_sort_syms(num_used_syms, syms0.ptr, syms1.ptr);
4221     calculate_minimum_redundancy(pSyms, num_used_syms);
4222 
4223     // Count the # of symbols of each code size.
4224     int[1+MAX_HUFF_CODESIZE] num_codes;
4225     //clear_obj(num_codes);
4226     for (int i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++;
4227 
4228     enum JPGE_CODE_SIZE_LIMIT = 16u; // the maximum possible size of a JPEG Huffman code (valid range is [9,16] - 9 vs. 8 because of the dummy symbol)
4229     huffman_enforce_max_code_size(num_codes.ptr, num_used_syms, JPGE_CODE_SIZE_LIMIT);
4230 
4231     // Compute m_huff_bits array, which contains the # of symbols per code size.
4232     //clear_obj(m_huff_bits[table_num]);
4233     m_huff_bits[table_num][] = 0;
4234     for (int i = 1; i <= cast(int)JPGE_CODE_SIZE_LIMIT; i++) m_huff_bits[table_num][i] = cast(ubyte)(num_codes[i]);
4235 
4236     // Remove the dummy symbol added above, which must be in largest bucket.
4237     for (int i = JPGE_CODE_SIZE_LIMIT; i >= 1; i--) {
4238       if (m_huff_bits[table_num][i]) { m_huff_bits[table_num][i]--; break; }
4239     }
4240 
4241     // Compute the m_huff_val array, which contains the symbol indices sorted by code size (smallest to largest).
4242     for (int i = num_used_syms-1; i >= 1; i--) m_huff_val[table_num][num_used_syms-1-i] = cast(ubyte)(pSyms[i].m_sym_index-1);
4243   }
4244 
4245   bool put_obj(T) (T v) {
4246     try {
4247       return (m_pStream !is null && m_pStream((&v)[0..1]));
4248     } catch (Exception) {}
4249     return false;
4250   }
4251 
4252   bool put_buf() (const(void)* v, uint len) {
4253     try {
4254       return (m_pStream !is null && m_pStream((cast(ubyte*)v)[0..len]));
4255     } catch (Exception) {}
4256     return false;
4257   }
4258 
4259   // JPEG marker generation.
4260   void emit_byte (ubyte i) {
4261     m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && put_obj(i);
4262   }
4263 
4264   void emit_word(uint i) {
4265     emit_byte(cast(ubyte)(i>>8));
4266     emit_byte(cast(ubyte)(i&0xFF));
4267   }
4268 
4269   void emit_marker (int marker) {
4270     emit_byte(cast(ubyte)(0xFF));
4271     emit_byte(cast(ubyte)(marker));
4272   }
4273 
4274   // Emit JFIF marker
4275   void emit_jfif_app0 () {
4276     emit_marker(M_APP0);
4277     emit_word(2+4+1+2+1+2+2+1+1);
4278     emit_byte(0x4A); emit_byte(0x46); emit_byte(0x49); emit_byte(0x46); /* Identifier: ASCII "JFIF" */
4279     emit_byte(0);
4280     emit_byte(1); /* Major version */
4281     emit_byte(1); /* Minor version */
4282     emit_byte(0); /* Density unit */
4283     emit_word(1);
4284     emit_word(1);
4285     emit_byte(0); /* No thumbnail image */
4286     emit_byte(0);
4287   }
4288 
4289   // Emit quantization tables
4290   void emit_dqt () {
4291     for (int i = 0; i < (m_num_components == 3 ? 2 : 1); i++) {
4292       emit_marker(M_DQT);
4293       emit_word(64+1+2);
4294       emit_byte(cast(ubyte)(i));
4295       for (int j = 0; j < 64; j++) emit_byte(cast(ubyte)(m_quantization_tables[i][j]));
4296     }
4297   }
4298 
4299   // Emit start of frame marker
4300   void emit_sof () {
4301     emit_marker(M_SOF0); /* baseline */
4302     emit_word(3*m_num_components+2+5+1);
4303     emit_byte(8); /* precision */
4304     emit_word(m_image_y);
4305     emit_word(m_image_x);
4306     emit_byte(m_num_components);
4307     for (int i = 0; i < m_num_components; i++) {
4308       emit_byte(cast(ubyte)(i+1)); /* component ID */
4309       emit_byte(cast(ubyte)((m_comp_h_samp[i]<<4)+m_comp_v_samp[i])); /* h and v sampling */
4310       emit_byte(i > 0); /* quant. table num */
4311     }
4312   }
4313 
4314   // Emit Huffman table.
4315   void emit_dht (ubyte* bits, ubyte* val, int index, bool ac_flag) {
4316     emit_marker(M_DHT);
4317     int length = 0;
4318     for (int i = 1; i <= 16; i++) length += bits[i];
4319     emit_word(length+2+1+16);
4320     emit_byte(cast(ubyte)(index+(ac_flag<<4)));
4321     for (int i = 1; i <= 16; i++) emit_byte(bits[i]);
4322     for (int i = 0; i < length; i++) emit_byte(val[i]);
4323   }
4324 
4325   // Emit all Huffman tables.
4326   void emit_dhts () {
4327     emit_dht(m_huff_bits[0+0].ptr, m_huff_val[0+0].ptr, 0, false);
4328     emit_dht(m_huff_bits[2+0].ptr, m_huff_val[2+0].ptr, 0, true);
4329     if (m_num_components == 3) {
4330       emit_dht(m_huff_bits[0+1].ptr, m_huff_val[0+1].ptr, 1, false);
4331       emit_dht(m_huff_bits[2+1].ptr, m_huff_val[2+1].ptr, 1, true);
4332     }
4333   }
4334 
4335   // emit start of scan
4336   void emit_sos () {
4337     emit_marker(M_SOS);
4338     emit_word(2*m_num_components+2+1+3);
4339     emit_byte(m_num_components);
4340     for (int i = 0; i < m_num_components; i++) {
4341       emit_byte(cast(ubyte)(i+1));
4342       if (i == 0)
4343         emit_byte((0<<4)+0);
4344       else
4345         emit_byte((1<<4)+1);
4346     }
4347     emit_byte(0); /* spectral selection */
4348     emit_byte(63);
4349     emit_byte(0);
4350   }
4351 
4352   // Emit all markers at beginning of image file.
4353   void emit_markers () {
4354     emit_marker(M_SOI);
4355     emit_jfif_app0();
4356     emit_dqt();
4357     emit_sof();
4358     emit_dhts();
4359     emit_sos();
4360   }
4361 
4362   // Compute the actual canonical Huffman codes/code sizes given the JPEG huff bits and val arrays.
4363   void compute_huffman_table (uint* codes, ubyte* code_sizes, ubyte* bits, ubyte* val) {
4364     import core.stdc.string : memset;
4365 
4366     int i, l, last_p, si;
4367     ubyte[257] huff_size;
4368     uint[257] huff_code;
4369     uint code;
4370 
4371     int p = 0;
4372     for (l = 1; l <= 16; l++)
4373       for (i = 1; i <= bits[l]; i++)
4374         huff_size[p++] = cast(ubyte)l;
4375 
4376     huff_size[p] = 0; last_p = p; // write sentinel
4377 
4378     code = 0; si = huff_size[0]; p = 0;
4379 
4380     while (huff_size[p])
4381     {
4382       while (huff_size[p] == si)
4383         huff_code[p++] = code++;
4384       code <<= 1;
4385       si++;
4386     }
4387 
4388     memset(codes, 0, codes[0].sizeof*256);
4389     memset(code_sizes, 0, code_sizes[0].sizeof*256);
4390     for (p = 0; p < last_p; p++)
4391     {
4392       codes[val[p]]      = huff_code[p];
4393       code_sizes[val[p]] = huff_size[p];
4394     }
4395   }
4396 
4397   // Quantization table generation.
4398   void compute_quant_table (int* pDst, const(short)* pSrc) {
4399     int q;
4400     if (m_params.quality < 50)
4401       q = 5000/m_params.quality;
4402     else
4403       q = 200-m_params.quality*2;
4404     for (int i = 0; i < 64; i++) {
4405       int j = *pSrc++; j = (j*q+50L)/100L;
4406       *pDst++ = JPGE_MIN(JPGE_MAX(j, 1), 255);
4407     }
4408   }
4409 
4410   // Higher-level methods.
4411   void first_pass_init () {
4412     import core.stdc.string : memset;
4413     m_bit_buffer = 0; m_bits_in = 0;
4414     memset(m_last_dc_val.ptr, 0, 3*m_last_dc_val[0].sizeof);
4415     m_mcu_y_ofs = 0;
4416     m_pass_num = 1;
4417   }
4418 
4419   bool second_pass_init () {
4420     compute_huffman_table(&m_huff_codes[0+0][0], &m_huff_code_sizes[0+0][0], m_huff_bits[0+0].ptr, m_huff_val[0+0].ptr);
4421     compute_huffman_table(&m_huff_codes[2+0][0], &m_huff_code_sizes[2+0][0], m_huff_bits[2+0].ptr, m_huff_val[2+0].ptr);
4422     if (m_num_components > 1)
4423     {
4424       compute_huffman_table(&m_huff_codes[0+1][0], &m_huff_code_sizes[0+1][0], m_huff_bits[0+1].ptr, m_huff_val[0+1].ptr);
4425       compute_huffman_table(&m_huff_codes[2+1][0], &m_huff_code_sizes[2+1][0], m_huff_bits[2+1].ptr, m_huff_val[2+1].ptr);
4426     }
4427     first_pass_init();
4428     emit_markers();
4429     m_pass_num = 2;
4430     return true;
4431   }
4432 
4433   bool jpg_open (int p_x_res, int p_y_res, int src_channels) {
4434     m_num_components = 3;
4435     switch (m_params.subsampling) {
4436       case JpegSubsampling.Y_ONLY:
4437         m_num_components = 1;
4438         m_comp_h_samp[0] = 1; m_comp_v_samp[0] = 1;
4439         m_mcu_x          = 8; m_mcu_y          = 8;
4440         break;
4441       case JpegSubsampling.H1V1:
4442         m_comp_h_samp[0] = 1; m_comp_v_samp[0] = 1;
4443         m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
4444         m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
4445         m_mcu_x          = 8; m_mcu_y          = 8;
4446         break;
4447       case JpegSubsampling.H2V1:
4448         m_comp_h_samp[0] = 2; m_comp_v_samp[0] = 1;
4449         m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
4450         m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
4451         m_mcu_x          = 16; m_mcu_y         = 8;
4452         break;
4453       case JpegSubsampling.H2V2:
4454         m_comp_h_samp[0] = 2; m_comp_v_samp[0] = 2;
4455         m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
4456         m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
4457         m_mcu_x          = 16; m_mcu_y         = 16;
4458         break;
4459       default: assert(0);
4460     }
4461 
4462     m_image_x        = p_x_res; m_image_y = p_y_res;
4463     m_image_bpp      = src_channels;
4464     m_image_bpl      = m_image_x*src_channels;
4465     m_image_x_mcu    = (m_image_x+m_mcu_x-1)&(~(m_mcu_x-1));
4466     m_image_y_mcu    = (m_image_y+m_mcu_y-1)&(~(m_mcu_y-1));
4467     m_image_bpl_xlt  = m_image_x*m_num_components;
4468     m_image_bpl_mcu  = m_image_x_mcu*m_num_components;
4469     m_mcus_per_row   = m_image_x_mcu/m_mcu_x;
4470 
4471     if ((m_mcu_lines[0] = cast(ubyte*)(jpge_malloc(m_image_bpl_mcu*m_mcu_y))) is null) return false;
4472     for (int i = 1; i < m_mcu_y; i++)
4473       m_mcu_lines[i] = m_mcu_lines[i-1]+m_image_bpl_mcu;
4474 
4475     compute_quant_table(m_quantization_tables[0].ptr, s_std_lum_quant.ptr);
4476     compute_quant_table(m_quantization_tables[1].ptr, (m_params.noChromaDiscrimFlag ? s_std_lum_quant.ptr : s_std_croma_quant.ptr));
4477 
4478     m_out_buf_left = JPGE_OUT_BUF_SIZE;
4479     m_pOut_buf = m_out_buf.ptr;
4480 
4481     if (m_params.twoPass)
4482     {
4483       //clear_obj(m_huff_count);
4484       import core.stdc.string : memset;
4485       memset(m_huff_count.ptr, 0, m_huff_count.sizeof);
4486       first_pass_init();
4487     }
4488     else
4489     {
4490       import core.stdc.string : memcpy;
4491       memcpy(m_huff_bits[0+0].ptr, s_dc_lum_bits.ptr, 17);    memcpy(m_huff_val[0+0].ptr, s_dc_lum_val.ptr, DC_LUM_CODES);
4492       memcpy(m_huff_bits[2+0].ptr, s_ac_lum_bits.ptr, 17);    memcpy(m_huff_val[2+0].ptr, s_ac_lum_val.ptr, AC_LUM_CODES);
4493       memcpy(m_huff_bits[0+1].ptr, s_dc_chroma_bits.ptr, 17); memcpy(m_huff_val[0+1].ptr, s_dc_chroma_val.ptr, DC_CHROMA_CODES);
4494       memcpy(m_huff_bits[2+1].ptr, s_ac_chroma_bits.ptr, 17); memcpy(m_huff_val[2+1].ptr, s_ac_chroma_val.ptr, AC_CHROMA_CODES);
4495       if (!second_pass_init()) return false;   // in effect, skip over the first pass
4496     }
4497     return m_all_stream_writes_succeeded;
4498   }
4499 
4500   void load_block_8_8_grey (int x) {
4501     ubyte *pSrc;
4502     sample_array_t *pDst = m_sample_array.ptr;
4503     x <<= 3;
4504     for (int i = 0; i < 8; i++, pDst += 8)
4505     {
4506       pSrc = m_mcu_lines[i]+x;
4507       pDst[0] = pSrc[0]-128; pDst[1] = pSrc[1]-128; pDst[2] = pSrc[2]-128; pDst[3] = pSrc[3]-128;
4508       pDst[4] = pSrc[4]-128; pDst[5] = pSrc[5]-128; pDst[6] = pSrc[6]-128; pDst[7] = pSrc[7]-128;
4509     }
4510   }
4511 
4512   void load_block_8_8 (int x, int y, int c) {
4513     ubyte *pSrc;
4514     sample_array_t *pDst = m_sample_array.ptr;
4515     x = (x*(8*3))+c;
4516     y <<= 3;
4517     for (int i = 0; i < 8; i++, pDst += 8)
4518     {
4519       pSrc = m_mcu_lines[y+i]+x;
4520       pDst[0] = pSrc[0*3]-128; pDst[1] = pSrc[1*3]-128; pDst[2] = pSrc[2*3]-128; pDst[3] = pSrc[3*3]-128;
4521       pDst[4] = pSrc[4*3]-128; pDst[5] = pSrc[5*3]-128; pDst[6] = pSrc[6*3]-128; pDst[7] = pSrc[7*3]-128;
4522     }
4523   }
4524 
4525   void load_block_16_8 (int x, int c) {
4526     ubyte* pSrc1;
4527     ubyte* pSrc2;
4528     sample_array_t *pDst = m_sample_array.ptr;
4529     x = (x*(16*3))+c;
4530     int a = 0, b = 2;
4531     for (int i = 0; i < 16; i += 2, pDst += 8)
4532     {
4533       pSrc1 = m_mcu_lines[i+0]+x;
4534       pSrc2 = m_mcu_lines[i+1]+x;
4535       pDst[0] = ((pSrc1[ 0*3]+pSrc1[ 1*3]+pSrc2[ 0*3]+pSrc2[ 1*3]+a)>>2)-128; pDst[1] = ((pSrc1[ 2*3]+pSrc1[ 3*3]+pSrc2[ 2*3]+pSrc2[ 3*3]+b)>>2)-128;
4536       pDst[2] = ((pSrc1[ 4*3]+pSrc1[ 5*3]+pSrc2[ 4*3]+pSrc2[ 5*3]+a)>>2)-128; pDst[3] = ((pSrc1[ 6*3]+pSrc1[ 7*3]+pSrc2[ 6*3]+pSrc2[ 7*3]+b)>>2)-128;
4537       pDst[4] = ((pSrc1[ 8*3]+pSrc1[ 9*3]+pSrc2[ 8*3]+pSrc2[ 9*3]+a)>>2)-128; pDst[5] = ((pSrc1[10*3]+pSrc1[11*3]+pSrc2[10*3]+pSrc2[11*3]+b)>>2)-128;
4538       pDst[6] = ((pSrc1[12*3]+pSrc1[13*3]+pSrc2[12*3]+pSrc2[13*3]+a)>>2)-128; pDst[7] = ((pSrc1[14*3]+pSrc1[15*3]+pSrc2[14*3]+pSrc2[15*3]+b)>>2)-128;
4539       int temp = a; a = b; b = temp;
4540     }
4541   }
4542 
4543   void load_block_16_8_8 (int x, int c) {
4544     ubyte *pSrc1;
4545     sample_array_t *pDst = m_sample_array.ptr;
4546     x = (x*(16*3))+c;
4547     for (int i = 0; i < 8; i++, pDst += 8) {
4548       pSrc1 = m_mcu_lines[i+0]+x;
4549       pDst[0] = ((pSrc1[ 0*3]+pSrc1[ 1*3])>>1)-128; pDst[1] = ((pSrc1[ 2*3]+pSrc1[ 3*3])>>1)-128;
4550       pDst[2] = ((pSrc1[ 4*3]+pSrc1[ 5*3])>>1)-128; pDst[3] = ((pSrc1[ 6*3]+pSrc1[ 7*3])>>1)-128;
4551       pDst[4] = ((pSrc1[ 8*3]+pSrc1[ 9*3])>>1)-128; pDst[5] = ((pSrc1[10*3]+pSrc1[11*3])>>1)-128;
4552       pDst[6] = ((pSrc1[12*3]+pSrc1[13*3])>>1)-128; pDst[7] = ((pSrc1[14*3]+pSrc1[15*3])>>1)-128;
4553     }
4554   }
4555 
4556   void load_quantized_coefficients (int component_num) {
4557     int *q = m_quantization_tables[component_num > 0].ptr;
4558     short *pDst = m_coefficient_array.ptr;
4559     for (int i = 0; i < 64; i++)
4560     {
4561       sample_array_t j = m_sample_array[s_zag[i]];
4562       if (j < 0)
4563       {
4564         if ((j = -j+(*q>>1)) < *q)
4565           *pDst++ = 0;
4566         else
4567           *pDst++ = cast(short)(-(j/ *q));
4568       }
4569       else
4570       {
4571         if ((j = j+(*q>>1)) < *q)
4572           *pDst++ = 0;
4573         else
4574           *pDst++ = cast(short)((j/ *q));
4575       }
4576       q++;
4577     }
4578   }
4579 
4580   void flush_output_buffer () {
4581     if (m_out_buf_left != JPGE_OUT_BUF_SIZE) m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && put_buf(m_out_buf.ptr, JPGE_OUT_BUF_SIZE-m_out_buf_left);
4582     m_pOut_buf = m_out_buf.ptr;
4583     m_out_buf_left = JPGE_OUT_BUF_SIZE;
4584   }
4585 
4586   void put_bits (uint bits, uint len) {
4587     m_bit_buffer |= (cast(uint)bits<<(24-(m_bits_in += len)));
4588     while (m_bits_in >= 8) {
4589       ubyte c;
4590       //#define JPGE_PUT_BYTE(c) { *m_pOut_buf++ = (c); if (--m_out_buf_left == 0) flush_output_buffer(); }
4591       //JPGE_PUT_BYTE(c = (ubyte)((m_bit_buffer>>16)&0xFF));
4592       //if (c == 0xFF) JPGE_PUT_BYTE(0);
4593       c = cast(ubyte)((m_bit_buffer>>16)&0xFF);
4594       *m_pOut_buf++ = c;
4595       if (--m_out_buf_left == 0) flush_output_buffer();
4596       if (c == 0xFF) {
4597         *m_pOut_buf++ = 0;
4598         if (--m_out_buf_left == 0) flush_output_buffer();
4599       }
4600       m_bit_buffer <<= 8;
4601       m_bits_in -= 8;
4602     }
4603   }
4604 
4605   void code_coefficients_pass_one (int component_num) {
4606     if (component_num >= 3) return; // just to shut up static analysis
4607     int i, run_len, nbits, temp1;
4608     short *src = m_coefficient_array.ptr;
4609     uint *dc_count = (component_num ? m_huff_count[0+1].ptr : m_huff_count[0+0].ptr);
4610     uint *ac_count = (component_num ? m_huff_count[2+1].ptr : m_huff_count[2+0].ptr);
4611 
4612     temp1 = src[0]-m_last_dc_val[component_num];
4613     m_last_dc_val[component_num] = src[0];
4614     if (temp1 < 0) temp1 = -temp1;
4615 
4616     nbits = 0;
4617     while (temp1)
4618     {
4619       nbits++; temp1 >>= 1;
4620     }
4621 
4622     dc_count[nbits]++;
4623     for (run_len = 0, i = 1; i < 64; i++)
4624     {
4625       if ((temp1 = m_coefficient_array[i]) == 0)
4626         run_len++;
4627       else
4628       {
4629         while (run_len >= 16)
4630         {
4631           ac_count[0xF0]++;
4632           run_len -= 16;
4633         }
4634         if (temp1 < 0) temp1 = -temp1;
4635         nbits = 1;
4636         while (temp1 >>= 1) nbits++;
4637         ac_count[(run_len<<4)+nbits]++;
4638         run_len = 0;
4639       }
4640     }
4641     if (run_len) ac_count[0]++;
4642   }
4643 
4644   void code_coefficients_pass_two (int component_num) {
4645     int i, j, run_len, nbits, temp1, temp2;
4646     short *pSrc = m_coefficient_array.ptr;
4647     uint*[2] codes;
4648     ubyte*[2] code_sizes;
4649 
4650     if (component_num == 0)
4651     {
4652       codes[0] = m_huff_codes[0+0].ptr; codes[1] = m_huff_codes[2+0].ptr;
4653       code_sizes[0] = m_huff_code_sizes[0+0].ptr; code_sizes[1] = m_huff_code_sizes[2+0].ptr;
4654     }
4655     else
4656     {
4657       codes[0] = m_huff_codes[0+1].ptr; codes[1] = m_huff_codes[2+1].ptr;
4658       code_sizes[0] = m_huff_code_sizes[0+1].ptr; code_sizes[1] = m_huff_code_sizes[2+1].ptr;
4659     }
4660 
4661     temp1 = temp2 = pSrc[0]-m_last_dc_val[component_num];
4662     m_last_dc_val[component_num] = pSrc[0];
4663 
4664     if (temp1 < 0)
4665     {
4666       temp1 = -temp1; temp2--;
4667     }
4668 
4669     nbits = 0;
4670     while (temp1)
4671     {
4672       nbits++; temp1 >>= 1;
4673     }
4674 
4675     put_bits(codes[0][nbits], code_sizes[0][nbits]);
4676     if (nbits) put_bits(temp2&((1<<nbits)-1), nbits);
4677 
4678     for (run_len = 0, i = 1; i < 64; i++)
4679     {
4680       if ((temp1 = m_coefficient_array[i]) == 0)
4681         run_len++;
4682       else
4683       {
4684         while (run_len >= 16)
4685         {
4686           put_bits(codes[1][0xF0], code_sizes[1][0xF0]);
4687           run_len -= 16;
4688         }
4689         if ((temp2 = temp1) < 0)
4690         {
4691           temp1 = -temp1;
4692           temp2--;
4693         }
4694         nbits = 1;
4695         while (temp1 >>= 1)
4696           nbits++;
4697         j = (run_len<<4)+nbits;
4698         put_bits(codes[1][j], code_sizes[1][j]);
4699         put_bits(temp2&((1<<nbits)-1), nbits);
4700         run_len = 0;
4701       }
4702     }
4703     if (run_len)
4704       put_bits(codes[1][0], code_sizes[1][0]);
4705   }
4706 
4707   void code_block (int component_num) {
4708     DCT2D(m_sample_array.ptr);
4709     load_quantized_coefficients(component_num);
4710     if (m_pass_num == 1)
4711       code_coefficients_pass_one(component_num);
4712     else
4713       code_coefficients_pass_two(component_num);
4714   }
4715 
4716   void process_mcu_row () {
4717     if (m_num_components == 1)
4718     {
4719       for (int i = 0; i < m_mcus_per_row; i++)
4720       {
4721         load_block_8_8_grey(i); code_block(0);
4722       }
4723     }
4724     else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1))
4725     {
4726       for (int i = 0; i < m_mcus_per_row; i++)
4727       {
4728         load_block_8_8(i, 0, 0); code_block(0); load_block_8_8(i, 0, 1); code_block(1); load_block_8_8(i, 0, 2); code_block(2);
4729       }
4730     }
4731     else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1))
4732     {
4733       for (int i = 0; i < m_mcus_per_row; i++)
4734       {
4735         load_block_8_8(i*2+0, 0, 0); code_block(0); load_block_8_8(i*2+1, 0, 0); code_block(0);
4736         load_block_16_8_8(i, 1); code_block(1); load_block_16_8_8(i, 2); code_block(2);
4737       }
4738     }
4739     else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2))
4740     {
4741       for (int i = 0; i < m_mcus_per_row; i++)
4742       {
4743         load_block_8_8(i*2+0, 0, 0); code_block(0); load_block_8_8(i*2+1, 0, 0); code_block(0);
4744         load_block_8_8(i*2+0, 1, 0); code_block(0); load_block_8_8(i*2+1, 1, 0); code_block(0);
4745         load_block_16_8(i, 1); code_block(1); load_block_16_8(i, 2); code_block(2);
4746       }
4747     }
4748   }
4749 
4750   bool terminate_pass_one () {
4751     optimize_huffman_table(0+0, DC_LUM_CODES); optimize_huffman_table(2+0, AC_LUM_CODES);
4752     if (m_num_components > 1)
4753     {
4754       optimize_huffman_table(0+1, DC_CHROMA_CODES); optimize_huffman_table(2+1, AC_CHROMA_CODES);
4755     }
4756     return second_pass_init();
4757   }
4758 
4759   bool terminate_pass_two () {
4760     put_bits(0x7F, 7);
4761     flush_output_buffer();
4762     emit_marker(M_EOI);
4763     m_pass_num++; // purposely bump up m_pass_num, for debugging
4764     return true;
4765   }
4766 
4767   bool process_end_of_image () {
4768     if (m_mcu_y_ofs)
4769     {
4770       if (m_mcu_y_ofs < 16) // check here just to shut up static analysis
4771       {
4772         for (int i = m_mcu_y_ofs; i < m_mcu_y; i++) {
4773           import core.stdc.string : memcpy;
4774           memcpy(m_mcu_lines[i], m_mcu_lines[m_mcu_y_ofs-1], m_image_bpl_mcu);
4775         }
4776       }
4777       process_mcu_row();
4778     }
4779 
4780     if (m_pass_num == 1)
4781       return terminate_pass_one();
4782     else
4783       return terminate_pass_two();
4784   }
4785 
4786   void load_mcu (const(void)* pSrc) {
4787     import core.stdc.string : memcpy;
4788     const(ubyte)* Psrc = cast(const(ubyte)*)(pSrc);
4789 
4790     ubyte* pDst = m_mcu_lines[m_mcu_y_ofs]; // OK to write up to m_image_bpl_xlt bytes to pDst
4791 
4792     if (m_num_components == 1)
4793     {
4794       if (m_image_bpp == 4)
4795         RGBA_to_Y(pDst, Psrc, m_image_x);
4796       else if (m_image_bpp == 3)
4797         RGB_to_Y(pDst, Psrc, m_image_x);
4798       else
4799         memcpy(pDst, Psrc, m_image_x);
4800     }
4801     else
4802     {
4803       if (m_image_bpp == 4)
4804         RGBA_to_YCC(pDst, Psrc, m_image_x);
4805       else if (m_image_bpp == 3)
4806         RGB_to_YCC(pDst, Psrc, m_image_x);
4807       else
4808         Y_to_YCC(pDst, Psrc, m_image_x);
4809     }
4810 
4811     // Possibly duplicate pixels at end of scanline if not a multiple of 8 or 16
4812     if (m_num_components == 1) {
4813       import core.stdc.string : memset;
4814       memset(m_mcu_lines[m_mcu_y_ofs]+m_image_bpl_xlt, pDst[m_image_bpl_xlt-1], m_image_x_mcu-m_image_x);
4815     } else
4816     {
4817       const ubyte y = pDst[m_image_bpl_xlt-3+0], cb = pDst[m_image_bpl_xlt-3+1], cr = pDst[m_image_bpl_xlt-3+2];
4818       ubyte *q = m_mcu_lines[m_mcu_y_ofs]+m_image_bpl_xlt;
4819       for (int i = m_image_x; i < m_image_x_mcu; i++)
4820       {
4821         *q++ = y; *q++ = cb; *q++ = cr;
4822       }
4823     }
4824 
4825     if (++m_mcu_y_ofs == m_mcu_y)
4826     {
4827       process_mcu_row();
4828       m_mcu_y_ofs = 0;
4829     }
4830   }
4831 
4832   void clear() {
4833     m_mcu_lines[0] = null;
4834     m_pass_num = 0;
4835     m_all_stream_writes_succeeded = true;
4836   }
4837 
4838 
4839 public:
4840   //this () { clear(); }
4841   ~this () { deinit(); }
4842 
4843   @disable this (this); // no copies
4844 
4845   // Initializes the compressor.
4846   // pStream: The stream object to use for writing compressed data.
4847   // comp_params - Compression parameters structure, defined above.
4848   // width, height  - Image dimensions.
4849   // channels - May be 1, or 3. 1 indicates grayscale, 3 indicates RGB source data.
4850   // Returns false on out of memory or if a stream write fails.
4851   bool setup() (WriteFunc pStream, int width, int height, int src_channels, const scope auto ref JpegParams comp_params) {
4852     deinit();
4853     if ((pStream is null || width < 1 || height < 1) || (src_channels != 1 && src_channels != 3 && src_channels != 4) || !comp_params.check()) return false;
4854     m_pStream = pStream;
4855     m_params = comp_params;
4856     return jpg_open(width, height, src_channels);
4857   }
4858 
4859   bool setup() (WriteFunc pStream, int width, int height, int src_channels) { return setup(pStream, width, height, src_channels, JpegParams()); }
4860 
4861   @property ref inout(JpegParams) params () return inout pure nothrow @trusted @nogc { pragma(inline, true); return m_params; }
4862 
4863   // Deinitializes the compressor, freeing any allocated memory. May be called at any time.
4864   void deinit () {
4865     jpge_free(m_mcu_lines[0]);
4866     clear();
4867   }
4868 
4869   @property uint total_passes () const pure nothrow @trusted @nogc { pragma(inline, true); return (m_params.twoPass ? 2 : 1); }
4870   @property uint cur_pass () const pure nothrow @trusted @nogc { pragma(inline, true); return m_pass_num; }
4871 
4872   // Call this method with each source scanline.
4873   // width*src_channels bytes per scanline is expected (RGB or Y format).
4874   // You must call with null after all scanlines are processed to finish compression.
4875   // Returns false on out of memory or if a stream write fails.
4876   bool process_scanline (const(void)* pScanline) {
4877     if (m_pass_num < 1 || m_pass_num > 2) return false;
4878     if (m_all_stream_writes_succeeded) {
4879       if (pScanline is null) {
4880         if (!process_end_of_image()) return false;
4881       } else {
4882         load_mcu(pScanline);
4883       }
4884     }
4885     return m_all_stream_writes_succeeded;
4886   }
4887 }