The OpenD Programming Language

1 // jpgd.h - C++ class for JPEG decompression.
2 // Rich Geldreich <richgel99@gmail.com>
3 // Alex Evans: Linear memory allocator (taken from jpge.h).
4 // v1.04, May. 19, 2012: Code tweaks to fix VS2008 static code analysis warnings (all looked harmless)
5 // D translation by Ketmar // Invisible Vector
6 //
7 // This is free and unencumbered software released into the public domain.
8 //
9 // Anyone is free to copy, modify, publish, use, compile, sell, or
10 // distribute this software, either in source code form or as a compiled
11 // binary, for any purpose, commercial or non-commercial, and by any
12 // means.
13 //
14 // In jurisdictions that recognize copyright laws, the author or authors
15 // of this software dedicate any and all copyright interest in the
16 // software to the public domain. We make this dedication for the benefit
17 // of the public at large and to the detriment of our heirs and
18 // successors. We intend this dedication to be an overt act of
19 // relinquishment in perpetuity of all present and future rights to this
20 // software under copyright law.
21 //
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25 // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 // OTHER DEALINGS IN THE SOFTWARE.
29 //
30 // For more information, please refer to <http://unlicense.org/>
31 //
32 // Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2.
33 //
34 // Chroma upsampling quality: H2V2 is upsampled in the frequency domain, H2V1 and H1V2 are upsampled using point sampling.
35 // Chroma upsampling reference: "Fast Scheme for Image Size Change in the Compressed Domain"
36 // http://vision.ai.uiuc.edu/~dugad/research/dct/index.html
37 /**
38  * Loads a JPEG image from a memory buffer or a file.
39  *
40  * req_comps can be 1 (grayscale), 3 (RGB), or 4 (RGBA).
41  * On return, width/height will be set to the image's dimensions, and actual_comps will be set to the either 1 (grayscale) or 3 (RGB).
42  * Requesting a 8 or 32bpp image is currently a little faster than 24bpp because the jpeg_decoder class itself currently always unpacks to either 8 or 32bpp.
43  */
44 module arsd.jpeg;
45 
46 @system:
47 
48 // Set to 1 to enable freq. domain chroma upsampling on images using H2V2 subsampling (0=faster nearest neighbor sampling).
49 // This is slower, but results in higher quality on images with highly saturated colors.
50 version = JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING;
51 
52 /// Input stream interface.
53 /// This delegate is called when the internal input buffer is empty.
54 /// Parameters:
55 ///   pBuf - input buffer
56 ///   max_bytes_to_read - maximum bytes that can be written to pBuf
57 ///   pEOF_flag - set this to true if at end of stream (no more bytes remaining)
58 ///   Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0).
59 ///   Notes: This delegate will be called in a loop until you set *pEOF_flag to true or the internal buffer is full.
60 alias JpegStreamReadFunc = int delegate (void* pBuf, int max_bytes_to_read, bool* pEOF_flag);
61 
62 
63 // ////////////////////////////////////////////////////////////////////////// //
64 private:
65 void *jpgd_malloc (size_t nSize) { import core.stdc.stdlib : malloc; return malloc(nSize); }
66 void jpgd_free (void *p) { import core.stdc.stdlib : free; if (p !is null) free(p); }
67 
68 // Success/failure error codes.
69 alias jpgd_status = int;
70 enum /*jpgd_status*/ {
71   JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1,
72   JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE,
73   JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS,
74   JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH,
75   JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER,
76   JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS,
77   JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE,
78   JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER, JPGD_ASSERTION_ERROR,
79   JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM,
80 }
81 
82 enum {
83   JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4,
84   JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 8192, JPGD_MAX_HEIGHT = 16384, JPGD_MAX_WIDTH = 16384,
85 }
86 
87 // DCT coefficients are stored in this sequence.
88 static immutable int[64] g_ZAG = [  0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 ];
89 
90 alias JPEG_MARKER = int;
91 enum /*JPEG_MARKER*/ {
92   M_SOF0  = 0xC0, M_SOF1  = 0xC1, M_SOF2  = 0xC2, M_SOF3  = 0xC3, M_SOF5  = 0xC5, M_SOF6  = 0xC6, M_SOF7  = 0xC7, M_JPG   = 0xC8,
93   M_SOF9  = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT   = 0xC4, M_DAC   = 0xCC,
94   M_RST0  = 0xD0, M_RST1  = 0xD1, M_RST2  = 0xD2, M_RST3  = 0xD3, M_RST4  = 0xD4, M_RST5  = 0xD5, M_RST6  = 0xD6, M_RST7  = 0xD7,
95   M_SOI   = 0xD8, M_EOI   = 0xD9, M_SOS   = 0xDA, M_DQT   = 0xDB, M_DNL   = 0xDC, M_DRI   = 0xDD, M_DHP   = 0xDE, M_EXP   = 0xDF,
96   M_APP0  = 0xE0, M_APP15 = 0xEF, M_JPG0  = 0xF0, M_JPG13 = 0xFD, M_COM   = 0xFE, M_TEM   = 0x01, M_ERROR = 0x100, RST0   = 0xD0,
97 }
98 
99 alias JPEG_SUBSAMPLING = int;
100 enum /*JPEG_SUBSAMPLING*/ { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 }
101 
102 enum CONST_BITS = 13;
103 enum PASS1_BITS = 2;
104 enum SCALEDONE = cast(int)1;
105 
106 enum FIX_0_298631336 = cast(int)2446;  /* FIX(0.298631336) */
107 enum FIX_0_390180644 = cast(int)3196;  /* FIX(0.390180644) */
108 enum FIX_0_541196100 = cast(int)4433;  /* FIX(0.541196100) */
109 enum FIX_0_765366865 = cast(int)6270;  /* FIX(0.765366865) */
110 enum FIX_0_899976223 = cast(int)7373;  /* FIX(0.899976223) */
111 enum FIX_1_175875602 = cast(int)9633;  /* FIX(1.175875602) */
112 enum FIX_1_501321110 = cast(int)12299; /* FIX(1.501321110) */
113 enum FIX_1_847759065 = cast(int)15137; /* FIX(1.847759065) */
114 enum FIX_1_961570560 = cast(int)16069; /* FIX(1.961570560) */
115 enum FIX_2_053119869 = cast(int)16819; /* FIX(2.053119869) */
116 enum FIX_2_562915447 = cast(int)20995; /* FIX(2.562915447) */
117 enum FIX_3_072711026 = cast(int)25172; /* FIX(3.072711026) */
118 
119 int DESCALE() (int x, int n) { pragma(inline, true); return (((x) + (SCALEDONE << ((n)-1))) >> (n)); }
120 int DESCALE_ZEROSHIFT() (int x, int n) { pragma(inline, true); return (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n)); }
121 ubyte CLAMP() (int i) { pragma(inline, true); return cast(ubyte)(cast(uint)i > 255 ? (((~i) >> 31) & 0xFF) : i); }
122 
123 
124 // Compiler creates a fast path 1D IDCT for X non-zero columns
125 struct Row(int NONZERO_COLS) {
126 pure nothrow @trusted @nogc:
127   static void idct(int* pTemp, const(jpeg_decoder.jpgd_block_t)* pSrc) {
128     static if (NONZERO_COLS == 0) {
129       // nothing
130     } else static if (NONZERO_COLS == 1) {
131       immutable int dcval = (pSrc[0] << PASS1_BITS);
132       pTemp[0] = dcval;
133       pTemp[1] = dcval;
134       pTemp[2] = dcval;
135       pTemp[3] = dcval;
136       pTemp[4] = dcval;
137       pTemp[5] = dcval;
138       pTemp[6] = dcval;
139       pTemp[7] = dcval;
140     } else {
141       // ACCESS_COL() will be optimized at compile time to either an array access, or 0.
142       //#define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0)
143       template ACCESS_COL(int x) {
144         static if (x < NONZERO_COLS) enum ACCESS_COL = "cast(int)pSrc["~x.stringof~"]"; else enum ACCESS_COL = "0";
145       }
146 
147       immutable int z2 = mixin(ACCESS_COL!2), z3 = mixin(ACCESS_COL!6);
148 
149       immutable int z1 = (z2 + z3)*FIX_0_541196100;
150       immutable int tmp2 = z1 + z3*(-FIX_1_847759065);
151       immutable int tmp3 = z1 + z2*FIX_0_765366865;
152 
153       immutable int tmp0 = (mixin(ACCESS_COL!0) + mixin(ACCESS_COL!4)) << CONST_BITS;
154       immutable int tmp1 = (mixin(ACCESS_COL!0) - mixin(ACCESS_COL!4)) << CONST_BITS;
155 
156       immutable int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
157 
158       immutable int atmp0 = mixin(ACCESS_COL!7), atmp1 = mixin(ACCESS_COL!5), atmp2 = mixin(ACCESS_COL!3), atmp3 = mixin(ACCESS_COL!1);
159 
160       immutable int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
161       immutable int bz5 = (bz3 + bz4)*FIX_1_175875602;
162 
163       immutable int az1 = bz1*(-FIX_0_899976223);
164       immutable int az2 = bz2*(-FIX_2_562915447);
165       immutable int az3 = bz3*(-FIX_1_961570560) + bz5;
166       immutable int az4 = bz4*(-FIX_0_390180644) + bz5;
167 
168       immutable int btmp0 = atmp0*FIX_0_298631336 + az1 + az3;
169       immutable int btmp1 = atmp1*FIX_2_053119869 + az2 + az4;
170       immutable int btmp2 = atmp2*FIX_3_072711026 + az2 + az3;
171       immutable int btmp3 = atmp3*FIX_1_501321110 + az1 + az4;
172 
173       pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS-PASS1_BITS);
174       pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS-PASS1_BITS);
175       pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS-PASS1_BITS);
176       pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS-PASS1_BITS);
177       pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS-PASS1_BITS);
178       pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS-PASS1_BITS);
179       pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS-PASS1_BITS);
180       pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS-PASS1_BITS);
181     }
182   }
183 }
184 
185 
186 // Compiler creates a fast path 1D IDCT for X non-zero rows
187 struct Col (int NONZERO_ROWS) {
188 pure nothrow @trusted @nogc:
189   static void idct(ubyte* pDst_ptr, const(int)* pTemp) {
190     static assert(NONZERO_ROWS > 0);
191     static if (NONZERO_ROWS == 1) {
192       int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS+3);
193       immutable ubyte dcval_clamped = cast(ubyte)CLAMP(dcval);
194       pDst_ptr[0*8] = dcval_clamped;
195       pDst_ptr[1*8] = dcval_clamped;
196       pDst_ptr[2*8] = dcval_clamped;
197       pDst_ptr[3*8] = dcval_clamped;
198       pDst_ptr[4*8] = dcval_clamped;
199       pDst_ptr[5*8] = dcval_clamped;
200       pDst_ptr[6*8] = dcval_clamped;
201       pDst_ptr[7*8] = dcval_clamped;
202     } else {
203       // ACCESS_ROW() will be optimized at compile time to either an array access, or 0.
204       //#define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0)
205       template ACCESS_ROW(int x) {
206         static if (x < NONZERO_ROWS) enum ACCESS_ROW = "pTemp["~(x*8).stringof~"]"; else enum ACCESS_ROW = "0";
207       }
208 
209       immutable int z2 = mixin(ACCESS_ROW!2);
210       immutable int z3 = mixin(ACCESS_ROW!6);
211 
212       immutable int z1 = (z2 + z3)*FIX_0_541196100;
213       immutable int tmp2 = z1 + z3*(-FIX_1_847759065);
214       immutable int tmp3 = z1 + z2*FIX_0_765366865;
215 
216       immutable int tmp0 = (mixin(ACCESS_ROW!0) + mixin(ACCESS_ROW!4)) << CONST_BITS;
217       immutable int tmp1 = (mixin(ACCESS_ROW!0) - mixin(ACCESS_ROW!4)) << CONST_BITS;
218 
219       immutable int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
220 
221       immutable int atmp0 = mixin(ACCESS_ROW!7), atmp1 = mixin(ACCESS_ROW!5), atmp2 = mixin(ACCESS_ROW!3), atmp3 = mixin(ACCESS_ROW!1);
222 
223       immutable int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
224       immutable int bz5 = (bz3 + bz4)*FIX_1_175875602;
225 
226       immutable int az1 = bz1*(-FIX_0_899976223);
227       immutable int az2 = bz2*(-FIX_2_562915447);
228       immutable int az3 = bz3*(-FIX_1_961570560) + bz5;
229       immutable int az4 = bz4*(-FIX_0_390180644) + bz5;
230 
231       immutable int btmp0 = atmp0*FIX_0_298631336 + az1 + az3;
232       immutable int btmp1 = atmp1*FIX_2_053119869 + az2 + az4;
233       immutable int btmp2 = atmp2*FIX_3_072711026 + az2 + az3;
234       immutable int btmp3 = atmp3*FIX_1_501321110 + az1 + az4;
235 
236       int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS+PASS1_BITS+3);
237       pDst_ptr[8*0] = cast(ubyte)CLAMP(i);
238 
239       i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS+PASS1_BITS+3);
240       pDst_ptr[8*7] = cast(ubyte)CLAMP(i);
241 
242       i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS+PASS1_BITS+3);
243       pDst_ptr[8*1] = cast(ubyte)CLAMP(i);
244 
245       i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS+PASS1_BITS+3);
246       pDst_ptr[8*6] = cast(ubyte)CLAMP(i);
247 
248       i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS+PASS1_BITS+3);
249       pDst_ptr[8*2] = cast(ubyte)CLAMP(i);
250 
251       i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS+PASS1_BITS+3);
252       pDst_ptr[8*5] = cast(ubyte)CLAMP(i);
253 
254       i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS+PASS1_BITS+3);
255       pDst_ptr[8*3] = cast(ubyte)CLAMP(i);
256 
257       i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS+PASS1_BITS+3);
258       pDst_ptr[8*4] = cast(ubyte)CLAMP(i);
259     }
260   }
261 }
262 
263 
264 static immutable ubyte[512] s_idct_row_table = [
265   1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0,
266   4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0,
267   6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0,
268   6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0,
269   8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2,
270   8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2,
271   8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4,
272   8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8,
273 ];
274 
275 static immutable ubyte[64] s_idct_col_table = [ 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 ];
276 
277 void idct() (const(jpeg_decoder.jpgd_block_t)* pSrc_ptr, ubyte* pDst_ptr, int block_max_zag) {
278   assert(block_max_zag >= 1);
279   assert(block_max_zag <= 64);
280 
281   if (block_max_zag <= 1)
282   {
283     int k = ((pSrc_ptr[0] + 4) >> 3) + 128;
284     k = CLAMP(k);
285     k = k | (k<<8);
286     k = k | (k<<16);
287 
288     for (int i = 8; i > 0; i--)
289     {
290       *cast(int*)&pDst_ptr[0] = k;
291       *cast(int*)&pDst_ptr[4] = k;
292       pDst_ptr += 8;
293     }
294     return;
295   }
296 
297   int[64] temp;
298 
299   const(jpeg_decoder.jpgd_block_t)* pSrc = pSrc_ptr;
300   int* pTemp = temp.ptr;
301 
302   const(ubyte)* pRow_tab = &s_idct_row_table.ptr[(block_max_zag - 1) * 8];
303   int i;
304   for (i = 8; i > 0; i--, pRow_tab++)
305   {
306     switch (*pRow_tab)
307     {
308       case 0: Row!(0).idct(pTemp, pSrc); break;
309       case 1: Row!(1).idct(pTemp, pSrc); break;
310       case 2: Row!(2).idct(pTemp, pSrc); break;
311       case 3: Row!(3).idct(pTemp, pSrc); break;
312       case 4: Row!(4).idct(pTemp, pSrc); break;
313       case 5: Row!(5).idct(pTemp, pSrc); break;
314       case 6: Row!(6).idct(pTemp, pSrc); break;
315       case 7: Row!(7).idct(pTemp, pSrc); break;
316       case 8: Row!(8).idct(pTemp, pSrc); break;
317       default: assert(0);
318     }
319 
320     pSrc += 8;
321     pTemp += 8;
322   }
323 
324   pTemp = temp.ptr;
325 
326   immutable int nonzero_rows = s_idct_col_table.ptr[block_max_zag - 1];
327   for (i = 8; i > 0; i--)
328   {
329     switch (nonzero_rows)
330     {
331       case 1: Col!(1).idct(pDst_ptr, pTemp); break;
332       case 2: Col!(2).idct(pDst_ptr, pTemp); break;
333       case 3: Col!(3).idct(pDst_ptr, pTemp); break;
334       case 4: Col!(4).idct(pDst_ptr, pTemp); break;
335       case 5: Col!(5).idct(pDst_ptr, pTemp); break;
336       case 6: Col!(6).idct(pDst_ptr, pTemp); break;
337       case 7: Col!(7).idct(pDst_ptr, pTemp); break;
338       case 8: Col!(8).idct(pDst_ptr, pTemp); break;
339       default: assert(0);
340     }
341 
342     pTemp++;
343     pDst_ptr++;
344   }
345 }
346 
347 void idct_4x4() (const(jpeg_decoder.jpgd_block_t)* pSrc_ptr, ubyte* pDst_ptr) {
348   int[64] temp;
349   int* pTemp = temp.ptr;
350   const(jpeg_decoder.jpgd_block_t)* pSrc = pSrc_ptr;
351 
352   for (int i = 4; i > 0; i--)
353   {
354     Row!(4).idct(pTemp, pSrc);
355     pSrc += 8;
356     pTemp += 8;
357   }
358 
359   pTemp = temp.ptr;
360   for (int i = 8; i > 0; i--)
361   {
362     Col!(4).idct(pDst_ptr, pTemp);
363     pTemp++;
364     pDst_ptr++;
365   }
366 }
367 
368 
369 // ////////////////////////////////////////////////////////////////////////// //
370 struct jpeg_decoder {
371 private import core.stdc.string : memcpy, memset;
372 private:
373   static auto JPGD_MIN(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a < b ? a : b); }
374   static auto JPGD_MAX(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a > b ? a : b); }
375 
376   alias jpgd_quant_t = short;
377   alias jpgd_block_t = short;
378   alias pDecode_block_func = void function (ref jpeg_decoder, int, int, int);
379 
380   static struct huff_tables {
381     bool ac_table;
382     uint[256] look_up;
383     uint[256] look_up2;
384     ubyte[256] code_size;
385     uint[512] tree;
386   }
387 
388   static struct coeff_buf {
389     ubyte* pData;
390     int block_num_x, block_num_y;
391     int block_len_x, block_len_y;
392     int block_size;
393   }
394 
395   static struct mem_block {
396     mem_block* m_pNext;
397     size_t m_used_count;
398     size_t m_size;
399     char[1] m_data;
400   }
401 
402   mem_block* m_pMem_blocks;
403   int m_image_x_size;
404   int m_image_y_size;
405   JpegStreamReadFunc readfn;
406   int m_progressive_flag;
407   ubyte[JPGD_MAX_HUFF_TABLES] m_huff_ac;
408   ubyte*[JPGD_MAX_HUFF_TABLES] m_huff_num;      // pointer to number of Huffman codes per bit size
409   ubyte*[JPGD_MAX_HUFF_TABLES] m_huff_val;      // pointer to Huffman codes per bit size
410   jpgd_quant_t*[JPGD_MAX_QUANT_TABLES] m_quant; // pointer to quantization tables
411   int m_scan_type;                              // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported)
412   int m_comps_in_frame;                         // # of components in frame
413   int[JPGD_MAX_COMPONENTS] m_comp_h_samp;       // component's horizontal sampling factor
414   int[JPGD_MAX_COMPONENTS] m_comp_v_samp;       // component's vertical sampling factor
415   int[JPGD_MAX_COMPONENTS] m_comp_quant;        // component's quantization table selector
416   int[JPGD_MAX_COMPONENTS] m_comp_ident;        // component's ID
417   int[JPGD_MAX_COMPONENTS] m_comp_h_blocks;
418   int[JPGD_MAX_COMPONENTS] m_comp_v_blocks;
419   int m_comps_in_scan;                          // # of components in scan
420   int[JPGD_MAX_COMPS_IN_SCAN] m_comp_list;      // components in this scan
421   int[JPGD_MAX_COMPONENTS] m_comp_dc_tab;       // component's DC Huffman coding table selector
422   int[JPGD_MAX_COMPONENTS] m_comp_ac_tab;       // component's AC Huffman coding table selector
423   int m_spectral_start;                         // spectral selection start
424   int m_spectral_end;                           // spectral selection end
425   int m_successive_low;                         // successive approximation low
426   int m_successive_high;                        // successive approximation high
427   int m_max_mcu_x_size;                         // MCU's max. X size in pixels
428   int m_max_mcu_y_size;                         // MCU's max. Y size in pixels
429   int m_blocks_per_mcu;
430   int m_max_blocks_per_row;
431   int m_mcus_per_row, m_mcus_per_col;
432   int[JPGD_MAX_BLOCKS_PER_MCU] m_mcu_org;
433   int m_total_lines_left;                       // total # lines left in image
434   int m_mcu_lines_left;                         // total # lines left in this MCU
435   int m_real_dest_bytes_per_scan_line;
436   int m_dest_bytes_per_scan_line;               // rounded up
437   int m_dest_bytes_per_pixel;                   // 4 (RGB) or 1 (Y)
438   huff_tables*[JPGD_MAX_HUFF_TABLES] m_pHuff_tabs;
439   coeff_buf*[JPGD_MAX_COMPONENTS] m_dc_coeffs;
440   coeff_buf*[JPGD_MAX_COMPONENTS] m_ac_coeffs;
441   int m_eob_run;
442   int[JPGD_MAX_COMPONENTS] m_block_y_mcu;
443   ubyte* m_pIn_buf_ofs;
444   int m_in_buf_left;
445   int m_tem_flag;
446   bool m_eof_flag;
447   ubyte[128] m_in_buf_pad_start;
448   ubyte[JPGD_IN_BUF_SIZE+128] m_in_buf;
449   ubyte[128] m_in_buf_pad_end;
450   int m_bits_left;
451   uint m_bit_buf;
452   int m_restart_interval;
453   int m_restarts_left;
454   int m_next_restart_num;
455   int m_max_mcus_per_row;
456   int m_max_blocks_per_mcu;
457   int m_expanded_blocks_per_mcu;
458   int m_expanded_blocks_per_row;
459   int m_expanded_blocks_per_component;
460   bool m_freq_domain_chroma_upsample;
461   int m_max_mcus_per_col;
462   uint[JPGD_MAX_COMPONENTS] m_last_dc_val;
463   jpgd_block_t* m_pMCU_coefficients;
464   int[JPGD_MAX_BLOCKS_PER_MCU] m_mcu_block_max_zag;
465   ubyte* m_pSample_buf;
466   int[256] m_crr;
467   int[256] m_cbb;
468   int[256] m_crg;
469   int[256] m_cbg;
470   ubyte* m_pScan_line_0;
471   ubyte* m_pScan_line_1;
472   jpgd_status m_error_code;
473   bool m_ready_flag;
474   int m_total_bytes_read;
475 
476 public:
477   // Inspect `error_code` after constructing to determine if the stream is valid or not. You may look at the `width`, `height`, etc.
478   // methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline.
479   this (JpegStreamReadFunc rfn) { decode_init(rfn); }
480 
481   ~this () { free_all_blocks(); }
482 
483   @disable this (this); // no copies
484 
485   // Call this method after constructing the object to begin decompression.
486   // If JPGD_SUCCESS is returned you may then call decode() on each scanline.
487   int begin_decoding () {
488     if (m_ready_flag) return JPGD_SUCCESS;
489     if (m_error_code) return JPGD_FAILED;
490     try {
491       decode_start();
492       m_ready_flag = true;
493       return JPGD_SUCCESS;
494     } catch (Exception e) {
495       //version(jpegd_test) {{ import core.stdc.stdio; stderr.fprintf("ERROR: %.*s...\n", cast(int)e.msg.length, e.msg.ptr); }}
496       version(jpegd_test) {{ import std.stdio; stderr.writeln(e.toString); }}
497     }
498     return JPGD_FAILED;
499   }
500 
501   // Returns the next scan line.
502   // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (`bytes_per_pixel` will return 1).
503   // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and `bytes_per_pixel` will return 4).
504   // Returns JPGD_SUCCESS if a scan line has been returned.
505   // Returns JPGD_DONE if all scan lines have been returned.
506   // Returns JPGD_FAILED if an error occurred. Inspect `error_code` for a more info.
507   int decode (/*const void** */void** pScan_line, uint* pScan_line_len) {
508     if (m_error_code || !m_ready_flag) return JPGD_FAILED;
509     if (m_total_lines_left == 0) return JPGD_DONE;
510     try {
511       if (m_mcu_lines_left == 0) {
512         if (m_progressive_flag) load_next_row(); else decode_next_row();
513         // Find the EOI marker if that was the last row.
514         if (m_total_lines_left <= m_max_mcu_y_size) find_eoi();
515         m_mcu_lines_left = m_max_mcu_y_size;
516       }
517       if (m_freq_domain_chroma_upsample) {
518         expanded_convert();
519         *pScan_line = m_pScan_line_0;
520       } else {
521         switch (m_scan_type) {
522           case JPGD_YH2V2:
523             if ((m_mcu_lines_left & 1) == 0) {
524               H2V2Convert();
525               *pScan_line = m_pScan_line_0;
526             } else {
527               *pScan_line = m_pScan_line_1;
528             }
529             break;
530           case JPGD_YH2V1:
531             H2V1Convert();
532             *pScan_line = m_pScan_line_0;
533             break;
534           case JPGD_YH1V2:
535             if ((m_mcu_lines_left & 1) == 0) {
536               H1V2Convert();
537               *pScan_line = m_pScan_line_0;
538             } else {
539               *pScan_line = m_pScan_line_1;
540             }
541             break;
542           case JPGD_YH1V1:
543             H1V1Convert();
544             *pScan_line = m_pScan_line_0;
545             break;
546           case JPGD_GRAYSCALE:
547             gray_convert();
548             *pScan_line = m_pScan_line_0;
549             break;
550           default:
551         }
552       }
553       *pScan_line_len = m_real_dest_bytes_per_scan_line;
554       --m_mcu_lines_left;
555       --m_total_lines_left;
556       return JPGD_SUCCESS;
557     } catch (Exception) {}
558     return JPGD_FAILED;
559   }
560 
561   @property const pure nothrow @trusted @nogc {
562     jpgd_status error_code () { pragma(inline, true); return m_error_code; }
563 
564     int width () { pragma(inline, true); return m_image_x_size; }
565     int height () { pragma(inline, true); return m_image_y_size; }
566 
567     int num_components () { pragma(inline, true); return m_comps_in_frame; }
568 
569     int bytes_per_pixel () { pragma(inline, true); return m_dest_bytes_per_pixel; }
570     int bytes_per_scan_line () { pragma(inline, true); return m_image_x_size * bytes_per_pixel(); }
571 
572     // Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file).
573     int total_bytes_read () { pragma(inline, true); return m_total_bytes_read; }
574   }
575 
576 private:
577   // Retrieve one character from the input stream.
578   uint get_char () {
579     // Any bytes remaining in buffer?
580     if (!m_in_buf_left) {
581       // Try to get more bytes.
582       prep_in_buffer();
583       // Still nothing to get?
584       if (!m_in_buf_left) {
585         // Pad the end of the stream with 0xFF 0xD9 (EOI marker)
586         int t = m_tem_flag;
587         m_tem_flag ^= 1;
588         return (t ? 0xD9 : 0xFF);
589       }
590     }
591     uint c = *m_pIn_buf_ofs++;
592     --m_in_buf_left;
593     return c;
594   }
595 
596   // Same as previous method, except can indicate if the character is a pad character or not.
597   uint get_char (bool* pPadding_flag) {
598     if (!m_in_buf_left) {
599       prep_in_buffer();
600       if (!m_in_buf_left) {
601         *pPadding_flag = true;
602         int t = m_tem_flag;
603         m_tem_flag ^= 1;
604         return (t ? 0xD9 : 0xFF);
605       }
606     }
607     *pPadding_flag = false;
608     uint c = *m_pIn_buf_ofs++;
609     --m_in_buf_left;
610     return c;
611   }
612 
613   // Inserts a previously retrieved character back into the input buffer.
614   void stuff_char (ubyte q) {
615     *(--m_pIn_buf_ofs) = q;
616     m_in_buf_left++;
617   }
618 
619   // Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered.
620   ubyte get_octet () {
621     bool padding_flag;
622     int c = get_char(&padding_flag);
623     if (c == 0xFF) {
624       if (padding_flag) return 0xFF;
625       c = get_char(&padding_flag);
626       if (padding_flag) { stuff_char(0xFF); return 0xFF; }
627       if (c == 0x00) return 0xFF;
628       stuff_char(cast(ubyte)(c));
629       stuff_char(0xFF);
630       return 0xFF;
631     }
632     return cast(ubyte)(c);
633   }
634 
635   // Retrieves a variable number of bits from the input stream. Does not recognize markers.
636   uint get_bits (int num_bits) {
637     if (!num_bits) return 0;
638     uint i = m_bit_buf >> (32 - num_bits);
639     if ((m_bits_left -= num_bits) <= 0) {
640       m_bit_buf <<= (num_bits += m_bits_left);
641       uint c1 = get_char();
642       uint c2 = get_char();
643       m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2;
644       m_bit_buf <<= -m_bits_left;
645       m_bits_left += 16;
646       assert(m_bits_left >= 0);
647     } else {
648       m_bit_buf <<= num_bits;
649     }
650     return i;
651   }
652 
653   // Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered.
654   uint get_bits_no_markers (int num_bits) {
655     if (!num_bits) return 0;
656     uint i = m_bit_buf >> (32 - num_bits);
657     if ((m_bits_left -= num_bits) <= 0) {
658       m_bit_buf <<= (num_bits += m_bits_left);
659       if (m_in_buf_left < 2 || m_pIn_buf_ofs[0] == 0xFF || m_pIn_buf_ofs[1] == 0xFF) {
660         uint c1 = get_octet();
661         uint c2 = get_octet();
662         m_bit_buf |= (c1 << 8) | c2;
663       } else {
664         m_bit_buf |= (cast(uint)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1];
665         m_in_buf_left -= 2;
666         m_pIn_buf_ofs += 2;
667       }
668       m_bit_buf <<= -m_bits_left;
669       m_bits_left += 16;
670       assert(m_bits_left >= 0);
671     } else {
672       m_bit_buf <<= num_bits;
673     }
674     return i;
675   }
676 
677   // Decodes a Huffman encoded symbol.
678   int huff_decode (huff_tables *pH) {
679     int symbol;
680     // Check first 8-bits: do we have a complete symbol?
681     if ((symbol = pH.look_up.ptr[m_bit_buf >> 24]) < 0) {
682       // Decode more bits, use a tree traversal to find symbol.
683       int ofs = 23;
684       do {
685         symbol = pH.tree.ptr[-cast(int)(symbol + ((m_bit_buf >> ofs) & 1))];
686         --ofs;
687       } while (symbol < 0);
688       get_bits_no_markers(8 + (23 - ofs));
689     } else {
690       get_bits_no_markers(pH.code_size.ptr[symbol]);
691     }
692     return symbol;
693   }
694 
695   // Decodes a Huffman encoded symbol.
696   int huff_decode (huff_tables *pH, ref int extra_bits) {
697     int symbol;
698     // Check first 8-bits: do we have a complete symbol?
699     if ((symbol = pH.look_up2.ptr[m_bit_buf >> 24]) < 0) {
700       // Use a tree traversal to find symbol.
701       int ofs = 23;
702       do {
703         symbol = pH.tree.ptr[-cast(int)(symbol + ((m_bit_buf >> ofs) & 1))];
704         --ofs;
705       } while (symbol < 0);
706       get_bits_no_markers(8 + (23 - ofs));
707       extra_bits = get_bits_no_markers(symbol & 0xF);
708     } else {
709       assert(((symbol >> 8) & 31) == pH.code_size.ptr[symbol & 255] + ((symbol & 0x8000) ? (symbol & 15) : 0));
710       if (symbol & 0x8000) {
711         get_bits_no_markers((symbol >> 8) & 31);
712         extra_bits = symbol >> 16;
713       } else {
714         int code_size = (symbol >> 8) & 31;
715         int num_extra_bits = symbol & 0xF;
716         int bits = code_size + num_extra_bits;
717         if (bits <= (m_bits_left + 16)) {
718           extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1);
719         } else {
720           get_bits_no_markers(code_size);
721           extra_bits = get_bits_no_markers(num_extra_bits);
722         }
723       }
724       symbol &= 0xFF;
725     }
726     return symbol;
727   }
728 
729   // Tables and macro used to fully decode the DPCM differences.
730   static immutable int[16] s_extend_test = [ 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 ];
731   static immutable int[16] s_extend_offset = [ 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1, ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1, ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1, ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 ];
732   static immutable int[18] s_extend_mask = [ 0, (1<<0), (1<<1), (1<<2), (1<<3), (1<<4), (1<<5), (1<<6), (1<<7), (1<<8), (1<<9), (1<<10), (1<<11), (1<<12), (1<<13), (1<<14), (1<<15), (1<<16) ];
733   // The logical AND's in this macro are to shut up static code analysis (aren't really necessary - couldn't find another way to do this)
734   //#define JPGD_HUFF_EXTEND(x, s) (((x) < s_extend_test[s & 15]) ? ((x) + s_extend_offset[s & 15]) : (x))
735   static JPGD_HUFF_EXTEND (int x, int s) nothrow @trusted @nogc { pragma(inline, true); return (((x) < s_extend_test.ptr[s & 15]) ? ((x) + s_extend_offset.ptr[s & 15]) : (x)); }
736 
737   // Clamps a value between 0-255.
738   //static ubyte clamp (int i) { if (cast(uint)(i) > 255) i = (((~i) >> 31) & 0xFF); return cast(ubyte)(i); }
739   alias clamp = CLAMP;
740 
741   static struct DCT_Upsample {
742   static:
743     static struct Matrix44 {
744     pure nothrow @trusted @nogc:
745       alias Element_Type = int;
746       enum { NUM_ROWS = 4, NUM_COLS = 4 }
747 
748       Element_Type[NUM_COLS][NUM_ROWS] v;
749 
750       this() (const scope auto ref Matrix44 m) {
751         foreach (immutable r; 0..NUM_ROWS) v[r][] = m.v[r][];
752       }
753 
754       //@property int rows () const { pragma(inline, true); return NUM_ROWS; }
755       //@property int cols () const { pragma(inline, true); return NUM_COLS; }
756 
757       ref inout(Element_Type) at (int r, int c) inout { pragma(inline, true); return v.ptr[r].ptr[c]; }
758 
759       ref Matrix44 opOpAssign(string op:"+") (const scope auto ref Matrix44 a) {
760         foreach (int r; 0..NUM_ROWS) {
761           at(r, 0) += a.at(r, 0);
762           at(r, 1) += a.at(r, 1);
763           at(r, 2) += a.at(r, 2);
764           at(r, 3) += a.at(r, 3);
765         }
766         return this;
767       }
768 
769       ref Matrix44 opOpAssign(string op:"-") (const scope auto ref Matrix44 a) {
770         foreach (int r; 0..NUM_ROWS) {
771           at(r, 0) -= a.at(r, 0);
772           at(r, 1) -= a.at(r, 1);
773           at(r, 2) -= a.at(r, 2);
774           at(r, 3) -= a.at(r, 3);
775         }
776         return this;
777       }
778 
779       Matrix44 opBinary(string op:"+") (const scope auto ref Matrix44 b) const {
780         alias a = this;
781         Matrix44 ret;
782         foreach (int r; 0..NUM_ROWS) {
783           ret.at(r, 0) = a.at(r, 0) + b.at(r, 0);
784           ret.at(r, 1) = a.at(r, 1) + b.at(r, 1);
785           ret.at(r, 2) = a.at(r, 2) + b.at(r, 2);
786           ret.at(r, 3) = a.at(r, 3) + b.at(r, 3);
787         }
788         return ret;
789       }
790 
791       Matrix44 opBinary(string op:"-") (const scope auto ref Matrix44 b) const {
792         alias a = this;
793         Matrix44 ret;
794         foreach (int r; 0..NUM_ROWS) {
795           ret.at(r, 0) = a.at(r, 0) - b.at(r, 0);
796           ret.at(r, 1) = a.at(r, 1) - b.at(r, 1);
797           ret.at(r, 2) = a.at(r, 2) - b.at(r, 2);
798           ret.at(r, 3) = a.at(r, 3) - b.at(r, 3);
799         }
800         return ret;
801       }
802 
803       static void add_and_store() (jpgd_block_t* pDst, const scope auto ref Matrix44 a, const scope auto ref Matrix44 b) {
804         foreach (int r; 0..4) {
805           pDst[0*8 + r] = cast(jpgd_block_t)(a.at(r, 0) + b.at(r, 0));
806           pDst[1*8 + r] = cast(jpgd_block_t)(a.at(r, 1) + b.at(r, 1));
807           pDst[2*8 + r] = cast(jpgd_block_t)(a.at(r, 2) + b.at(r, 2));
808           pDst[3*8 + r] = cast(jpgd_block_t)(a.at(r, 3) + b.at(r, 3));
809         }
810       }
811 
812       static void sub_and_store() (jpgd_block_t* pDst, const scope auto ref Matrix44 a, const scope auto ref Matrix44 b) {
813         foreach (int r; 0..4) {
814           pDst[0*8 + r] = cast(jpgd_block_t)(a.at(r, 0) - b.at(r, 0));
815           pDst[1*8 + r] = cast(jpgd_block_t)(a.at(r, 1) - b.at(r, 1));
816           pDst[2*8 + r] = cast(jpgd_block_t)(a.at(r, 2) - b.at(r, 2));
817           pDst[3*8 + r] = cast(jpgd_block_t)(a.at(r, 3) - b.at(r, 3));
818         }
819       }
820     }
821 
822     enum FRACT_BITS = 10;
823     enum SCALE = 1 << FRACT_BITS;
824 
825     alias Temp_Type = int;
826     //TODO: convert defines to mixins
827     //#define D(i) (((i) + (SCALE >> 1)) >> FRACT_BITS)
828     //#define F(i) ((int)((i) * SCALE + .5f))
829     // Any decent C++ compiler will optimize this at compile time to a 0, or an array access.
830     //#define AT(c, r) ((((c)>=NUM_COLS)||((r)>=NUM_ROWS)) ? 0 : pSrc[(c)+(r)*8])
831 
832     static int D(T) (T i) { pragma(inline, true); return (((i) + (SCALE >> 1)) >> FRACT_BITS); }
833     enum F(float i) = (cast(int)((i) * SCALE + 0.5f));
834 
835     // NUM_ROWS/NUM_COLS = # of non-zero rows/cols in input matrix
836     static struct P_Q(int NUM_ROWS, int NUM_COLS) {
837       static void calc (ref Matrix44 P, ref Matrix44 Q, const(jpgd_block_t)* pSrc) {
838         //auto AT (int c, int r) nothrow @trusted @nogc { return (c >= NUM_COLS || r >= NUM_ROWS ? 0 : pSrc[c+r*8]); }
839         template AT(int c, int r) {
840           static if (c >= NUM_COLS || r >= NUM_ROWS) enum AT = "0"; else enum AT = "pSrc["~c.stringof~"+"~r.stringof~"*8]";
841         }
842         // 4x8 = 4x8 times 8x8, matrix 0 is constant
843         immutable Temp_Type X000 = mixin(AT!(0, 0));
844         immutable Temp_Type X001 = mixin(AT!(0, 1));
845         immutable Temp_Type X002 = mixin(AT!(0, 2));
846         immutable Temp_Type X003 = mixin(AT!(0, 3));
847         immutable Temp_Type X004 = mixin(AT!(0, 4));
848         immutable Temp_Type X005 = mixin(AT!(0, 5));
849         immutable Temp_Type X006 = mixin(AT!(0, 6));
850         immutable Temp_Type X007 = mixin(AT!(0, 7));
851         immutable Temp_Type X010 = D(F!(0.415735f) * mixin(AT!(1, 0)) + F!(0.791065f) * mixin(AT!(3, 0)) + F!(-0.352443f) * mixin(AT!(5, 0)) + F!(0.277785f) * mixin(AT!(7, 0)));
852         immutable Temp_Type X011 = D(F!(0.415735f) * mixin(AT!(1, 1)) + F!(0.791065f) * mixin(AT!(3, 1)) + F!(-0.352443f) * mixin(AT!(5, 1)) + F!(0.277785f) * mixin(AT!(7, 1)));
853         immutable Temp_Type X012 = D(F!(0.415735f) * mixin(AT!(1, 2)) + F!(0.791065f) * mixin(AT!(3, 2)) + F!(-0.352443f) * mixin(AT!(5, 2)) + F!(0.277785f) * mixin(AT!(7, 2)));
854         immutable Temp_Type X013 = D(F!(0.415735f) * mixin(AT!(1, 3)) + F!(0.791065f) * mixin(AT!(3, 3)) + F!(-0.352443f) * mixin(AT!(5, 3)) + F!(0.277785f) * mixin(AT!(7, 3)));
855         immutable Temp_Type X014 = D(F!(0.415735f) * mixin(AT!(1, 4)) + F!(0.791065f) * mixin(AT!(3, 4)) + F!(-0.352443f) * mixin(AT!(5, 4)) + F!(0.277785f) * mixin(AT!(7, 4)));
856         immutable Temp_Type X015 = D(F!(0.415735f) * mixin(AT!(1, 5)) + F!(0.791065f) * mixin(AT!(3, 5)) + F!(-0.352443f) * mixin(AT!(5, 5)) + F!(0.277785f) * mixin(AT!(7, 5)));
857         immutable Temp_Type X016 = D(F!(0.415735f) * mixin(AT!(1, 6)) + F!(0.791065f) * mixin(AT!(3, 6)) + F!(-0.352443f) * mixin(AT!(5, 6)) + F!(0.277785f) * mixin(AT!(7, 6)));
858         immutable Temp_Type X017 = D(F!(0.415735f) * mixin(AT!(1, 7)) + F!(0.791065f) * mixin(AT!(3, 7)) + F!(-0.352443f) * mixin(AT!(5, 7)) + F!(0.277785f) * mixin(AT!(7, 7)));
859         immutable Temp_Type X020 = mixin(AT!(4, 0));
860         immutable Temp_Type X021 = mixin(AT!(4, 1));
861         immutable Temp_Type X022 = mixin(AT!(4, 2));
862         immutable Temp_Type X023 = mixin(AT!(4, 3));
863         immutable Temp_Type X024 = mixin(AT!(4, 4));
864         immutable Temp_Type X025 = mixin(AT!(4, 5));
865         immutable Temp_Type X026 = mixin(AT!(4, 6));
866         immutable Temp_Type X027 = mixin(AT!(4, 7));
867         immutable Temp_Type X030 = D(F!(0.022887f) * mixin(AT!(1, 0)) + F!(-0.097545f) * mixin(AT!(3, 0)) + F!(0.490393f) * mixin(AT!(5, 0)) + F!(0.865723f) * mixin(AT!(7, 0)));
868         immutable Temp_Type X031 = D(F!(0.022887f) * mixin(AT!(1, 1)) + F!(-0.097545f) * mixin(AT!(3, 1)) + F!(0.490393f) * mixin(AT!(5, 1)) + F!(0.865723f) * mixin(AT!(7, 1)));
869         immutable Temp_Type X032 = D(F!(0.022887f) * mixin(AT!(1, 2)) + F!(-0.097545f) * mixin(AT!(3, 2)) + F!(0.490393f) * mixin(AT!(5, 2)) + F!(0.865723f) * mixin(AT!(7, 2)));
870         immutable Temp_Type X033 = D(F!(0.022887f) * mixin(AT!(1, 3)) + F!(-0.097545f) * mixin(AT!(3, 3)) + F!(0.490393f) * mixin(AT!(5, 3)) + F!(0.865723f) * mixin(AT!(7, 3)));
871         immutable Temp_Type X034 = D(F!(0.022887f) * mixin(AT!(1, 4)) + F!(-0.097545f) * mixin(AT!(3, 4)) + F!(0.490393f) * mixin(AT!(5, 4)) + F!(0.865723f) * mixin(AT!(7, 4)));
872         immutable Temp_Type X035 = D(F!(0.022887f) * mixin(AT!(1, 5)) + F!(-0.097545f) * mixin(AT!(3, 5)) + F!(0.490393f) * mixin(AT!(5, 5)) + F!(0.865723f) * mixin(AT!(7, 5)));
873         immutable Temp_Type X036 = D(F!(0.022887f) * mixin(AT!(1, 6)) + F!(-0.097545f) * mixin(AT!(3, 6)) + F!(0.490393f) * mixin(AT!(5, 6)) + F!(0.865723f) * mixin(AT!(7, 6)));
874         immutable Temp_Type X037 = D(F!(0.022887f) * mixin(AT!(1, 7)) + F!(-0.097545f) * mixin(AT!(3, 7)) + F!(0.490393f) * mixin(AT!(5, 7)) + F!(0.865723f) * mixin(AT!(7, 7)));
875 
876         // 4x4 = 4x8 times 8x4, matrix 1 is constant
877         P.at(0, 0) = X000;
878         P.at(0, 1) = D(X001 * F!(0.415735f) + X003 * F!(0.791065f) + X005 * F!(-0.352443f) + X007 * F!(0.277785f));
879         P.at(0, 2) = X004;
880         P.at(0, 3) = D(X001 * F!(0.022887f) + X003 * F!(-0.097545f) + X005 * F!(0.490393f) + X007 * F!(0.865723f));
881         P.at(1, 0) = X010;
882         P.at(1, 1) = D(X011 * F!(0.415735f) + X013 * F!(0.791065f) + X015 * F!(-0.352443f) + X017 * F!(0.277785f));
883         P.at(1, 2) = X014;
884         P.at(1, 3) = D(X011 * F!(0.022887f) + X013 * F!(-0.097545f) + X015 * F!(0.490393f) + X017 * F!(0.865723f));
885         P.at(2, 0) = X020;
886         P.at(2, 1) = D(X021 * F!(0.415735f) + X023 * F!(0.791065f) + X025 * F!(-0.352443f) + X027 * F!(0.277785f));
887         P.at(2, 2) = X024;
888         P.at(2, 3) = D(X021 * F!(0.022887f) + X023 * F!(-0.097545f) + X025 * F!(0.490393f) + X027 * F!(0.865723f));
889         P.at(3, 0) = X030;
890         P.at(3, 1) = D(X031 * F!(0.415735f) + X033 * F!(0.791065f) + X035 * F!(-0.352443f) + X037 * F!(0.277785f));
891         P.at(3, 2) = X034;
892         P.at(3, 3) = D(X031 * F!(0.022887f) + X033 * F!(-0.097545f) + X035 * F!(0.490393f) + X037 * F!(0.865723f));
893         // 40 muls 24 adds
894 
895         // 4x4 = 4x8 times 8x4, matrix 1 is constant
896         Q.at(0, 0) = D(X001 * F!(0.906127f) + X003 * F!(-0.318190f) + X005 * F!(0.212608f) + X007 * F!(-0.180240f));
897         Q.at(0, 1) = X002;
898         Q.at(0, 2) = D(X001 * F!(-0.074658f) + X003 * F!(0.513280f) + X005 * F!(0.768178f) + X007 * F!(-0.375330f));
899         Q.at(0, 3) = X006;
900         Q.at(1, 0) = D(X011 * F!(0.906127f) + X013 * F!(-0.318190f) + X015 * F!(0.212608f) + X017 * F!(-0.180240f));
901         Q.at(1, 1) = X012;
902         Q.at(1, 2) = D(X011 * F!(-0.074658f) + X013 * F!(0.513280f) + X015 * F!(0.768178f) + X017 * F!(-0.375330f));
903         Q.at(1, 3) = X016;
904         Q.at(2, 0) = D(X021 * F!(0.906127f) + X023 * F!(-0.318190f) + X025 * F!(0.212608f) + X027 * F!(-0.180240f));
905         Q.at(2, 1) = X022;
906         Q.at(2, 2) = D(X021 * F!(-0.074658f) + X023 * F!(0.513280f) + X025 * F!(0.768178f) + X027 * F!(-0.375330f));
907         Q.at(2, 3) = X026;
908         Q.at(3, 0) = D(X031 * F!(0.906127f) + X033 * F!(-0.318190f) + X035 * F!(0.212608f) + X037 * F!(-0.180240f));
909         Q.at(3, 1) = X032;
910         Q.at(3, 2) = D(X031 * F!(-0.074658f) + X033 * F!(0.513280f) + X035 * F!(0.768178f) + X037 * F!(-0.375330f));
911         Q.at(3, 3) = X036;
912         // 40 muls 24 adds
913       }
914     }
915 
916     static struct R_S(int NUM_ROWS, int NUM_COLS) {
917       static void calc(ref Matrix44 R, ref Matrix44 S, const(jpgd_block_t)* pSrc) {
918         //auto AT (int c, int r) nothrow @trusted @nogc { return (c >= NUM_COLS || r >= NUM_ROWS ? 0 : pSrc[c+r*8]); }
919         template AT(int c, int r) {
920           static if (c >= NUM_COLS || r >= NUM_ROWS) enum AT = "0"; else enum AT = "pSrc["~c.stringof~"+"~r.stringof~"*8]";
921         }
922         // 4x8 = 4x8 times 8x8, matrix 0 is constant
923         immutable Temp_Type X100 = D(F!(0.906127f) * mixin(AT!(1, 0)) + F!(-0.318190f) * mixin(AT!(3, 0)) + F!(0.212608f) * mixin(AT!(5, 0)) + F!(-0.180240f) * mixin(AT!(7, 0)));
924         immutable Temp_Type X101 = D(F!(0.906127f) * mixin(AT!(1, 1)) + F!(-0.318190f) * mixin(AT!(3, 1)) + F!(0.212608f) * mixin(AT!(5, 1)) + F!(-0.180240f) * mixin(AT!(7, 1)));
925         immutable Temp_Type X102 = D(F!(0.906127f) * mixin(AT!(1, 2)) + F!(-0.318190f) * mixin(AT!(3, 2)) + F!(0.212608f) * mixin(AT!(5, 2)) + F!(-0.180240f) * mixin(AT!(7, 2)));
926         immutable Temp_Type X103 = D(F!(0.906127f) * mixin(AT!(1, 3)) + F!(-0.318190f) * mixin(AT!(3, 3)) + F!(0.212608f) * mixin(AT!(5, 3)) + F!(-0.180240f) * mixin(AT!(7, 3)));
927         immutable Temp_Type X104 = D(F!(0.906127f) * mixin(AT!(1, 4)) + F!(-0.318190f) * mixin(AT!(3, 4)) + F!(0.212608f) * mixin(AT!(5, 4)) + F!(-0.180240f) * mixin(AT!(7, 4)));
928         immutable Temp_Type X105 = D(F!(0.906127f) * mixin(AT!(1, 5)) + F!(-0.318190f) * mixin(AT!(3, 5)) + F!(0.212608f) * mixin(AT!(5, 5)) + F!(-0.180240f) * mixin(AT!(7, 5)));
929         immutable Temp_Type X106 = D(F!(0.906127f) * mixin(AT!(1, 6)) + F!(-0.318190f) * mixin(AT!(3, 6)) + F!(0.212608f) * mixin(AT!(5, 6)) + F!(-0.180240f) * mixin(AT!(7, 6)));
930         immutable Temp_Type X107 = D(F!(0.906127f) * mixin(AT!(1, 7)) + F!(-0.318190f) * mixin(AT!(3, 7)) + F!(0.212608f) * mixin(AT!(5, 7)) + F!(-0.180240f) * mixin(AT!(7, 7)));
931         immutable Temp_Type X110 = mixin(AT!(2, 0));
932         immutable Temp_Type X111 = mixin(AT!(2, 1));
933         immutable Temp_Type X112 = mixin(AT!(2, 2));
934         immutable Temp_Type X113 = mixin(AT!(2, 3));
935         immutable Temp_Type X114 = mixin(AT!(2, 4));
936         immutable Temp_Type X115 = mixin(AT!(2, 5));
937         immutable Temp_Type X116 = mixin(AT!(2, 6));
938         immutable Temp_Type X117 = mixin(AT!(2, 7));
939         immutable Temp_Type X120 = D(F!(-0.074658f) * mixin(AT!(1, 0)) + F!(0.513280f) * mixin(AT!(3, 0)) + F!(0.768178f) * mixin(AT!(5, 0)) + F!(-0.375330f) * mixin(AT!(7, 0)));
940         immutable Temp_Type X121 = D(F!(-0.074658f) * mixin(AT!(1, 1)) + F!(0.513280f) * mixin(AT!(3, 1)) + F!(0.768178f) * mixin(AT!(5, 1)) + F!(-0.375330f) * mixin(AT!(7, 1)));
941         immutable Temp_Type X122 = D(F!(-0.074658f) * mixin(AT!(1, 2)) + F!(0.513280f) * mixin(AT!(3, 2)) + F!(0.768178f) * mixin(AT!(5, 2)) + F!(-0.375330f) * mixin(AT!(7, 2)));
942         immutable Temp_Type X123 = D(F!(-0.074658f) * mixin(AT!(1, 3)) + F!(0.513280f) * mixin(AT!(3, 3)) + F!(0.768178f) * mixin(AT!(5, 3)) + F!(-0.375330f) * mixin(AT!(7, 3)));
943         immutable Temp_Type X124 = D(F!(-0.074658f) * mixin(AT!(1, 4)) + F!(0.513280f) * mixin(AT!(3, 4)) + F!(0.768178f) * mixin(AT!(5, 4)) + F!(-0.375330f) * mixin(AT!(7, 4)));
944         immutable Temp_Type X125 = D(F!(-0.074658f) * mixin(AT!(1, 5)) + F!(0.513280f) * mixin(AT!(3, 5)) + F!(0.768178f) * mixin(AT!(5, 5)) + F!(-0.375330f) * mixin(AT!(7, 5)));
945         immutable Temp_Type X126 = D(F!(-0.074658f) * mixin(AT!(1, 6)) + F!(0.513280f) * mixin(AT!(3, 6)) + F!(0.768178f) * mixin(AT!(5, 6)) + F!(-0.375330f) * mixin(AT!(7, 6)));
946         immutable Temp_Type X127 = D(F!(-0.074658f) * mixin(AT!(1, 7)) + F!(0.513280f) * mixin(AT!(3, 7)) + F!(0.768178f) * mixin(AT!(5, 7)) + F!(-0.375330f) * mixin(AT!(7, 7)));
947         immutable Temp_Type X130 = mixin(AT!(6, 0));
948         immutable Temp_Type X131 = mixin(AT!(6, 1));
949         immutable Temp_Type X132 = mixin(AT!(6, 2));
950         immutable Temp_Type X133 = mixin(AT!(6, 3));
951         immutable Temp_Type X134 = mixin(AT!(6, 4));
952         immutable Temp_Type X135 = mixin(AT!(6, 5));
953         immutable Temp_Type X136 = mixin(AT!(6, 6));
954         immutable Temp_Type X137 = mixin(AT!(6, 7));
955         // 80 muls 48 adds
956 
957         // 4x4 = 4x8 times 8x4, matrix 1 is constant
958         R.at(0, 0) = X100;
959         R.at(0, 1) = D(X101 * F!(0.415735f) + X103 * F!(0.791065f) + X105 * F!(-0.352443f) + X107 * F!(0.277785f));
960         R.at(0, 2) = X104;
961         R.at(0, 3) = D(X101 * F!(0.022887f) + X103 * F!(-0.097545f) + X105 * F!(0.490393f) + X107 * F!(0.865723f));
962         R.at(1, 0) = X110;
963         R.at(1, 1) = D(X111 * F!(0.415735f) + X113 * F!(0.791065f) + X115 * F!(-0.352443f) + X117 * F!(0.277785f));
964         R.at(1, 2) = X114;
965         R.at(1, 3) = D(X111 * F!(0.022887f) + X113 * F!(-0.097545f) + X115 * F!(0.490393f) + X117 * F!(0.865723f));
966         R.at(2, 0) = X120;
967         R.at(2, 1) = D(X121 * F!(0.415735f) + X123 * F!(0.791065f) + X125 * F!(-0.352443f) + X127 * F!(0.277785f));
968         R.at(2, 2) = X124;
969         R.at(2, 3) = D(X121 * F!(0.022887f) + X123 * F!(-0.097545f) + X125 * F!(0.490393f) + X127 * F!(0.865723f));
970         R.at(3, 0) = X130;
971         R.at(3, 1) = D(X131 * F!(0.415735f) + X133 * F!(0.791065f) + X135 * F!(-0.352443f) + X137 * F!(0.277785f));
972         R.at(3, 2) = X134;
973         R.at(3, 3) = D(X131 * F!(0.022887f) + X133 * F!(-0.097545f) + X135 * F!(0.490393f) + X137 * F!(0.865723f));
974         // 40 muls 24 adds
975         // 4x4 = 4x8 times 8x4, matrix 1 is constant
976         S.at(0, 0) = D(X101 * F!(0.906127f) + X103 * F!(-0.318190f) + X105 * F!(0.212608f) + X107 * F!(-0.180240f));
977         S.at(0, 1) = X102;
978         S.at(0, 2) = D(X101 * F!(-0.074658f) + X103 * F!(0.513280f) + X105 * F!(0.768178f) + X107 * F!(-0.375330f));
979         S.at(0, 3) = X106;
980         S.at(1, 0) = D(X111 * F!(0.906127f) + X113 * F!(-0.318190f) + X115 * F!(0.212608f) + X117 * F!(-0.180240f));
981         S.at(1, 1) = X112;
982         S.at(1, 2) = D(X111 * F!(-0.074658f) + X113 * F!(0.513280f) + X115 * F!(0.768178f) + X117 * F!(-0.375330f));
983         S.at(1, 3) = X116;
984         S.at(2, 0) = D(X121 * F!(0.906127f) + X123 * F!(-0.318190f) + X125 * F!(0.212608f) + X127 * F!(-0.180240f));
985         S.at(2, 1) = X122;
986         S.at(2, 2) = D(X121 * F!(-0.074658f) + X123 * F!(0.513280f) + X125 * F!(0.768178f) + X127 * F!(-0.375330f));
987         S.at(2, 3) = X126;
988         S.at(3, 0) = D(X131 * F!(0.906127f) + X133 * F!(-0.318190f) + X135 * F!(0.212608f) + X137 * F!(-0.180240f));
989         S.at(3, 1) = X132;
990         S.at(3, 2) = D(X131 * F!(-0.074658f) + X133 * F!(0.513280f) + X135 * F!(0.768178f) + X137 * F!(-0.375330f));
991         S.at(3, 3) = X136;
992         // 40 muls 24 adds
993       }
994     }
995   } // end namespace DCT_Upsample
996 
997   // Unconditionally frees all allocated m_blocks.
998   void free_all_blocks () {
999     //m_pStream = null;
1000     readfn = null;
1001     for (mem_block *b = m_pMem_blocks; b; ) {
1002       mem_block* n = b.m_pNext;
1003       jpgd_free(b);
1004       b = n;
1005     }
1006     m_pMem_blocks = null;
1007   }
1008 
1009   // This method handles all errors. It will never return.
1010   // It could easily be changed to use C++ exceptions.
1011   /*JPGD_NORETURN*/ void stop_decoding (jpgd_status status, size_t line=__LINE__) {
1012     m_error_code = status;
1013     free_all_blocks();
1014     //longjmp(m_jmp_state, status);
1015     throw new Exception("jpeg decoding error", __FILE__, line);
1016   }
1017 
1018   void* alloc (size_t nSize, bool zero=false) {
1019     nSize = (JPGD_MAX(nSize, 1) + 3) & ~3;
1020     char *rv = null;
1021     for (mem_block *b = m_pMem_blocks; b; b = b.m_pNext)
1022     {
1023       if ((b.m_used_count + nSize) <= b.m_size)
1024       {
1025         rv = b.m_data.ptr + b.m_used_count;
1026         b.m_used_count += nSize;
1027         break;
1028       }
1029     }
1030     if (!rv)
1031     {
1032       size_t capacity = JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047);
1033       mem_block *b = cast(mem_block*)jpgd_malloc(mem_block.sizeof + capacity);
1034       if (!b) { stop_decoding(JPGD_NOTENOUGHMEM); }
1035       b.m_pNext = m_pMem_blocks; m_pMem_blocks = b;
1036       b.m_used_count = nSize;
1037       b.m_size = capacity;
1038       rv = b.m_data.ptr;
1039     }
1040     if (zero) memset(rv, 0, nSize);
1041     return rv;
1042   }
1043 
1044   void word_clear (void *p, ushort c, uint n) {
1045     ubyte *pD = cast(ubyte*)p;
1046     immutable ubyte l = c & 0xFF, h = (c >> 8) & 0xFF;
1047     while (n)
1048     {
1049       pD[0] = l; pD[1] = h; pD += 2;
1050       n--;
1051     }
1052   }
1053 
1054   // Refill the input buffer.
1055   // This method will sit in a loop until (A) the buffer is full or (B)
1056   // the stream's read() method reports and end of file condition.
1057   void prep_in_buffer () {
1058     m_in_buf_left = 0;
1059     m_pIn_buf_ofs = m_in_buf.ptr;
1060 
1061     if (m_eof_flag)
1062       return;
1063 
1064     do
1065     {
1066       int bytes_read = readfn(m_in_buf.ptr + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag);
1067       if (bytes_read == -1)
1068         stop_decoding(JPGD_STREAM_READ);
1069 
1070       m_in_buf_left += bytes_read;
1071     } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag));
1072 
1073     m_total_bytes_read += m_in_buf_left;
1074 
1075     // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid).
1076     // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.)
1077     word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64);
1078   }
1079 
1080   // Read a Huffman code table.
1081   void read_dht_marker () {
1082     int i, index, count;
1083     ubyte[17] huff_num;
1084     ubyte[256] huff_val;
1085 
1086     uint num_left = get_bits(16);
1087 
1088     if (num_left < 2)
1089       stop_decoding(JPGD_BAD_DHT_MARKER);
1090 
1091     num_left -= 2;
1092 
1093     while (num_left)
1094     {
1095       index = get_bits(8);
1096 
1097       huff_num.ptr[0] = 0;
1098 
1099       count = 0;
1100 
1101       for (i = 1; i <= 16; i++)
1102       {
1103         huff_num.ptr[i] = cast(ubyte)(get_bits(8));
1104         count += huff_num.ptr[i];
1105       }
1106 
1107       if (count > 255)
1108         stop_decoding(JPGD_BAD_DHT_COUNTS);
1109 
1110       for (i = 0; i < count; i++)
1111         huff_val.ptr[i] = cast(ubyte)(get_bits(8));
1112 
1113       i = 1 + 16 + count;
1114 
1115       if (num_left < cast(uint)i)
1116         stop_decoding(JPGD_BAD_DHT_MARKER);
1117 
1118       num_left -= i;
1119 
1120       if ((index & 0x10) > 0x10)
1121         stop_decoding(JPGD_BAD_DHT_INDEX);
1122 
1123       index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1);
1124 
1125       if (index >= JPGD_MAX_HUFF_TABLES)
1126         stop_decoding(JPGD_BAD_DHT_INDEX);
1127 
1128       if (!m_huff_num.ptr[index])
1129         m_huff_num.ptr[index] = cast(ubyte*)alloc(17);
1130 
1131       if (!m_huff_val.ptr[index])
1132         m_huff_val.ptr[index] = cast(ubyte*)alloc(256);
1133 
1134       m_huff_ac.ptr[index] = (index & 0x10) != 0;
1135       memcpy(m_huff_num.ptr[index], huff_num.ptr, 17);
1136       memcpy(m_huff_val.ptr[index], huff_val.ptr, 256);
1137     }
1138   }
1139 
1140   // Read a quantization table.
1141   void read_dqt_marker () {
1142     int n, i, prec;
1143     uint num_left;
1144     uint temp;
1145 
1146     num_left = get_bits(16);
1147 
1148     if (num_left < 2)
1149       stop_decoding(JPGD_BAD_DQT_MARKER);
1150 
1151     num_left -= 2;
1152 
1153     while (num_left)
1154     {
1155       n = get_bits(8);
1156       prec = n >> 4;
1157       n &= 0x0F;
1158 
1159       if (n >= JPGD_MAX_QUANT_TABLES)
1160         stop_decoding(JPGD_BAD_DQT_TABLE);
1161 
1162       if (!m_quant.ptr[n])
1163         m_quant.ptr[n] = cast(jpgd_quant_t*)alloc(64 * jpgd_quant_t.sizeof);
1164 
1165       // read quantization entries, in zag order
1166       for (i = 0; i < 64; i++)
1167       {
1168         temp = get_bits(8);
1169 
1170         if (prec)
1171           temp = (temp << 8) + get_bits(8);
1172 
1173         m_quant.ptr[n][i] = cast(jpgd_quant_t)(temp);
1174       }
1175 
1176       i = 64 + 1;
1177 
1178       if (prec)
1179         i += 64;
1180 
1181       if (num_left < cast(uint)i)
1182         stop_decoding(JPGD_BAD_DQT_LENGTH);
1183 
1184       num_left -= i;
1185     }
1186   }
1187 
1188   // Read the start of frame (SOF) marker.
1189   void read_sof_marker () {
1190     int i;
1191     uint num_left;
1192 
1193     num_left = get_bits(16);
1194 
1195     if (get_bits(8) != 8)   /* precision: sorry, only 8-bit precision is supported right now */
1196       stop_decoding(JPGD_BAD_PRECISION);
1197 
1198     m_image_y_size = get_bits(16);
1199 
1200     if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT))
1201       stop_decoding(JPGD_BAD_HEIGHT);
1202 
1203     m_image_x_size = get_bits(16);
1204 
1205     if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH))
1206       stop_decoding(JPGD_BAD_WIDTH);
1207 
1208     m_comps_in_frame = get_bits(8);
1209 
1210     if (m_comps_in_frame > JPGD_MAX_COMPONENTS)
1211       stop_decoding(JPGD_TOO_MANY_COMPONENTS);
1212 
1213     if (num_left != cast(uint)(m_comps_in_frame * 3 + 8))
1214       stop_decoding(JPGD_BAD_SOF_LENGTH);
1215 
1216     for (i = 0; i < m_comps_in_frame; i++)
1217     {
1218       m_comp_ident.ptr[i]  = get_bits(8);
1219       m_comp_h_samp.ptr[i] = get_bits(4);
1220       m_comp_v_samp.ptr[i] = get_bits(4);
1221       m_comp_quant.ptr[i]  = get_bits(8);
1222     }
1223   }
1224 
1225   // Used to skip unrecognized markers.
1226   void skip_variable_marker () {
1227     uint num_left;
1228 
1229     num_left = get_bits(16);
1230 
1231     if (num_left < 2)
1232       stop_decoding(JPGD_BAD_VARIABLE_MARKER);
1233 
1234     num_left -= 2;
1235 
1236     while (num_left)
1237     {
1238       get_bits(8);
1239       num_left--;
1240     }
1241   }
1242 
1243   // Read a define restart interval (DRI) marker.
1244   void read_dri_marker () {
1245     if (get_bits(16) != 4)
1246       stop_decoding(JPGD_BAD_DRI_LENGTH);
1247 
1248     m_restart_interval = get_bits(16);
1249   }
1250 
1251   // Read a start of scan (SOS) marker.
1252   void read_sos_marker () {
1253     uint num_left;
1254     int i, ci, n, c, cc;
1255 
1256     num_left = get_bits(16);
1257 
1258     n = get_bits(8);
1259 
1260     m_comps_in_scan = n;
1261 
1262     num_left -= 3;
1263 
1264     if ( (num_left != cast(uint)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN) )
1265       stop_decoding(JPGD_BAD_SOS_LENGTH);
1266 
1267     for (i = 0; i < n; i++)
1268     {
1269       cc = get_bits(8);
1270       c = get_bits(8);
1271       num_left -= 2;
1272 
1273       for (ci = 0; ci < m_comps_in_frame; ci++)
1274         if (cc == m_comp_ident.ptr[ci])
1275           break;
1276 
1277       if (ci >= m_comps_in_frame)
1278         stop_decoding(JPGD_BAD_SOS_COMP_ID);
1279 
1280       m_comp_list.ptr[i]    = ci;
1281       m_comp_dc_tab.ptr[ci] = (c >> 4) & 15;
1282       m_comp_ac_tab.ptr[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1);
1283     }
1284 
1285     m_spectral_start  = get_bits(8);
1286     m_spectral_end    = get_bits(8);
1287     m_successive_high = get_bits(4);
1288     m_successive_low  = get_bits(4);
1289 
1290     if (!m_progressive_flag)
1291     {
1292       m_spectral_start = 0;
1293       m_spectral_end = 63;
1294     }
1295 
1296     num_left -= 3;
1297 
1298     /* read past whatever is num_left */
1299     while (num_left)
1300     {
1301       get_bits(8);
1302       num_left--;
1303     }
1304   }
1305 
1306   // Finds the next marker.
1307   int next_marker () {
1308     uint c, bytes;
1309 
1310     bytes = 0;
1311 
1312     do
1313     {
1314       do
1315       {
1316         bytes++;
1317         c = get_bits(8);
1318       } while (c != 0xFF);
1319 
1320       do
1321       {
1322         c = get_bits(8);
1323       } while (c == 0xFF);
1324 
1325     } while (c == 0);
1326 
1327     // If bytes > 0 here, there where extra bytes before the marker (not good).
1328 
1329     return c;
1330   }
1331 
1332   // Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is
1333   // encountered.
1334   int process_markers (bool allow_restarts = false) {
1335     int c;
1336 
1337     for ( ; ; ) {
1338       c = next_marker();
1339 
1340       switch (c)
1341       {
1342         case M_SOF0:
1343         case M_SOF1:
1344         case M_SOF2:
1345         case M_SOF3:
1346         case M_SOF5:
1347         case M_SOF6:
1348         case M_SOF7:
1349         //case M_JPG:
1350         case M_SOF9:
1351         case M_SOF10:
1352         case M_SOF11:
1353         case M_SOF13:
1354         case M_SOF14:
1355         case M_SOF15:
1356         case M_SOI:
1357         case M_EOI:
1358         case M_SOS:
1359           return c;
1360         case M_DHT:
1361           read_dht_marker();
1362           break;
1363         // No arithmitic support - dumb patents!
1364         case M_DAC:
1365           stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1366           break;
1367         case M_DQT:
1368           read_dqt_marker();
1369           break;
1370         case M_DRI:
1371           read_dri_marker();
1372           break;
1373         //case M_APP0:  /* no need to read the JFIF marker */
1374 
1375         case M_RST0:    /* no parameters */
1376         case M_RST1:
1377         case M_RST2:
1378         case M_RST3:
1379         case M_RST4:
1380         case M_RST5:
1381         case M_RST6:
1382         case M_RST7:
1383 		if(allow_restarts)
1384 			continue;
1385 		else
1386 			goto case;
1387         case M_JPG:
1388         case M_TEM:
1389           stop_decoding(JPGD_UNEXPECTED_MARKER);
1390           break;
1391         default:    /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */
1392           skip_variable_marker();
1393           break;
1394       }
1395     }
1396 
1397     assert(0);
1398   }
1399 
1400   // Finds the start of image (SOI) marker.
1401   // This code is rather defensive: it only checks the first 512 bytes to avoid
1402   // false positives.
1403   void locate_soi_marker () {
1404     uint lastchar, thischar;
1405     uint bytesleft;
1406 
1407     lastchar = get_bits(8);
1408 
1409     thischar = get_bits(8);
1410 
1411     /* ok if it's a normal JPEG file without a special header */
1412 
1413     if ((lastchar == 0xFF) && (thischar == M_SOI))
1414       return;
1415 
1416     bytesleft = 4096; //512;
1417 
1418     for ( ; ; )
1419     {
1420       if (--bytesleft == 0)
1421         stop_decoding(JPGD_NOT_JPEG);
1422 
1423       lastchar = thischar;
1424 
1425       thischar = get_bits(8);
1426 
1427       if (lastchar == 0xFF)
1428       {
1429         if (thischar == M_SOI)
1430           break;
1431         else if (thischar == M_EOI) // get_bits will keep returning M_EOI if we read past the end
1432           stop_decoding(JPGD_NOT_JPEG);
1433       }
1434     }
1435 
1436     // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad.
1437     thischar = (m_bit_buf >> 24) & 0xFF;
1438 
1439     if (thischar != 0xFF)
1440       stop_decoding(JPGD_NOT_JPEG);
1441   }
1442 
1443   // Find a start of frame (SOF) marker.
1444   void locate_sof_marker () {
1445     locate_soi_marker();
1446 
1447     int c = process_markers();
1448 
1449     switch (c)
1450     {
1451       case M_SOF2:
1452         m_progressive_flag = true;
1453         goto case;
1454       case M_SOF0:  /* baseline DCT */
1455       case M_SOF1:  /* extended sequential DCT */
1456         read_sof_marker();
1457         break;
1458       case M_SOF9:  /* Arithmitic coding */
1459         stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1460         break;
1461       default:
1462         stop_decoding(JPGD_UNSUPPORTED_MARKER);
1463         break;
1464     }
1465   }
1466 
1467   // Find a start of scan (SOS) marker.
1468   int locate_sos_marker () {
1469     int c;
1470 
1471     c = process_markers();
1472 
1473     if (c == M_EOI)
1474       return false;
1475     else if (c != M_SOS)
1476       stop_decoding(JPGD_UNEXPECTED_MARKER);
1477 
1478     read_sos_marker();
1479 
1480     return true;
1481   }
1482 
1483   // Reset everything to default/uninitialized state.
1484   void initit (JpegStreamReadFunc rfn) {
1485     m_pMem_blocks = null;
1486     m_error_code = JPGD_SUCCESS;
1487     m_ready_flag = false;
1488     m_image_x_size = m_image_y_size = 0;
1489     readfn = rfn;
1490     m_progressive_flag = false;
1491 
1492     memset(m_huff_ac.ptr, 0, m_huff_ac.sizeof);
1493     memset(m_huff_num.ptr, 0, m_huff_num.sizeof);
1494     memset(m_huff_val.ptr, 0, m_huff_val.sizeof);
1495     memset(m_quant.ptr, 0, m_quant.sizeof);
1496 
1497     m_scan_type = 0;
1498     m_comps_in_frame = 0;
1499 
1500     memset(m_comp_h_samp.ptr, 0, m_comp_h_samp.sizeof);
1501     memset(m_comp_v_samp.ptr, 0, m_comp_v_samp.sizeof);
1502     memset(m_comp_quant.ptr, 0, m_comp_quant.sizeof);
1503     memset(m_comp_ident.ptr, 0, m_comp_ident.sizeof);
1504     memset(m_comp_h_blocks.ptr, 0, m_comp_h_blocks.sizeof);
1505     memset(m_comp_v_blocks.ptr, 0, m_comp_v_blocks.sizeof);
1506 
1507     m_comps_in_scan = 0;
1508     memset(m_comp_list.ptr, 0, m_comp_list.sizeof);
1509     memset(m_comp_dc_tab.ptr, 0, m_comp_dc_tab.sizeof);
1510     memset(m_comp_ac_tab.ptr, 0, m_comp_ac_tab.sizeof);
1511 
1512     m_spectral_start = 0;
1513     m_spectral_end = 0;
1514     m_successive_low = 0;
1515     m_successive_high = 0;
1516     m_max_mcu_x_size = 0;
1517     m_max_mcu_y_size = 0;
1518     m_blocks_per_mcu = 0;
1519     m_max_blocks_per_row = 0;
1520     m_mcus_per_row = 0;
1521     m_mcus_per_col = 0;
1522     m_expanded_blocks_per_component = 0;
1523     m_expanded_blocks_per_mcu = 0;
1524     m_expanded_blocks_per_row = 0;
1525     m_freq_domain_chroma_upsample = false;
1526 
1527     memset(m_mcu_org.ptr, 0, m_mcu_org.sizeof);
1528 
1529     m_total_lines_left = 0;
1530     m_mcu_lines_left = 0;
1531     m_real_dest_bytes_per_scan_line = 0;
1532     m_dest_bytes_per_scan_line = 0;
1533     m_dest_bytes_per_pixel = 0;
1534 
1535     memset(m_pHuff_tabs.ptr, 0, m_pHuff_tabs.sizeof);
1536 
1537     memset(m_dc_coeffs.ptr, 0, m_dc_coeffs.sizeof);
1538     memset(m_ac_coeffs.ptr, 0, m_ac_coeffs.sizeof);
1539     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
1540 
1541     m_eob_run = 0;
1542 
1543     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
1544 
1545     m_pIn_buf_ofs = m_in_buf.ptr;
1546     m_in_buf_left = 0;
1547     m_eof_flag = false;
1548     m_tem_flag = 0;
1549 
1550     memset(m_in_buf_pad_start.ptr, 0, m_in_buf_pad_start.sizeof);
1551     memset(m_in_buf.ptr, 0, m_in_buf.sizeof);
1552     memset(m_in_buf_pad_end.ptr, 0, m_in_buf_pad_end.sizeof);
1553 
1554     m_restart_interval = 0;
1555     m_restarts_left    = 0;
1556     m_next_restart_num = 0;
1557 
1558     m_max_mcus_per_row = 0;
1559     m_max_blocks_per_mcu = 0;
1560     m_max_mcus_per_col = 0;
1561 
1562     memset(m_last_dc_val.ptr, 0, m_last_dc_val.sizeof);
1563     m_pMCU_coefficients = null;
1564     m_pSample_buf = null;
1565 
1566     m_total_bytes_read = 0;
1567 
1568     m_pScan_line_0 = null;
1569     m_pScan_line_1 = null;
1570 
1571     // Ready the input buffer.
1572     prep_in_buffer();
1573 
1574     // Prime the bit buffer.
1575     m_bits_left = 16;
1576     m_bit_buf = 0;
1577 
1578     get_bits(16);
1579     get_bits(16);
1580 
1581     for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++)
1582       m_mcu_block_max_zag.ptr[i] = 64;
1583   }
1584 
1585   enum SCALEBITS = 16;
1586   enum ONE_HALF = (cast(int) 1 << (SCALEBITS-1));
1587   enum FIX(float x) = (cast(int)((x) * (1L<<SCALEBITS) + 0.5f));
1588 
1589   // Create a few tables that allow us to quickly convert YCbCr to RGB.
1590   void create_look_ups () {
1591     for (int i = 0; i <= 255; i++)
1592     {
1593       int k = i - 128;
1594       m_crr.ptr[i] = ( FIX!(1.40200f)  * k + ONE_HALF) >> SCALEBITS;
1595       m_cbb.ptr[i] = ( FIX!(1.77200f)  * k + ONE_HALF) >> SCALEBITS;
1596       m_crg.ptr[i] = (-FIX!(0.71414f)) * k;
1597       m_cbg.ptr[i] = (-FIX!(0.34414f)) * k + ONE_HALF;
1598     }
1599   }
1600 
1601   // This method throws back into the stream any bytes that where read
1602   // into the bit buffer during initial marker scanning.
1603   void fix_in_buffer () {
1604     // In case any 0xFF's where pulled into the buffer during marker scanning.
1605     assert((m_bits_left & 7) == 0);
1606 
1607     if (m_bits_left == 16)
1608       stuff_char(cast(ubyte)(m_bit_buf & 0xFF));
1609 
1610     if (m_bits_left >= 8)
1611       stuff_char(cast(ubyte)((m_bit_buf >> 8) & 0xFF));
1612 
1613     stuff_char(cast(ubyte)((m_bit_buf >> 16) & 0xFF));
1614     stuff_char(cast(ubyte)((m_bit_buf >> 24) & 0xFF));
1615 
1616     m_bits_left = 16;
1617     get_bits_no_markers(16);
1618     get_bits_no_markers(16);
1619   }
1620 
1621   void transform_mcu (int mcu_row) {
1622     jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1623     ubyte* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64;
1624 
1625     for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1626     {
1627       idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag.ptr[mcu_block]);
1628       pSrc_ptr += 64;
1629       pDst_ptr += 64;
1630     }
1631   }
1632 
1633   static immutable ubyte[64] s_max_rc = [
1634     17, 18, 34, 50, 50, 51, 52, 52, 52, 68, 84, 84, 84, 84, 85, 86, 86, 86, 86, 86,
1635     102, 118, 118, 118, 118, 118, 118, 119, 120, 120, 120, 120, 120, 120, 120, 136,
1636     136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136,
1637     136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136
1638   ];
1639 
1640   void transform_mcu_expand (int mcu_row) {
1641     jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1642     ubyte* pDst_ptr = m_pSample_buf + mcu_row * m_expanded_blocks_per_mcu * 64;
1643 
1644     // Y IDCT
1645     int mcu_block;
1646     for (mcu_block = 0; mcu_block < m_expanded_blocks_per_component; mcu_block++)
1647     {
1648       idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag.ptr[mcu_block]);
1649       pSrc_ptr += 64;
1650       pDst_ptr += 64;
1651     }
1652 
1653     // Chroma IDCT, with upsampling
1654     jpgd_block_t[64] temp_block;
1655 
1656     for (int i = 0; i < 2; i++)
1657     {
1658       DCT_Upsample.Matrix44 P, Q, R, S;
1659 
1660       assert(m_mcu_block_max_zag.ptr[mcu_block] >= 1);
1661       assert(m_mcu_block_max_zag.ptr[mcu_block] <= 64);
1662 
1663       int max_zag = m_mcu_block_max_zag.ptr[mcu_block++] - 1;
1664       if (max_zag <= 0) max_zag = 0; // should never happen, only here to shut up static analysis
1665       switch (s_max_rc.ptr[max_zag])
1666       {
1667       case 1*16+1:
1668         DCT_Upsample.P_Q!(1, 1).calc(P, Q, pSrc_ptr);
1669         DCT_Upsample.R_S!(1, 1).calc(R, S, pSrc_ptr);
1670         break;
1671       case 1*16+2:
1672         DCT_Upsample.P_Q!(1, 2).calc(P, Q, pSrc_ptr);
1673         DCT_Upsample.R_S!(1, 2).calc(R, S, pSrc_ptr);
1674         break;
1675       case 2*16+2:
1676         DCT_Upsample.P_Q!(2, 2).calc(P, Q, pSrc_ptr);
1677         DCT_Upsample.R_S!(2, 2).calc(R, S, pSrc_ptr);
1678         break;
1679       case 3*16+2:
1680         DCT_Upsample.P_Q!(3, 2).calc(P, Q, pSrc_ptr);
1681         DCT_Upsample.R_S!(3, 2).calc(R, S, pSrc_ptr);
1682         break;
1683       case 3*16+3:
1684         DCT_Upsample.P_Q!(3, 3).calc(P, Q, pSrc_ptr);
1685         DCT_Upsample.R_S!(3, 3).calc(R, S, pSrc_ptr);
1686         break;
1687       case 3*16+4:
1688         DCT_Upsample.P_Q!(3, 4).calc(P, Q, pSrc_ptr);
1689         DCT_Upsample.R_S!(3, 4).calc(R, S, pSrc_ptr);
1690         break;
1691       case 4*16+4:
1692         DCT_Upsample.P_Q!(4, 4).calc(P, Q, pSrc_ptr);
1693         DCT_Upsample.R_S!(4, 4).calc(R, S, pSrc_ptr);
1694         break;
1695       case 5*16+4:
1696         DCT_Upsample.P_Q!(5, 4).calc(P, Q, pSrc_ptr);
1697         DCT_Upsample.R_S!(5, 4).calc(R, S, pSrc_ptr);
1698         break;
1699       case 5*16+5:
1700         DCT_Upsample.P_Q!(5, 5).calc(P, Q, pSrc_ptr);
1701         DCT_Upsample.R_S!(5, 5).calc(R, S, pSrc_ptr);
1702         break;
1703       case 5*16+6:
1704         DCT_Upsample.P_Q!(5, 6).calc(P, Q, pSrc_ptr);
1705         DCT_Upsample.R_S!(5, 6).calc(R, S, pSrc_ptr);
1706         break;
1707       case 6*16+6:
1708         DCT_Upsample.P_Q!(6, 6).calc(P, Q, pSrc_ptr);
1709         DCT_Upsample.R_S!(6, 6).calc(R, S, pSrc_ptr);
1710         break;
1711       case 7*16+6:
1712         DCT_Upsample.P_Q!(7, 6).calc(P, Q, pSrc_ptr);
1713         DCT_Upsample.R_S!(7, 6).calc(R, S, pSrc_ptr);
1714         break;
1715       case 7*16+7:
1716         DCT_Upsample.P_Q!(7, 7).calc(P, Q, pSrc_ptr);
1717         DCT_Upsample.R_S!(7, 7).calc(R, S, pSrc_ptr);
1718         break;
1719       case 7*16+8:
1720         DCT_Upsample.P_Q!(7, 8).calc(P, Q, pSrc_ptr);
1721         DCT_Upsample.R_S!(7, 8).calc(R, S, pSrc_ptr);
1722         break;
1723       case 8*16+8:
1724         DCT_Upsample.P_Q!(8, 8).calc(P, Q, pSrc_ptr);
1725         DCT_Upsample.R_S!(8, 8).calc(R, S, pSrc_ptr);
1726         break;
1727       default:
1728         assert(false);
1729       }
1730 
1731       auto a = DCT_Upsample.Matrix44(P + Q);
1732       P -= Q;
1733       DCT_Upsample.Matrix44* b = &P;
1734       auto c = DCT_Upsample.Matrix44(R + S);
1735       R -= S;
1736       DCT_Upsample.Matrix44* d = &R;
1737 
1738       DCT_Upsample.Matrix44.add_and_store(temp_block.ptr, a, c);
1739       idct_4x4(temp_block.ptr, pDst_ptr);
1740       pDst_ptr += 64;
1741 
1742       DCT_Upsample.Matrix44.sub_and_store(temp_block.ptr, a, c);
1743       idct_4x4(temp_block.ptr, pDst_ptr);
1744       pDst_ptr += 64;
1745 
1746       DCT_Upsample.Matrix44.add_and_store(temp_block.ptr, *b, *d);
1747       idct_4x4(temp_block.ptr, pDst_ptr);
1748       pDst_ptr += 64;
1749 
1750       DCT_Upsample.Matrix44.sub_and_store(temp_block.ptr, *b, *d);
1751       idct_4x4(temp_block.ptr, pDst_ptr);
1752       pDst_ptr += 64;
1753 
1754       pSrc_ptr += 64;
1755     }
1756   }
1757 
1758   // Loads and dequantizes the next row of (already decoded) coefficients.
1759   // Progressive images only.
1760   void load_next_row () {
1761     int i;
1762     jpgd_block_t *p;
1763     jpgd_quant_t *q;
1764     int mcu_row, mcu_block, row_block = 0;
1765     int component_num, component_id;
1766     int[JPGD_MAX_COMPONENTS] block_x_mcu;
1767 
1768     memset(block_x_mcu.ptr, 0, JPGD_MAX_COMPONENTS * int.sizeof);
1769 
1770     for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
1771     {
1772       int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
1773 
1774       for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1775       {
1776         component_id = m_mcu_org.ptr[mcu_block];
1777         q = m_quant.ptr[m_comp_quant.ptr[component_id]];
1778 
1779         p = m_pMCU_coefficients + 64 * mcu_block;
1780 
1781         jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs.ptr[component_id], block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
1782         jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs.ptr[component_id], block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
1783         p[0] = pDC[0];
1784         memcpy(&p[1], &pAC[1], 63 * jpgd_block_t.sizeof);
1785 
1786         for (i = 63; i > 0; i--)
1787           if (p[g_ZAG[i]])
1788             break;
1789 
1790         m_mcu_block_max_zag.ptr[mcu_block] = i + 1;
1791 
1792         for ( ; i >= 0; i--)
1793           if (p[g_ZAG[i]])
1794             p[g_ZAG[i]] = cast(jpgd_block_t)(p[g_ZAG[i]] * q[i]);
1795 
1796         row_block++;
1797 
1798         if (m_comps_in_scan == 1)
1799           block_x_mcu.ptr[component_id]++;
1800         else
1801         {
1802           if (++block_x_mcu_ofs == m_comp_h_samp.ptr[component_id])
1803           {
1804             block_x_mcu_ofs = 0;
1805 
1806             if (++block_y_mcu_ofs == m_comp_v_samp.ptr[component_id])
1807             {
1808               block_y_mcu_ofs = 0;
1809 
1810               block_x_mcu.ptr[component_id] += m_comp_h_samp.ptr[component_id];
1811             }
1812           }
1813         }
1814       }
1815 
1816       if (m_freq_domain_chroma_upsample)
1817         transform_mcu_expand(mcu_row);
1818       else
1819         transform_mcu(mcu_row);
1820     }
1821 
1822     if (m_comps_in_scan == 1)
1823       m_block_y_mcu.ptr[m_comp_list.ptr[0]]++;
1824     else
1825     {
1826       for (component_num = 0; component_num < m_comps_in_scan; component_num++)
1827       {
1828         component_id = m_comp_list.ptr[component_num];
1829 
1830         m_block_y_mcu.ptr[component_id] += m_comp_v_samp.ptr[component_id];
1831       }
1832     }
1833   }
1834 
1835   // Restart interval processing.
1836   void process_restart () {
1837     int i;
1838     int c = 0;
1839 
1840     // Align to a byte boundry
1841     // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers!
1842     //get_bits_no_markers(m_bits_left & 7);
1843 
1844     // Let's scan a little bit to find the marker, but not _too_ far.
1845     // 1536 is a "fudge factor" that determines how much to scan.
1846     for (i = 1536; i > 0; i--)
1847       if (get_char() == 0xFF)
1848         break;
1849 
1850     if (i == 0)
1851       stop_decoding(JPGD_BAD_RESTART_MARKER);
1852 
1853     for ( ; i > 0; i--)
1854       if ((c = get_char()) != 0xFF)
1855         break;
1856 
1857     if (i == 0)
1858       stop_decoding(JPGD_BAD_RESTART_MARKER);
1859 
1860     // Is it the expected marker? If not, something bad happened.
1861     if (c != (m_next_restart_num + M_RST0))
1862       stop_decoding(JPGD_BAD_RESTART_MARKER);
1863 
1864     // Reset each component's DC prediction values.
1865     memset(&m_last_dc_val, 0, m_comps_in_frame * uint.sizeof);
1866 
1867     m_eob_run = 0;
1868 
1869     m_restarts_left = m_restart_interval;
1870 
1871     m_next_restart_num = (m_next_restart_num + 1) & 7;
1872 
1873     // Get the bit buffer going again...
1874 
1875     m_bits_left = 16;
1876     get_bits_no_markers(16);
1877     get_bits_no_markers(16);
1878   }
1879 
1880   static int dequantize_ac (int c, int q) { pragma(inline, true); c *= q; return c; }
1881 
1882   // Decodes and dequantizes the next row of coefficients.
1883   void decode_next_row () {
1884     int row_block = 0;
1885 
1886     for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
1887     {
1888       if ((m_restart_interval) && (m_restarts_left == 0))
1889         process_restart();
1890 
1891       jpgd_block_t* p = m_pMCU_coefficients;
1892       for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64)
1893       {
1894         int component_id = m_mcu_org.ptr[mcu_block];
1895         jpgd_quant_t* q = m_quant.ptr[m_comp_quant.ptr[component_id]];
1896 
1897         int r, s;
1898         s = huff_decode(m_pHuff_tabs.ptr[m_comp_dc_tab.ptr[component_id]], r);
1899         s = JPGD_HUFF_EXTEND(r, s);
1900 
1901         m_last_dc_val.ptr[component_id] = (s += m_last_dc_val.ptr[component_id]);
1902 
1903         p[0] = cast(jpgd_block_t)(s * q[0]);
1904 
1905         int prev_num_set = m_mcu_block_max_zag.ptr[mcu_block];
1906 
1907         huff_tables *pH = m_pHuff_tabs.ptr[m_comp_ac_tab.ptr[component_id]];
1908 
1909         int k;
1910         for (k = 1; k < 64; k++)
1911         {
1912           int extra_bits;
1913           s = huff_decode(pH, extra_bits);
1914 
1915           r = s >> 4;
1916           s &= 15;
1917 
1918           if (s)
1919           {
1920             if (r)
1921             {
1922               if ((k + r) > 63)
1923                 stop_decoding(JPGD_DECODE_ERROR);
1924 
1925               if (k < prev_num_set)
1926               {
1927                 int n = JPGD_MIN(r, prev_num_set - k);
1928                 int kt = k;
1929                 while (n--)
1930                   p[g_ZAG[kt++]] = 0;
1931               }
1932 
1933               k += r;
1934             }
1935 
1936             s = JPGD_HUFF_EXTEND(extra_bits, s);
1937 
1938             assert(k < 64);
1939 
1940             p[g_ZAG[k]] = cast(jpgd_block_t)(dequantize_ac(s, q[k])); //s * q[k];
1941           }
1942           else
1943           {
1944             if (r == 15)
1945             {
1946               if ((k + 16) > 64)
1947                 stop_decoding(JPGD_DECODE_ERROR);
1948 
1949               if (k < prev_num_set)
1950               {
1951                 int n = JPGD_MIN(16, prev_num_set - k);
1952                 int kt = k;
1953                 while (n--)
1954                 {
1955                   assert(kt <= 63);
1956                   p[g_ZAG[kt++]] = 0;
1957                 }
1958               }
1959 
1960               k += 16 - 1; // - 1 because the loop counter is k
1961               assert(p[g_ZAG[k]] == 0);
1962             }
1963             else
1964               break;
1965           }
1966         }
1967 
1968         if (k < prev_num_set)
1969         {
1970           int kt = k;
1971           while (kt < prev_num_set)
1972             p[g_ZAG[kt++]] = 0;
1973         }
1974 
1975         m_mcu_block_max_zag.ptr[mcu_block] = k;
1976 
1977         row_block++;
1978       }
1979 
1980       if (m_freq_domain_chroma_upsample)
1981         transform_mcu_expand(mcu_row);
1982       else
1983         transform_mcu(mcu_row);
1984 
1985       m_restarts_left--;
1986     }
1987   }
1988 
1989   // YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB
1990   void H1V1Convert () {
1991     int row = m_max_mcu_y_size - m_mcu_lines_left;
1992     ubyte *d = m_pScan_line_0;
1993     ubyte *s = m_pSample_buf + row * 8;
1994 
1995     for (int i = m_max_mcus_per_row; i > 0; i--)
1996     {
1997       for (int j = 0; j < 8; j++)
1998       {
1999         int y = s[j];
2000         int cb = s[64+j];
2001         int cr = s[128+j];
2002 
2003         d[0] = clamp(y + m_crr.ptr[cr]);
2004         d[1] = clamp(y + ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16));
2005         d[2] = clamp(y + m_cbb.ptr[cb]);
2006         d[3] = 255;
2007 
2008         d += 4;
2009       }
2010 
2011       s += 64*3;
2012     }
2013   }
2014 
2015   // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB
2016   void H2V1Convert () {
2017     int row = m_max_mcu_y_size - m_mcu_lines_left;
2018     ubyte *d0 = m_pScan_line_0;
2019     ubyte *y = m_pSample_buf + row * 8;
2020     ubyte *c = m_pSample_buf + 2*64 + row * 8;
2021 
2022     for (int i = m_max_mcus_per_row; i > 0; i--)
2023     {
2024       for (int l = 0; l < 2; l++)
2025       {
2026         for (int j = 0; j < 4; j++)
2027         {
2028           int cb = c[0];
2029           int cr = c[64];
2030 
2031           int rc = m_crr.ptr[cr];
2032           int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2033           int bc = m_cbb.ptr[cb];
2034 
2035           int yy = y[j<<1];
2036           d0[0] = clamp(yy+rc);
2037           d0[1] = clamp(yy+gc);
2038           d0[2] = clamp(yy+bc);
2039           d0[3] = 255;
2040 
2041           yy = y[(j<<1)+1];
2042           d0[4] = clamp(yy+rc);
2043           d0[5] = clamp(yy+gc);
2044           d0[6] = clamp(yy+bc);
2045           d0[7] = 255;
2046 
2047           d0 += 8;
2048 
2049           c++;
2050         }
2051         y += 64;
2052       }
2053 
2054       y += 64*4 - 64*2;
2055       c += 64*4 - 8;
2056     }
2057   }
2058 
2059   // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB
2060   void H1V2Convert () {
2061     int row = m_max_mcu_y_size - m_mcu_lines_left;
2062     ubyte *d0 = m_pScan_line_0;
2063     ubyte *d1 = m_pScan_line_1;
2064     ubyte *y;
2065     ubyte *c;
2066 
2067     if (row < 8)
2068       y = m_pSample_buf + row * 8;
2069     else
2070       y = m_pSample_buf + 64*1 + (row & 7) * 8;
2071 
2072     c = m_pSample_buf + 64*2 + (row >> 1) * 8;
2073 
2074     for (int i = m_max_mcus_per_row; i > 0; i--)
2075     {
2076       for (int j = 0; j < 8; j++)
2077       {
2078         int cb = c[0+j];
2079         int cr = c[64+j];
2080 
2081         int rc = m_crr.ptr[cr];
2082         int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2083         int bc = m_cbb.ptr[cb];
2084 
2085         int yy = y[j];
2086         d0[0] = clamp(yy+rc);
2087         d0[1] = clamp(yy+gc);
2088         d0[2] = clamp(yy+bc);
2089         d0[3] = 255;
2090 
2091         yy = y[8+j];
2092         d1[0] = clamp(yy+rc);
2093         d1[1] = clamp(yy+gc);
2094         d1[2] = clamp(yy+bc);
2095         d1[3] = 255;
2096 
2097         d0 += 4;
2098         d1 += 4;
2099       }
2100 
2101       y += 64*4;
2102       c += 64*4;
2103     }
2104   }
2105 
2106   // YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB
2107   void H2V2Convert () {
2108     int row = m_max_mcu_y_size - m_mcu_lines_left;
2109     ubyte *d0 = m_pScan_line_0;
2110     ubyte *d1 = m_pScan_line_1;
2111     ubyte *y;
2112     ubyte *c;
2113 
2114     if (row < 8)
2115       y = m_pSample_buf + row * 8;
2116     else
2117       y = m_pSample_buf + 64*2 + (row & 7) * 8;
2118 
2119     c = m_pSample_buf + 64*4 + (row >> 1) * 8;
2120 
2121     for (int i = m_max_mcus_per_row; i > 0; i--)
2122     {
2123       for (int l = 0; l < 2; l++)
2124       {
2125         for (int j = 0; j < 8; j += 2)
2126         {
2127           int cb = c[0];
2128           int cr = c[64];
2129 
2130           int rc = m_crr.ptr[cr];
2131           int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2132           int bc = m_cbb.ptr[cb];
2133 
2134           int yy = y[j];
2135           d0[0] = clamp(yy+rc);
2136           d0[1] = clamp(yy+gc);
2137           d0[2] = clamp(yy+bc);
2138           d0[3] = 255;
2139 
2140           yy = y[j+1];
2141           d0[4] = clamp(yy+rc);
2142           d0[5] = clamp(yy+gc);
2143           d0[6] = clamp(yy+bc);
2144           d0[7] = 255;
2145 
2146           yy = y[j+8];
2147           d1[0] = clamp(yy+rc);
2148           d1[1] = clamp(yy+gc);
2149           d1[2] = clamp(yy+bc);
2150           d1[3] = 255;
2151 
2152           yy = y[j+8+1];
2153           d1[4] = clamp(yy+rc);
2154           d1[5] = clamp(yy+gc);
2155           d1[6] = clamp(yy+bc);
2156           d1[7] = 255;
2157 
2158           d0 += 8;
2159           d1 += 8;
2160 
2161           c++;
2162         }
2163         y += 64;
2164       }
2165 
2166       y += 64*6 - 64*2;
2167       c += 64*6 - 8;
2168     }
2169   }
2170 
2171   // Y (1 block per MCU) to 8-bit grayscale
2172   void gray_convert () {
2173     int row = m_max_mcu_y_size - m_mcu_lines_left;
2174     ubyte *d = m_pScan_line_0;
2175     ubyte *s = m_pSample_buf + row * 8;
2176 
2177     for (int i = m_max_mcus_per_row; i > 0; i--)
2178     {
2179       *cast(uint*)d = *cast(uint*)s;
2180       *cast(uint*)(&d[4]) = *cast(uint*)(&s[4]);
2181 
2182       s += 64;
2183       d += 8;
2184     }
2185   }
2186 
2187   void expanded_convert () {
2188     int row = m_max_mcu_y_size - m_mcu_lines_left;
2189 
2190     ubyte* Py = m_pSample_buf + (row / 8) * 64 * m_comp_h_samp.ptr[0] + (row & 7) * 8;
2191 
2192     ubyte* d = m_pScan_line_0;
2193 
2194     for (int i = m_max_mcus_per_row; i > 0; i--)
2195     {
2196       for (int k = 0; k < m_max_mcu_x_size; k += 8)
2197       {
2198         immutable int Y_ofs = k * 8;
2199         immutable int Cb_ofs = Y_ofs + 64 * m_expanded_blocks_per_component;
2200         immutable int Cr_ofs = Y_ofs + 64 * m_expanded_blocks_per_component * 2;
2201         for (int j = 0; j < 8; j++)
2202         {
2203           int y = Py[Y_ofs + j];
2204           int cb = Py[Cb_ofs + j];
2205           int cr = Py[Cr_ofs + j];
2206 
2207           d[0] = clamp(y + m_crr.ptr[cr]);
2208           d[1] = clamp(y + ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16));
2209           d[2] = clamp(y + m_cbb.ptr[cb]);
2210           d[3] = 255;
2211 
2212           d += 4;
2213         }
2214       }
2215 
2216       Py += 64 * m_expanded_blocks_per_mcu;
2217     }
2218   }
2219 
2220   // Find end of image (EOI) marker, so we can return to the user the exact size of the input stream.
2221   void find_eoi () {
2222     if (!m_progressive_flag)
2223     {
2224       // Attempt to read the EOI marker.
2225       //get_bits_no_markers(m_bits_left & 7);
2226 
2227       // Prime the bit buffer
2228       m_bits_left = 16;
2229       get_bits(16);
2230       get_bits(16);
2231 
2232       // The next marker _should_ be EOI
2233       process_markers(true); // but restarts are allowed as we can harmlessly skip them at the end of the stream
2234     }
2235 
2236     m_total_bytes_read -= m_in_buf_left;
2237   }
2238 
2239   // Creates the tables needed for efficient Huffman decoding.
2240   void make_huff_table (int index, huff_tables *pH) {
2241     int p, i, l, si;
2242     ubyte[257] huffsize;
2243     uint[257] huffcode;
2244     uint code;
2245     uint subtree;
2246     int code_size;
2247     int lastp;
2248     int nextfreeentry;
2249     int currententry;
2250 
2251     pH.ac_table = m_huff_ac.ptr[index] != 0;
2252 
2253     p = 0;
2254 
2255     for (l = 1; l <= 16; l++)
2256     {
2257       for (i = 1; i <= m_huff_num.ptr[index][l]; i++)
2258         huffsize.ptr[p++] = cast(ubyte)(l);
2259     }
2260 
2261     huffsize.ptr[p] = 0;
2262 
2263     lastp = p;
2264 
2265     code = 0;
2266     si = huffsize.ptr[0];
2267     p = 0;
2268 
2269     while (huffsize.ptr[p])
2270     {
2271       while (huffsize.ptr[p] == si)
2272       {
2273         huffcode.ptr[p++] = code;
2274         code++;
2275       }
2276 
2277       code <<= 1;
2278       si++;
2279     }
2280 
2281     memset(pH.look_up.ptr, 0, pH.look_up.sizeof);
2282     memset(pH.look_up2.ptr, 0, pH.look_up2.sizeof);
2283     memset(pH.tree.ptr, 0, pH.tree.sizeof);
2284     memset(pH.code_size.ptr, 0, pH.code_size.sizeof);
2285 
2286     nextfreeentry = -1;
2287 
2288     p = 0;
2289 
2290     while (p < lastp)
2291     {
2292       i = m_huff_val.ptr[index][p];
2293       code = huffcode.ptr[p];
2294       code_size = huffsize.ptr[p];
2295 
2296       pH.code_size.ptr[i] = cast(ubyte)(code_size);
2297 
2298       if (code_size <= 8)
2299       {
2300         code <<= (8 - code_size);
2301 
2302         for (l = 1 << (8 - code_size); l > 0; l--)
2303         {
2304           assert(i < 256);
2305 
2306           pH.look_up.ptr[code] = i;
2307 
2308           bool has_extrabits = false;
2309           int extra_bits = 0;
2310           int num_extra_bits = i & 15;
2311 
2312           int bits_to_fetch = code_size;
2313           if (num_extra_bits)
2314           {
2315             int total_codesize = code_size + num_extra_bits;
2316             if (total_codesize <= 8)
2317             {
2318               has_extrabits = true;
2319               extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize));
2320               assert(extra_bits <= 0x7FFF);
2321               bits_to_fetch += num_extra_bits;
2322             }
2323           }
2324 
2325           if (!has_extrabits)
2326             pH.look_up2.ptr[code] = i | (bits_to_fetch << 8);
2327           else
2328             pH.look_up2.ptr[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8);
2329 
2330           code++;
2331         }
2332       }
2333       else
2334       {
2335         subtree = (code >> (code_size - 8)) & 0xFF;
2336 
2337         currententry = pH.look_up.ptr[subtree];
2338 
2339         if (currententry == 0)
2340         {
2341           pH.look_up.ptr[subtree] = currententry = nextfreeentry;
2342           pH.look_up2.ptr[subtree] = currententry = nextfreeentry;
2343 
2344           nextfreeentry -= 2;
2345         }
2346 
2347         code <<= (16 - (code_size - 8));
2348 
2349         for (l = code_size; l > 9; l--)
2350         {
2351           if ((code & 0x8000) == 0)
2352             currententry--;
2353 
2354           if (pH.tree.ptr[-currententry - 1] == 0)
2355           {
2356             pH.tree.ptr[-currententry - 1] = nextfreeentry;
2357 
2358             currententry = nextfreeentry;
2359 
2360             nextfreeentry -= 2;
2361           }
2362           else
2363             currententry = pH.tree.ptr[-currententry - 1];
2364 
2365           code <<= 1;
2366         }
2367 
2368         if ((code & 0x8000) == 0)
2369           currententry--;
2370 
2371         pH.tree.ptr[-currententry - 1] = i;
2372       }
2373 
2374       p++;
2375     }
2376   }
2377 
2378   // Verifies the quantization tables needed for this scan are available.
2379   void check_quant_tables () {
2380     for (int i = 0; i < m_comps_in_scan; i++)
2381       if (m_quant.ptr[m_comp_quant.ptr[m_comp_list.ptr[i]]] == null)
2382         stop_decoding(JPGD_UNDEFINED_QUANT_TABLE);
2383   }
2384 
2385   // Verifies that all the Huffman tables needed for this scan are available.
2386   void check_huff_tables () {
2387     for (int i = 0; i < m_comps_in_scan; i++)
2388     {
2389       if ((m_spectral_start == 0) && (m_huff_num.ptr[m_comp_dc_tab.ptr[m_comp_list.ptr[i]]] == null))
2390         stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2391 
2392       if ((m_spectral_end > 0) && (m_huff_num.ptr[m_comp_ac_tab.ptr[m_comp_list.ptr[i]]] == null))
2393         stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2394     }
2395 
2396     for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++)
2397       if (m_huff_num.ptr[i])
2398       {
2399         if (!m_pHuff_tabs.ptr[i])
2400           m_pHuff_tabs.ptr[i] = cast(huff_tables*)alloc(huff_tables.sizeof);
2401 
2402         make_huff_table(i, m_pHuff_tabs.ptr[i]);
2403       }
2404   }
2405 
2406   // Determines the component order inside each MCU.
2407   // Also calcs how many MCU's are on each row, etc.
2408   void calc_mcu_block_order () {
2409     int component_num, component_id;
2410     int max_h_samp = 0, max_v_samp = 0;
2411 
2412     for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2413     {
2414       if (m_comp_h_samp.ptr[component_id] > max_h_samp)
2415         max_h_samp = m_comp_h_samp.ptr[component_id];
2416 
2417       if (m_comp_v_samp.ptr[component_id] > max_v_samp)
2418         max_v_samp = m_comp_v_samp.ptr[component_id];
2419     }
2420 
2421     for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2422     {
2423       m_comp_h_blocks.ptr[component_id] = ((((m_image_x_size * m_comp_h_samp.ptr[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8;
2424       m_comp_v_blocks.ptr[component_id] = ((((m_image_y_size * m_comp_v_samp.ptr[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8;
2425     }
2426 
2427     if (m_comps_in_scan == 1)
2428     {
2429       m_mcus_per_row = m_comp_h_blocks.ptr[m_comp_list.ptr[0]];
2430       m_mcus_per_col = m_comp_v_blocks.ptr[m_comp_list.ptr[0]];
2431     }
2432     else
2433     {
2434       m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp;
2435       m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp;
2436     }
2437 
2438     if (m_comps_in_scan == 1)
2439     {
2440       m_mcu_org.ptr[0] = m_comp_list.ptr[0];
2441 
2442       m_blocks_per_mcu = 1;
2443     }
2444     else
2445     {
2446       m_blocks_per_mcu = 0;
2447 
2448       for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2449       {
2450         int num_blocks;
2451 
2452         component_id = m_comp_list.ptr[component_num];
2453 
2454         num_blocks = m_comp_h_samp.ptr[component_id] * m_comp_v_samp.ptr[component_id];
2455 
2456         while (num_blocks--)
2457           m_mcu_org.ptr[m_blocks_per_mcu++] = component_id;
2458       }
2459     }
2460   }
2461 
2462   // Starts a new scan.
2463   int init_scan () {
2464     if (!locate_sos_marker())
2465       return false;
2466 
2467     calc_mcu_block_order();
2468 
2469     check_huff_tables();
2470 
2471     check_quant_tables();
2472 
2473     memset(m_last_dc_val.ptr, 0, m_comps_in_frame * uint.sizeof);
2474 
2475     m_eob_run = 0;
2476 
2477     if (m_restart_interval)
2478     {
2479       m_restarts_left = m_restart_interval;
2480       m_next_restart_num = 0;
2481     }
2482 
2483     fix_in_buffer();
2484 
2485     return true;
2486   }
2487 
2488   // Starts a frame. Determines if the number of components or sampling factors
2489   // are supported.
2490   void init_frame () {
2491     int i;
2492 
2493     if (m_comps_in_frame == 1)
2494     {
2495       version(jpegd_test) {{ import std.stdio; stderr.writeln("m_comp_h_samp=", m_comp_h_samp.ptr[0], "; m_comp_v_samp=", m_comp_v_samp.ptr[0]); }}
2496 
2497       //if ((m_comp_h_samp.ptr[0] != 1) || (m_comp_v_samp.ptr[0] != 1))
2498       //  stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2499 
2500       if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 1))
2501       {
2502         m_scan_type = JPGD_GRAYSCALE;
2503         m_max_blocks_per_mcu = 1;
2504         m_max_mcu_x_size = 8;
2505         m_max_mcu_y_size = 8;
2506       }
2507       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 2))
2508       {
2509         //k8: i added this, and i absolutely don't know what it means; but it decoded two sample images i found
2510         m_scan_type = JPGD_GRAYSCALE;
2511         m_max_blocks_per_mcu = 4;
2512         m_max_mcu_x_size = 8;
2513         m_max_mcu_y_size = 8;
2514       }
2515       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 1))
2516       {
2517       	// adr added this. idk if it is right seems wrong since it the same as above but..... meh ship it.
2518         m_scan_type = JPGD_GRAYSCALE;
2519         m_max_blocks_per_mcu = 4;
2520         m_max_mcu_x_size = 8;
2521         m_max_mcu_y_size = 8;
2522       }
2523       else {
2524       // code -231 brings us here
2525       //import std.conv;
2526       //assert(0, to!string(m_comp_h_samp) ~ to!string(m_comp_v_samp));
2527         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2528       }
2529     }
2530     else if (m_comps_in_frame == 3)
2531     {
2532       if ( ((m_comp_h_samp.ptr[1] != 1) || (m_comp_v_samp.ptr[1] != 1)) ||
2533            ((m_comp_h_samp.ptr[2] != 1) || (m_comp_v_samp.ptr[2] != 1)) )
2534         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2535 
2536       if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 1))
2537       {
2538         m_scan_type = JPGD_YH1V1;
2539 
2540         m_max_blocks_per_mcu = 3;
2541         m_max_mcu_x_size = 8;
2542         m_max_mcu_y_size = 8;
2543       }
2544       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 1))
2545       {
2546         m_scan_type = JPGD_YH2V1;
2547         m_max_blocks_per_mcu = 4;
2548         m_max_mcu_x_size = 16;
2549         m_max_mcu_y_size = 8;
2550       }
2551       else if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 2))
2552       {
2553         m_scan_type = JPGD_YH1V2;
2554         m_max_blocks_per_mcu = 4;
2555         m_max_mcu_x_size = 8;
2556         m_max_mcu_y_size = 16;
2557       }
2558       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 2))
2559       {
2560         m_scan_type = JPGD_YH2V2;
2561         m_max_blocks_per_mcu = 6;
2562         m_max_mcu_x_size = 16;
2563         m_max_mcu_y_size = 16;
2564       }
2565       else
2566         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2567     }
2568     else
2569       stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2570 
2571     m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size;
2572     m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size;
2573 
2574     // These values are for the *destination* pixels: after conversion.
2575     if (m_scan_type == JPGD_GRAYSCALE)
2576       m_dest_bytes_per_pixel = 1;
2577     else
2578       m_dest_bytes_per_pixel = 4;
2579 
2580     m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel;
2581 
2582     m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel);
2583 
2584     // Initialize two scan line buffers.
2585     m_pScan_line_0 = cast(ubyte*)alloc(m_dest_bytes_per_scan_line, true);
2586     if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2))
2587       m_pScan_line_1 = cast(ubyte*)alloc(m_dest_bytes_per_scan_line, true);
2588 
2589     m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu;
2590 
2591     // Should never happen
2592     if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW)
2593       stop_decoding(JPGD_ASSERTION_ERROR);
2594 
2595     // Allocate the coefficient buffer, enough for one MCU
2596     m_pMCU_coefficients = cast(jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * jpgd_block_t.sizeof);
2597 
2598     for (i = 0; i < m_max_blocks_per_mcu; i++)
2599       m_mcu_block_max_zag.ptr[i] = 64;
2600 
2601     m_expanded_blocks_per_component = m_comp_h_samp.ptr[0] * m_comp_v_samp.ptr[0];
2602     m_expanded_blocks_per_mcu = m_expanded_blocks_per_component * m_comps_in_frame;
2603     m_expanded_blocks_per_row = m_max_mcus_per_row * m_expanded_blocks_per_mcu;
2604     // Freq. domain chroma upsampling is only supported for H2V2 subsampling factor (the most common one I've seen).
2605     m_freq_domain_chroma_upsample = false;
2606     version(JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING) {
2607       m_freq_domain_chroma_upsample = (m_expanded_blocks_per_mcu == 4*3);
2608     }
2609 
2610     if (m_freq_domain_chroma_upsample)
2611       m_pSample_buf = cast(ubyte*)alloc(m_expanded_blocks_per_row * 64);
2612     else
2613       m_pSample_buf = cast(ubyte*)alloc(m_max_blocks_per_row * 64);
2614 
2615     m_total_lines_left = m_image_y_size;
2616 
2617     m_mcu_lines_left = 0;
2618 
2619     create_look_ups();
2620   }
2621 
2622   // The coeff_buf series of methods originally stored the coefficients
2623   // into a "virtual" file which was located in EMS, XMS, or a disk file. A cache
2624   // was used to make this process more efficient. Now, we can store the entire
2625   // thing in RAM.
2626   coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y) {
2627     coeff_buf* cb = cast(coeff_buf*)alloc(coeff_buf.sizeof);
2628 
2629     cb.block_num_x = block_num_x;
2630     cb.block_num_y = block_num_y;
2631     cb.block_len_x = block_len_x;
2632     cb.block_len_y = block_len_y;
2633     cb.block_size = cast(int)((block_len_x * block_len_y) * jpgd_block_t.sizeof);
2634     cb.pData = cast(ubyte*)alloc(cb.block_size * block_num_x * block_num_y, true);
2635     return cb;
2636   }
2637 
2638   jpgd_block_t* coeff_buf_getp (coeff_buf *cb, int block_x, int block_y) {
2639     assert((block_x < cb.block_num_x) && (block_y < cb.block_num_y));
2640     return cast(jpgd_block_t*)(cb.pData + block_x * cb.block_size + block_y * (cb.block_size * cb.block_num_x));
2641   }
2642 
2643   // The following methods decode the various types of m_blocks encountered
2644   // in progressively encoded images.
2645   static void decode_block_dc_first (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2646     int s, r;
2647     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_dc_coeffs.ptr[component_id], block_x, block_y);
2648 
2649     if ((s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_dc_tab.ptr[component_id]])) != 0)
2650     {
2651       r = pD.get_bits_no_markers(s);
2652       s = JPGD_HUFF_EXTEND(r, s);
2653     }
2654 
2655     pD.m_last_dc_val.ptr[component_id] = (s += pD.m_last_dc_val.ptr[component_id]);
2656 
2657     p[0] = cast(jpgd_block_t)(s << pD.m_successive_low);
2658   }
2659 
2660   static void decode_block_dc_refine (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2661     if (pD.get_bits_no_markers(1))
2662     {
2663       jpgd_block_t *p = pD.coeff_buf_getp(pD.m_dc_coeffs.ptr[component_id], block_x, block_y);
2664 
2665       p[0] |= (1 << pD.m_successive_low);
2666     }
2667   }
2668 
2669   static void decode_block_ac_first (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2670     int k, s, r;
2671 
2672     if (pD.m_eob_run)
2673     {
2674       pD.m_eob_run--;
2675       return;
2676     }
2677 
2678     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_ac_coeffs.ptr[component_id], block_x, block_y);
2679 
2680     for (k = pD.m_spectral_start; k <= pD.m_spectral_end; k++)
2681     {
2682       s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_ac_tab.ptr[component_id]]);
2683 
2684       r = s >> 4;
2685       s &= 15;
2686 
2687       if (s)
2688       {
2689         if ((k += r) > 63)
2690           pD.stop_decoding(JPGD_DECODE_ERROR);
2691 
2692         r = pD.get_bits_no_markers(s);
2693         s = JPGD_HUFF_EXTEND(r, s);
2694 
2695         p[g_ZAG[k]] = cast(jpgd_block_t)(s << pD.m_successive_low);
2696       }
2697       else
2698       {
2699         if (r == 15)
2700         {
2701           if ((k += 15) > 63)
2702             pD.stop_decoding(JPGD_DECODE_ERROR);
2703         }
2704         else
2705         {
2706           pD.m_eob_run = 1 << r;
2707 
2708           if (r)
2709             pD.m_eob_run += pD.get_bits_no_markers(r);
2710 
2711           pD.m_eob_run--;
2712 
2713           break;
2714         }
2715       }
2716     }
2717   }
2718 
2719   static void decode_block_ac_refine (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2720     int s, k, r;
2721     int p1 = 1 << pD.m_successive_low;
2722     int m1 = (-1) << pD.m_successive_low;
2723     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_ac_coeffs.ptr[component_id], block_x, block_y);
2724 
2725     assert(pD.m_spectral_end <= 63);
2726 
2727     k = pD.m_spectral_start;
2728 
2729     if (pD.m_eob_run == 0)
2730     {
2731       for ( ; k <= pD.m_spectral_end; k++)
2732       {
2733         s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_ac_tab.ptr[component_id]]);
2734 
2735         r = s >> 4;
2736         s &= 15;
2737 
2738         if (s)
2739         {
2740           if (s != 1)
2741             pD.stop_decoding(JPGD_DECODE_ERROR);
2742 
2743           if (pD.get_bits_no_markers(1))
2744             s = p1;
2745           else
2746             s = m1;
2747         }
2748         else
2749         {
2750           if (r != 15)
2751           {
2752             pD.m_eob_run = 1 << r;
2753 
2754             if (r)
2755               pD.m_eob_run += pD.get_bits_no_markers(r);
2756 
2757             break;
2758           }
2759         }
2760 
2761         do
2762         {
2763           jpgd_block_t *this_coef = p + g_ZAG[k & 63];
2764 
2765           if (*this_coef != 0)
2766           {
2767             if (pD.get_bits_no_markers(1))
2768             {
2769               if ((*this_coef & p1) == 0)
2770               {
2771                 if (*this_coef >= 0)
2772                   *this_coef = cast(jpgd_block_t)(*this_coef + p1);
2773                 else
2774                   *this_coef = cast(jpgd_block_t)(*this_coef + m1);
2775               }
2776             }
2777           }
2778           else
2779           {
2780             if (--r < 0)
2781               break;
2782           }
2783 
2784           k++;
2785 
2786         } while (k <= pD.m_spectral_end);
2787 
2788         if ((s) && (k < 64))
2789         {
2790           p[g_ZAG[k]] = cast(jpgd_block_t)(s);
2791         }
2792       }
2793     }
2794 
2795     if (pD.m_eob_run > 0)
2796     {
2797       for ( ; k <= pD.m_spectral_end; k++)
2798       {
2799         jpgd_block_t *this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis
2800 
2801         if (*this_coef != 0)
2802         {
2803           if (pD.get_bits_no_markers(1))
2804           {
2805             if ((*this_coef & p1) == 0)
2806             {
2807               if (*this_coef >= 0)
2808                 *this_coef = cast(jpgd_block_t)(*this_coef + p1);
2809               else
2810                 *this_coef = cast(jpgd_block_t)(*this_coef + m1);
2811             }
2812           }
2813         }
2814       }
2815 
2816       pD.m_eob_run--;
2817     }
2818   }
2819 
2820   // Decode a scan in a progressively encoded image.
2821   void decode_scan (pDecode_block_func decode_block_func) {
2822     int mcu_row, mcu_col, mcu_block;
2823     int[JPGD_MAX_COMPONENTS] block_x_mcu;
2824     int[JPGD_MAX_COMPONENTS] m_block_y_mcu;
2825 
2826     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
2827 
2828     for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++)
2829     {
2830       int component_num, component_id;
2831 
2832       memset(block_x_mcu.ptr, 0, block_x_mcu.sizeof);
2833 
2834       for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
2835       {
2836         int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
2837 
2838         if ((m_restart_interval) && (m_restarts_left == 0))
2839           process_restart();
2840 
2841         for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
2842         {
2843           component_id = m_mcu_org.ptr[mcu_block];
2844 
2845           decode_block_func(this, component_id, block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
2846 
2847           if (m_comps_in_scan == 1)
2848             block_x_mcu.ptr[component_id]++;
2849           else
2850           {
2851             if (++block_x_mcu_ofs == m_comp_h_samp.ptr[component_id])
2852             {
2853               block_x_mcu_ofs = 0;
2854 
2855               if (++block_y_mcu_ofs == m_comp_v_samp.ptr[component_id])
2856               {
2857                 block_y_mcu_ofs = 0;
2858                 block_x_mcu.ptr[component_id] += m_comp_h_samp.ptr[component_id];
2859               }
2860             }
2861           }
2862         }
2863 
2864         m_restarts_left--;
2865       }
2866 
2867       if (m_comps_in_scan == 1)
2868         m_block_y_mcu.ptr[m_comp_list.ptr[0]]++;
2869       else
2870       {
2871         for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2872         {
2873           component_id = m_comp_list.ptr[component_num];
2874           m_block_y_mcu.ptr[component_id] += m_comp_v_samp.ptr[component_id];
2875         }
2876       }
2877     }
2878   }
2879 
2880   // Decode a progressively encoded image.
2881   void init_progressive () {
2882     int i;
2883 
2884     if (m_comps_in_frame == 4)
2885       stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2886 
2887     // Allocate the coefficient buffers.
2888     for (i = 0; i < m_comps_in_frame; i++)
2889     {
2890       m_dc_coeffs.ptr[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp.ptr[i], m_max_mcus_per_col * m_comp_v_samp.ptr[i], 1, 1);
2891       m_ac_coeffs.ptr[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp.ptr[i], m_max_mcus_per_col * m_comp_v_samp.ptr[i], 8, 8);
2892     }
2893 
2894     for ( ; ; )
2895     {
2896       int dc_only_scan, refinement_scan;
2897       pDecode_block_func decode_block_func;
2898 
2899       if (!init_scan())
2900         break;
2901 
2902       dc_only_scan = (m_spectral_start == 0);
2903       refinement_scan = (m_successive_high != 0);
2904 
2905       if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63))
2906         stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2907 
2908       if (dc_only_scan)
2909       {
2910         if (m_spectral_end)
2911           stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2912       }
2913       else if (m_comps_in_scan != 1)  /* AC scans can only contain one component */
2914         stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2915 
2916       if ((refinement_scan) && (m_successive_low != m_successive_high - 1))
2917         stop_decoding(JPGD_BAD_SOS_SUCCESSIVE);
2918 
2919       if (dc_only_scan)
2920       {
2921         if (refinement_scan)
2922           decode_block_func = &decode_block_dc_refine;
2923         else
2924           decode_block_func = &decode_block_dc_first;
2925       }
2926       else
2927       {
2928         if (refinement_scan)
2929           decode_block_func = &decode_block_ac_refine;
2930         else
2931           decode_block_func = &decode_block_ac_first;
2932       }
2933 
2934       decode_scan(decode_block_func);
2935 
2936       m_bits_left = 16;
2937       get_bits(16);
2938       get_bits(16);
2939     }
2940 
2941     m_comps_in_scan = m_comps_in_frame;
2942 
2943     for (i = 0; i < m_comps_in_frame; i++)
2944       m_comp_list.ptr[i] = i;
2945 
2946     calc_mcu_block_order();
2947   }
2948 
2949   void init_sequential () {
2950     if (!init_scan())
2951       stop_decoding(JPGD_UNEXPECTED_MARKER);
2952   }
2953 
2954   void decode_start () {
2955     init_frame();
2956 
2957     if (m_progressive_flag)
2958       init_progressive();
2959     else
2960       init_sequential();
2961   }
2962 
2963   void decode_init (JpegStreamReadFunc rfn) {
2964     initit(rfn);
2965     locate_sof_marker();
2966   }
2967 }
2968 
2969 
2970 // ////////////////////////////////////////////////////////////////////////// //
2971 /// read JPEG image header, determine dimensions and number of components.
2972 /// return `false` if image is not JPEG (i hope).
2973 public bool detect_jpeg_image_from_stream (scope JpegStreamReadFunc rfn, out int width, out int height, out int actual_comps) {
2974   if (rfn is null) return false;
2975   auto decoder = jpeg_decoder(rfn);
2976   version(jpegd_test) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
2977   if (decoder.error_code != JPGD_SUCCESS) return false;
2978   width = decoder.width;
2979   height = decoder.height;
2980   actual_comps = decoder.num_components;
2981   return true;
2982 }
2983 
2984 
2985 // ////////////////////////////////////////////////////////////////////////// //
2986 /// read JPEG image header, determine dimensions and number of components.
2987 /// return `false` if image is not JPEG (i hope).
2988 public bool detect_jpeg_image_from_file (const(char)[] filename, out int width, out int height, out int actual_comps) {
2989   import core.stdc.stdio;
2990 
2991   FILE* m_pFile;
2992   bool m_eof_flag, m_error_flag;
2993 
2994   if (filename.length == 0) throw new Exception("cannot open unnamed file");
2995   if (filename.length < 512) {
2996     char[513] buffer;
2997     //import core.stdc.stdlib : alloca;
2998     auto tfn = buffer[0 .. filename.length + 1]; // (cast(char*)alloca(filename.length+1))[0..filename.length+1];
2999     tfn[0..filename.length] = filename[];
3000     tfn[filename.length] = 0;
3001     m_pFile = fopen(tfn.ptr, "rb");
3002   } else {
3003     import core.stdc.stdlib : malloc, free;
3004     auto tfn = (cast(char*)malloc(filename.length+1))[0..filename.length+1];
3005     if (tfn !is null) {
3006       scope(exit) free(tfn.ptr);
3007       m_pFile = fopen(tfn.ptr, "rb");
3008     }
3009   }
3010   if (m_pFile is null) throw new Exception("cannot open file '"~filename.idup~"'");
3011   scope(exit) if (m_pFile) fclose(m_pFile);
3012 
3013   return detect_jpeg_image_from_stream(
3014     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3015       if (m_pFile is null) return -1;
3016       if (m_eof_flag) {
3017         *pEOF_flag = true;
3018         return 0;
3019       }
3020       if (m_error_flag) return -1;
3021       int bytes_read = cast(int)(fread(pBuf, 1, max_bytes_to_read, m_pFile));
3022       if (bytes_read < max_bytes_to_read) {
3023         if (ferror(m_pFile)) {
3024           m_error_flag = true;
3025           return -1;
3026         }
3027         m_eof_flag = true;
3028         *pEOF_flag = true;
3029       }
3030       return bytes_read;
3031     },
3032     width, height, actual_comps);
3033 }
3034 
3035 
3036 // ////////////////////////////////////////////////////////////////////////// //
3037 /// read JPEG image header, determine dimensions and number of components.
3038 /// return `false` if image is not JPEG (i hope).
3039 public bool detect_jpeg_image_from_memory (const(void)[] buf, out int width, out int height, out int actual_comps) {
3040   size_t bufpos;
3041   return detect_jpeg_image_from_stream(
3042     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3043       import core.stdc.string : memcpy;
3044       if (bufpos >= buf.length) {
3045         *pEOF_flag = true;
3046         return 0;
3047       }
3048       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
3049       memcpy(pBuf, (cast(const(ubyte)*)buf.ptr)+bufpos, max_bytes_to_read);
3050       bufpos += max_bytes_to_read;
3051       return max_bytes_to_read;
3052     },
3053     width, height, actual_comps);
3054 }
3055 
3056 
3057 // ////////////////////////////////////////////////////////////////////////// //
3058 /// decompress JPEG image, what else?
3059 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3060 public ubyte[] decompress_jpeg_image_from_stream(bool useMalloc=false) (scope JpegStreamReadFunc rfn, out int width, out int height, out int actual_comps, int req_comps=-1) {
3061   import core.stdc.string : memcpy;
3062 
3063   //actual_comps = 0;
3064   if (rfn is null) return null;
3065   if (req_comps != -1 && req_comps != 1 && req_comps != 3 && req_comps != 4) return null;
3066 
3067   auto decoder = jpeg_decoder(rfn);
3068   if (decoder.error_code != JPGD_SUCCESS) return null;
3069   version(jpegd_test) scope(exit) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
3070 
3071   immutable int image_width = decoder.width;
3072   immutable int image_height = decoder.height;
3073   width = image_width;
3074   height = image_height;
3075   actual_comps = decoder.num_components;
3076   if (req_comps < 0) req_comps = decoder.num_components;
3077 
3078   if (decoder.begin_decoding() != JPGD_SUCCESS) return null;
3079 
3080   immutable int dst_bpl = image_width*req_comps;
3081 
3082   static if (useMalloc) {
3083     ubyte* pImage_data = cast(ubyte*)jpgd_malloc(dst_bpl*image_height);
3084     if (pImage_data is null) return null;
3085     auto idata = pImage_data[0..dst_bpl*image_height];
3086   } else {
3087     auto idata = new ubyte[](dst_bpl*image_height);
3088     auto pImage_data = idata.ptr;
3089   }
3090 
3091   scope(failure) {
3092     static if (useMalloc) {
3093       jpgd_free(pImage_data);
3094     } else {
3095       import core.memory : GC;
3096       GC.free(idata.ptr);
3097       idata = null;
3098     }
3099   }
3100 
3101   for (int y = 0; y < image_height; ++y) {
3102     const(ubyte)* pScan_line;
3103     uint scan_line_len;
3104     if (decoder.decode(/*(const void**)*/cast(void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS) {
3105       static if (useMalloc) {
3106         jpgd_free(pImage_data);
3107       } else {
3108         import core.memory : GC;
3109         GC.free(idata.ptr);
3110         idata = null;
3111       }
3112       return null;
3113     }
3114 
3115     ubyte* pDst = pImage_data+y*dst_bpl;
3116 
3117     if ((req_comps == 1 && decoder.num_components == 1) || (req_comps == 4 && decoder.num_components == 3)) {
3118       memcpy(pDst, pScan_line, dst_bpl);
3119     } else if (decoder.num_components == 1) {
3120       if (req_comps == 3) {
3121         for (int x = 0; x < image_width; ++x) {
3122           ubyte luma = pScan_line[x];
3123           pDst[0] = luma;
3124           pDst[1] = luma;
3125           pDst[2] = luma;
3126           pDst += 3;
3127         }
3128       } else {
3129         for (int x = 0; x < image_width; ++x) {
3130           ubyte luma = pScan_line[x];
3131           pDst[0] = luma;
3132           pDst[1] = luma;
3133           pDst[2] = luma;
3134           pDst[3] = 255;
3135           pDst += 4;
3136         }
3137       }
3138     } else if (decoder.num_components == 3) {
3139       if (req_comps == 1) {
3140         immutable int YR = 19595, YG = 38470, YB = 7471;
3141         for (int x = 0; x < image_width; ++x) {
3142           int r = pScan_line[x*4+0];
3143           int g = pScan_line[x*4+1];
3144           int b = pScan_line[x*4+2];
3145           *pDst++ = cast(ubyte)((r * YR + g * YG + b * YB + 32768) >> 16);
3146         }
3147       } else {
3148         for (int x = 0; x < image_width; ++x) {
3149           pDst[0] = pScan_line[x*4+0];
3150           pDst[1] = pScan_line[x*4+1];
3151           pDst[2] = pScan_line[x*4+2];
3152           pDst += 3;
3153         }
3154       }
3155     }
3156   }
3157 
3158   return idata;
3159 }
3160 
3161 
3162 // ////////////////////////////////////////////////////////////////////////// //
3163 /// decompress JPEG image from disk file.
3164 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3165 public ubyte[] decompress_jpeg_image_from_file(bool useMalloc=false) (const(char)[] filename, out int width, out int height, out int actual_comps, int req_comps=-1) {
3166   import core.stdc.stdio;
3167 
3168   FILE* m_pFile;
3169   bool m_eof_flag, m_error_flag;
3170 
3171   if (filename.length == 0) throw new Exception("cannot open unnamed file");
3172   if (filename.length < 512) {
3173 	char[513] buffer;
3174     //import core.stdc.stdlib : alloca;
3175     auto tfn = buffer[0 .. filename.length + 1]; // (cast(char*)alloca(filename.length+1))[0..filename.length+1];
3176     tfn[0..filename.length] = filename[];
3177     tfn[filename.length] = 0;
3178     m_pFile = fopen(tfn.ptr, "rb");
3179   } else {
3180     import core.stdc.stdlib : malloc, free;
3181     auto tfn = (cast(char*)malloc(filename.length+1))[0..filename.length+1];
3182     if (tfn !is null) {
3183       scope(exit) free(tfn.ptr);
3184       m_pFile = fopen(tfn.ptr, "rb");
3185     }
3186   }
3187   if (m_pFile is null) throw new Exception("cannot open file '"~filename.idup~"'");
3188   scope(exit) if (m_pFile) fclose(m_pFile);
3189 
3190   return decompress_jpeg_image_from_stream!useMalloc(
3191     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3192       if (m_pFile is null) return -1;
3193       if (m_eof_flag) {
3194         *pEOF_flag = true;
3195         return 0;
3196       }
3197       if (m_error_flag) return -1;
3198       int bytes_read = cast(int)(fread(pBuf, 1, max_bytes_to_read, m_pFile));
3199       if (bytes_read < max_bytes_to_read) {
3200         if (ferror(m_pFile)) {
3201           m_error_flag = true;
3202           return -1;
3203         }
3204         m_eof_flag = true;
3205         *pEOF_flag = true;
3206       }
3207       return bytes_read;
3208     },
3209     width, height, actual_comps, req_comps);
3210 }
3211 
3212 
3213 // ////////////////////////////////////////////////////////////////////////// //
3214 /// decompress JPEG image from memory buffer.
3215 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3216 public ubyte[] decompress_jpeg_image_from_memory(bool useMalloc=false) (const(void)[] buf, out int width, out int height, out int actual_comps, int req_comps=-1) {
3217   size_t bufpos;
3218   return decompress_jpeg_image_from_stream!useMalloc(
3219     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3220       import core.stdc.string : memcpy;
3221       if (bufpos >= buf.length) {
3222         *pEOF_flag = true;
3223         return 0;
3224       }
3225       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
3226       memcpy(pBuf, (cast(const(ubyte)*)buf.ptr)+bufpos, max_bytes_to_read);
3227       bufpos += max_bytes_to_read;
3228       return max_bytes_to_read;
3229     },
3230     width, height, actual_comps, req_comps);
3231 }
3232 
3233 
3234 // ////////////////////////////////////////////////////////////////////////// //
3235 // if we have access "iv.vfs", add some handy API
3236 static if (__traits(compiles, { import iv.vfs; })) enum JpegHasIVVFS = true; else enum JpegHasIVVFS = false;
3237 
3238 static if (JpegHasIVVFS) {
3239 import iv.vfs;
3240 
3241 // ////////////////////////////////////////////////////////////////////////// //
3242 /// decompress JPEG image from disk file.
3243 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3244 public ubyte[] decompress_jpeg_image_from_file(bool useMalloc=false) (VFile fl, out int width, out int height, out int actual_comps, int req_comps=-1) {
3245   return decompress_jpeg_image_from_stream!useMalloc(
3246     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3247       if (!fl.isOpen) return -1;
3248       if (fl.eof) {
3249         *pEOF_flag = true;
3250         return 0;
3251       }
3252       auto rd = fl.rawRead(pBuf[0..max_bytes_to_read]);
3253       if (fl.eof) *pEOF_flag = true;
3254       return cast(int)rd.length;
3255     },
3256     width, height, actual_comps, req_comps);
3257 }
3258 // vfs API
3259 }
3260 
3261 
3262 // ////////////////////////////////////////////////////////////////////////// //
3263 // if we have access "arsd.color", add some handy API
3264 static if (__traits(compiles, { import arsd.color; })) enum JpegHasArsd = true; else enum JpegHasArsd = false;
3265 
3266 
3267 
3268 public struct LastJpegError {
3269 	int stage;
3270 	int code;
3271 	int details;
3272 }
3273 
3274 public LastJpegError lastJpegError;
3275 
3276 
3277 static if (JpegHasArsd) {
3278 import arsd.color;
3279 
3280 // ////////////////////////////////////////////////////////////////////////// //
3281 /// decompress JPEG image, what else?
3282 public MemoryImage readJpegFromStream (scope JpegStreamReadFunc rfn) {
3283   import core.stdc.string : memcpy;
3284   enum req_comps = 4;
3285 
3286   if (rfn is null) return null;
3287 
3288   auto decoder = jpeg_decoder(rfn);
3289   if (decoder.error_code != JPGD_SUCCESS) { lastJpegError = LastJpegError(1, decoder.error_code); return null; }
3290   version(jpegd_test) scope(exit) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
3291 
3292   immutable int image_width = decoder.width;
3293   immutable int image_height = decoder.height;
3294   //width = image_width;
3295   //height = image_height;
3296   //actual_comps = decoder.num_components;
3297 
3298   version(jpegd_test) {{ import core.stdc.stdio; stderr.fprintf("starting (%dx%d)...\n", image_width, image_height); }}
3299 
3300   auto err = decoder.begin_decoding();
3301   if (err != JPGD_SUCCESS || image_width < 1 || image_height < 1) {
3302 		lastJpegError = LastJpegError(2, err, decoder.m_error_code);
3303 		return null;
3304   }
3305 
3306   immutable int dst_bpl = image_width*req_comps;
3307   auto img = new TrueColorImage(image_width, image_height);
3308   scope(failure) { img.clearInternal(); img = null; }
3309   ubyte* pImage_data = img.imageData.bytes.ptr;
3310 
3311   for (int y = 0; y < image_height; ++y) {
3312     //version(jpegd_test) {{ import core.stdc.stdio; stderr.fprintf("loading line %d...\n", y); }}
3313 
3314     const(ubyte)* pScan_line;
3315     uint scan_line_len;
3316     err = decoder.decode(/*(const void**)*/cast(void**)&pScan_line, &scan_line_len);
3317     if (err != JPGD_SUCCESS) {
3318       lastJpegError = LastJpegError(3, err);
3319       img.clearInternal();
3320       img = null;
3321       //jpgd_free(pImage_data);
3322       return null;
3323     }
3324 
3325     ubyte* pDst = pImage_data+y*dst_bpl;
3326 
3327     if ((req_comps == 1 && decoder.num_components == 1) || (req_comps == 4 && decoder.num_components == 3)) {
3328       memcpy(pDst, pScan_line, dst_bpl);
3329     } else if (decoder.num_components == 1) {
3330       if (req_comps == 3) {
3331         for (int x = 0; x < image_width; ++x) {
3332           ubyte luma = pScan_line[x];
3333           pDst[0] = luma;
3334           pDst[1] = luma;
3335           pDst[2] = luma;
3336           pDst += 3;
3337         }
3338       } else {
3339         for (int x = 0; x < image_width; ++x) {
3340           ubyte luma = pScan_line[x];
3341           pDst[0] = luma;
3342           pDst[1] = luma;
3343           pDst[2] = luma;
3344           pDst[3] = 255;
3345           pDst += 4;
3346         }
3347       }
3348     } else if (decoder.num_components == 3) {
3349       if (req_comps == 1) {
3350         immutable int YR = 19595, YG = 38470, YB = 7471;
3351         for (int x = 0; x < image_width; ++x) {
3352           int r = pScan_line[x*4+0];
3353           int g = pScan_line[x*4+1];
3354           int b = pScan_line[x*4+2];
3355           *pDst++ = cast(ubyte)((r * YR + g * YG + b * YB + 32768) >> 16);
3356         }
3357       } else {
3358         for (int x = 0; x < image_width; ++x) {
3359           pDst[0] = pScan_line[x*4+0];
3360           pDst[1] = pScan_line[x*4+1];
3361           pDst[2] = pScan_line[x*4+2];
3362           pDst += 3;
3363         }
3364       }
3365     }
3366   }
3367 
3368   return img;
3369 }
3370 
3371 
3372 // ////////////////////////////////////////////////////////////////////////// //
3373 /// decompress JPEG image from disk file.
3374 /// Returns null if loading failed for any reason.
3375 public MemoryImage readJpeg (const(char)[] filename) {
3376   import core.stdc.stdio;
3377 
3378   FILE* m_pFile;
3379   bool m_eof_flag, m_error_flag;
3380 
3381   if (filename.length == 0) throw new Exception("cannot open unnamed file");
3382   if (filename.length < 512) {
3383 	char[513] buffer;
3384     //import core.stdc.stdlib : alloca;
3385     auto tfn = buffer[0 .. filename.length + 1]; // (cast(char*)alloca(filename.length+1))[0..filename.length+1];
3386     tfn[0..filename.length] = filename[];
3387     tfn[filename.length] = 0;
3388     m_pFile = fopen(tfn.ptr, "rb");
3389   } else {
3390     import core.stdc.stdlib : malloc, free;
3391     auto tfn = (cast(char*)malloc(filename.length+1))[0..filename.length+1];
3392     if (tfn !is null) {
3393       scope(exit) free(tfn.ptr);
3394       m_pFile = fopen(tfn.ptr, "rb");
3395     }
3396   }
3397   if (m_pFile is null) throw new Exception("cannot open file '"~filename.idup~"'");
3398   scope(exit) if (m_pFile) fclose(m_pFile);
3399 
3400   return readJpegFromStream(
3401     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3402       if (m_pFile is null) return -1;
3403       if (m_eof_flag) {
3404         *pEOF_flag = true;
3405         return 0;
3406       }
3407       if (m_error_flag) return -1;
3408       int bytes_read = cast(int)(fread(pBuf, 1, max_bytes_to_read, m_pFile));
3409       if (bytes_read < max_bytes_to_read) {
3410         if (ferror(m_pFile)) {
3411           m_error_flag = true;
3412           return -1;
3413         }
3414         m_eof_flag = true;
3415         *pEOF_flag = true;
3416       }
3417       return bytes_read;
3418     }
3419   );
3420 }
3421 
3422 /++
3423 	History:
3424 		Added January 22, 2021 (release version 9.2)
3425 +/
3426 public void writeJpeg(const(char)[] filename, TrueColorImage img, JpegParams params = JpegParams.init) {
3427 	if(!compress_image_to_jpeg_file(filename, img.width, img.height, 4, img.imageData.bytes, params))
3428 		throw new Exception("jpeg write failed"); // FIXME: check errno?
3429 }
3430 
3431 /++
3432   	Encodes an image as jpeg in memory.
3433 
3434 	History:
3435 		Added January 22, 2021 (release version 9.2)
3436 +/
3437 public ubyte[] encodeJpeg(TrueColorImage img, JpegParams params = JpegParams.init) {
3438   	ubyte[] data;
3439 	encodeJpeg((const scope ubyte[] i) {
3440 		data ~= i;
3441 		return true;
3442 	}, img, params);
3443 
3444 	return data;
3445 }
3446 
3447 /// ditto
3448 public void encodeJpeg(scope bool delegate(const scope ubyte[]) dg, TrueColorImage img, JpegParams params = JpegParams.init) {
3449 	if(!compress_image_to_jpeg_stream(
3450 		dg,
3451 		img.width, img.height, 4, img.imageData.bytes, params))
3452 		throw new Exception("encode");
3453 }
3454 
3455 
3456 // ////////////////////////////////////////////////////////////////////////// //
3457 /// decompress JPEG image from memory buffer.
3458 public MemoryImage readJpegFromMemory (const(void)[] buf) {
3459   size_t bufpos;
3460   return readJpegFromStream(
3461     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3462       import core.stdc.string : memcpy;
3463       if (bufpos >= buf.length) {
3464         *pEOF_flag = true;
3465         return 0;
3466       }
3467       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
3468       memcpy(pBuf, (cast(const(ubyte)*)buf.ptr)+bufpos, max_bytes_to_read);
3469       bufpos += max_bytes_to_read;
3470       return max_bytes_to_read;
3471     }
3472   );
3473 }
3474 // done with arsd API
3475 }
3476 
3477 
3478 static if (JpegHasIVVFS) {
3479 public MemoryImage readJpeg (VFile fl) {
3480   return readJpegFromStream(
3481     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3482       if (!fl.isOpen) return -1;
3483       if (fl.eof) {
3484         *pEOF_flag = true;
3485         return 0;
3486       }
3487       auto rd = fl.rawRead(pBuf[0..max_bytes_to_read]);
3488       if (fl.eof) *pEOF_flag = true;
3489       return cast(int)rd.length;
3490     }
3491   );
3492 }
3493 
3494 public bool detectJpeg (VFile fl, out int width, out int height, out int actual_comps) {
3495   return detect_jpeg_image_from_stream(
3496     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3497       if (!fl.isOpen) return -1;
3498       if (fl.eof) {
3499         *pEOF_flag = true;
3500         return 0;
3501       }
3502       auto rd = fl.rawRead(pBuf[0..max_bytes_to_read]);
3503       if (fl.eof) *pEOF_flag = true;
3504       return cast(int)rd.length;
3505     },
3506     width, height, actual_comps);
3507 }
3508 // vfs API
3509 }
3510 
3511 
3512 // ////////////////////////////////////////////////////////////////////////// //
3513 version(jpegd_test) {
3514 import arsd.color;
3515 import arsd.png;
3516 
3517 void main (string[] args) {
3518   import std.stdio;
3519   int width, height, comps;
3520   {
3521     assert(detect_jpeg_image_from_file((args.length > 1 ? args[1] : "image.jpg"), width, height, comps));
3522     writeln(width, "x", height, "x", comps);
3523     auto img = readJpeg((args.length > 1 ? args[1] : "image.jpg"));
3524     writeln(img.width, "x", img.height);
3525     writePng("z00.png", img);
3526   }
3527   {
3528     ubyte[] file;
3529     {
3530       auto fl = File(args.length > 1 ? args[1] : "image.jpg");
3531       file.length = cast(int)fl.size;
3532       fl.rawRead(file[]);
3533     }
3534     assert(detect_jpeg_image_from_memory(file[], width, height, comps));
3535     writeln(width, "x", height, "x", comps);
3536     auto img = readJpegFromMemory(file[]);
3537     writeln(img.width, "x", img.height);
3538     writePng("z01.png", img);
3539   }
3540 }
3541 }
3542 
3543 // jpge.cpp - C++ class for JPEG compression.
3544 // Public domain, Rich Geldreich <richgel99@gmail.com>
3545 // Alex Evans: Added RGBA support, linear memory allocator.
3546 // v1.01, Dec. 18, 2010 - Initial release
3547 // v1.02, Apr. 6, 2011 - Removed 2x2 ordered dither in H2V1 chroma subsampling method load_block_16_8_8(). (The rounding factor was 2, when it should have been 1. Either way, it wasn't helping.)
3548 // v1.03, Apr. 16, 2011 - Added support for optimized Huffman code tables, optimized dynamic memory allocation down to only 1 alloc.
3549 //                        Also from Alex Evans: Added RGBA support, linear memory allocator (no longer needed in v1.03).
3550 // v1.04, May. 19, 2012: Forgot to set m_pFile ptr to null in cfile_stream::close(). Thanks to Owen Kaluza for reporting this bug.
3551 //                       Code tweaks to fix VS2008 static code analysis warnings (all looked harmless).
3552 //                       Code review revealed method load_block_16_8_8() (used for the non-default H2V1 sampling mode to downsample chroma) somehow didn't get the rounding factor fix from v1.02.
3553 // D translation by Ketmar // Invisible Vector
3554 //
3555 // This is free and unencumbered software released into the public domain.
3556 //
3557 // Anyone is free to copy, modify, publish, use, compile, sell, or
3558 // distribute this software, either in source code form or as a compiled
3559 // binary, for any purpose, commercial or non-commercial, and by any
3560 // means.
3561 //
3562 // In jurisdictions that recognize copyright laws, the author or authors
3563 // of this software dedicate any and all copyright interest in the
3564 // software to the public domain. We make this dedication for the benefit
3565 // of the public at large and to the detriment of our heirs and
3566 // successors. We intend this dedication to be an overt act of
3567 // relinquishment in perpetuity of all present and future rights to this
3568 // software under copyright law.
3569 //
3570 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
3571 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
3572 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
3573 // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
3574 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
3575 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
3576 // OTHER DEALINGS IN THE SOFTWARE.
3577 //
3578 // For more information, please refer to <http://unlicense.org/>
3579 /**
3580  * Writes a JPEG image to a file or stream.
3581  * num_channels must be 1 (Y), 3 (RGB), 4 (RGBA), image pitch must be width*num_channels.
3582  * note that alpha will not be stored in jpeg file.
3583  */
3584 
3585 public:
3586 // ////////////////////////////////////////////////////////////////////////// //
3587 // JPEG chroma subsampling factors. Y_ONLY (grayscale images) and H2V2 (color images) are the most common.
3588 enum JpegSubsampling { Y_ONLY = 0, H1V1 = 1, H2V1 = 2, H2V2 = 3 }
3589 
3590 /// JPEG compression parameters structure.
3591 public struct JpegParams {
3592   /// Quality: 1-100, higher is better. Typical values are around 50-95.
3593   int quality = 85;
3594 
3595   /// subsampling:
3596   /// 0 = Y (grayscale) only
3597   /// 1 = YCbCr, no subsampling (H1V1, YCbCr 1x1x1, 3 blocks per MCU)
3598   /// 2 = YCbCr, H2V1 subsampling (YCbCr 2x1x1, 4 blocks per MCU)
3599   /// 3 = YCbCr, H2V2 subsampling (YCbCr 4x1x1, 6 blocks per MCU-- very common)
3600   JpegSubsampling subsampling = JpegSubsampling.H2V2;
3601 
3602   /// Disables CbCr discrimination - only intended for testing.
3603   /// If true, the Y quantization table is also used for the CbCr channels.
3604   bool noChromaDiscrimFlag = false;
3605 
3606   ///
3607   bool twoPass = true;
3608 
3609   ///
3610   bool check () const pure nothrow @trusted @nogc {
3611     if (quality < 1 || quality > 100) return false;
3612     if (cast(uint)subsampling > cast(uint)JpegSubsampling.H2V2) return false;
3613     return true;
3614   }
3615 }
3616 
3617 
3618 // ////////////////////////////////////////////////////////////////////////// //
3619 /// Writes JPEG image to file.
3620 /// num_channels must be 1 (Y), 3 (RGB), 4 (RGBA), image pitch must be width*num_channels.
3621 /// note that alpha will not be stored in jpeg file.
3622 bool compress_image_to_jpeg_stream (scope jpeg_encoder.WriteFunc wfn, int width, int height, int num_channels, const(ubyte)[] pImage_data) { return compress_image_to_jpeg_stream(wfn, width, height, num_channels, pImage_data, JpegParams()); }
3623 
3624 /// Writes JPEG image to file.
3625 /// num_channels must be 1 (Y), 3 (RGB), 4 (RGBA), image pitch must be width*num_channels.
3626 /// note that alpha will not be stored in jpeg file.
3627 bool compress_image_to_jpeg_stream (scope jpeg_encoder.WriteFunc wfn, int width, int height, int num_channels, const(ubyte)[] pImage_data, in JpegParams comp_params) {
3628   jpeg_encoder dst_image;
3629   if (!dst_image.setup(wfn, width, height, num_channels, comp_params)) return false;
3630   for (uint pass_index = 0; pass_index < dst_image.total_passes(); pass_index++) {
3631     for (int i = 0; i < height; i++) {
3632       const(ubyte)* pBuf = pImage_data.ptr+i*width*num_channels;
3633       if (!dst_image.process_scanline(pBuf)) return false;
3634     }
3635     if (!dst_image.process_scanline(null)) return false;
3636   }
3637   dst_image.deinit();
3638   //return dst_stream.close();
3639   return true;
3640 }
3641 
3642 
3643 /// Writes JPEG image to file.
3644 /// num_channels must be 1 (Y), 3 (RGB), 4 (RGBA), image pitch must be width*num_channels.
3645 /// note that alpha will not be stored in jpeg file.
3646 bool compress_image_to_jpeg_file (const(char)[] fname, int width, int height, int num_channels, const(ubyte)[] pImage_data) { return compress_image_to_jpeg_file(fname, width, height, num_channels, pImage_data, JpegParams()); }
3647 
3648 /// Writes JPEG image to file.
3649 /// num_channels must be 1 (Y), 3 (RGB), 4 (RGBA), image pitch must be width*num_channels.
3650 /// note that alpha will not be stored in jpeg file.
3651 bool compress_image_to_jpeg_file() (const(char)[] fname, int width, int height, int num_channels, const(ubyte)[] pImage_data, const scope auto ref JpegParams comp_params) {
3652   import std.internal.cstring;
3653   import core.stdc.stdio : FILE, fopen, fclose, fwrite;
3654   FILE* fl = fopen(fname.tempCString, "wb");
3655   if (fl is null) return false;
3656   scope(exit) if (fl !is null) fclose(fl);
3657   auto res = compress_image_to_jpeg_stream(
3658     delegate bool (scope const(ubyte)[] buf) {
3659       if (fwrite(buf.ptr, 1, buf.length, fl) != buf.length) return false;
3660       return true;
3661     }, width, height, num_channels, pImage_data, comp_params);
3662   if (res) {
3663     if (fclose(fl) != 0) res = false;
3664     fl = null;
3665   }
3666   return res;
3667 }
3668 
3669 
3670 // ////////////////////////////////////////////////////////////////////////// //
3671 private:
3672 nothrow @trusted @nogc {
3673 auto JPGE_MIN(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a < b ? a : b); }
3674 auto JPGE_MAX(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a > b ? a : b); }
3675 
3676 void *jpge_malloc (size_t nSize) { import core.stdc.stdlib : malloc; return malloc(nSize); }
3677 void jpge_free (void *p) { import core.stdc.stdlib : free; if (p !is null) free(p); }
3678 
3679 
3680 // Various JPEG enums and tables.
3681 enum { DC_LUM_CODES = 12, AC_LUM_CODES = 256, DC_CHROMA_CODES = 12, AC_CHROMA_CODES = 256, MAX_HUFF_SYMBOLS = 257, MAX_HUFF_CODESIZE = 32 }
3682 
3683 static immutable ubyte[64] s_zag = [ 0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 ];
3684 static immutable short[64] s_std_lum_quant = [ 16,11,12,14,12,10,16,14,13,14,18,17,16,19,24,40,26,24,22,22,24,49,35,37,29,40,58,51,61,60,57,51,56,55,64,72,92,78,64,68,87,69,55,56,80,109,81,87,95,98,103,104,103,62,77,113,121,112,100,120,92,101,103,99 ];
3685 static immutable short[64] s_std_croma_quant = [ 17,18,18,24,21,24,47,26,26,47,99,66,56,66,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99 ];
3686 static immutable ubyte[17] s_dc_lum_bits = [ 0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0 ];
3687 static immutable ubyte[DC_LUM_CODES] s_dc_lum_val = [ 0,1,2,3,4,5,6,7,8,9,10,11 ];
3688 static immutable ubyte[17] s_ac_lum_bits = [ 0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d ];
3689 static immutable ubyte[AC_LUM_CODES] s_ac_lum_val = [
3690   0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08,0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,
3691   0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28,0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,
3692   0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89,
3693   0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,
3694   0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,
3695   0xf9,0xfa
3696 ];
3697 static immutable ubyte[17] s_dc_chroma_bits = [ 0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0 ];
3698 static immutable ubyte[DC_CHROMA_CODES] s_dc_chroma_val = [ 0,1,2,3,4,5,6,7,8,9,10,11 ];
3699 static immutable ubyte[17] s_ac_chroma_bits = [ 0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77 ];
3700 static immutable ubyte[AC_CHROMA_CODES] s_ac_chroma_val = [
3701   0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91,0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,
3702   0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26,0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,
3703   0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87,
3704   0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,
3705   0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,
3706   0xf9,0xfa
3707 ];
3708 
3709 // Low-level helper functions.
3710 //template <class T> inline void clear_obj(T &obj) { memset(&obj, 0, sizeof(obj)); }
3711 
3712 enum YR = 19595, YG = 38470, YB = 7471, CB_R = -11059, CB_G = -21709, CB_B = 32768, CR_R = 32768, CR_G = -27439, CR_B = -5329; // int
3713 //ubyte clamp (int i) { if (cast(uint)(i) > 255U) { if (i < 0) i = 0; else if (i > 255) i = 255; } return cast(ubyte)(i); }
3714 ubyte clamp() (int i) { pragma(inline, true); return cast(ubyte)(cast(uint)i > 255 ? (((~i)>>31)&0xFF) : i); }
3715 
3716 void RGB_to_YCC (ubyte* pDst, const(ubyte)* pSrc, int num_pixels) {
3717   for (; num_pixels; pDst += 3, pSrc += 3, --num_pixels) {
3718     immutable int r = pSrc[0], g = pSrc[1], b = pSrc[2];
3719     pDst[0] = cast(ubyte)((r*YR+g*YG+b*YB+32768)>>16);
3720     pDst[1] = clamp(128+((r*CB_R+g*CB_G+b*CB_B+32768)>>16));
3721     pDst[2] = clamp(128+((r*CR_R+g*CR_G+b*CR_B+32768)>>16));
3722   }
3723 }
3724 
3725 void RGB_to_Y (ubyte* pDst, const(ubyte)* pSrc, int num_pixels) {
3726   for (; num_pixels; ++pDst, pSrc += 3, --num_pixels) {
3727     pDst[0] = cast(ubyte)((pSrc[0]*YR+pSrc[1]*YG+pSrc[2]*YB+32768)>>16);
3728   }
3729 }
3730 
3731 void RGBA_to_YCC (ubyte* pDst, const(ubyte)* pSrc, int num_pixels) {
3732   for (; num_pixels; pDst += 3, pSrc += 4, --num_pixels) {
3733     immutable int r = pSrc[0], g = pSrc[1], b = pSrc[2];
3734     pDst[0] = cast(ubyte)((r*YR+g*YG+b*YB+32768)>>16);
3735     pDst[1] = clamp(128+((r*CB_R+g*CB_G+b*CB_B+32768)>>16));
3736     pDst[2] = clamp(128+((r*CR_R+g*CR_G+b*CR_B+32768)>>16));
3737   }
3738 }
3739 
3740 void RGBA_to_Y (ubyte* pDst, const(ubyte)* pSrc, int num_pixels) {
3741   for (; num_pixels; ++pDst, pSrc += 4, --num_pixels) {
3742     pDst[0] = cast(ubyte)((pSrc[0]*YR+pSrc[1]*YG+pSrc[2]*YB+32768)>>16);
3743   }
3744 }
3745 
3746 void Y_to_YCC (ubyte* pDst, const(ubyte)* pSrc, int num_pixels) {
3747   for (; num_pixels; pDst += 3, ++pSrc, --num_pixels) { pDst[0] = pSrc[0]; pDst[1] = 128; pDst[2] = 128; }
3748 }
3749 
3750 // Forward DCT - DCT derived from jfdctint.
3751 enum { ROW_BITS = 2 }
3752 //#define DCT_DESCALE(x, n) (((x)+(((int)1)<<((n)-1)))>>(n))
3753 int DCT_DESCALE() (int x, int n) { pragma(inline, true); return (((x)+((cast(int)1)<<((n)-1)))>>(n)); }
3754 //#define DCT_MUL(var, c) (cast(short)(var)*cast(int)(c))
3755 
3756 //#define DCT1D(s0, s1, s2, s3, s4, s5, s6, s7)
3757 enum DCT1D = q{{
3758   int t0 = s0+s7, t7 = s0-s7, t1 = s1+s6, t6 = s1-s6, t2 = s2+s5, t5 = s2-s5, t3 = s3+s4, t4 = s3-s4;
3759   int t10 = t0+t3, t13 = t0-t3, t11 = t1+t2, t12 = t1-t2;
3760   int u1 = (cast(short)(t12+t13)*cast(int)(4433));
3761   s2 = u1+(cast(short)(t13)*cast(int)(6270));
3762   s6 = u1+(cast(short)(t12)*cast(int)(-15137));
3763   u1 = t4+t7;
3764   int u2 = t5+t6, u3 = t4+t6, u4 = t5+t7;
3765   int z5 = (cast(short)(u3+u4)*cast(int)(9633));
3766   t4 = (cast(short)(t4)*cast(int)(2446)); t5 = (cast(short)(t5)*cast(int)(16819));
3767   t6 = (cast(short)(t6)*cast(int)(25172)); t7 = (cast(short)(t7)*cast(int)(12299));
3768   u1 = (cast(short)(u1)*cast(int)(-7373)); u2 = (cast(short)(u2)*cast(int)(-20995));
3769   u3 = (cast(short)(u3)*cast(int)(-16069)); u4 = (cast(short)(u4)*cast(int)(-3196));
3770   u3 += z5; u4 += z5;
3771   s0 = t10+t11; s1 = t7+u1+u4; s3 = t6+u2+u3; s4 = t10-t11; s5 = t5+u2+u4; s7 = t4+u1+u3;
3772 }};
3773 
3774 void DCT2D (int* p) {
3775   int c;
3776   int* q = p;
3777   for (c = 7; c >= 0; --c, q += 8) {
3778     int s0 = q[0], s1 = q[1], s2 = q[2], s3 = q[3], s4 = q[4], s5 = q[5], s6 = q[6], s7 = q[7];
3779     //DCT1D(s0, s1, s2, s3, s4, s5, s6, s7);
3780     mixin(DCT1D);
3781     q[0] = s0<<ROW_BITS; q[1] = DCT_DESCALE(s1, CONST_BITS-ROW_BITS); q[2] = DCT_DESCALE(s2, CONST_BITS-ROW_BITS); q[3] = DCT_DESCALE(s3, CONST_BITS-ROW_BITS);
3782     q[4] = s4<<ROW_BITS; q[5] = DCT_DESCALE(s5, CONST_BITS-ROW_BITS); q[6] = DCT_DESCALE(s6, CONST_BITS-ROW_BITS); q[7] = DCT_DESCALE(s7, CONST_BITS-ROW_BITS);
3783   }
3784   for (q = p, c = 7; c >= 0; --c, ++q) {
3785     int s0 = q[0*8], s1 = q[1*8], s2 = q[2*8], s3 = q[3*8], s4 = q[4*8], s5 = q[5*8], s6 = q[6*8], s7 = q[7*8];
3786     //DCT1D(s0, s1, s2, s3, s4, s5, s6, s7);
3787     mixin(DCT1D);
3788     q[0*8] = DCT_DESCALE(s0, ROW_BITS+3); q[1*8] = DCT_DESCALE(s1, CONST_BITS+ROW_BITS+3); q[2*8] = DCT_DESCALE(s2, CONST_BITS+ROW_BITS+3); q[3*8] = DCT_DESCALE(s3, CONST_BITS+ROW_BITS+3);
3789     q[4*8] = DCT_DESCALE(s4, ROW_BITS+3); q[5*8] = DCT_DESCALE(s5, CONST_BITS+ROW_BITS+3); q[6*8] = DCT_DESCALE(s6, CONST_BITS+ROW_BITS+3); q[7*8] = DCT_DESCALE(s7, CONST_BITS+ROW_BITS+3);
3790   }
3791 }
3792 
3793 struct sym_freq { uint m_key, m_sym_index; }
3794 
3795 // Radix sorts sym_freq[] array by 32-bit key m_key. Returns ptr to sorted values.
3796 sym_freq* radix_sort_syms (uint num_syms, sym_freq* pSyms0, sym_freq* pSyms1) {
3797   const uint cMaxPasses = 4;
3798   uint[256*cMaxPasses] hist;
3799   //clear_obj(hist);
3800   for (uint i = 0; i < num_syms; i++) {
3801     uint freq = pSyms0[i].m_key;
3802     ++hist[freq&0xFF];
3803     ++hist[256+((freq>>8)&0xFF)];
3804     ++hist[256*2+((freq>>16)&0xFF)];
3805     ++hist[256*3+((freq>>24)&0xFF)];
3806   }
3807   sym_freq* pCur_syms = pSyms0;
3808   sym_freq* pNew_syms = pSyms1;
3809   uint total_passes = cMaxPasses; while (total_passes > 1 && num_syms == hist[(total_passes-1)*256]) --total_passes;
3810   uint[256] offsets;
3811   for (uint pass_shift = 0, pass = 0; pass < total_passes; ++pass, pass_shift += 8) {
3812     const(uint)* pHist = &hist[pass<<8];
3813     uint cur_ofs = 0;
3814     for (uint i = 0; i < 256; i++) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; }
3815     for (uint i = 0; i < num_syms; i++) pNew_syms[offsets[(pCur_syms[i].m_key>>pass_shift)&0xFF]++] = pCur_syms[i];
3816     sym_freq* t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t;
3817   }
3818   return pCur_syms;
3819 }
3820 
3821 // calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996.
3822 void calculate_minimum_redundancy (sym_freq* A, int n) {
3823   int root, leaf, next, avbl, used, dpth;
3824   if (n == 0) return;
3825   if (n == 1) { A[0].m_key = 1; return; }
3826   A[0].m_key += A[1].m_key; root = 0; leaf = 2;
3827   for (next=1; next < n-1; next++)
3828   {
3829     if (leaf>=n || A[root].m_key<A[leaf].m_key) { A[next].m_key = A[root].m_key; A[root++].m_key = next; } else A[next].m_key = A[leaf++].m_key;
3830     if (leaf>=n || (root<next && A[root].m_key<A[leaf].m_key)) { A[next].m_key += A[root].m_key; A[root++].m_key = next; } else A[next].m_key += A[leaf++].m_key;
3831   }
3832   A[n-2].m_key = 0;
3833   for (next=n-3; next>=0; next--) A[next].m_key = A[A[next].m_key].m_key+1;
3834   avbl = 1; used = dpth = 0; root = n-2; next = n-1;
3835   while (avbl>0)
3836   {
3837     while (root >= 0 && cast(int)A[root].m_key == dpth) { used++; root--; }
3838     while (avbl>used) { A[next--].m_key = dpth; avbl--; }
3839     avbl = 2*used; dpth++; used = 0;
3840   }
3841 }
3842 
3843 // Limits canonical Huffman code table's max code size to max_code_size.
3844 void huffman_enforce_max_code_size (int* pNum_codes, int code_list_len, int max_code_size) {
3845   if (code_list_len <= 1) return;
3846   for (int i = max_code_size+1; i <= MAX_HUFF_CODESIZE; i++) pNum_codes[max_code_size] += pNum_codes[i];
3847   uint total = 0;
3848   for (int i = max_code_size; i > 0; i--) total += ((cast(uint)pNum_codes[i])<<(max_code_size-i));
3849   while (total != (1UL<<max_code_size)) {
3850     pNum_codes[max_code_size]--;
3851     for (int i = max_code_size-1; i > 0; i--) {
3852       if (pNum_codes[i]) { pNum_codes[i]--; pNum_codes[i+1] += 2; break; }
3853     }
3854     total--;
3855   }
3856 }
3857 }
3858 
3859 
3860 // ////////////////////////////////////////////////////////////////////////// //
3861 // Lower level jpeg_encoder class - useful if more control is needed than the above helper functions.
3862 struct jpeg_encoder {
3863 public:
3864   alias WriteFunc = bool delegate (scope const(ubyte)[] buf);
3865 
3866 nothrow /*@trusted @nogc*/:
3867 private:
3868   alias sample_array_t = int;
3869 
3870   WriteFunc m_pStream;
3871   JpegParams m_params;
3872   ubyte m_num_components;
3873   ubyte[3] m_comp_h_samp;
3874   ubyte[3] m_comp_v_samp;
3875   int m_image_x, m_image_y, m_image_bpp, m_image_bpl;
3876   int m_image_x_mcu, m_image_y_mcu;
3877   int m_image_bpl_xlt, m_image_bpl_mcu;
3878   int m_mcus_per_row;
3879   int m_mcu_x, m_mcu_y;
3880   ubyte*[16] m_mcu_lines;
3881   ubyte m_mcu_y_ofs;
3882   sample_array_t[64] m_sample_array;
3883   short[64] m_coefficient_array;
3884   int[64][2] m_quantization_tables;
3885   uint[256][4] m_huff_codes;
3886   ubyte[256][4] m_huff_code_sizes;
3887   ubyte[17][4] m_huff_bits;
3888   ubyte[256][4] m_huff_val;
3889   uint[256][4] m_huff_count;
3890   int[3] m_last_dc_val;
3891   enum JPGE_OUT_BUF_SIZE = 2048;
3892   ubyte[JPGE_OUT_BUF_SIZE] m_out_buf;
3893   ubyte* m_pOut_buf;
3894   uint m_out_buf_left;
3895   uint m_bit_buffer;
3896   uint m_bits_in;
3897   ubyte m_pass_num;
3898   bool m_all_stream_writes_succeeded = true;
3899 
3900 private:
3901   // Generates an optimized offman table.
3902   void optimize_huffman_table (int table_num, int table_len) {
3903     sym_freq[MAX_HUFF_SYMBOLS] syms0;
3904     sym_freq[MAX_HUFF_SYMBOLS] syms1;
3905     syms0[0].m_key = 1; syms0[0].m_sym_index = 0;  // dummy symbol, assures that no valid code contains all 1's
3906     int num_used_syms = 1;
3907     const uint *pSym_count = &m_huff_count[table_num][0];
3908     for (int i = 0; i < table_len; i++) {
3909       if (pSym_count[i]) { syms0[num_used_syms].m_key = pSym_count[i]; syms0[num_used_syms++].m_sym_index = i+1; }
3910     }
3911     sym_freq* pSyms = radix_sort_syms(num_used_syms, syms0.ptr, syms1.ptr);
3912     calculate_minimum_redundancy(pSyms, num_used_syms);
3913 
3914     // Count the # of symbols of each code size.
3915     int[1+MAX_HUFF_CODESIZE] num_codes;
3916     //clear_obj(num_codes);
3917     for (int i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++;
3918 
3919     enum JPGE_CODE_SIZE_LIMIT = 16u; // the maximum possible size of a JPEG Huffman code (valid range is [9,16] - 9 vs. 8 because of the dummy symbol)
3920     huffman_enforce_max_code_size(num_codes.ptr, num_used_syms, JPGE_CODE_SIZE_LIMIT);
3921 
3922     // Compute m_huff_bits array, which contains the # of symbols per code size.
3923     //clear_obj(m_huff_bits[table_num]);
3924     m_huff_bits[table_num][] = 0;
3925     for (int i = 1; i <= cast(int)JPGE_CODE_SIZE_LIMIT; i++) m_huff_bits[table_num][i] = cast(ubyte)(num_codes[i]);
3926 
3927     // Remove the dummy symbol added above, which must be in largest bucket.
3928     for (int i = JPGE_CODE_SIZE_LIMIT; i >= 1; i--) {
3929       if (m_huff_bits[table_num][i]) { m_huff_bits[table_num][i]--; break; }
3930     }
3931 
3932     // Compute the m_huff_val array, which contains the symbol indices sorted by code size (smallest to largest).
3933     for (int i = num_used_syms-1; i >= 1; i--) m_huff_val[table_num][num_used_syms-1-i] = cast(ubyte)(pSyms[i].m_sym_index-1);
3934   }
3935 
3936   bool put_obj(T) (T v) {
3937     try {
3938       return (m_pStream !is null && m_pStream((&v)[0..1]));
3939     } catch (Exception) {}
3940     return false;
3941   }
3942 
3943   bool put_buf() (const(void)* v, uint len) {
3944     try {
3945       return (m_pStream !is null && m_pStream((cast(ubyte*)v)[0..len]));
3946     } catch (Exception) {}
3947     return false;
3948   }
3949 
3950   // JPEG marker generation.
3951   void emit_byte (ubyte i) {
3952     m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && put_obj(i);
3953   }
3954 
3955   void emit_word(uint i) {
3956     emit_byte(cast(ubyte)(i>>8));
3957     emit_byte(cast(ubyte)(i&0xFF));
3958   }
3959 
3960   void emit_marker (int marker) {
3961     emit_byte(cast(ubyte)(0xFF));
3962     emit_byte(cast(ubyte)(marker));
3963   }
3964 
3965   // Emit JFIF marker
3966   void emit_jfif_app0 () {
3967     emit_marker(M_APP0);
3968     emit_word(2+4+1+2+1+2+2+1+1);
3969     emit_byte(0x4A); emit_byte(0x46); emit_byte(0x49); emit_byte(0x46); /* Identifier: ASCII "JFIF" */
3970     emit_byte(0);
3971     emit_byte(1); /* Major version */
3972     emit_byte(1); /* Minor version */
3973     emit_byte(0); /* Density unit */
3974     emit_word(1);
3975     emit_word(1);
3976     emit_byte(0); /* No thumbnail image */
3977     emit_byte(0);
3978   }
3979 
3980   // Emit quantization tables
3981   void emit_dqt () {
3982     for (int i = 0; i < (m_num_components == 3 ? 2 : 1); i++) {
3983       emit_marker(M_DQT);
3984       emit_word(64+1+2);
3985       emit_byte(cast(ubyte)(i));
3986       for (int j = 0; j < 64; j++) emit_byte(cast(ubyte)(m_quantization_tables[i][j]));
3987     }
3988   }
3989 
3990   // Emit start of frame marker
3991   void emit_sof () {
3992     emit_marker(M_SOF0); /* baseline */
3993     emit_word(3*m_num_components+2+5+1);
3994     emit_byte(8); /* precision */
3995     emit_word(m_image_y);
3996     emit_word(m_image_x);
3997     emit_byte(m_num_components);
3998     for (int i = 0; i < m_num_components; i++) {
3999       emit_byte(cast(ubyte)(i+1)); /* component ID */
4000       emit_byte(cast(ubyte)((m_comp_h_samp[i]<<4)+m_comp_v_samp[i])); /* h and v sampling */
4001       emit_byte(i > 0); /* quant. table num */
4002     }
4003   }
4004 
4005   // Emit Huffman table.
4006   void emit_dht (ubyte* bits, ubyte* val, int index, bool ac_flag) {
4007     emit_marker(M_DHT);
4008     int length = 0;
4009     for (int i = 1; i <= 16; i++) length += bits[i];
4010     emit_word(length+2+1+16);
4011     emit_byte(cast(ubyte)(index+(ac_flag<<4)));
4012     for (int i = 1; i <= 16; i++) emit_byte(bits[i]);
4013     for (int i = 0; i < length; i++) emit_byte(val[i]);
4014   }
4015 
4016   // Emit all Huffman tables.
4017   void emit_dhts () {
4018     emit_dht(m_huff_bits[0+0].ptr, m_huff_val[0+0].ptr, 0, false);
4019     emit_dht(m_huff_bits[2+0].ptr, m_huff_val[2+0].ptr, 0, true);
4020     if (m_num_components == 3) {
4021       emit_dht(m_huff_bits[0+1].ptr, m_huff_val[0+1].ptr, 1, false);
4022       emit_dht(m_huff_bits[2+1].ptr, m_huff_val[2+1].ptr, 1, true);
4023     }
4024   }
4025 
4026   // emit start of scan
4027   void emit_sos () {
4028     emit_marker(M_SOS);
4029     emit_word(2*m_num_components+2+1+3);
4030     emit_byte(m_num_components);
4031     for (int i = 0; i < m_num_components; i++) {
4032       emit_byte(cast(ubyte)(i+1));
4033       if (i == 0)
4034         emit_byte((0<<4)+0);
4035       else
4036         emit_byte((1<<4)+1);
4037     }
4038     emit_byte(0); /* spectral selection */
4039     emit_byte(63);
4040     emit_byte(0);
4041   }
4042 
4043   // Emit all markers at beginning of image file.
4044   void emit_markers () {
4045     emit_marker(M_SOI);
4046     emit_jfif_app0();
4047     emit_dqt();
4048     emit_sof();
4049     emit_dhts();
4050     emit_sos();
4051   }
4052 
4053   // Compute the actual canonical Huffman codes/code sizes given the JPEG huff bits and val arrays.
4054   void compute_huffman_table (uint* codes, ubyte* code_sizes, ubyte* bits, ubyte* val) {
4055     import core.stdc.string : memset;
4056 
4057     int i, l, last_p, si;
4058     ubyte[257] huff_size;
4059     uint[257] huff_code;
4060     uint code;
4061 
4062     int p = 0;
4063     for (l = 1; l <= 16; l++)
4064       for (i = 1; i <= bits[l]; i++)
4065         huff_size[p++] = cast(ubyte)l;
4066 
4067     huff_size[p] = 0; last_p = p; // write sentinel
4068 
4069     code = 0; si = huff_size[0]; p = 0;
4070 
4071     while (huff_size[p])
4072     {
4073       while (huff_size[p] == si)
4074         huff_code[p++] = code++;
4075       code <<= 1;
4076       si++;
4077     }
4078 
4079     memset(codes, 0, codes[0].sizeof*256);
4080     memset(code_sizes, 0, code_sizes[0].sizeof*256);
4081     for (p = 0; p < last_p; p++)
4082     {
4083       codes[val[p]]      = huff_code[p];
4084       code_sizes[val[p]] = huff_size[p];
4085     }
4086   }
4087 
4088   // Quantization table generation.
4089   void compute_quant_table (int* pDst, const(short)* pSrc) {
4090     int q;
4091     if (m_params.quality < 50)
4092       q = 5000/m_params.quality;
4093     else
4094       q = 200-m_params.quality*2;
4095     for (int i = 0; i < 64; i++) {
4096       int j = *pSrc++; j = (j*q+50L)/100L;
4097       *pDst++ = JPGE_MIN(JPGE_MAX(j, 1), 255);
4098     }
4099   }
4100 
4101   // Higher-level methods.
4102   void first_pass_init () {
4103     import core.stdc.string : memset;
4104     m_bit_buffer = 0; m_bits_in = 0;
4105     memset(m_last_dc_val.ptr, 0, 3*m_last_dc_val[0].sizeof);
4106     m_mcu_y_ofs = 0;
4107     m_pass_num = 1;
4108   }
4109 
4110   bool second_pass_init () {
4111     compute_huffman_table(&m_huff_codes[0+0][0], &m_huff_code_sizes[0+0][0], m_huff_bits[0+0].ptr, m_huff_val[0+0].ptr);
4112     compute_huffman_table(&m_huff_codes[2+0][0], &m_huff_code_sizes[2+0][0], m_huff_bits[2+0].ptr, m_huff_val[2+0].ptr);
4113     if (m_num_components > 1)
4114     {
4115       compute_huffman_table(&m_huff_codes[0+1][0], &m_huff_code_sizes[0+1][0], m_huff_bits[0+1].ptr, m_huff_val[0+1].ptr);
4116       compute_huffman_table(&m_huff_codes[2+1][0], &m_huff_code_sizes[2+1][0], m_huff_bits[2+1].ptr, m_huff_val[2+1].ptr);
4117     }
4118     first_pass_init();
4119     emit_markers();
4120     m_pass_num = 2;
4121     return true;
4122   }
4123 
4124   bool jpg_open (int p_x_res, int p_y_res, int src_channels) {
4125     m_num_components = 3;
4126     switch (m_params.subsampling) {
4127       case JpegSubsampling.Y_ONLY:
4128         m_num_components = 1;
4129         m_comp_h_samp[0] = 1; m_comp_v_samp[0] = 1;
4130         m_mcu_x          = 8; m_mcu_y          = 8;
4131         break;
4132       case JpegSubsampling.H1V1:
4133         m_comp_h_samp[0] = 1; m_comp_v_samp[0] = 1;
4134         m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
4135         m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
4136         m_mcu_x          = 8; m_mcu_y          = 8;
4137         break;
4138       case JpegSubsampling.H2V1:
4139         m_comp_h_samp[0] = 2; m_comp_v_samp[0] = 1;
4140         m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
4141         m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
4142         m_mcu_x          = 16; m_mcu_y         = 8;
4143         break;
4144       case JpegSubsampling.H2V2:
4145         m_comp_h_samp[0] = 2; m_comp_v_samp[0] = 2;
4146         m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
4147         m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
4148         m_mcu_x          = 16; m_mcu_y         = 16;
4149         break;
4150       default: assert(0);
4151     }
4152 
4153     m_image_x        = p_x_res; m_image_y = p_y_res;
4154     m_image_bpp      = src_channels;
4155     m_image_bpl      = m_image_x*src_channels;
4156     m_image_x_mcu    = (m_image_x+m_mcu_x-1)&(~(m_mcu_x-1));
4157     m_image_y_mcu    = (m_image_y+m_mcu_y-1)&(~(m_mcu_y-1));
4158     m_image_bpl_xlt  = m_image_x*m_num_components;
4159     m_image_bpl_mcu  = m_image_x_mcu*m_num_components;
4160     m_mcus_per_row   = m_image_x_mcu/m_mcu_x;
4161 
4162     if ((m_mcu_lines[0] = cast(ubyte*)(jpge_malloc(m_image_bpl_mcu*m_mcu_y))) is null) return false;
4163     for (int i = 1; i < m_mcu_y; i++)
4164       m_mcu_lines[i] = m_mcu_lines[i-1]+m_image_bpl_mcu;
4165 
4166     compute_quant_table(m_quantization_tables[0].ptr, s_std_lum_quant.ptr);
4167     compute_quant_table(m_quantization_tables[1].ptr, (m_params.noChromaDiscrimFlag ? s_std_lum_quant.ptr : s_std_croma_quant.ptr));
4168 
4169     m_out_buf_left = JPGE_OUT_BUF_SIZE;
4170     m_pOut_buf = m_out_buf.ptr;
4171 
4172     if (m_params.twoPass)
4173     {
4174       //clear_obj(m_huff_count);
4175       import core.stdc.string : memset;
4176       memset(m_huff_count.ptr, 0, m_huff_count.sizeof);
4177       first_pass_init();
4178     }
4179     else
4180     {
4181       import core.stdc.string : memcpy;
4182       memcpy(m_huff_bits[0+0].ptr, s_dc_lum_bits.ptr, 17);    memcpy(m_huff_val[0+0].ptr, s_dc_lum_val.ptr, DC_LUM_CODES);
4183       memcpy(m_huff_bits[2+0].ptr, s_ac_lum_bits.ptr, 17);    memcpy(m_huff_val[2+0].ptr, s_ac_lum_val.ptr, AC_LUM_CODES);
4184       memcpy(m_huff_bits[0+1].ptr, s_dc_chroma_bits.ptr, 17); memcpy(m_huff_val[0+1].ptr, s_dc_chroma_val.ptr, DC_CHROMA_CODES);
4185       memcpy(m_huff_bits[2+1].ptr, s_ac_chroma_bits.ptr, 17); memcpy(m_huff_val[2+1].ptr, s_ac_chroma_val.ptr, AC_CHROMA_CODES);
4186       if (!second_pass_init()) return false;   // in effect, skip over the first pass
4187     }
4188     return m_all_stream_writes_succeeded;
4189   }
4190 
4191   void load_block_8_8_grey (int x) {
4192     ubyte *pSrc;
4193     sample_array_t *pDst = m_sample_array.ptr;
4194     x <<= 3;
4195     for (int i = 0; i < 8; i++, pDst += 8)
4196     {
4197       pSrc = m_mcu_lines[i]+x;
4198       pDst[0] = pSrc[0]-128; pDst[1] = pSrc[1]-128; pDst[2] = pSrc[2]-128; pDst[3] = pSrc[3]-128;
4199       pDst[4] = pSrc[4]-128; pDst[5] = pSrc[5]-128; pDst[6] = pSrc[6]-128; pDst[7] = pSrc[7]-128;
4200     }
4201   }
4202 
4203   void load_block_8_8 (int x, int y, int c) {
4204     ubyte *pSrc;
4205     sample_array_t *pDst = m_sample_array.ptr;
4206     x = (x*(8*3))+c;
4207     y <<= 3;
4208     for (int i = 0; i < 8; i++, pDst += 8)
4209     {
4210       pSrc = m_mcu_lines[y+i]+x;
4211       pDst[0] = pSrc[0*3]-128; pDst[1] = pSrc[1*3]-128; pDst[2] = pSrc[2*3]-128; pDst[3] = pSrc[3*3]-128;
4212       pDst[4] = pSrc[4*3]-128; pDst[5] = pSrc[5*3]-128; pDst[6] = pSrc[6*3]-128; pDst[7] = pSrc[7*3]-128;
4213     }
4214   }
4215 
4216   void load_block_16_8 (int x, int c) {
4217     ubyte* pSrc1;
4218     ubyte* pSrc2;
4219     sample_array_t *pDst = m_sample_array.ptr;
4220     x = (x*(16*3))+c;
4221     int a = 0, b = 2;
4222     for (int i = 0; i < 16; i += 2, pDst += 8)
4223     {
4224       pSrc1 = m_mcu_lines[i+0]+x;
4225       pSrc2 = m_mcu_lines[i+1]+x;
4226       pDst[0] = ((pSrc1[ 0*3]+pSrc1[ 1*3]+pSrc2[ 0*3]+pSrc2[ 1*3]+a)>>2)-128; pDst[1] = ((pSrc1[ 2*3]+pSrc1[ 3*3]+pSrc2[ 2*3]+pSrc2[ 3*3]+b)>>2)-128;
4227       pDst[2] = ((pSrc1[ 4*3]+pSrc1[ 5*3]+pSrc2[ 4*3]+pSrc2[ 5*3]+a)>>2)-128; pDst[3] = ((pSrc1[ 6*3]+pSrc1[ 7*3]+pSrc2[ 6*3]+pSrc2[ 7*3]+b)>>2)-128;
4228       pDst[4] = ((pSrc1[ 8*3]+pSrc1[ 9*3]+pSrc2[ 8*3]+pSrc2[ 9*3]+a)>>2)-128; pDst[5] = ((pSrc1[10*3]+pSrc1[11*3]+pSrc2[10*3]+pSrc2[11*3]+b)>>2)-128;
4229       pDst[6] = ((pSrc1[12*3]+pSrc1[13*3]+pSrc2[12*3]+pSrc2[13*3]+a)>>2)-128; pDst[7] = ((pSrc1[14*3]+pSrc1[15*3]+pSrc2[14*3]+pSrc2[15*3]+b)>>2)-128;
4230       int temp = a; a = b; b = temp;
4231     }
4232   }
4233 
4234   void load_block_16_8_8 (int x, int c) {
4235     ubyte *pSrc1;
4236     sample_array_t *pDst = m_sample_array.ptr;
4237     x = (x*(16*3))+c;
4238     for (int i = 0; i < 8; i++, pDst += 8) {
4239       pSrc1 = m_mcu_lines[i+0]+x;
4240       pDst[0] = ((pSrc1[ 0*3]+pSrc1[ 1*3])>>1)-128; pDst[1] = ((pSrc1[ 2*3]+pSrc1[ 3*3])>>1)-128;
4241       pDst[2] = ((pSrc1[ 4*3]+pSrc1[ 5*3])>>1)-128; pDst[3] = ((pSrc1[ 6*3]+pSrc1[ 7*3])>>1)-128;
4242       pDst[4] = ((pSrc1[ 8*3]+pSrc1[ 9*3])>>1)-128; pDst[5] = ((pSrc1[10*3]+pSrc1[11*3])>>1)-128;
4243       pDst[6] = ((pSrc1[12*3]+pSrc1[13*3])>>1)-128; pDst[7] = ((pSrc1[14*3]+pSrc1[15*3])>>1)-128;
4244     }
4245   }
4246 
4247   void load_quantized_coefficients (int component_num) {
4248     int *q = m_quantization_tables[component_num > 0].ptr;
4249     short *pDst = m_coefficient_array.ptr;
4250     for (int i = 0; i < 64; i++)
4251     {
4252       sample_array_t j = m_sample_array[s_zag[i]];
4253       if (j < 0)
4254       {
4255         if ((j = -j+(*q>>1)) < *q)
4256           *pDst++ = 0;
4257         else
4258           *pDst++ = cast(short)(-(j/ *q));
4259       }
4260       else
4261       {
4262         if ((j = j+(*q>>1)) < *q)
4263           *pDst++ = 0;
4264         else
4265           *pDst++ = cast(short)((j/ *q));
4266       }
4267       q++;
4268     }
4269   }
4270 
4271   void flush_output_buffer () {
4272     if (m_out_buf_left != JPGE_OUT_BUF_SIZE) m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && put_buf(m_out_buf.ptr, JPGE_OUT_BUF_SIZE-m_out_buf_left);
4273     m_pOut_buf = m_out_buf.ptr;
4274     m_out_buf_left = JPGE_OUT_BUF_SIZE;
4275   }
4276 
4277   void put_bits (uint bits, uint len) {
4278     m_bit_buffer |= (cast(uint)bits<<(24-(m_bits_in += len)));
4279     while (m_bits_in >= 8) {
4280       ubyte c;
4281       //#define JPGE_PUT_BYTE(c) { *m_pOut_buf++ = (c); if (--m_out_buf_left == 0) flush_output_buffer(); }
4282       //JPGE_PUT_BYTE(c = (ubyte)((m_bit_buffer>>16)&0xFF));
4283       //if (c == 0xFF) JPGE_PUT_BYTE(0);
4284       c = cast(ubyte)((m_bit_buffer>>16)&0xFF);
4285       *m_pOut_buf++ = c;
4286       if (--m_out_buf_left == 0) flush_output_buffer();
4287       if (c == 0xFF) {
4288         *m_pOut_buf++ = 0;
4289         if (--m_out_buf_left == 0) flush_output_buffer();
4290       }
4291       m_bit_buffer <<= 8;
4292       m_bits_in -= 8;
4293     }
4294   }
4295 
4296   void code_coefficients_pass_one (int component_num) {
4297     if (component_num >= 3) return; // just to shut up static analysis
4298     int i, run_len, nbits, temp1;
4299     short *src = m_coefficient_array.ptr;
4300     uint *dc_count = (component_num ? m_huff_count[0+1].ptr : m_huff_count[0+0].ptr);
4301     uint *ac_count = (component_num ? m_huff_count[2+1].ptr : m_huff_count[2+0].ptr);
4302 
4303     temp1 = src[0]-m_last_dc_val[component_num];
4304     m_last_dc_val[component_num] = src[0];
4305     if (temp1 < 0) temp1 = -temp1;
4306 
4307     nbits = 0;
4308     while (temp1)
4309     {
4310       nbits++; temp1 >>= 1;
4311     }
4312 
4313     dc_count[nbits]++;
4314     for (run_len = 0, i = 1; i < 64; i++)
4315     {
4316       if ((temp1 = m_coefficient_array[i]) == 0)
4317         run_len++;
4318       else
4319       {
4320         while (run_len >= 16)
4321         {
4322           ac_count[0xF0]++;
4323           run_len -= 16;
4324         }
4325         if (temp1 < 0) temp1 = -temp1;
4326         nbits = 1;
4327         while (temp1 >>= 1) nbits++;
4328         ac_count[(run_len<<4)+nbits]++;
4329         run_len = 0;
4330       }
4331     }
4332     if (run_len) ac_count[0]++;
4333   }
4334 
4335   void code_coefficients_pass_two (int component_num) {
4336     int i, j, run_len, nbits, temp1, temp2;
4337     short *pSrc = m_coefficient_array.ptr;
4338     uint*[2] codes;
4339     ubyte*[2] code_sizes;
4340 
4341     if (component_num == 0)
4342     {
4343       codes[0] = m_huff_codes[0+0].ptr; codes[1] = m_huff_codes[2+0].ptr;
4344       code_sizes[0] = m_huff_code_sizes[0+0].ptr; code_sizes[1] = m_huff_code_sizes[2+0].ptr;
4345     }
4346     else
4347     {
4348       codes[0] = m_huff_codes[0+1].ptr; codes[1] = m_huff_codes[2+1].ptr;
4349       code_sizes[0] = m_huff_code_sizes[0+1].ptr; code_sizes[1] = m_huff_code_sizes[2+1].ptr;
4350     }
4351 
4352     temp1 = temp2 = pSrc[0]-m_last_dc_val[component_num];
4353     m_last_dc_val[component_num] = pSrc[0];
4354 
4355     if (temp1 < 0)
4356     {
4357       temp1 = -temp1; temp2--;
4358     }
4359 
4360     nbits = 0;
4361     while (temp1)
4362     {
4363       nbits++; temp1 >>= 1;
4364     }
4365 
4366     put_bits(codes[0][nbits], code_sizes[0][nbits]);
4367     if (nbits) put_bits(temp2&((1<<nbits)-1), nbits);
4368 
4369     for (run_len = 0, i = 1; i < 64; i++)
4370     {
4371       if ((temp1 = m_coefficient_array[i]) == 0)
4372         run_len++;
4373       else
4374       {
4375         while (run_len >= 16)
4376         {
4377           put_bits(codes[1][0xF0], code_sizes[1][0xF0]);
4378           run_len -= 16;
4379         }
4380         if ((temp2 = temp1) < 0)
4381         {
4382           temp1 = -temp1;
4383           temp2--;
4384         }
4385         nbits = 1;
4386         while (temp1 >>= 1)
4387           nbits++;
4388         j = (run_len<<4)+nbits;
4389         put_bits(codes[1][j], code_sizes[1][j]);
4390         put_bits(temp2&((1<<nbits)-1), nbits);
4391         run_len = 0;
4392       }
4393     }
4394     if (run_len)
4395       put_bits(codes[1][0], code_sizes[1][0]);
4396   }
4397 
4398   void code_block (int component_num) {
4399     DCT2D(m_sample_array.ptr);
4400     load_quantized_coefficients(component_num);
4401     if (m_pass_num == 1)
4402       code_coefficients_pass_one(component_num);
4403     else
4404       code_coefficients_pass_two(component_num);
4405   }
4406 
4407   void process_mcu_row () {
4408     if (m_num_components == 1)
4409     {
4410       for (int i = 0; i < m_mcus_per_row; i++)
4411       {
4412         load_block_8_8_grey(i); code_block(0);
4413       }
4414     }
4415     else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1))
4416     {
4417       for (int i = 0; i < m_mcus_per_row; i++)
4418       {
4419         load_block_8_8(i, 0, 0); code_block(0); load_block_8_8(i, 0, 1); code_block(1); load_block_8_8(i, 0, 2); code_block(2);
4420       }
4421     }
4422     else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1))
4423     {
4424       for (int i = 0; i < m_mcus_per_row; i++)
4425       {
4426         load_block_8_8(i*2+0, 0, 0); code_block(0); load_block_8_8(i*2+1, 0, 0); code_block(0);
4427         load_block_16_8_8(i, 1); code_block(1); load_block_16_8_8(i, 2); code_block(2);
4428       }
4429     }
4430     else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2))
4431     {
4432       for (int i = 0; i < m_mcus_per_row; i++)
4433       {
4434         load_block_8_8(i*2+0, 0, 0); code_block(0); load_block_8_8(i*2+1, 0, 0); code_block(0);
4435         load_block_8_8(i*2+0, 1, 0); code_block(0); load_block_8_8(i*2+1, 1, 0); code_block(0);
4436         load_block_16_8(i, 1); code_block(1); load_block_16_8(i, 2); code_block(2);
4437       }
4438     }
4439   }
4440 
4441   bool terminate_pass_one () {
4442     optimize_huffman_table(0+0, DC_LUM_CODES); optimize_huffman_table(2+0, AC_LUM_CODES);
4443     if (m_num_components > 1)
4444     {
4445       optimize_huffman_table(0+1, DC_CHROMA_CODES); optimize_huffman_table(2+1, AC_CHROMA_CODES);
4446     }
4447     return second_pass_init();
4448   }
4449 
4450   bool terminate_pass_two () {
4451     put_bits(0x7F, 7);
4452     flush_output_buffer();
4453     emit_marker(M_EOI);
4454     m_pass_num++; // purposely bump up m_pass_num, for debugging
4455     return true;
4456   }
4457 
4458   bool process_end_of_image () {
4459     if (m_mcu_y_ofs)
4460     {
4461       if (m_mcu_y_ofs < 16) // check here just to shut up static analysis
4462       {
4463         for (int i = m_mcu_y_ofs; i < m_mcu_y; i++) {
4464           import core.stdc.string : memcpy;
4465           memcpy(m_mcu_lines[i], m_mcu_lines[m_mcu_y_ofs-1], m_image_bpl_mcu);
4466         }
4467       }
4468       process_mcu_row();
4469     }
4470 
4471     if (m_pass_num == 1)
4472       return terminate_pass_one();
4473     else
4474       return terminate_pass_two();
4475   }
4476 
4477   void load_mcu (const(void)* pSrc) {
4478     import core.stdc.string : memcpy;
4479     const(ubyte)* Psrc = cast(const(ubyte)*)(pSrc);
4480 
4481     ubyte* pDst = m_mcu_lines[m_mcu_y_ofs]; // OK to write up to m_image_bpl_xlt bytes to pDst
4482 
4483     if (m_num_components == 1)
4484     {
4485       if (m_image_bpp == 4)
4486         RGBA_to_Y(pDst, Psrc, m_image_x);
4487       else if (m_image_bpp == 3)
4488         RGB_to_Y(pDst, Psrc, m_image_x);
4489       else
4490         memcpy(pDst, Psrc, m_image_x);
4491     }
4492     else
4493     {
4494       if (m_image_bpp == 4)
4495         RGBA_to_YCC(pDst, Psrc, m_image_x);
4496       else if (m_image_bpp == 3)
4497         RGB_to_YCC(pDst, Psrc, m_image_x);
4498       else
4499         Y_to_YCC(pDst, Psrc, m_image_x);
4500     }
4501 
4502     // Possibly duplicate pixels at end of scanline if not a multiple of 8 or 16
4503     if (m_num_components == 1) {
4504       import core.stdc.string : memset;
4505       memset(m_mcu_lines[m_mcu_y_ofs]+m_image_bpl_xlt, pDst[m_image_bpl_xlt-1], m_image_x_mcu-m_image_x);
4506     } else
4507     {
4508       const ubyte y = pDst[m_image_bpl_xlt-3+0], cb = pDst[m_image_bpl_xlt-3+1], cr = pDst[m_image_bpl_xlt-3+2];
4509       ubyte *q = m_mcu_lines[m_mcu_y_ofs]+m_image_bpl_xlt;
4510       for (int i = m_image_x; i < m_image_x_mcu; i++)
4511       {
4512         *q++ = y; *q++ = cb; *q++ = cr;
4513       }
4514     }
4515 
4516     if (++m_mcu_y_ofs == m_mcu_y)
4517     {
4518       process_mcu_row();
4519       m_mcu_y_ofs = 0;
4520     }
4521   }
4522 
4523   void clear() {
4524     m_mcu_lines[0] = null;
4525     m_pass_num = 0;
4526     m_all_stream_writes_succeeded = true;
4527   }
4528 
4529 
4530 public:
4531   //this () { clear(); }
4532   ~this () { deinit(); }
4533 
4534   @disable this (this); // no copies
4535 
4536   // Initializes the compressor.
4537   // pStream: The stream object to use for writing compressed data.
4538   // comp_params - Compression parameters structure, defined above.
4539   // width, height  - Image dimensions.
4540   // channels - May be 1, or 3. 1 indicates grayscale, 3 indicates RGB source data.
4541   // Returns false on out of memory or if a stream write fails.
4542   bool setup() (WriteFunc pStream, int width, int height, int src_channels, const scope auto ref JpegParams comp_params) {
4543     deinit();
4544     if ((pStream is null || width < 1 || height < 1) || (src_channels != 1 && src_channels != 3 && src_channels != 4) || !comp_params.check()) return false;
4545     m_pStream = pStream;
4546     m_params = comp_params;
4547     return jpg_open(width, height, src_channels);
4548   }
4549 
4550   bool setup() (WriteFunc pStream, int width, int height, int src_channels) { return setup(pStream, width, height, src_channels, JpegParams()); }
4551 
4552   @property ref inout(JpegParams) params () return inout pure nothrow @trusted @nogc { pragma(inline, true); return m_params; }
4553 
4554   // Deinitializes the compressor, freeing any allocated memory. May be called at any time.
4555   void deinit () {
4556     jpge_free(m_mcu_lines[0]);
4557     clear();
4558   }
4559 
4560   @property uint total_passes () const pure nothrow @trusted @nogc { pragma(inline, true); return (m_params.twoPass ? 2 : 1); }
4561   @property uint cur_pass () const pure nothrow @trusted @nogc { pragma(inline, true); return m_pass_num; }
4562 
4563   // Call this method with each source scanline.
4564   // width*src_channels bytes per scanline is expected (RGB or Y format).
4565   // You must call with null after all scanlines are processed to finish compression.
4566   // Returns false on out of memory or if a stream write fails.
4567   bool process_scanline (const(void)* pScanline) {
4568     if (m_pass_num < 1 || m_pass_num > 2) return false;
4569     if (m_all_stream_writes_succeeded) {
4570       if (pScanline is null) {
4571         if (!process_end_of_image()) return false;
4572       } else {
4573         load_mcu(pScanline);
4574       }
4575     }
4576     return m_all_stream_writes_succeeded;
4577   }
4578 }