The OpenD Programming Language

1 module gamut.codecs.msf_gif;
2 
3 version(encodeGIF):
4 
5 /*
6 HOW TO USE:
7 
8     In exactly one translation unit (.c or .cpp file), #define MSF_GIF_IMPL before including the header, like so:
9 
10     #define MSF_GIF_IMPL
11     #include "msf_gif.h"
12 
13     Everywhere else, just include the header like normal.
14 
15 
16 USAGE EXAMPLE:
17 
18     int width = 480, height = 320, centisecondsPerFrame = 5, bitDepth = 16;
19     MsfGifState gifState = {};
20     // msf_gif_bgra_flag = true; //optionally, set this flag if your pixels are in BGRA format instead of RGBA
21     // msf_gif_alpha_threshold = 128; //optionally, enable transparency (see function documentation below for details)
22     msf_gif_begin(&gifState, width, height);
23     msf_gif_frame(&gifState, ..., centisecondsPerFrame, bitDepth, width * 4); //frame 1
24     msf_gif_frame(&gifState, ..., centisecondsPerFrame, bitDepth, width * 4); //frame 2
25     msf_gif_frame(&gifState, ..., centisecondsPerFrame, bitDepth, width * 4); //frame 3, etc...
26     MsfGifResult result = msf_gif_end(&gifState);
27     if (result.data) {
28         FILE * fp = fopen("MyGif.gif", "wb");
29         fwrite(result.data, result.dataSize, 1, fp);
30         fclose(fp);
31     }
32     msf_gif_free(result);
33 
34 Detailed function documentation can be found in the header section below.
35 
36 
37 ERROR HANDLING:
38 
39     If memory allocation fails, the functions will signal the error via their return values.
40     If one function call fails, the library will free all of its allocations,
41     and all subsequent calls will safely no-op and return 0 until the next call to `msf_gif_begin()`.
42     Therefore, it's safe to check only the return value of `msf_gif_end()`.
43 
44 
45 REPLACING MALLOC:
46 
47     This library uses malloc+realloc+free internally for memory allocation.
48     To facilitate integration with custom memory allocators, these calls go through macros, which can be redefined.
49     The expected function signature equivalents of the macros are as follows:
50 
51     void * MSF_GIF_MALLOC(void * context, size_t newSize)
52     void * MSF_GIF_REALLOC(void * context, void * oldMemory, size_t oldSize, size_t newSize)
53     void MSF_GIF_FREE(void * context, void * oldMemory, size_t oldSize)
54 
55     If your allocator needs a context pointer, you can set the `customAllocatorContext` field of the MsfGifState struct
56     before calling msf_gif_begin(), and it will be passed to all subsequent allocator macro calls.
57 
58     The maximum number of bytes the library will allocate to encode a single gif is bounded by the following formula:
59     `(2 * 1024 * 1024) + (width * height * 8) + ((1024 + width * height * 1.5) * 3 * frameCount)`
60     The peak heap memory usage in bytes, if using a general-purpose heap allocator, is bounded by the following formula:
61     `(2 * 1024 * 1024) + (width * height * 9.5) + 1024 + (16 * frameCount) + (2 * sizeOfResultingGif)
62 
63 
64 See end of file for license information.
65 */
66 
67 import core.stdc.stdint;
68 import core.stdc.stdlib: malloc, free, realloc;
69 import core.stdc.string: memset, memcpy;
70 import inteli.emmintrin;
71 
72 nothrow @nogc:
73 
74 //version 2.2
75 
76 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
77 /// HEADER                                                                                                           ///
78 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
79 
80 struct MsfGifResult
81 {
82     void * data;
83     size_t dataSize;
84     size_t allocSize; //internal use
85     void * contextPointer; //internal use
86 }
87 
88 struct MsfCookedFrame //internal use
89 {
90     uint32_t* pixels = null;
91     int depth, count, rbits, gbits, bbits;
92 }
93 
94 struct MsfGifBuffer 
95 {
96     MsfGifBuffer * next;
97     size_t size;
98     uint8_t[1] data;
99 }
100 
101 extern(C)
102 {
103     alias MsfGifFileWriteFunc = size_t function(const(void)* buffer, size_t size, size_t count, void* stream);
104 }
105 
106 struct MsfGifState
107 {
108     MsfGifFileWriteFunc fileWriteFunc;
109     void * fileWriteData;
110     MsfCookedFrame previousFrame;
111     MsfCookedFrame currentFrame;
112     int16_t * lzwMem;
113     MsfGifBuffer * listHead;
114     MsfGifBuffer * listTail;
115     int width, height;
116     void * customAllocatorContext;
117     int framesSubmitted; //needed for transparency to work correctly (because we reach into the previous frame)
118 
119     //The gif format only supports 1-bit transparency, meaning a pixel will either be fully transparent or fully opaque.
120     //Pixels with an alpha value less than the alpha threshold will be treated as transparent.
121     //To enable exporting transparent gifs, set it to a value between 1 and 255 (inclusive) before calling msf_gif_frame().
122     //Setting it to 0 causes the alpha channel to be ignored. Its initial value is 0.
123     int msf_gif_alpha_threshold = 10; // GP: tuned grossly
124 }
125 
126 
127 /**
128  * @return                     A block of memory containing the gif file data, or NULL on error.
129  *                             You are responsible for freeing this via `msf_gif_free()`.
130  */
131 MsfGifResult msf_gif_end(MsfGifState * handle);
132 
133 /**
134  * @param result                The MsfGifResult struct, verbatim as it was returned from `msf_gif_end()`.
135  */
136 void msf_gif_free(MsfGifResult result);
137 
138 
139 
140 void* MSF_GIF_MALLOC(void* contextPointer, size_t newSize) 
141 {
142     return malloc(newSize);
143 }
144 
145 void* MSF_GIF_REALLOC(void* contextPointer, void* oldMemory, size_t oldSize, size_t newSize)
146 {
147     return realloc(oldMemory, newSize);
148 }
149 
150 void MSF_GIF_FREE(void* contextPointer, void* oldMemory, size_t oldSize)
151 {
152     free(oldMemory);
153 }
154 
155 // PERf: original file use the intrinsics for bit scan
156 int msf_bit_log(int i) 
157 {
158     __gshared static immutable int[32] MultiplyDeBruijnBitPosition = 
159     [
160         0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
161         8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31,
162     ];
163     i |= i >> 1;
164     i |= i >> 2;
165     i |= i >> 4;
166     i |= i >> 8;
167     i |= i >> 16;
168     return MultiplyDeBruijnBitPosition[cast(uint)(i * 0x07C4ACDDU) >> 27] + 1;
169 }
170 
171 
172 int msf_imin(int a, int b) { return a < b? a : b; }
173 int msf_imax(int a, int b) { return b < a? a : b; }
174 
175 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
176 /// Frame Cooking                                                                                                    ///
177 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
178 
179 enum int msf_gif_bgra_flag = 0;
180 
181 void msf_cook_frame(MsfCookedFrame * frame, const(uint8_t)* raw, uint8_t * used,
182                     int width, int height, int pitch, int depth, int msf_gif_alpha_threshold)
183 {
184     //bit depth for each channel
185     static immutable int[17] rdepthsArray = [ 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5 ];
186     static immutable int[17] gdepthsArray = [ 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 ];
187     static immutable int[17] bdepthsArray = [ 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5 ];
188     //this extra level of indirection looks unnecessary but we need to explicitly decay the arrays to pointers
189     //in order to be able to swap them because of C's annoying not-quite-pointers, not-quite-value-types stack arrays.
190     const(int)* rdepths = rdepthsArray.ptr;
191     const(int)* gdepths = gdepthsArray.ptr;
192     const(int)* bdepths = bdepthsArray.ptr;
193 
194     static immutable int[16] ditherKernel = 
195     [
196          0 << 12,  8 << 12,  2 << 12, 10 << 12,
197         12 << 12,  4 << 12, 14 << 12,  6 << 12,
198          3 << 12, 11 << 12,  1 << 12,  9 << 12,
199         15 << 12,  7 << 12, 13 << 12,  5 << 12,
200     ];
201 
202     uint32_t * cooked = frame.pixels;
203     int count = 0;
204     do 
205     {
206         int rbits = rdepths[depth], gbits = gdepths[depth], bbits = bdepths[depth];
207         int paletteSize = (1 << (rbits + gbits + bbits)) + 1;
208         memset(used, 0, paletteSize * uint8_t.sizeof);
209 
210         //TODO: document what this math does and why it's correct
211         int rdiff = (1 << (8 - rbits)) - 1;
212         int gdiff = (1 << (8 - gbits)) - 1;
213         int bdiff = (1 << (8 - bbits)) - 1;
214         short rmul = cast(short) ((255.0f - rdiff) / 255.0f * 257);
215         short gmul = cast(short) ((255.0f - gdiff) / 255.0f * 257);
216         short bmul = cast(short) ((255.0f - bdiff) / 255.0f * 257);
217 
218         int gmask = ((1 << gbits) - 1) << rbits;
219         int bmask = ((1 << bbits) - 1) << rbits << gbits;
220 
221         for (int y = 0; y < height; ++y) 
222         {
223             int x = 0;
224             
225             __m128i k = _mm_loadu_si128(cast(__m128i *) &ditherKernel[(y & 3) * 4]);
226             __m128i k2 = _mm_or_si128(_mm_srli_epi32(k, rbits), _mm_slli_epi32(_mm_srli_epi32(k, bbits), 16));
227             for (; x < width - 3; x += 4) 
228             {
229                 const(uint8_t)* pixels = &raw[y * pitch + x * 4];
230                 __m128i p = _mm_loadu_si128(cast(__m128i *) pixels);
231 
232                 __m128i rb = _mm_and_si128(p, _mm_set1_epi32(0x00FF00FF));
233                 __m128i rb1 = _mm_mullo_epi16(rb, _mm_set_epi16(bmul, rmul, bmul, rmul, bmul, rmul, bmul, rmul));
234                 __m128i rb2 = _mm_adds_epu16(rb1, k2);
235                 __m128i r3 = _mm_srli_epi32(_mm_and_si128(rb2, _mm_set1_epi32(0x0000FFFF)), 16 - rbits);
236                 __m128i b3 = _mm_and_si128(_mm_srli_epi32(rb2, 32 - rbits - gbits - bbits), _mm_set1_epi32(bmask));
237 
238                 __m128i g = _mm_and_si128(_mm_srli_epi32(p, 8), _mm_set1_epi32(0x000000FF));
239                 __m128i g1 = _mm_mullo_epi16(g, _mm_set1_epi32(gmul));
240                 __m128i g2 = _mm_adds_epu16(g1, _mm_srli_epi32(k, gbits));
241                 __m128i g3 = _mm_and_si128(_mm_srli_epi32(g2, 16 - rbits - gbits), _mm_set1_epi32(gmask));
242 
243                 __m128i out_ = _mm_or_si128(_mm_or_si128(r3, g3), b3);
244 
245                 //mask in transparency based on threshold
246                 //NOTE: we can theoretically do a sub instead of srli by doing an unsigned compare via bias
247                 //      to maybe save a TINY amount of throughput? but lol who cares maybe I'll do it later -m
248                 __m128i invAlphaMask = _mm_cmplt_epi32(_mm_srli_epi32(p, 24), _mm_set1_epi32(msf_gif_alpha_threshold));
249                 out_ = _mm_or_si128(_mm_and_si128(invAlphaMask, _mm_set1_epi32(paletteSize - 1)), _mm_andnot_si128(invAlphaMask, out_));
250 
251                 //TODO: does storing this as a __m128i then reading it back as a uint32_t violate strict aliasing?
252                 uint32_t * c = &cooked[y * width + x];
253                 _mm_storeu_si128(cast(__m128i *) c, out_);
254             }
255 
256             //scalar cleanup loop
257             for (; x < width; ++x) 
258             {
259                 const(uint8_t)* p = &raw[y * pitch + x * 4];
260 
261                 //transparent pixel if alpha is low
262                 if (p[3] < msf_gif_alpha_threshold) {
263                     cooked[y * width + x] = paletteSize - 1;
264                     continue;
265                 }
266 
267                 int dx = x & 3, dy = y & 3;
268                 int k3 = ditherKernel[dy * 4 + dx];
269                 cooked[y * width + x] =
270                     (msf_imin(65535, p[2] * bmul + (k3 >> bbits)) >> (16 - rbits - gbits - bbits) & bmask) |
271                     (msf_imin(65535, p[1] * gmul + (k3 >> gbits)) >> (16 - rbits - gbits        ) & gmask) |
272                      msf_imin(65535, p[0] * rmul + (k3 >> rbits)) >> (16 - rbits                );
273             }
274         }
275 
276         count = 0;
277         for (int i = 0; i < width * height; ++i) 
278         {
279             used[cooked[i]] = 1;
280         }
281 
282         //count used colors, transparent is ignored
283         for (int j = 0; j < paletteSize - 1; ++j) 
284         {
285             count += used[j];
286         }
287     } while (count >= 256 && --depth);
288 
289     MsfCookedFrame ret = { cooked, depth, count, rdepths[depth], gdepths[depth], bdepths[depth] };
290     *frame = ret;
291 }
292 
293 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
294 /// Frame Compression                                                                                                ///
295 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
296 
297 void msf_put_code(uint8_t * * writeHead, uint32_t * blockBits, int len, uint32_t code) 
298 {
299     //insert new code into block buffer
300     int idx = *blockBits / 8;
301     int bit = *blockBits % 8;
302     (*writeHead)[idx + 0] |= code <<       bit ;
303     (*writeHead)[idx + 1] |= code >> ( 8 - bit);
304     (*writeHead)[idx + 2] |= code >> (16 - bit);
305     *blockBits += len;
306 
307     //prep the next block buffer if the current one is full
308     if (*blockBits >= 256 * 8) {
309         *blockBits -= 255 * 8;
310         (*writeHead) += 256;
311         (*writeHead)[2] = (*writeHead)[1];
312         (*writeHead)[1] = (*writeHead)[0];
313         (*writeHead)[0] = 255;
314         memset((*writeHead) + 4, 0, 256);
315     }
316 }
317 
318 struct MsfStridedList
319 {
320     int16_t * data;
321     int len;
322     int stride;
323 }
324 
325 void msf_lzw_reset(MsfStridedList* lzw, int tableSize, int stride) 
326 {
327     memset(lzw.data, 0xFF, 4096 * stride * int16_t.sizeof);
328     lzw.len = tableSize + 2;
329     lzw.stride = stride;
330 }
331 
332 MsfGifBuffer * msf_compress_frame(void * allocContext, int width, int height, int centiSeconds, 
333                                   MsfCookedFrame frame, MsfGifState * handle, uint8_t * used, int16_t * lzwMem)
334 {
335     //NOTE: we reserve enough memory for theoretical the worst case upfront because it's a reasonable amount,
336     //      and prevents us from ever having to check size or realloc during compression
337     int maxGIFSize = 0;
338     {
339         maxGIFSize += 32; // headers
340         maxGIFSize += 256*3;
341         int maxData = width * height * 3 / 2;
342         int maxFramingOverhead = 256;
343         maxGIFSize += maxData;
344         maxGIFSize += maxFramingOverhead; // issue #63, else small GIF might exceed buffer.
345     }
346     int maxBufSize = cast(int)(MsfGifBuffer.data.offsetof) + maxGIFSize;
347     MsfGifBuffer * buffer = cast(MsfGifBuffer *) MSF_GIF_MALLOC(allocContext, maxBufSize);
348     if (!buffer) { return null; }
349     uint8_t * writeHead = buffer.data.ptr;
350     MsfStridedList lzw = { lzwMem };
351 
352     //allocate tlb
353     int totalBits = frame.rbits + frame.gbits + frame.bbits;
354     int tlbSize = (1 << totalBits) + 1;
355 
356     //only 64k, so stack allocating is fine
357     // GP: I don't think that's fine.
358     uint8_t[(1 << 16) + 1] tlb; 
359 
360     //generate palette
361     static struct Color3 
362     { 
363         uint8_t r = 0, g = 0, b = 0; 
364     } 
365     static assert(Color3.sizeof == 3);
366     Color3[256] table;
367 
368     int tableIdx = 1; //we start counting at 1 because 0 is the transparent color
369     //transparent is always last in the table
370     tlb[tlbSize-1] = 0;
371     for (int i = 0; i < tlbSize-1; ++i) 
372     {
373         if (used[i]) 
374         {
375             tlb[i] = cast(ubyte)tableIdx;
376             int rmask = (1 << frame.rbits) - 1;
377             int gmask = (1 << frame.gbits) - 1;
378             //isolate components
379             int r = i & rmask;
380             int g = i >> frame.rbits & gmask;
381             int b = i >> (frame.rbits + frame.gbits);
382             //shift into highest bits
383             r <<= 8 - frame.rbits;
384             g <<= 8 - frame.gbits;
385             b <<= 8 - frame.bbits;
386             table[tableIdx].r = cast(ubyte)(r | r >> frame.rbits | r >> (frame.rbits * 2) | r >> (frame.rbits * 3));
387             table[tableIdx].g = cast(ubyte)(g | g >> frame.gbits | g >> (frame.gbits * 2) | g >> (frame.gbits * 3));
388             table[tableIdx].b = cast(ubyte)(b | b >> frame.bbits | b >> (frame.bbits * 2) | b >> (frame.bbits * 3));
389             if (msf_gif_bgra_flag) {
390                 uint8_t temp = table[tableIdx].r;
391                 table[tableIdx].r = table[tableIdx].b;
392                 table[tableIdx].b = temp;
393             }
394             ++tableIdx;
395         }
396     }
397     int hasTransparentPixels = used[tlbSize-1];
398 
399     //SPEC: "Because of some algorithmic constraints however, black & white images which have one color bit
400     //       must be indicated as having a code size of 2."
401     int tableBits = msf_imax(2, msf_bit_log(tableIdx - 1));
402     int tableSize = 1 << tableBits;
403     //NOTE: we don't just compare `depth` field here because it will be wrong for the first frame and we will segfault
404     MsfCookedFrame previous = handle.previousFrame;
405     int hasSamePal = frame.rbits == previous.rbits && frame.gbits == previous.gbits && frame.bbits == previous.bbits;
406     int framesCompatible = hasSamePal && !hasTransparentPixels;
407 
408     char[19] headerBytes = "\x21\xF9\x04\x05\0\0\0\0\x2C\0\0\0\0\0\0\0\0\x80";
409     //NOTE: we need to check the frame number because if we reach into the buffer prior to the first frame,
410     //      we'll just clobber the file header instead, which is a bug
411     if (hasTransparentPixels && handle.framesSubmitted > 0) {
412         handle.listTail.data.ptr[3] = 0x09; //set the previous frame's disposal to background, so transparency is possible
413     }
414     memcpy(&headerBytes[4], &centiSeconds, 2); // Note: Only works on LittleEndian!
415     memcpy(&headerBytes[13], &width, 2); // Note: Only works on LittleEndian!
416     memcpy(&headerBytes[15], &height, 2); // Note: Only works on LittleEndian!
417     headerBytes[17] |= tableBits - 1;
418     memcpy(writeHead, headerBytes.ptr, 18);
419     writeHead += 18;
420 
421     //local color table
422     memcpy(writeHead, table.ptr, tableSize * Color3.sizeof);
423     writeHead += tableSize * Color3.sizeof;
424     *writeHead++ = cast(ubyte)tableBits;
425 
426     //prep block
427     memset(writeHead, 0, 260);
428     writeHead[0] = 255;
429     uint32_t blockBits = 8; //relative to block.head
430 
431     //SPEC: "Encoders should output a Clear code as the first code of each image data stream."
432     msf_lzw_reset(&lzw, tableSize, tableIdx);
433     msf_put_code(&writeHead, &blockBits, msf_bit_log(lzw.len - 1), tableSize);
434 
435     int lastCode = framesCompatible && frame.pixels[0] == previous.pixels[0]? 0 : tlb[frame.pixels[0]];
436     for (int i = 1; i < width * height; ++i) 
437     {
438         //PERF: branching vs. branchless version of this line is observed to have no discernable impact on speed
439         int color = framesCompatible && frame.pixels[i] == previous.pixels[i]? 0 : tlb[frame.pixels[i]];
440         int code = (&lzw.data[lastCode * lzw.stride])[color];
441         if (code < 0) {
442             //write to code stream
443             int codeBits = msf_bit_log(lzw.len - 1);
444             msf_put_code(&writeHead, &blockBits, codeBits, lastCode);
445 
446             if (lzw.len > 4095) {
447                 //reset buffer code table
448                 msf_put_code(&writeHead, &blockBits, codeBits, tableSize);
449                 msf_lzw_reset(&lzw, tableSize, tableIdx);
450             } else {
451                 (&lzw.data[lastCode * lzw.stride])[color] = cast(short)lzw.len;
452                 ++lzw.len;
453             }
454 
455             lastCode = color;
456         } else {
457             lastCode = code;
458         }
459     }
460 
461     //write code for leftover index buffer contents, then the end code
462     msf_put_code(&writeHead, &blockBits, msf_imin(12, msf_bit_log(lzw.len - 1)), lastCode);
463     msf_put_code(&writeHead, &blockBits, msf_imin(12, msf_bit_log(lzw.len)), tableSize + 1);
464 
465     //flush remaining data
466     if (blockBits > 8) {
467         int bytes = (blockBits + 7) / 8; //round up
468         writeHead[0] = cast(ubyte)(bytes - 1);
469         writeHead += bytes;
470     }
471     *writeHead++ = 0; //terminating block
472 
473     //fill in buffer header and shrink buffer to fit data
474     buffer.next = null;
475     buffer.size = writeHead - buffer.data.ptr;
476     MsfGifBuffer * moved =
477         cast(MsfGifBuffer *) MSF_GIF_REALLOC(allocContext, buffer, maxBufSize, MsfGifBuffer.data.offsetof + buffer.size);
478     if (!moved) 
479     {
480         MSF_GIF_FREE(allocContext, buffer, maxBufSize); 
481         return null; 
482     }
483     return moved;
484 }
485 
486 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
487 /// To-memory API                                                                                                    ///
488 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
489 
490 enum int lzwAllocSize = 4096 * 256 * 2;
491 
492 //NOTE: by C standard library conventions, freeing NULL should be a no-op,
493 //      but just in case the user's custom free doesn't follow that rule, we do null checks on our end as well.
494 static void msf_free_gif_state(MsfGifState * handle) {
495     if (handle.previousFrame.pixels) MSF_GIF_FREE(handle.customAllocatorContext, handle.previousFrame.pixels,
496                                                    handle.width * handle.height * uint32_t.sizeof);
497     if (handle.currentFrame.pixels)  MSF_GIF_FREE(handle.customAllocatorContext, handle.currentFrame.pixels,
498                                                    handle.width * handle.height * uint32_t.sizeof);
499     if (handle.lzwMem) MSF_GIF_FREE(handle.customAllocatorContext, handle.lzwMem, lzwAllocSize);
500     for (MsfGifBuffer * node = handle.listHead; node;) {
501         MsfGifBuffer * next = node.next; //NOTE: we have to copy the `next` pointer BEFORE freeing the node holding it
502         MSF_GIF_FREE(handle.customAllocatorContext, node, MsfGifBuffer.data.offsetof + node.size);
503         node = next;
504     }
505     handle.listHead = null; //this implicitly marks the handle as invalid until the next msf_gif_begin() call
506 }
507 
508 /**
509  * @param width                Image width in pixels.
510  * @param height               Image height in pixels.
511  * @return                     Non-zero on success, 0 on error.
512  */
513 int msf_gif_begin(MsfGifState * handle, int width, int height) 
514 {
515     //NOTE: we cannot stomp the entire struct to zero because we must preserve `customAllocatorContext`.
516     MsfCookedFrame empty;
517     handle.previousFrame = empty;
518     handle.currentFrame = empty;
519     handle.width = width;
520     handle.height = height;
521     handle.framesSubmitted = 0;
522 
523     //allocate memory for LZW buffer
524     //NOTE: Unfortunately we can't just use stack memory for the LZW table because it's 2MB,
525     //      which is more stack space than most operating systems give by default,
526     //      and we can't realistically expect users to be willing to override that just to use our library,
527     //      so we have to allocate this on the heap.
528     handle.lzwMem = cast(int16_t *) MSF_GIF_MALLOC(handle.customAllocatorContext, lzwAllocSize);
529     handle.previousFrame.pixels =
530         cast(uint32_t *) MSF_GIF_MALLOC(handle.customAllocatorContext, handle.width * handle.height * uint32_t.sizeof);
531     handle.currentFrame.pixels =
532         cast(uint32_t *) MSF_GIF_MALLOC(handle.customAllocatorContext, handle.width * handle.height * uint32_t.sizeof);
533 
534     //setup header buffer header (lol)
535     handle.listHead = cast(MsfGifBuffer *) MSF_GIF_MALLOC(handle.customAllocatorContext, MsfGifBuffer.data.offsetof + 32);
536     if (!handle.listHead || !handle.lzwMem || !handle.previousFrame.pixels || !handle.currentFrame.pixels) {
537         msf_free_gif_state(handle);
538         return 0;
539     }
540     handle.listTail = handle.listHead;
541     handle.listHead.next = null;
542     handle.listHead.size = 32;
543 
544     //NOTE: because __attribute__((__packed__)) is annoyingly compiler-specific, we do this unreadable weirdness
545     char[33] headerBytes = "GIF89a\0\0\0\0\x70\0\0\x21\xFF\x0BNETSCAPE2.0\x03\x01\0\0\0";
546     memcpy(&headerBytes[6], &width, 2); // same, only works in little-endian
547     memcpy(&headerBytes[8], &height, 2); // same, only works in little-endian
548     memcpy(handle.listHead.data.ptr, headerBytes.ptr, 32);
549     return 1;
550 }
551 
552 /**
553 * @param pixelData            Pointer to raw framebuffer data. Rows must be contiguous in memory, in RGBA8 format
554 *                             (or BGRA8 if you have set `msf_gif_bgra_flag = true`).
555 *                             Note: This function does NOT free `pixelData`. You must free it yourself afterwards.
556 * @param centiSecondsPerFrame How many hundredths of a second this frame should be displayed for.
557 *                             Note: This being specified in centiseconds is a limitation of the GIF format.
558 * @param maxBitDepth          Limits how many bits per pixel can be used when quantizing the gif.
559 *                             The actual bit depth chosen for a given frame will be less than or equal to
560 *                             the supplied maximum, depending on the variety of colors used in the frame.
561 *                             `maxBitDepth` will be clamped between 1 and 16. The recommended default is 16.
562 *                             Lowering this value can result in faster exports and smaller gifs,
563 *                             but the quality may suffer.
564 *                             Please experiment with this value to find what works best for your application.
565 * @param pitchInBytes         The number of bytes from the beginning of one row of pixels to the beginning of the next.
566 *                             If you want to flip the image, just pass in a negative pitch.
567 * @return                     Non-zero on success, 0 on error.
568 */
569 int msf_gif_frame(MsfGifState * handle, const(uint8_t)* pixelData, int centiSecondsPerFame, int maxBitDepth, int pitchInBytes)
570 {
571     if (!handle.listHead) { return 0; }
572 
573     maxBitDepth = msf_imax(1, msf_imin(16, maxBitDepth));
574 
575     //only 64k, so stack allocating is fine
576     //GP: again argh
577     uint8_t[(1 << 16) + 1] used; 
578     msf_cook_frame(&handle.currentFrame, pixelData, used.ptr, handle.width, handle.height, pitchInBytes,
579         msf_imin(maxBitDepth, handle.previousFrame.depth + 160 / msf_imax(1, handle.previousFrame.count)),
580                    handle.msf_gif_alpha_threshold);
581 
582     MsfGifBuffer * buffer = msf_compress_frame(handle.customAllocatorContext, handle.width, handle.height,
583         centiSecondsPerFame, handle.currentFrame, handle, used.ptr, handle.lzwMem);
584     if (!buffer) { msf_free_gif_state(handle); return 0; }
585     handle.listTail.next = buffer;
586     handle.listTail = buffer;
587 
588     //swap current and previous frames
589     MsfCookedFrame tmp = handle.previousFrame;
590     handle.previousFrame = handle.currentFrame;
591     handle.currentFrame = tmp;
592 
593     handle.framesSubmitted += 1;
594     return 1;
595 }
596 
597 MsfGifResult msf_gif_end(MsfGifState * handle) 
598 {
599     if (!handle.listHead) 
600     { 
601         MsfGifResult empty; 
602         return empty; 
603     }
604 
605     //first pass: determine total size
606     size_t total = 1; //1 byte for trailing marker
607     for (MsfGifBuffer * node = handle.listHead; node; node = node.next) { total += node.size; }
608 
609     //second pass: write data
610     uint8_t * buffer = cast(uint8_t *) MSF_GIF_MALLOC(handle.customAllocatorContext, total);
611     if (buffer) 
612     {
613         uint8_t * writeHead = buffer;
614         for (MsfGifBuffer * node = handle.listHead; node; node = node.next) 
615         {
616             memcpy(writeHead, node.data.ptr, node.size);
617             writeHead += node.size;
618         }
619         *writeHead++ = 0x3B;
620     }
621 
622     //third pass: free buffers
623     msf_free_gif_state(handle);
624 
625     MsfGifResult ret = { buffer, total, total, handle.customAllocatorContext };
626     return ret;
627 }
628 
629 void msf_gif_free(MsfGifResult result) 
630 {
631     if (result.data)
632     {
633         MSF_GIF_FREE(result.contextPointer, result.data, result.allocSize);
634     }
635 }
636 
637 /*
638 ------------------------------------------------------------------------------
639 This software is available under 2 licenses -- choose whichever you prefer.
640 ------------------------------------------------------------------------------
641 ALTERNATIVE A - MIT License
642 Copyright (c) 2021 Miles Fogle
643 Permission is hereby granted, free of charge, to any person obtaining a copy of
644 this software and associated documentation files (the "Software"), to deal in
645 the Software without restriction, including without limitation the rights to
646 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
647 of the Software, and to permit persons to whom the Software is furnished to do
648 so, subject to the following conditions:
649 The above copyright notice and this permission notice shall be included in all
650 copies or substantial portions of the Software.
651 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
652 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
653 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
654 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
655 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
656 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
657 SOFTWARE.
658 ------------------------------------------------------------------------------
659 ALTERNATIVE B - Public Domain (www.unlicense.org)
660 This is free and unencumbered software released into the public domain.
661 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
662 software, either in source code form or as a compiled binary, for any purpose,
663 commercial or non-commercial, and by any means.
664 In jurisdictions that recognize copyright laws, the author or authors of this
665 software dedicate any and all copyright interest in the software to the public
666 domain. We make this dedication for the benefit of the public at large and to
667 the detriment of our heirs and successors. We intend this dedication to be an
668 overt act of relinquishment in perpetuity of all present and future rights to
669 this software under copyright law.
670 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
671 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
672 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
673 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
674 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
675 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
676 ------------------------------------------------------------------------------
677 */