The OpenD Programming Language

1 /**
2 OpenType/FreeType parsing.
3 
4 Copyright: Guillaume Piolat 2018.
5 License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6 */
7 module printed.font.opentype;
8 
9 import std.stdio;
10 import std.conv;
11 import std.string;
12 import std.uni;
13 import std.algorithm.searching;
14 
15 import binrange;
16 
17 
18 /// A POD-type to represent a range of Unicode characters.
19 struct CharRange
20 {
21     dchar start;
22     dchar stop;
23 }
24 
25 /// Font weight
26 enum OpenTypeFontWeight : int
27 {
28     thinest = 0, // Note: thinest doesn't exist in PostScript
29     thin = 100,
30     extraLight = 200,
31     light = 300,
32     normal = 400,
33     medium = 500,
34     semiBold = 600,
35     bold = 700,
36     extraBold = 800,
37     black = 900
38 }
39 
40 /// Font style
41 enum OpenTypeFontStyle
42 {
43     normal,
44     italic,
45     oblique
46 }
47 
48 /// Should match printed.canvas `TextBaseline`
49 enum FontBaseline
50 {
51     top,
52     hanging,
53     middle,
54     alphabetic,
55     bottom
56 }
57 
58 struct OpenTypeTextMetrics
59 {
60     /// The text's advance width, in glyph units.
61     float horzAdvance;
62 
63     /// The text leftmost extent, in glyph units.
64     float xmin;
65 
66     /// The text rightmost extent, in glyph units.
67     float xmax;
68 }
69 
70 /// OpenType 1.8 file parser, for the purpose of finding all fonts in a file, their family name, their weight, etc.
71 /// This OpenType file might either be:
72 /// - a single font
73 /// - a collection file starting with a TTC Header
74 /// You may find the specification here: www.microsoft.com/typography/otspec/
75 class OpenTypeFile
76 {
77     this(const(ubyte[]) wholeFileContents)
78     {
79         _wholeFileData = wholeFileContents;
80         const(ubyte)[] file = wholeFileContents[];
81 
82         // Read first tag
83         uint firstTag = popBE!uint(file);
84 
85         if (firstTag == 0x00010000 || firstTag == 0x4F54544F /* 'OTTO' */)
86         {
87             _isCollection = false;
88             _numberOfFonts = 1;
89         }
90         else if (firstTag == 0x74746366 /* 'ttcf' */)
91         {
92             _isCollection = true;
93             uint version_ = popBE!uint(file); // ignored for now
94             _numberOfFonts = popBE!int(file);
95 
96             offsetToOffsetTable.length = _numberOfFonts;
97             foreach(i; 0.._numberOfFonts)
98                 offsetToOffsetTable[i] = popBE!uint(file);
99         }
100         else
101             throw new Exception("Couldn't recognize the font file type");
102     }
103 
104     /// Number of fonts in this OpenType file
105     int numberOfFonts()
106     {
107         return _numberOfFonts;
108     }
109 
110 private:
111     const(ubyte)[] _wholeFileData;
112     int[] offsetToOffsetTable;
113 
114     int _numberOfFonts;
115     bool _isCollection; // It is a TTC or single font?
116 
117     uint offsetForFont(int index)
118     {
119         assert(index < numberOfFonts());
120 
121         if (_isCollection)
122             return offsetToOffsetTable[index];
123         else
124             return 0;
125     }
126 }
127 
128 
129 /// Parses a font from a font file (which could contain data for several of them).
130 class OpenTypeFont
131 {
132 public:
133 
134     this(OpenTypeFile file, int index)
135     {
136         _file = file;
137         _fontIndex = index;
138         _wholeFileData = file._wholeFileData;
139 
140         // Figure out font weight, style, and type immediately, as it is useful for font matching
141 
142         _isMonospaced = false;
143 
144         const(ubyte)[] os2Table = findTable(0x4F532F32 /* 'OS/2' */);
145         if (os2Table !is null)
146         {
147             // Parse weight and style from the 'OS/2' table
148             // Note: Apple documentation says that "Many fonts have inaccurate information in their 'OS/2' table."
149             // Some fonts don't have this table: https://github.com/princjef/font-finder/issues/5
150 
151             skipBytes(os2Table, 4); // table version and xAvgCharWidth
152             int usWeightClass = popBE!ushort(os2Table);
153             _weight = cast(OpenTypeFontWeight)( 100 * ( (usWeightClass + 50) / 100 ) ); // round to multiple of 100
154             skipBytes(os2Table, 2 /*usWidthClass*/  + 2 /*fsType*/ + 10 * 2 /*yXXXXXXX*/ + 2 /*sFamilyClass*/);
155 
156             ubyte[10] panose;
157             foreach(b; 0..10)
158                 panose[b] = popBE!ubyte(os2Table);
159 
160             _isMonospaced = (panose[0] == 2) && (panose[3] == 9);
161 
162             skipBytes(os2Table, 4*4 /*ulUnicodeRangeN*/ + 4/*achVendID*/);
163             uint fsSelection = popBE!ushort(os2Table);
164             _style = OpenTypeFontStyle.normal;
165             if (fsSelection & 1)
166                 _style = OpenTypeFontStyle.italic;
167             if (fsSelection & 512)
168                 _style = OpenTypeFontStyle.oblique;
169         }
170         else
171         {
172             // No OS/2 table? parse 'head' instead (some Mac fonts). For monospace, parse 'post' table.
173             const(ubyte)[] postTable = findTable(0x706F7374 /* 'post' */);
174             if (postTable)
175             {
176                 skipBytes(postTable, 4 /*version*/ + 4 /*italicAngle*/ + 2 /*underlinePosition*/ + 2 /*underlineThickness*/);
177                 _isMonospaced = popBE!uint(postTable) /*isFixedPitch*/ != 0;
178             }
179 
180             const(ubyte)[] headTable = findTable(0x68656164 /* 'head' */);
181             if (headTable !is null)
182             {
183                 skipBytes(headTable, 4 /*version*/ + 4 /*fontRevision*/ + 4 /*checkSumAdjustment*/ + 4 /*magicNumber*/ 
184                                      + 2 /*flags*/ + 2 /*_unitsPerEm*/ + 16 /*created+modified*/+4*2/*bounding box*/ );
185                 ushort macStyle = popBE!ushort(headTable);
186 
187                 _weight = OpenTypeFontWeight.normal;
188                 if (macStyle & 1) _weight = OpenTypeFontWeight.bold;
189                 _style = OpenTypeFontStyle.normal;
190                 if (macStyle & 2) _style = OpenTypeFontStyle.italic;
191             }
192             else
193             {
194                 // Last chance heuristics.
195                 // Font weight heuristic based on family names
196                 string subFamily = subFamilyName().toLower;
197                 if (subFamily.canFind("thin"))
198                     _weight = OpenTypeFontWeight.thin;
199                 else if (subFamily.canFind("ultra light"))
200                     _weight = OpenTypeFontWeight.thinest;
201                 else if (subFamily.canFind("ultraLight"))
202                     _weight = OpenTypeFontWeight.thinest;
203                 else if (subFamily.canFind("hairline"))
204                     _weight = OpenTypeFontWeight.thinest;
205                 else if (subFamily.canFind("extralight"))
206                     _weight = OpenTypeFontWeight.extraLight;
207                 else if (subFamily.canFind("light"))
208                     _weight = OpenTypeFontWeight.light;
209                 else if (subFamily.canFind("demi bold"))
210                     _weight = OpenTypeFontWeight.semiBold;
211                 else if (subFamily.canFind("semibold"))
212                     _weight = OpenTypeFontWeight.semiBold;
213                 else if (subFamily.canFind("extrabold"))
214                     _weight = OpenTypeFontWeight.extraBold;
215                 else if (subFamily.canFind("bold"))
216                     _weight = OpenTypeFontWeight.bold;
217                 else if (subFamily.canFind("heavy"))
218                     _weight = OpenTypeFontWeight.bold;
219                 else if (subFamily.canFind("medium"))
220                     _weight = OpenTypeFontWeight.medium;
221                 else if (subFamily.canFind("black"))
222                     _weight = OpenTypeFontWeight.black;
223                 else if (subFamily.canFind("negreta"))
224                     _weight = OpenTypeFontWeight.black;
225                 else if (subFamily.canFind("regular"))
226                     _weight = OpenTypeFontWeight.normal;
227                 else if (subFamily == "italic")
228                     _weight = OpenTypeFontWeight.normal;
229                 else
230                     _weight = OpenTypeFontWeight.normal;
231 
232                 // Font style heuristic based on family names
233                 if (subFamily.canFind("italic"))
234                     _style = OpenTypeFontStyle.italic;
235                 else if (subFamily.canFind("oblique"))
236                     _style = OpenTypeFontStyle.oblique;
237                 else
238                     _style = OpenTypeFontStyle.normal;
239             }
240         }        
241     }
242 
243     /// Returns: a typographics family name suitable for grouping fonts per family in menus
244     string familyName()
245     {
246         string family = getName(NameID.preferredFamily);
247         if (family is null)
248             return getName(NameID.fontFamily);
249         else
250             return family;
251     }
252 
253     /// Returns: a typographics sub-family name suitable for grouping fonts per family in menus
254     string subFamilyName()
255     {
256         string family = getName(NameID.preferredSubFamily);
257         if (family is null)
258             return getName(NameID.fontSubFamily);
259         else
260             return family;
261     }
262 
263     /// Returns: the PostScript name of that font, if available.
264     string postScriptName()
265     {
266         return getName(NameID.postscriptName);
267     }
268 
269     /// Returns: the "full" font name, if available.
270     string fullFontName()
271     {
272         return getName(NameID.fullFontName);
273     }
274 
275     /// Returns: `true` is the font is monospaced.
276     bool isMonospaced()
277     {
278         return _isMonospaced;
279     }
280 
281     /// Returns: Font weight.
282     OpenTypeFontWeight weight()
283     {
284         return _weight;
285     }
286 
287     /// Returns: Font style.
288     OpenTypeFontStyle style()
289     {
290         return _style;
291     }
292 
293     /// Returns: The whole OpenType file where this font is located.
294     const(ubyte)[] fileData()
295     {
296         return _wholeFileData;
297     }
298 
299     int[4] boundingBox()
300     {
301         computeFontMetrics();
302         return _boundingBox;
303     }
304 
305     /// Returns: Baseline offset above the normal "alphabetical" baseline.
306     ///          In glyph units.
307     float getBaselineOffset(FontBaseline baseline)
308     {
309         computeFontMetrics();
310         final switch(baseline) with (FontBaseline)
311         {
312             case top:
313                 // ascent - descent should give the em square, but if it doesn't rescale to have top of em square
314                 float actualUnits = _ascender - _descender;
315                 return _ascender * _unitsPerEm / actualUnits;
316 
317             case hanging:
318                 return ascent(); // TODO: correct?
319 
320             case middle:
321                 // middle of em square
322                 float actualUnits = _ascender - _descender;
323                 return 0.5f * (_ascender + _descender) * _unitsPerEm / actualUnits;
324 
325             case alphabetic: 
326                 return 0; // the default "baseline"
327 
328             case bottom:
329                 // ascent - descent should give the em square, but if it doesn't rescale to have bottom of em square
330                 float actualUnits = _ascender - _descender;
331                 return _descender * _unitsPerEm / actualUnits;
332         }
333     }
334 
335     /// Returns: Maximum height above the baseline reached by glyphs in this font.
336     ///          In glyph units.
337     int ascent()
338     {
339         computeFontMetrics();
340         return _ascender;
341     }
342 
343     /// Returns: Maximum depth below the baseline reached by glyphs in this font.
344     ///          Should be negative.
345     ///          In glyph units.
346     int descent()
347     {
348         computeFontMetrics();
349         return _descender;
350     }
351 
352     /// Returns: The spacing between baselines of consecutive lines of text.
353     ///          In glyph units.
354     ///          Also called "leading".
355     int lineGap()
356     {
357         computeFontMetrics();
358         return _ascender - _descender + _lineGap;
359     }
360 
361     /// Returns: 'A' height.
362     /// TODO: eventually extract from OS/2 table
363     int capHeight()
364     {
365         computeFontMetrics();
366         return _ascender; // looks like ascent, but perhaps not
367     }
368 
369     /// Returns: Italic angle in counter-clockwise degrees from the vertical. 
370     /// Zero for upright text, negative for text that leans to the right (forward).
371     float postScriptItalicAngle()
372     {
373         computeFontMetrics();
374         return _italicAngle / 65536.0f;
375     }
376 
377     /// Does this font has a glyph for this codepoint?
378     bool hasGlyphFor(dchar ch)
379     {
380         computeFontMetrics();
381         ushort* index = ch in _charToGlyphMapping;
382         return index !is null;
383     }
384 
385     ushort glyphIndexFor(dchar ch)
386     {
387         computeFontMetrics();
388         ushort* index = ch in _charToGlyphMapping;
389         if (index)
390             return *index;
391         else
392             return 0; // special value for non available characters
393     }
394 
395     /// Return the glyph used when a char is requested that the font doesn't provide.
396     GlyphDesc glyphForChar(dchar ch)
397     {
398         ushort index = glyphIndexFor(ch);
399         if (index != 0)
400             return _glyphs[index];
401 
402         // Unicode has two symbols for unknown characters:
403         // U+25A1 and U+FFFD
404         // Browsers seem to use U+FFFD.
405         
406         index = glyphIndexFor('\uFFFD');
407         if (index != 0)
408             return _glyphs[index];
409 
410         // try 0x7f character (Webdings...)
411         index = glyphIndexFor('\u007f');
412         if (index != 0)
413             return _glyphs[index];
414 
415         // try ? character
416         index = glyphIndexFor('\u003F');
417         if (index != 0)
418             return _glyphs[index];
419 
420         // try space character
421         index = glyphIndexFor(' ');
422         if (index != 0)
423             return _glyphs[index];
424 
425         // Return first glyph.
426         if (_glyphs.length > 0)
427             return _glyphs[0];
428 
429         // give up, this font has no suitable replacement character
430         assert(false);
431     }
432 
433     /// Returns: left side bearing for this character.
434     int leftSideBearing(dchar ch)
435     {
436         computeFontMetrics();
437         return glyphForChar(ch).leftSideBearing;
438     }
439 
440     /// Returns: horizontal advance for this character.
441     int horizontalAdvance(dchar ch)
442     {
443         computeFontMetrics();
444         return glyphForChar(ch).horzAdvance;
445     }
446 
447     /// Returns: number of glyphs in the font.
448     int numGlyphs()
449     {
450         computeFontMetrics();
451         return cast(int)(_glyphs.length);
452     }
453 
454     /// Returns: horizontal advance for a glyph.
455     /// In glyph units.
456     int horizontalAdvanceForGlyph(int glyphIndex)
457     {
458         computeFontMetrics();
459         return _glyphs[glyphIndex].horzAdvance;
460     }
461 
462     /// maximum Unicode char available in this font
463     dchar maxAvailableChar()
464     {
465         computeFontMetrics();
466         return _maxCodepoint;
467     }
468 
469     const(CharRange)[] charRanges()
470     {
471         return _charRanges;
472     }
473 
474     // The number of internal units for 1em
475     float UPM()
476     {
477         return _unitsPerEm;
478     }
479 
480     // A scale factpr to convert from glyph units to em
481     float invUPM()
482     {
483         return 1.0f / _unitsPerEm;
484     }
485 
486     /// Returns text metrics for this piece of text (single line assumed), in glyph units.
487     OpenTypeTextMetrics measureText(const(char)[] text)
488     {
489         OpenTypeTextMetrics result;
490         float adv = 0;
491         float advButLastChar = 0;
492         foreach(dchar ch; text) // Note: auto-decoding there
493         {
494             advButLastChar = adv;
495             adv += horizontalAdvance(ch);
496         }
497 
498         // TODO: only works if the font is left to right
499         result.horzAdvance = adv;
500         result.xmin = _boundingBox[0]; // TODO: compute from each glyph bounding box
501         result.xmax = advButLastChar + _boundingBox[2]; // TODO: compute from each glyph bounding box
502         return result;
503     }
504 
505 private:
506     // need whole file since some data may be shared across fonts
507     // And also table offsets are relative to the whole file.
508     const(ubyte)[] _wholeFileData;
509 
510     OpenTypeFile _file;
511     int _fontIndex;
512 
513     // Computed in constructor
514     OpenTypeFontWeight _weight;
515     OpenTypeFontStyle _style;
516     bool _isMonospaced;
517 
518     // <parsed-by-computeFontMetrics>
519 
520     bool metricsParsed = false;
521 
522     // xmin ymin xmax ymax
523     int[4] _boundingBox;
524 
525     int _unitsPerEm;
526 
527     short _ascender, _descender, _lineGap;
528     int _italicAngle; // fixed 16.16 format
529 
530     static struct GlyphDesc
531     {
532         ushort horzAdvance;
533         short leftSideBearing;
534     }
535     GlyphDesc[] _glyphs;
536 
537     /// Unicode char to glyph mapping, parsed from 'cmap' table
538     /// Note: it's not sure at all if parsing the 'cmap' table each time is more costly.
539     /// Also this could be an array sorted by dchar.
540     ushort[dchar] _charToGlyphMapping;
541 
542     CharRange[] _charRanges;
543 
544     dchar _maxCodepoint;
545 
546     // </parsed-by-computeFontMetrics>
547 
548     /// Returns: A bounding box for each glyph, in glyph space.
549     void computeFontMetrics()
550     {
551         if (metricsParsed)
552             return;
553         metricsParsed = true;
554 
555         const(ubyte)[] headTable = getTable(0x68656164 /* 'head' */);
556 
557         skipBytes(headTable, 4); // Table version number
558         skipBytes(headTable, 4); // fontRevision
559         skipBytes(headTable, 4); // checkSumAdjustment
560         uint magicNumber = popBE!uint(headTable);
561         if (magicNumber != 0x5F0F3CF5)
562             throw new Exception("Invalid magicNumber in 'head' table.");
563         skipBytes(headTable, 2); // flags
564         _unitsPerEm = popBE!ushort(headTable);
565         skipBytes(headTable, 8); // created
566         skipBytes(headTable, 8); // modified
567         _boundingBox[0] = popBE!short(headTable);
568         _boundingBox[1] = popBE!short(headTable);
569         _boundingBox[2] = popBE!short(headTable);
570         _boundingBox[3] = popBE!short(headTable);
571         skipBytes(headTable, 2); // macStyle
572         skipBytes(headTable, 2); // lowestRecPPEM
573         skipBytes(headTable, 2); // fontDirectionHint
574         skipBytes(headTable, 2); // indexToLocFormat
575         skipBytes(headTable, 2); // glyphDataFormat
576 
577         const(ubyte)[] hheaTable = getTable(0x68686561 /* 'hhea' */);
578         skipBytes(hheaTable, 4); // Table version number
579         _ascender = popBE!short(hheaTable);
580         _descender = popBE!short(hheaTable);
581         _lineGap = popBE!short(hheaTable);
582         skipBytes(hheaTable, 2); // advanceWidthMax
583         skipBytes(hheaTable, 2); // minLeftSideBearing
584         skipBytes(hheaTable, 2); // minRightSideBearing
585         skipBytes(hheaTable, 2); // xMaxExtent
586         skipBytes(hheaTable, 2); // caretSlopeRise
587         skipBytes(hheaTable, 2); // caretSlopeRun
588         skipBytes(hheaTable, 2); // caretOffset
589         skipBytes(hheaTable, 8); // reserved
590         short metricDataFormat = popBE!short(hheaTable);
591         if (metricDataFormat != 0)
592             throw new Exception("Unsupported metricDataFormat in 'hhea' table");
593 
594         int numberOfHMetrics = popBE!ushort(hheaTable);
595 
596         const(ubyte)[] maxpTable = getTable(0x6D617870 /* 'maxp' */);
597         skipBytes(maxpTable, 4); // version
598         int numGlyphs = popBE!ushort(maxpTable);
599 
600         _glyphs.length = numGlyphs;
601 
602         const(ubyte)[] hmtxTable = getTable(0x686D7478 /* 'hmtx' */);
603 
604         ushort lastAdvance = 0;
605         foreach(g; 0..numberOfHMetrics)
606         {
607             lastAdvance = popBE!ushort(hmtxTable);
608             _glyphs[g].horzAdvance = lastAdvance;
609             _glyphs[g].leftSideBearing = popBE!short(hmtxTable);
610         }
611         foreach(g; numberOfHMetrics.._glyphs.length)
612         {
613             _glyphs[g].horzAdvance = lastAdvance;
614             _glyphs[g].leftSideBearing = popBE!short(hmtxTable);
615         }
616 
617         // Parse italicAngle
618         const(ubyte)[] postTable = getTable(0x706F7374 /* 'post' */);
619         skipBytes(postTable, 4); // version
620         _italicAngle = popBE!int(postTable);
621 
622         parseCMAP();
623     }
624 
625     /// Parses all codepoints-to-glyph mappings, fills the hashmap `_charToGlyphMapping`
626     void parseCMAP()
627     {
628         const(ubyte)[] cmapTableFull = getTable(0x636d6170 /* 'cmap' */);
629         const(ubyte)[] cmapTable = cmapTableFull;
630 
631         skipBytes(cmapTable, 2); // version
632         int numTables = popBE!ushort(cmapTable);
633 
634         // Looking for a BMP Unicode 'cmap' only
635         for(int table = 0; table < numTables; ++table)
636         {
637             ushort platformID = popBE!ushort(cmapTable);
638             ushort encodingID = popBE!ushort(cmapTable);
639             uint offset = popBE!uint(cmapTable);
640 
641             // in stb_truetype, only case supported, seems to be common
642             // Unfortunately documentation is scarce about these table formats.
643             if (platformID == 3 && (encodingID == 0 /* Unicode 1.0 */
644                                  || encodingID == 1 /* Unicode UCS-2 */
645                                  || encodingID == 4 /* Unicode UCS-4 */))
646             {
647                 const(ubyte)[] subTable = cmapTableFull[offset..$];
648                 ushort format = popBE!ushort(subTable);
649 
650                 // TODO: support other format because this only works within the BMP
651                 if (format == 4)
652                 {
653                     ushort len = popBE!ushort(subTable);
654                     skipBytes(subTable, 2); // language, not useful here
655                     int segCountX2 = popBE!ushort(subTable);
656                     if ((segCountX2 % 2) != 0)
657                         throw new Exception("segCountX2 is not an even number");
658                     int segCount = segCountX2/2;
659                     int searchRange = popBE!ushort(subTable);
660                     int entrySelector = popBE!ushort(subTable);
661                     int rangeShift = popBE!ushort(subTable);
662 
663                     int[] endCount = new int[segCount];
664                     int[] startCount = new int[segCount];
665                     short[] idDelta = new short[segCount];
666 
667                     int[] idRangeOffset = new int[segCount];
668 
669                     foreach(seg; 0..segCount)
670                         endCount[seg] = popBE!ushort(subTable);
671                     skipBytes(subTable, 2); // reserved, should be zero
672 
673                     foreach(seg; 0..segCount)
674                         startCount[seg] = popBE!ushort(subTable);
675 
676                     foreach(seg; 0..segCount)
677                         idDelta[seg] = popBE!short(subTable);
678 
679                     const(ubyte)[] idRangeOffsetArray = subTable;
680 
681                     foreach(seg; 0..segCount)
682                         idRangeOffset[seg] = popBE!ushort(subTable);
683 
684                     foreach(seg; 0..segCount)
685                     {
686                         _charRanges ~= CharRange(startCount[seg], endCount[seg]);
687 
688                         foreach(dchar ch; startCount[seg]..endCount[seg])
689                         {
690                             ushort glyphIndex;
691 
692                             if (idRangeOffset[seg] == 0)
693                             {
694                                 glyphIndex = cast(ushort)( cast(ushort)ch + idDelta[seg] );
695                             }
696                             else
697                             {
698                                 if ((idRangeOffset[seg] % 2) != 0)
699                                     throw new Exception("idRangeOffset[i] is not an even number");
700 
701                                 // Yes, this is what the spec says to do
702                                 ushort* p = cast(ushort*)(idRangeOffsetArray.ptr);
703                                 p = p + seg;
704                                 p = p + (ch - startCount[seg]);
705                                 p = p + (idRangeOffset[seg]/2);    
706                                 ubyte[] pslice = cast(ubyte[])(p[0..1]);
707                                 glyphIndex = popBE!ushort(pslice);
708 
709                                 if (glyphIndex == 0) // missing glyph
710                                     continue;
711                                 glyphIndex += idDelta[seg];
712                             }
713 
714                             if (glyphIndex >= _glyphs.length)
715                             {
716                                 throw new Exception("Non existing glyph index");
717                             }
718                             _charToGlyphMapping[ch] = glyphIndex;
719 
720                             if (ch > _maxCodepoint) 
721                                 _maxCodepoint = ch;
722                         }
723                     }
724                 }
725                 else
726                     throw new Exception("Unsupported 'cmap' format");
727                 break;
728             }
729         }
730     }
731 
732     /// Returns: an index in the file, where that table start for this particular font.
733     const(ubyte)[] findTable(uint fourCC)
734     {
735         int offsetToOffsetTable = _file.offsetForFont(_fontIndex);
736         const(ubyte)[] offsetTable = _wholeFileData[offsetToOffsetTable..$];
737 
738         uint firstTag = popBE!uint(offsetTable);
739 
740         if (firstTag != 0x00010000 && firstTag != 0x4F54544F /* 'OTTO' */)
741             throw new Exception("Unrecognized tag in Offset Table");
742 
743         int numTables = popBE!ushort(offsetTable);
744         skipBytes(offsetTable, 6);
745 
746         const(uint)[] tableRecordEntries = cast(uint[])(offsetTable[0..16*numTables]);
747 
748         // Binary search following
749         // https://en.wikipedia.org/wiki/Binary_search_algorithm#Algorithm
750         int L = 0;
751         int R = numTables - 1;
752         while (L <= R)
753         {
754             int m = (L+R)/2;
755             uint tag = forceBigEndian(tableRecordEntries[m * 4]);
756             if (tag < fourCC)
757                 L = m + 1;
758             else if (tag > fourCC)
759                 R = m - 1;
760             else
761             {
762                 // found
763                 assert (tag == fourCC);
764                 uint checkSum = forceBigEndian(tableRecordEntries[m*4+1]);
765                 uint offset = forceBigEndian(tableRecordEntries[m*4+2]);
766                 uint len = forceBigEndian(tableRecordEntries[m*4+3]);
767                 return _wholeFileData[offset..offset+len];
768             }
769         }
770         return null; // not found
771     }
772 
773     /// Ditto, but throw if not found.
774     const(ubyte)[] getTable(uint fourCC)
775     {
776         const(ubyte)[] result = findTable(fourCC);
777         if (result is null)
778             throw new Exception(format("Table not found: %s", fourCC));
779         return result;
780     }
781 
782     /// Returns: that "name" information, in UTF-8
783     string getName(NameID requestedNameID)
784     {
785         const(ubyte)[] nameTable = getTable(0x6e616d65 /* 'name' */);
786         const(ubyte)[] nameTableParsed = nameTable;
787 
788         ushort format = popBE!ushort(nameTableParsed);
789         if (format > 1)
790             throw new Exception("Unrecognized format in 'name' table");
791 
792         ushort numNameRecords = popBE!ushort(nameTableParsed);
793         ushort stringOffset = popBE!ushort(nameTableParsed);
794 
795         const(ubyte)[] stringDataStorage = nameTable[stringOffset..$];
796 
797         foreach(i; 0..numNameRecords)
798         {
799             PlatformID platformID = cast(PlatformID) popBE!ushort(nameTableParsed);
800             ushort encodingID = popBE!ushort(nameTableParsed);
801             ushort languageID = popBE!ushort(nameTableParsed);
802             ushort nameID = popBE!ushort(nameTableParsed);
803             ushort length = popBE!ushort(nameTableParsed);
804             ushort offset = popBE!ushort(nameTableParsed); // String offset from start of storage area (in bytes)
805             if (nameID == requestedNameID)
806             {
807                 // found
808                 const(ubyte)[] stringSlice = stringDataStorage[offset..offset+length];
809                 string name;
810 
811                 if (platformID == PlatformID.macintosh && encodingID == 0)
812                 {
813                     // MacRoman encoding
814                     name = decodeMacRoman(stringSlice);
815                 }
816                 else
817                 {
818                     // Most of the time it's UTF16-BE
819                     name = decodeUTF16BE(stringSlice);
820                 }
821                 return name;
822             }
823         }
824 
825         return null; // not found
826     }
827 
828     enum PlatformID : ushort
829     {
830         unicode,
831         macintosh,
832         iso,
833         windows,
834         custom,
835     }
836 
837     enum NameID : ushort
838     {
839         copyrightNotice      = 0,
840         fontFamily           = 1,
841         fontSubFamily        = 2,
842         uniqueFontIdentifier = 3,
843         fullFontName         = 4,
844         versionString        = 5,
845         postscriptName       = 6,
846         trademark            = 7,
847         manufacturer         = 8,
848         designer             = 9,
849         description          = 10,
850         preferredFamily      = 16,
851         preferredSubFamily   = 17,
852     }
853 }
854 
855 
856 private:
857 
858 uint forceBigEndian(ref const(uint) x) pure nothrow @nogc
859 {
860     version(BigEndian)
861         return x;
862     else
863     {
864         import core.bitop: bswap;
865         return bswap(x);
866     }
867 }
868 
869 string decodeMacRoman(const(ubyte)[] input) pure
870 {
871     static immutable dchar[128] CONVERT_TABLE  =
872     [
873         'Ä', 'Å', 'Ç', 'É', 'Ñ', 'Ö', 'Ü', 'á', 'à', 'â', 'ä'   , 'ã', 'å', 'ç', 'é', 'è',
874         'ê', 'ë', 'í', 'ì', 'î', 'ï', 'ñ', 'ó', 'ò', 'ô', 'ö'   , 'õ', 'ú', 'ù', 'û', 'ü',
875         '†', '°', '¢', '£', '§', '•', '¶', 'ß', '®', '©', '™'   , '´', '¨', '≠', 'Æ', 'Ø',
876         '∞', '±', '≤', '≥', '¥', 'µ', '∂', '∑', '∏', 'π', '∫'   , 'ª', 'º', 'Ω', 'æ', 'ø',
877         '¿', '¡', '¬', '√', 'ƒ', '≈', '∆', '«', '»', '…', '\xA0', 'À', 'Ã', 'Õ', 'Œ', 'œ',
878         '–', '—', '“', '”', '‘', '’', '÷', '◊', 'ÿ', 'Ÿ', '⁄'   , '€', '‹', '›', 'fi', 'fl',
879         '‡', '·', '‚', '„', '‰', 'Â', 'Ê', 'Á', 'Ë', 'È', 'Í'   , 'Î', 'Ï', 'Ì', 'Ó', 'Ô',
880         '?', 'Ò', 'Ú', 'Û', 'Ù', 'ı', 'ˆ', '˜', '¯', '˘', '˙'   , '˚', '¸', '˝', '˛', 'ˇ',
881     ];
882 
883     string textTranslated = "";
884     foreach(i; 0..input.length)
885     {
886         char c = input[i];
887         dchar ch;
888         if (c < 128)
889             ch = c;
890         else
891             ch = CONVERT_TABLE[c - 128];
892         textTranslated ~= ch;
893     }
894     return textTranslated;
895 }
896 
897 string decodeUTF16BE(const(ubyte)[] input) pure
898 {
899     wstring utf16 = "";
900 
901     if ((input.length % 2) != 0)
902         throw new Exception("Couldn't decode UTF-16 string");
903 
904     int numCodepoints = cast(int)(input.length)/2;
905     for (int i = 0; i < numCodepoints; ++i)
906     {
907         wchar ch = popBE!ushort(input);
908         utf16 ~= ch;
909     }
910     return to!string(utf16);
911 }