1 /* 2 * MD4C: Markdown parser for C 3 * (http://github.com/mity/md4c) 4 * 5 * Copyright (c) 2016-2019 Martin Mitas 6 * Copyright (c) 2019 Guillaume Piolat (D translation) 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice shall be included in 16 * all copies or substantial portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 24 * IN THE SOFTWARE. 25 */ 26 module commonmarkd.md4c; 27 28 import core.stdc.string; 29 import core.stdc.stdio; 30 import core.stdc.stdlib: malloc, free; 31 32 nothrow: 33 @nogc: 34 @system: 35 36 // Compatibility with older DMDFE 37 static if (__VERSION__ < 2079) 38 { 39 import core.stdc.stdlib: _compare_fp_t; 40 // Provide @nogc nothrow bsearch and qsort for older compilers 41 extern (C): 42 @system: 43 inout(void)* bsearch(scope const void* key, scope inout(void)* base, size_t nmemb, size_t size, _compare_fp_t compar); 44 void qsort(scope void* base, size_t nmemb, size_t size, _compare_fp_t compar); 45 } 46 else 47 { 48 import core.stdc.stdlib: qsort, bsearch; 49 } 50 51 alias MD_CHAR = char; 52 alias MD_SIZE = uint; 53 alias MD_OFFSET = uint; 54 55 /* Block represents a part of document hierarchy structure like a paragraph 56 * or list item. 57 */ 58 alias MD_BLOCKTYPE = int; 59 enum : MD_BLOCKTYPE 60 { 61 /* <body>...</body> */ 62 MD_BLOCK_DOC = 0, 63 64 /* <blockquote>...</blockquote> */ 65 MD_BLOCK_QUOTE, 66 67 /* <ul>...</ul> 68 * Detail: Structure MD_BLOCK_UL_DETAIL. */ 69 MD_BLOCK_UL, 70 71 /* <ol>...</ol> 72 * Detail: Structure MD_BLOCK_OL_DETAIL. */ 73 MD_BLOCK_OL, 74 75 /* <li>...</li> 76 * Detail: Structure MD_BLOCK_LI_DETAIL. */ 77 MD_BLOCK_LI, 78 79 /* <hr> */ 80 MD_BLOCK_HR, 81 82 /* <h1>...</h1> (for levels up to 6) 83 * Detail: Structure MD_BLOCK_H_DETAIL. */ 84 MD_BLOCK_H, 85 86 /* <pre><code>...</code></pre> 87 * Note the text lines within code blocks are terminated with '\n' 88 * instead of explicit MD_TEXT_BR. */ 89 MD_BLOCK_CODE, 90 91 /* Raw HTML block. This itself does not correspond to any particular HTML 92 * tag. The contents of it _is_ raw HTML source intended to be put 93 * in verbatim form to the HTML output. */ 94 MD_BLOCK_HTML, 95 96 /* <p>...</p> */ 97 MD_BLOCK_P, 98 99 /* <table>...</table> and its contents. 100 * Detail: Structure MD_BLOCK_TD_DETAIL (used with MD_BLOCK_TH and MD_BLOCK_TD) 101 * Note all of these are used only if extension MD_FLAG_TABLES is enabled. */ 102 MD_BLOCK_TABLE, 103 MD_BLOCK_THEAD, 104 MD_BLOCK_TBODY, 105 MD_BLOCK_TR, 106 MD_BLOCK_TH, 107 MD_BLOCK_TD 108 } 109 110 /* Span represents an in-line piece of a document which should be rendered with 111 * the same font, color and other attributes. A sequence of spans forms a block 112 * like paragraph or list item. */ 113 alias MD_SPANTYPE = int; 114 enum : MD_SPANTYPE 115 { 116 /* <em>...</em> */ 117 MD_SPAN_EM, 118 119 /* <strong>...</strong> */ 120 MD_SPAN_STRONG, 121 122 /* <a href="xxx">...</a> 123 * Detail: Structure MD_SPAN_A_DETAIL. */ 124 MD_SPAN_A, 125 126 /* <img src="xxx">...</a> 127 * Detail: Structure MD_SPAN_IMG_DETAIL. 128 * Note: Image text can contain nested spans and even nested images. 129 * If rendered into ALT attribute of HTML <IMG> tag, it's responsibility 130 * of the renderer to deal with it. 131 */ 132 MD_SPAN_IMG, 133 134 /* <code>...</code> */ 135 MD_SPAN_CODE, 136 137 /* <del>...</del> 138 * Note: Recognized only when MD_FLAG_STRIKETHROUGH is enabled. 139 */ 140 MD_SPAN_DEL, 141 142 /* For recognizing inline ($) and display ($$) equations 143 * Note: Recognized only when MD_FLAG_LATEXMATHSPANS is enabled. 144 */ 145 MD_SPAN_LATEXMATH, 146 MD_SPAN_LATEXMATH_DISPLAY 147 } 148 149 /* Text is the actual textual contents of span. */ 150 alias MD_TEXTTYPE = int; 151 enum : MD_TEXTTYPE 152 { 153 /* Normal text. */ 154 MD_TEXT_NORMAL = 0, 155 156 /* null character. CommonMark requires replacing null character with 157 * the replacement char U+FFFD, so this allows caller to do that easily. */ 158 MD_TEXT_NULLCHAR, 159 160 /* Line breaks. 161 * Note these are not sent from blocks with verbatim output (MD_BLOCK_CODE 162 * or MD_BLOCK_HTML). In such cases, '\n' is part of the text itself. */ 163 MD_TEXT_BR, /* <br> (hard break) */ 164 MD_TEXT_SOFTBR, /* '\n' in source text where it is not semantically meaningful (soft break) */ 165 166 /* Entity. 167 * (a) Named entity, e.g. 168 * (Note MD4C does not have a list of known entities. 169 * Anything matching the regexp /&[A-Za-z][A-Za-z0-9]{1,47};/ is 170 * treated as a named entity.) 171 * (b) Numerical entity, e.g. Ӓ 172 * (c) Hexadecimal entity, e.g. ካ 173 * 174 * As MD4C is mostly encoding agnostic, application gets the verbatim 175 * entity text into the MD_RENDERER::text_callback(). */ 176 MD_TEXT_ENTITY, 177 178 /* Text in a code block (inside MD_BLOCK_CODE) or inlined code (`code`). 179 * If it is inside MD_BLOCK_CODE, it includes spaces for indentation and 180 * '\n' for new lines. MD_TEXT_BR and MD_TEXT_SOFTBR are not sent for this 181 * kind of text. */ 182 MD_TEXT_CODE, 183 184 /* Text is a raw HTML. If it is contents of a raw HTML block (i.e. not 185 * an inline raw HTML), then MD_TEXT_BR and MD_TEXT_SOFTBR are not used. 186 * The text contains verbatim '\n' for the new lines. */ 187 MD_TEXT_HTML, 188 189 /* Text is inside an equation. This is processed the same way as inlined code 190 * spans (`code`). */ 191 MD_TEXT_LATEXMATH 192 } 193 194 195 /* Alignment enumeration. */ 196 197 alias MD_ALIGN = int; 198 enum : MD_ALIGN 199 { 200 MD_ALIGN_DEFAULT = 0, /* When unspecified. */ 201 MD_ALIGN_LEFT, 202 MD_ALIGN_CENTER, 203 MD_ALIGN_RIGHT 204 } 205 206 207 /* String attribute. 208 * 209 * This wraps strings which are outside of a normal text flow and which are 210 * propagated within various detailed structures, but which still may contain 211 * string portions of different types like e.g. entities. 212 * 213 * So, for example, lets consider an image has a title attribute string 214 * set to "foo " bar". (Note the string size is 14.) 215 * 216 * Then the attribute MD_SPAN_IMG_DETAIL::title shall provide the following: 217 * -- [0]: "foo " (substr_types[0] == MD_TEXT_NORMAL; substr_offsets[0] == 0) 218 * -- [1]: """ (substr_types[1] == MD_TEXT_ENTITY; substr_offsets[1] == 4) 219 * -- [2]: " bar" (substr_types[2] == MD_TEXT_NORMAL; substr_offsets[2] == 10) 220 * -- [3]: (n/a) (n/a ; substr_offsets[3] == 14) 221 * 222 * Note that these conditions are guaranteed: 223 * -- substr_offsets[0] == 0 224 * -- substr_offsets[LAST+1] == size 225 * -- Only MD_TEXT_NORMAL, MD_TEXT_ENTITY, MD_TEXT_NULLCHAR substrings can appear. 226 */ 227 struct MD_ATTRIBUTE 228 { 229 const (MD_CHAR)* text; 230 MD_SIZE size; 231 const (MD_TEXTTYPE)* substr_types; 232 const (MD_OFFSET)* substr_offsets; 233 } 234 235 236 /* Detailed info for MD_BLOCK_UL. */ 237 struct MD_BLOCK_UL_DETAIL 238 { 239 int is_tight; /* Non-zero if tight list, zero if loose. */ 240 MD_CHAR mark; /* Item bullet character in MarkDown source of the list, e.g. '-', '+', '*'. */ 241 } 242 243 /* Detailed info for MD_BLOCK_OL. */ 244 struct MD_BLOCK_OL_DETAIL 245 { 246 uint start; /* Start index of the ordered list. */ 247 int is_tight; /* Non-zero if tight list, zero if loose. */ 248 MD_CHAR mark_delimiter; /* Character delimiting the item marks in MarkDown source, e.g. '.' or ')' */ 249 } 250 251 /* Detailed info for MD_BLOCK_LI. */ 252 struct MD_BLOCK_LI_DETAIL 253 { 254 int is_task; /* Can be non-zero only with MD_FLAG_TASKLISTS */ 255 MD_CHAR task_mark; /* If is_task, then one of 'x', 'X' or ' '. Undefined otherwise. */ 256 MD_OFFSET task_mark_offset; /* If is_task, then offset in the input of the char between '[' and ']'. */ 257 } 258 259 /* Detailed info for MD_BLOCK_H. */ 260 struct MD_BLOCK_H_DETAIL 261 { 262 uint level; /* Header level (1 - 6) */ 263 } 264 265 /* Detailed info for MD_BLOCK_CODE. */ 266 struct MD_BLOCK_CODE_DETAIL 267 { 268 MD_ATTRIBUTE info; 269 MD_ATTRIBUTE lang; 270 MD_CHAR fence_char; /* The character used for fenced code block; or zero for indented code block. */ 271 } 272 273 /* Detailed info for MD_BLOCK_TH and MD_BLOCK_TD. */ 274 struct MD_BLOCK_TD_DETAIL 275 { 276 MD_ALIGN align_; 277 } 278 279 /* Detailed info for MD_SPAN_A. */ 280 struct MD_SPAN_A_DETAIL 281 { 282 MD_ATTRIBUTE href; 283 MD_ATTRIBUTE title; 284 } 285 286 /* Detailed info for MD_SPAN_IMG. */ 287 struct MD_SPAN_IMG_DETAIL 288 { 289 MD_ATTRIBUTE src; 290 MD_ATTRIBUTE title; 291 } 292 293 294 /* Flags specifying extensions/deviations from CommonMark specification. 295 * 296 * By default (when MD_RENDERER::flags == 0), we follow CommonMark specification. 297 * The following flags may allow some extensions or deviations from it. 298 */ 299 enum 300 { 301 MD_FLAG_COLLAPSEWHITESPACE = 0x0001, /* In MD_TEXT_NORMAL, collapse non-trivial whitespace into single ' ' */ 302 MD_FLAG_PERMISSIVEATXHEADERS = 0x0002, /* Do not require space in ATX headers ( ###header ) */ 303 MD_FLAG_PERMISSIVEURLAUTOLINKS = 0x0004, /* Recognize URLs as autolinks even without '<', '>' */ 304 MD_FLAG_PERMISSIVEEMAILAUTOLINKS = 0x0008, /* Recognize e-mails as autolinks even without '<', '>' and 'mailto:' */ 305 MD_FLAG_NOINDENTEDCODEBLOCKS = 0x0010, /* Disable indented code blocks. (Only fenced code works.) */ 306 MD_FLAG_NOHTMLBLOCKS = 0x0020, /* Disable raw HTML blocks. */ 307 MD_FLAG_NOHTMLSPANS = 0x0040, /* Disable raw HTML (inline). */ 308 MD_FLAG_TABLES = 0x0100, /* Enable tables extension. */ 309 MD_FLAG_STRIKETHROUGH = 0x0200, /* Enable strikethrough extension. */ 310 MD_FLAG_PERMISSIVEWWWAUTOLINKS = 0x0400, /* Enable WWW autolinks (even without any scheme prefix, if they begin with 'www.') */ 311 MD_FLAG_TASKLISTS = 0x0800, /* Enable task list extension. */ 312 MD_FLAG_LATEXMATHSPANS = 0x1000, /* Enable $ and $$ containing LaTeX equations. */ 313 314 MD_FLAG_PERMISSIVEAUTOLINKS = MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEURLAUTOLINKS | MD_FLAG_PERMISSIVEWWWAUTOLINKS, 315 MD_FLAG_NOHTML = MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS, 316 317 /* Convenient sets of flags corresponding to well-known Markdown dialects. 318 * 319 * Note we may only support subset of features of the referred dialect. 320 * The constant just enables those extensions which bring us as close as 321 * possible given what features we implement. 322 * 323 * ABI compatibility note: Meaning of these can change in time as new 324 * extensions, bringing the dialect closer to the original, are implemented. 325 */ 326 MD_DIALECT_COMMONMARK = 0, 327 MD_DIALECT_GITHUB = (MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_TABLES | MD_FLAG_STRIKETHROUGH | MD_FLAG_TASKLISTS), 328 } 329 330 /* Renderer structure. 331 */ 332 struct MD_PARSER 333 { 334 nothrow: 335 @nogc: 336 /* Reserved. Set to zero. 337 */ 338 uint abi_version; 339 340 /* Dialect options. Bitmask of MD_FLAG_xxxx values. 341 */ 342 uint flags; 343 344 /* Caller-provided rendering callbacks. 345 * 346 * For some block/span types, more detailed information is provided in a 347 * type-specific structure pointed by the argument 'detail'. 348 * 349 * The last argument of all callbacks, 'userdata', is just propagated from 350 * md_parse() and is available for any use by the application. 351 * 352 * Note any strings provided to the callbacks as their arguments or as 353 * members of any detail structure are generally not zero-terminated. 354 * Application has take the respective size information into account. 355 * 356 * Callbacks may abort further parsing of the document by returning non-zero. 357 */ 358 int function(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/) enter_block; 359 int function(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/) leave_block; 360 361 int function(MD_SPANTYPE /*type*/, void* /*detail*/, void* /*userdata*/) enter_span; 362 int function(MD_SPANTYPE /*type*/, void* /*detail*/, void* /*userdata*/) leave_span; 363 364 int function(MD_TEXTTYPE /*type*/, const(MD_CHAR)* /*text*/, MD_SIZE /*size*/, void* /*userdata*/) text; 365 366 /* Debug callback. Optional (may be null). 367 * 368 * If provided and something goes wrong, this function gets called. 369 * This is intended for debugging and problem diagnosis for developers; 370 * it is not intended to provide any errors suitable for displaying to an 371 * end user. 372 */ 373 void function(const(char)* /*msg*/, void* /*userdata*/) debug_log; 374 375 /* Reserved. Set to null. 376 */ 377 void function() syntax; 378 } 379 380 381 /***************************** 382 *** Miscellaneous Stuff *** 383 *****************************/ 384 385 386 /* Misc. macros. */ 387 388 enum TRUE = 1; 389 enum FALSE = 0; 390 391 392 /************************ 393 *** Internal Types *** 394 ************************/ 395 396 /* These are omnipresent so lets save some typing. */ 397 alias CHAR = MD_CHAR; 398 alias SZ = MD_SIZE; 399 alias OFF = MD_OFFSET; 400 401 /* During analyzes of inline marks, we need to manage some "mark chains", 402 * of (yet unresolved) openers. This structure holds start/end of the chain. 403 * The chain internals are then realized through MD_MARK::prev and ::next. 404 */ 405 struct MD_MARKCHAIN 406 { 407 int head; /* Index of first mark in the chain, or -1 if empty. */ 408 int tail; /* Index of last mark in the chain, or -1 if empty. */ 409 } 410 411 enum OPENERS_CHAIN_FIRST = 2; 412 enum OPENERS_CHAIN_LAST = 11; 413 414 /* Context propagated through all the parsing. */ 415 struct MD_CTX 416 { 417 nothrow: 418 @nogc: 419 420 /* Immutable stuff (parameters of md_parse()). */ 421 const(CHAR)* text; 422 SZ size; 423 MD_PARSER parser; 424 void* userdata; 425 426 /* When this is true, it allows some optimizations. */ 427 int doc_ends_with_newline; 428 429 /* Helper temporary growing buffer. */ 430 CHAR* buffer; 431 uint alloc_buffer; 432 433 /* Reference definitions. */ 434 MD_REF_DEF* ref_defs; 435 int n_ref_defs; 436 int alloc_ref_defs; 437 void** ref_def_hashtable; 438 int ref_def_hashtable_size; 439 440 /* Stack of inline/span markers. 441 * This is only used for parsing a single block contents but by storing it 442 * here we may reuse the stack for subsequent blocks; i.e. we have fewer 443 * (re)allocations. */ 444 MD_MARK* marks; 445 int n_marks; 446 int alloc_marks; 447 448 ubyte[256] mark_char_map; 449 /* For resolving of inline spans. */ 450 MD_MARKCHAIN[12] mark_chains; 451 452 MD_MARKCHAIN* PTR_CHAIN() return { return &mark_chains[0]; } 453 MD_MARKCHAIN* TABLECELLBOUNDARIES() return { return &mark_chains[1]; } 454 MD_MARKCHAIN* ASTERISK_OPENERS_extraword_mod3_0() return { return &mark_chains[2]; } 455 MD_MARKCHAIN* ASTERISK_OPENERS_extraword_mod3_1() return { return &mark_chains[3]; } 456 MD_MARKCHAIN* ASTERISK_OPENERS_extraword_mod3_2() return { return &mark_chains[4]; } 457 MD_MARKCHAIN* ASTERISK_OPENERS_intraword_mod3_0() return { return &mark_chains[5]; } 458 MD_MARKCHAIN* ASTERISK_OPENERS_intraword_mod3_1() return { return &mark_chains[6]; } 459 MD_MARKCHAIN* ASTERISK_OPENERS_intraword_mod3_2() return { return &mark_chains[7]; } 460 MD_MARKCHAIN* UNDERSCORE_OPENERS() return { return &mark_chains[8]; } 461 MD_MARKCHAIN* TILDE_OPENERS() return { return &mark_chains[9]; } 462 MD_MARKCHAIN* BRACKET_OPENERS() return { return &mark_chains[10]; } 463 MD_MARKCHAIN* DOLLAR_OPENERS() return { return &mark_chains[11]; } 464 465 int n_table_cell_boundaries; 466 467 /* For resolving links. */ 468 int unresolved_link_head; 469 int unresolved_link_tail; 470 471 /* For resolving raw HTML. */ 472 OFF html_comment_horizon; 473 OFF html_proc_instr_horizon; 474 OFF html_decl_horizon; 475 OFF html_cdata_horizon; 476 477 /* For block analysis. 478 * Notes: 479 * -- It holds MD_BLOCK as well as MD_LINE structures. After each 480 * MD_BLOCK, its (multiple) MD_LINE(s) follow. 481 * -- For MD_BLOCK_HTML and MD_BLOCK_CODE, MD_VERBATIMLINE(s) are used 482 * instead of MD_LINE(s). 483 */ 484 void* block_bytes; 485 MD_BLOCK* current_block; 486 int n_block_bytes; 487 int alloc_block_bytes; 488 489 /* For container block analysis. */ 490 MD_CONTAINER* containers; 491 int n_containers; 492 int alloc_containers; 493 494 /* Minimal indentation to call the block "indented code block". */ 495 uint code_indent_offset; 496 497 /* Contextual info for line analysis. */ 498 SZ code_fence_length; /* For checking closing fence length. */ 499 int html_block_type; /* For checking closing raw HTML condition. */ 500 int last_line_has_list_loosening_effect; 501 int last_list_item_starts_with_two_blank_lines; 502 503 void MD_LOG(const(char)* msg) 504 { 505 if(parser.debug_log != null) 506 parser.debug_log(msg, userdata); 507 } 508 509 /* Character accessors. */ 510 CHAR CH(OFF off) 511 { 512 return text[off]; 513 } 514 515 const(CHAR)* STR(OFF off) 516 { 517 return text + off; 518 } 519 520 bool ISANYOF(OFF off, const(CHAR)* palette) { return ISANYOF_(CH(off), palette); } 521 bool ISANYOF2(OFF off, CHAR ch1, CHAR ch2) { return ISANYOF2_(CH(off), ch1, ch2); } 522 bool ISANYOF3(OFF off, CHAR ch1, CHAR ch2, CHAR ch3) { return ISANYOF3_(CH(off), ch1, ch2, ch3); } 523 bool ISASCII(OFF off) { return ISASCII_(CH(off)); } 524 bool ISBLANK(OFF off) { return ISBLANK_(CH(off)); } 525 bool ISNEWLINE(OFF off) { return ISNEWLINE_(CH(off)); } 526 bool ISWHITESPACE(OFF off) { return ISWHITESPACE_(CH(off)); } 527 bool ISCNTRL(OFF off) { return ISCNTRL_(CH(off)); } 528 bool ISPUNCT(OFF off) { return ISPUNCT_(CH(off)); } 529 bool ISUPPER(OFF off) { return ISUPPER_(CH(off)); } 530 bool ISLOWER(OFF off) { return ISLOWER_(CH(off)); } 531 bool ISALPHA(OFF off) { return ISALPHA_(CH(off)); } 532 bool ISDIGIT(OFF off) { return ISDIGIT_(CH(off)); } 533 bool ISXDIGIT(OFF off) { return ISXDIGIT_(CH(off)); } 534 bool ISALNUM(OFF off) { return ISALNUM_(CH(off)); } 535 } 536 537 alias MD_LINETYPE = int; 538 enum : MD_LINETYPE 539 { 540 MD_LINE_BLANK, 541 MD_LINE_HR, 542 MD_LINE_ATXHEADER, 543 MD_LINE_SETEXTHEADER, 544 MD_LINE_SETEXTUNDERLINE, 545 MD_LINE_INDENTEDCODE, 546 MD_LINE_FENCEDCODE, 547 MD_LINE_HTML, 548 MD_LINE_TEXT, 549 MD_LINE_TABLE, 550 MD_LINE_TABLEUNDERLINE 551 } 552 553 struct MD_LINE_ANALYSIS 554 { 555 nothrow: 556 @nogc: 557 short type_; 558 ushort data_; 559 560 MD_LINETYPE type() const 561 { 562 return type_; 563 } 564 565 void type(MD_LINETYPE value) 566 { 567 type_ = cast(short)value; 568 } 569 570 int data() const 571 { 572 return data_; 573 } 574 575 void data(uint value) 576 { 577 data_ = cast(ushort)value; 578 } 579 580 OFF beg; 581 OFF end; 582 uint indent; /* Indentation level. */ 583 } 584 585 struct MD_LINE 586 { 587 OFF beg; 588 OFF end; 589 } 590 591 struct MD_VERBATIMLINE 592 { 593 OFF beg; 594 OFF end; 595 OFF indent; 596 } 597 598 599 /***************** 600 *** Helpers *** 601 *****************/ 602 603 pure 604 { 605 /* Character classification. 606 * Note we assume ASCII compatibility of code points < 128 here. */ 607 bool ISIN_(CHAR ch, CHAR ch_min, CHAR ch_max) 608 { 609 return (ch_min <= cast(uint)(ch) && cast(uint)(ch) <= ch_max); 610 } 611 612 bool ISANYOF_(CHAR ch, const(CHAR)* palette) 613 { 614 return md_strchr(palette, ch) != null; 615 } 616 617 bool ISANYOF2_(CHAR ch, CHAR ch1, CHAR ch2) 618 { 619 return (ch == ch1) || (ch == ch2); 620 } 621 622 bool ISANYOF3_(CHAR ch, CHAR ch1, CHAR ch2, CHAR ch3) 623 { 624 return (ch == ch1) || (ch == ch2) || (ch == ch3); 625 } 626 627 bool ISASCII_(CHAR ch) 628 { 629 return (cast(uint)ch) <= 127; 630 } 631 632 bool ISBLANK_(CHAR ch) 633 { 634 return ISANYOF2_(ch, ' ', '\t'); 635 } 636 637 bool ISNEWLINE_(CHAR ch) 638 { 639 return ISANYOF2_(ch, '\r', '\n'); 640 } 641 642 bool ISWHITESPACE_(CHAR ch) 643 { 644 return ISBLANK_(ch) || ISANYOF2_(ch, '\v', '\f'); 645 } 646 647 bool ISCNTRL_(CHAR ch) 648 { 649 return (cast(uint)(ch) <= 31 || cast(uint)(ch) == 127); 650 } 651 652 bool ISPUNCT_(CHAR ch) 653 { 654 return ISIN_(ch, 33, 47) || ISIN_(ch, 58, 64) || ISIN_(ch, 91, 96) || ISIN_(ch, 123, 126); 655 } 656 657 bool ISUPPER_(CHAR ch) 658 { 659 return ISIN_(ch, 'A', 'Z'); 660 } 661 662 bool ISLOWER_(CHAR ch) 663 { 664 return ISIN_(ch, 'a', 'z'); 665 } 666 667 bool ISALPHA_(CHAR ch) 668 { 669 return ISUPPER_(ch) || ISLOWER_(ch); 670 } 671 672 bool ISDIGIT_(CHAR ch) 673 { 674 return ISIN_(ch, '0', '9'); 675 } 676 677 bool ISXDIGIT_(CHAR ch) 678 { 679 return ISDIGIT_(ch) || ISIN_(ch, 'A', 'F') || ISIN_(ch, 'a', 'f'); 680 } 681 682 bool ISALNUM_(CHAR ch) 683 { 684 return ISALPHA_(ch) || ISDIGIT_(ch); 685 } 686 } 687 688 const(CHAR)* md_strchr(const(CHAR)* str, CHAR ch) pure 689 { 690 OFF i; 691 for(i = 0; str[i] != '\0'; i++) { 692 if(ch == str[i]) 693 return (str + i); 694 } 695 return null; 696 } 697 698 /* Case insensitive check of string equality. */ 699 int md_ascii_case_eq(const(CHAR)* s1, const(CHAR)* s2, SZ n) 700 { 701 OFF i; 702 for(i = 0; i < n; i++) { 703 CHAR ch1 = s1[i]; 704 CHAR ch2 = s2[i]; 705 706 if(ISLOWER_(ch1)) 707 ch1 += ('A'-'a'); 708 if(ISLOWER_(ch2)) 709 ch2 += ('A'-'a'); 710 if(ch1 != ch2) 711 return FALSE; 712 } 713 return TRUE; 714 } 715 716 int md_ascii_eq(const(CHAR)* s1, const(CHAR)* s2, SZ n) 717 { 718 return memcmp(s1, s2, n * CHAR.sizeof) == 0; 719 } 720 721 int md_text_with_null_replacement(MD_CTX* ctx, MD_TEXTTYPE type, const(CHAR)* str, SZ size) 722 { 723 OFF off = 0; 724 int ret = 0; 725 726 while(1) { 727 while(off < size && str[off] != '\0') 728 off++; 729 730 if(off > 0) { 731 ret = ctx.parser.text(type, str, off, ctx.userdata); 732 if(ret != 0) 733 return ret; 734 735 str += off; 736 size -= off; 737 off = 0; 738 } 739 740 if(off >= size) 741 return 0; 742 743 ret = ctx.parser.text(MD_TEXT_NULLCHAR, "", 1, ctx.userdata); 744 if(ret != 0) 745 return ret; 746 off++; 747 } 748 } 749 750 int MD_TEMP_BUFFER(MD_CTX* ctx, SZ sz) 751 { 752 if(sz > ctx.alloc_buffer) 753 { 754 CHAR* new_buffer; 755 SZ new_size = ((sz) + (sz) / 2 + 128) & ~127; 756 new_buffer = cast(CHAR*) realloc_safe(ctx.buffer, new_size); 757 if (new_buffer == null) 758 { 759 ctx.MD_LOG("realloc() failed."); 760 return -1; 761 } 762 ctx.buffer = new_buffer; 763 ctx.alloc_buffer = new_size; 764 } 765 return 0; 766 } 767 768 int MD_ENTER_BLOCK(MD_CTX* ctx, MD_BLOCKTYPE type, void* arg) 769 { 770 int ret = ctx.parser.enter_block(type, arg, ctx.userdata); 771 if(ret != 0) 772 { 773 ctx.MD_LOG("Aborted from enter_block() callback."); 774 return ret; 775 } 776 return 0; 777 } 778 779 int MD_LEAVE_BLOCK(MD_CTX* ctx, MD_BLOCKTYPE type, void* arg) 780 { 781 int ret = ctx.parser.leave_block(type, arg, ctx.userdata); 782 if(ret != 0) 783 { 784 ctx.MD_LOG("Aborted from leave_block() callback."); 785 return ret; 786 } 787 return 0; 788 } 789 790 int MD_ENTER_SPAN(MD_CTX* ctx, MD_SPANTYPE type, void* arg) 791 { 792 int ret = ctx.parser.enter_span(type, arg, ctx.userdata); 793 if(ret != 0) 794 { 795 ctx.MD_LOG("Aborted from enter_span() callback."); 796 return ret; 797 } 798 return 0; 799 } 800 801 int MD_LEAVE_SPAN(MD_CTX* ctx, MD_SPANTYPE type, void* arg) 802 { 803 int ret = ctx.parser.leave_span(type, arg, ctx.userdata); 804 if(ret != 0) 805 { 806 ctx.MD_LOG("Aborted from leave_span() callback."); 807 return ret; 808 } 809 return 0; 810 } 811 812 int MD_TEXT(MD_CTX* ctx, MD_TEXTTYPE type, const(MD_CHAR)* str, MD_SIZE size) 813 { 814 if(size > 0) 815 { 816 int ret = ctx.parser.text((type), (str), (size), ctx.userdata); 817 if (ret != 0) 818 { 819 ctx.MD_LOG("Aborted from text() callback."); 820 return ret; 821 } 822 } 823 return 0; 824 } 825 826 int MD_TEXT_INSECURE(MD_CTX* ctx, MD_TEXTTYPE type, const(MD_CHAR)* str, MD_SIZE size) 827 { 828 if(size > 0) 829 { 830 int ret = md_text_with_null_replacement(ctx, type, str, size); 831 if(ret != 0) 832 { 833 ctx.MD_LOG("Aborted from text() callback."); 834 return ret; 835 } 836 } 837 return 0; 838 } 839 840 /************************* 841 *** Unicode Support *** 842 *************************/ 843 844 struct MD_UNICODE_FOLD_INFO 845 { 846 uint[3] codepoints; 847 int n_codepoints; 848 }; 849 850 851 852 /* Binary search over sorted "map" of codepoints. Consecutive sequences 853 * of codepoints may be encoded in the map by just using the 854 * (MIN_CODEPOINT | 0x40000000) and (MAX_CODEPOINT | 0x80000000). 855 * 856 * Returns index of the found record in the map (in the case of ranges, 857 * the minimal value is used); or -1 on failure. */ 858 int md_unicode_bsearch__(uint codepoint, const(uint)* map, size_t map_size) 859 { 860 int beg, end; 861 int pivot_beg, pivot_end; 862 863 beg = 0; 864 end = cast(int) map_size-1; 865 while(beg <= end) { 866 /* Pivot may be a range, not just a single value. */ 867 pivot_beg = pivot_end = (beg + end) / 2; 868 if(map[pivot_end] & 0x40000000) 869 pivot_end++; 870 if(map[pivot_beg] & 0x80000000) 871 pivot_beg--; 872 873 if(codepoint < (map[pivot_beg] & 0x00ffffff)) 874 end = pivot_beg - 1; 875 else if(codepoint > (map[pivot_end] & 0x00ffffff)) 876 beg = pivot_end + 1; 877 else 878 return pivot_beg; 879 } 880 881 return -1; 882 } 883 884 bool md_is_unicode_whitespace__(uint codepoint) 885 { 886 /* Unicode "Zs" category. 887 * (generated by scripts/build_whitespace_map.py) */ 888 static immutable uint[] WHITESPACE_MAP = 889 [ 890 0x0020, 0x00a0, 0x1680, 0x2000| 0x40000000, 0x200a | 0x80000000, 0x202f, 0x205f, 0x3000 891 ]; 892 893 /* The ASCII ones are the most frequently used ones, also CommonMark 894 * specification requests few more in this range. */ 895 if(codepoint <= 0x7f) 896 return ISWHITESPACE_(cast(CHAR)codepoint); 897 898 return (md_unicode_bsearch__(codepoint, WHITESPACE_MAP.ptr, WHITESPACE_MAP.length) >= 0); 899 } 900 901 bool md_is_unicode_punct__(uint codepoint) 902 { 903 /* Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories. 904 * (generated by scripts/build_punct_map.py) */ 905 static immutable uint[] PUNCT_MAP = 906 [ 907 0x0021 | 0x40000000,0x0023 | 0x80000000, 0x0025 | 0x40000000,0x002a | 0x80000000, 0x002c | 0x40000000,0x002f | 0x80000000, 0x003a | 0x40000000,0x003b | 0x80000000, 0x003f | 0x40000000,0x0040 | 0x80000000, 908 0x005b | 0x40000000,0x005d | 0x80000000, 0x005f, 0x007b, 0x007d, 0x00a1, 0x00a7, 0x00ab, 0x00b6 | 0x40000000,0x00b7 | 0x80000000, 909 0x00bb, 0x00bf, 0x037e, 0x0387, 0x055a | 0x40000000,0x055f | 0x80000000, 0x0589 | 0x40000000,0x058a | 0x80000000, 0x05be, 0x05c0, 910 0x05c3, 0x05c6, 0x05f3 | 0x40000000,0x05f4 | 0x80000000, 0x0609 | 0x40000000,0x060a | 0x80000000, 0x060c | 0x40000000,0x060d | 0x80000000, 0x061b, 0x061e | 0x40000000,0x061f | 0x80000000, 911 0x066a | 0x40000000,0x066d | 0x80000000, 0x06d4, 0x0700 | 0x40000000,0x070d | 0x80000000, 0x07f7 | 0x40000000,0x07f9 | 0x80000000, 0x0830 | 0x40000000,0x083e | 0x80000000, 0x085e, 912 0x0964 | 0x40000000,0x0965 | 0x80000000, 0x0970, 0x09fd, 0x0a76, 0x0af0, 0x0c77, 0x0c84, 0x0df4, 0x0e4f, 913 0x0e5a | 0x40000000,0x0e5b | 0x80000000, 0x0f04 | 0x40000000,0x0f12 | 0x80000000, 0x0f14, 0x0f3a | 0x40000000,0x0f3d | 0x80000000, 0x0f85, 0x0fd0 | 0x40000000,0x0fd4 | 0x80000000, 914 0x0fd9 | 0x40000000,0x0fda | 0x80000000, 0x104a | 0x40000000,0x104f | 0x80000000, 0x10fb, 0x1360 | 0x40000000,0x1368 | 0x80000000, 0x1400, 0x166e, 0x169b | 0x40000000,0x169c | 0x80000000, 915 0x16eb | 0x40000000,0x16ed | 0x80000000, 0x1735 | 0x40000000,0x1736 | 0x80000000, 0x17d4 | 0x40000000,0x17d6 | 0x80000000, 0x17d8 | 0x40000000,0x17da | 0x80000000, 0x1800 | 0x40000000,0x180a | 0x80000000, 916 0x1944 | 0x40000000,0x1945 | 0x80000000, 0x1a1e | 0x40000000,0x1a1f | 0x80000000, 0x1aa0 | 0x40000000,0x1aa6 | 0x80000000, 0x1aa8 | 0x40000000,0x1aad | 0x80000000, 0x1b5a | 0x40000000,0x1b60 | 0x80000000, 917 0x1bfc | 0x40000000,0x1bff | 0x80000000, 0x1c3b | 0x40000000,0x1c3f | 0x80000000, 0x1c7e | 0x40000000,0x1c7f | 0x80000000, 0x1cc0 | 0x40000000,0x1cc7 | 0x80000000, 0x1cd3, 0x2010 | 0x40000000,0x2027 | 0x80000000, 918 0x2030 | 0x40000000,0x2043 | 0x80000000, 0x2045 | 0x40000000,0x2051 | 0x80000000, 0x2053 | 0x40000000,0x205e | 0x80000000, 0x207d | 0x40000000,0x207e | 0x80000000, 0x208d | 0x40000000,0x208e | 0x80000000, 919 0x2308 | 0x40000000,0x230b | 0x80000000, 0x2329 | 0x40000000,0x232a | 0x80000000, 0x2768 | 0x40000000,0x2775 | 0x80000000, 0x27c5 | 0x40000000,0x27c6 | 0x80000000, 0x27e6 | 0x40000000,0x27ef | 0x80000000, 920 0x2983 | 0x40000000,0x2998 | 0x80000000, 0x29d8 | 0x40000000,0x29db | 0x80000000, 0x29fc | 0x40000000,0x29fd | 0x80000000, 0x2cf9 | 0x40000000,0x2cfc | 0x80000000, 0x2cfe | 0x40000000,0x2cff | 0x80000000, 0x2d70, 921 0x2e00 | 0x40000000,0x2e2e | 0x80000000, 0x2e30 | 0x40000000,0x2e4f | 0x80000000, 0x3001 | 0x40000000,0x3003 | 0x80000000, 0x3008 | 0x40000000,0x3011 | 0x80000000, 0x3014 | 0x40000000,0x301f | 0x80000000, 0x3030, 922 0x303d, 0x30a0, 0x30fb, 0xa4fe | 0x40000000,0xa4ff | 0x80000000, 0xa60d | 0x40000000,0xa60f | 0x80000000, 0xa673, 0xa67e, 923 0xa6f2 | 0x40000000,0xa6f7 | 0x80000000, 0xa874 | 0x40000000,0xa877 | 0x80000000, 0xa8ce | 0x40000000,0xa8cf | 0x80000000, 0xa8f8 | 0x40000000,0xa8fa | 0x80000000, 0xa8fc, 0xa92e | 0x40000000,0xa92f | 0x80000000, 924 0xa95f, 0xa9c1 | 0x40000000,0xa9cd | 0x80000000, 0xa9de | 0x40000000,0xa9df | 0x80000000, 0xaa5c | 0x40000000,0xaa5f | 0x80000000, 0xaade | 0x40000000,0xaadf | 0x80000000, 0xaaf0 | 0x40000000,0xaaf1 | 0x80000000, 925 0xabeb, 0xfd3e | 0x40000000,0xfd3f | 0x80000000, 0xfe10 | 0x40000000,0xfe19 | 0x80000000, 0xfe30 | 0x40000000,0xfe52 | 0x80000000, 0xfe54 | 0x40000000,0xfe61 | 0x80000000, 0xfe63, 0xfe68, 926 0xfe6a | 0x40000000,0xfe6b | 0x80000000, 0xff01 | 0x40000000,0xff03 | 0x80000000, 0xff05 | 0x40000000,0xff0a | 0x80000000, 0xff0c | 0x40000000,0xff0f | 0x80000000, 0xff1a | 0x40000000,0xff1b | 0x80000000, 927 0xff1f | 0x40000000,0xff20 | 0x80000000, 0xff3b | 0x40000000,0xff3d | 0x80000000, 0xff3f, 0xff5b, 0xff5d, 0xff5f | 0x40000000,0xff65 | 0x80000000, 0x10100 | 0x40000000,0x10102 | 0x80000000, 928 0x1039f, 0x103d0, 0x1056f, 0x10857, 0x1091f, 0x1093f, 0x10a50 | 0x40000000,0x10a58 | 0x80000000, 0x10a7f, 929 0x10af0 | 0x40000000,0x10af6 | 0x80000000, 0x10b39 | 0x40000000,0x10b3f | 0x80000000, 0x10b99 | 0x40000000,0x10b9c | 0x80000000, 0x10f55 | 0x40000000,0x10f59 | 0x80000000, 0x11047 | 0x40000000,0x1104d | 0x80000000, 930 0x110bb | 0x40000000,0x110bc | 0x80000000, 0x110be | 0x40000000,0x110c1 | 0x80000000, 0x11140 | 0x40000000,0x11143 | 0x80000000, 0x11174 | 0x40000000,0x11175 | 0x80000000, 0x111c5 | 0x40000000,0x111c8 | 0x80000000, 931 0x111cd, 0x111db, 0x111dd | 0x40000000,0x111df | 0x80000000, 0x11238 | 0x40000000,0x1123d | 0x80000000, 0x112a9, 0x1144b | 0x40000000,0x1144f | 0x80000000, 932 0x1145b, 0x1145d, 0x114c6, 0x115c1 | 0x40000000,0x115d7 | 0x80000000, 0x11641 | 0x40000000,0x11643 | 0x80000000, 0x11660 | 0x40000000,0x1166c | 0x80000000, 933 0x1173c | 0x40000000,0x1173e | 0x80000000, 0x1183b, 0x119e2, 0x11a3f | 0x40000000,0x11a46 | 0x80000000, 0x11a9a | 0x40000000,0x11a9c | 0x80000000, 0x11a9e | 0x40000000,0x11aa2 | 0x80000000, 934 0x11c41 | 0x40000000,0x11c45 | 0x80000000, 0x11c70 | 0x40000000,0x11c71 | 0x80000000, 0x11ef7 | 0x40000000,0x11ef8 | 0x80000000, 0x11fff, 0x12470 | 0x40000000,0x12474 | 0x80000000, 935 0x16a6e | 0x40000000,0x16a6f | 0x80000000, 0x16af5, 0x16b37 | 0x40000000,0x16b3b | 0x80000000, 0x16b44, 0x16e97 | 0x40000000,0x16e9a | 0x80000000, 0x16fe2, 936 0x1bc9f, 0x1da87 | 0x40000000,0x1da8b | 0x80000000, 0x1e95e | 0x40000000,0x1e95f | 0x80000000 937 ]; 938 939 /* The ASCII ones are the most frequently used ones, also CommonMark 940 * specification requests few more in this range. */ 941 if(codepoint <= 0x7f) 942 return ISPUNCT_(cast(CHAR)codepoint); 943 944 return (md_unicode_bsearch__(codepoint, PUNCT_MAP.ptr, PUNCT_MAP.length) >= 0); 945 } 946 947 void md_get_unicode_fold_info(uint codepoint, MD_UNICODE_FOLD_INFO* info) 948 { 949 /* Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories. 950 * (generated by scripts/build_punct_map.py) */ 951 static immutable uint[] FOLD_MAP_1 = 952 [ 953 0x0041 | 0x40000000, 0x005a | 0x80000000, 0x00b5, 0x00c0 | 0x40000000, 0x00d6 | 0x80000000, 0x00d8 | 0x40000000, 0x00de | 0x80000000, 0x0100 | 0x40000000, 0x012e | 0x80000000, 0x0132 | 0x40000000, 0x0136 | 0x80000000, 954 0x0139 | 0x40000000, 0x0147 | 0x80000000, 0x014a | 0x40000000, 0x0176 | 0x80000000, 0x0178, 0x0179 | 0x40000000, 0x017d | 0x80000000, 0x017f, 0x0181, 0x0182, 955 0x0186, 0x0187, 0x0189, 0x018b, 0x018e, 0x018f, 0x0190, 0x0191, 0x0193, 956 0x0194, 0x0196, 0x0197, 0x0198, 0x019c, 0x019d, 0x019f, 0x01a0 | 0x40000000, 0x01a4 | 0x80000000, 0x01a6, 957 0x01a7, 0x01a9, 0x01ac, 0x01ae, 0x01af, 0x01b1, 0x01b3, 0x01b7, 0x01b8, 958 0x01bc, 0x01c4, 0x01c5, 0x01c7, 0x01c8, 0x01ca, 0x01cb | 0x40000000, 0x01db | 0x80000000, 0x01de | 0x40000000, 0x01ee | 0x80000000, 959 0x01f1, 0x01f2, 0x01f6, 0x01f7, 0x01f8 | 0x40000000, 0x021e | 0x80000000, 0x0220, 0x0222 | 0x40000000, 0x0232 | 0x80000000, 0x023a, 960 0x023b, 0x023d, 0x023e, 0x0241, 0x0243, 0x0244, 0x0245, 0x0246 | 0x40000000, 0x024e | 0x80000000, 0x0345, 961 0x0370, 0x0376, 0x037f, 0x0386, 0x0388 | 0x40000000, 0x038a | 0x80000000, 0x038c, 0x038e, 0x0391 | 0x40000000, 0x03a1 | 0x80000000, 962 0x03a3 | 0x40000000, 0x03ab | 0x80000000, 0x03c2, 0x03cf, 0x03d0, 0x03d1, 0x03d5, 0x03d6, 0x03d8 | 0x40000000, 0x03ee | 0x80000000, 963 0x03f0, 0x03f1, 0x03f4, 0x03f5, 0x03f7, 0x03f9, 0x03fa, 0x03fd | 0x40000000, 0x03ff | 0x80000000, 964 0x0400 | 0x40000000, 0x040f | 0x80000000, 0x0410 | 0x40000000, 0x042f | 0x80000000, 0x0460 | 0x40000000, 0x0480 | 0x80000000, 0x048a | 0x40000000, 0x04be | 0x80000000, 0x04c0, 0x04c1 | 0x40000000, 0x04cd | 0x80000000, 965 0x04d0 | 0x40000000, 0x052e | 0x80000000, 0x0531 | 0x40000000, 0x0556 | 0x80000000, 0x10a0 | 0x40000000, 0x10c5 | 0x80000000, 0x10c7, 0x10cd, 0x13f8 | 0x40000000, 0x13fd | 0x80000000, 0x1c80, 966 0x1c81, 0x1c82, 0x1c83, 0x1c85, 0x1c86, 0x1c87, 0x1c88, 0x1c90 | 0x40000000, 0x1cba | 0x80000000, 967 0x1cbd | 0x40000000, 0x1cbf | 0x80000000, 0x1e00 | 0x40000000, 0x1e94 | 0x80000000, 0x1e9b, 0x1ea0 | 0x40000000, 0x1efe | 0x80000000, 0x1f08 | 0x40000000, 0x1f0f | 0x80000000, 0x1f18 | 0x40000000, 0x1f1d | 0x80000000, 968 0x1f28 | 0x40000000, 0x1f2f | 0x80000000, 0x1f38 | 0x40000000, 0x1f3f | 0x80000000, 0x1f48 | 0x40000000, 0x1f4d | 0x80000000, 0x1f59, 0x1f5b, 0x1f5d, 0x1f5f, 969 0x1f68 | 0x40000000, 0x1f6f | 0x80000000, 0x1fb8, 0x1fba, 0x1fbe, 0x1fc8 | 0x40000000, 0x1fcb | 0x80000000, 0x1fd8, 0x1fda, 0x1fe8, 970 0x1fea, 0x1fec, 0x1ff8, 0x1ffa, 0x2126, 0x212a, 0x212b, 0x2132, 0x2160 | 0x40000000, 0x216f | 0x80000000, 971 0x2183, 0x24b6 | 0x40000000, 0x24cf | 0x80000000, 0x2c00 | 0x40000000, 0x2c2e | 0x80000000, 0x2c60, 0x2c62, 0x2c63, 0x2c64, 972 0x2c67 | 0x40000000, 0x2c6b | 0x80000000, 0x2c6d, 0x2c6e, 0x2c6f, 0x2c70, 0x2c72, 0x2c75, 0x2c7e, 973 0x2c80 | 0x40000000, 0x2ce2 | 0x80000000, 0x2ceb, 0x2cf2, 0xa640 | 0x40000000, 0xa66c | 0x80000000, 0xa680 | 0x40000000, 0xa69a | 0x80000000, 0xa722 | 0x40000000, 0xa72e | 0x80000000, 974 0xa732 | 0x40000000, 0xa76e | 0x80000000, 0xa779, 0xa77d, 0xa77e | 0x40000000, 0xa786 | 0x80000000, 0xa78b, 0xa78d, 0xa790, 975 0xa796 | 0x40000000, 0xa7a8 | 0x80000000, 0xa7aa, 0xa7ab, 0xa7ac, 0xa7ad, 0xa7ae, 0xa7b0, 0xa7b1, 0xa7b2, 976 0xa7b3, 0xa7b4 | 0x40000000, 0xa7be | 0x80000000, 0xa7c2, 0xa7c4, 0xa7c5, 0xa7c6, 0xab70 | 0x40000000, 0xabbf | 0x80000000, 977 0xff21 | 0x40000000, 0xff3a | 0x80000000, 0x10400 | 0x40000000, 0x10427 | 0x80000000, 0x104b0 | 0x40000000, 0x104d3 | 0x80000000, 0x10c80 | 0x40000000, 0x10cb2 | 0x80000000, 0x118a0 | 0x40000000, 0x118bf | 0x80000000, 978 0x16e40 | 0x40000000, 0x16e5f | 0x80000000, 0x1e900 | 0x40000000, 0x1e921 | 0x80000000 979 ]; 980 981 static immutable uint[] FOLD_MAP_1_DATA = 982 [ 983 0x0061, 0x007a, 0x03bc, 0x00e0, 0x00f6, 0x00f8, 0x00fe, 0x0101, 0x012f, 0x0133, 0x0137, 0x013a, 0x0148, 984 0x014b, 0x0177, 0x00ff, 0x017a, 0x017e, 0x0073, 0x0253, 0x0183, 0x0254, 0x0188, 0x0256, 0x018c, 0x01dd, 985 0x0259, 0x025b, 0x0192, 0x0260, 0x0263, 0x0269, 0x0268, 0x0199, 0x026f, 0x0272, 0x0275, 0x01a1, 0x01a5, 986 0x0280, 0x01a8, 0x0283, 0x01ad, 0x0288, 0x01b0, 0x028a, 0x01b4, 0x0292, 0x01b9, 0x01bd, 0x01c6, 0x01c6, 987 0x01c9, 0x01c9, 0x01cc, 0x01cc, 0x01dc, 0x01df, 0x01ef, 0x01f3, 0x01f3, 0x0195, 0x01bf, 0x01f9, 0x021f, 988 0x019e, 0x0223, 0x0233, 0x2c65, 0x023c, 0x019a, 0x2c66, 0x0242, 0x0180, 0x0289, 0x028c, 0x0247, 0x024f, 989 0x03b9, 0x0371, 0x0377, 0x03f3, 0x03ac, 0x03ad, 0x03af, 0x03cc, 0x03cd, 0x03b1, 0x03c1, 0x03c3, 0x03cb, 990 0x03c3, 0x03d7, 0x03b2, 0x03b8, 0x03c6, 0x03c0, 0x03d9, 0x03ef, 0x03ba, 0x03c1, 0x03b8, 0x03b5, 0x03f8, 991 0x03f2, 0x03fb, 0x037b, 0x037d, 0x0450, 0x045f, 0x0430, 0x044f, 0x0461, 0x0481, 0x048b, 0x04bf, 0x04cf, 992 0x04c2, 0x04ce, 0x04d1, 0x052f, 0x0561, 0x0586, 0x2d00, 0x2d25, 0x2d27, 0x2d2d, 0x13f0, 0x13f5, 0x0432, 993 0x0434, 0x043e, 0x0441, 0x0442, 0x044a, 0x0463, 0xa64b, 0x10d0, 0x10fa, 0x10fd, 0x10ff, 0x1e01, 0x1e95, 994 0x1e61, 0x1ea1, 0x1eff, 0x1f00, 0x1f07, 0x1f10, 0x1f15, 0x1f20, 0x1f27, 0x1f30, 0x1f37, 0x1f40, 0x1f45, 995 0x1f51, 0x1f53, 0x1f55, 0x1f57, 0x1f60, 0x1f67, 0x1fb0, 0x1f70, 0x03b9, 0x1f72, 0x1f75, 0x1fd0, 0x1f76, 996 0x1fe0, 0x1f7a, 0x1fe5, 0x1f78, 0x1f7c, 0x03c9, 0x006b, 0x00e5, 0x214e, 0x2170, 0x217f, 0x2184, 0x24d0, 997 0x24e9, 0x2c30, 0x2c5e, 0x2c61, 0x026b, 0x1d7d, 0x027d, 0x2c68, 0x2c6c, 0x0251, 0x0271, 0x0250, 0x0252, 998 0x2c73, 0x2c76, 0x023f, 0x2c81, 0x2ce3, 0x2cec, 0x2cf3, 0xa641, 0xa66d, 0xa681, 0xa69b, 0xa723, 0xa72f, 999 0xa733, 0xa76f, 0xa77a, 0x1d79, 0xa77f, 0xa787, 0xa78c, 0x0265, 0xa791, 0xa797, 0xa7a9, 0x0266, 0x025c, 1000 0x0261, 0x026c, 0x026a, 0x029e, 0x0287, 0x029d, 0xab53, 0xa7b5, 0xa7bf, 0xa7c3, 0xa794, 0x0282, 0x1d8e, 1001 0x13a0, 0x13ef, 0xff41, 0xff5a, 0x10428, 0x1044f, 0x104d8, 0x104fb, 0x10cc0, 0x10cf2, 0x118c0, 0x118df, 1002 0x16e60, 0x16e7f, 0x1e922, 0x1e943 1003 ]; 1004 1005 static immutable uint[] FOLD_MAP_2 = 1006 [ 1007 0x00df, 0x0130, 0x0149, 0x01f0, 0x0587, 0x1e96, 0x1e97, 0x1e98, 0x1e99, 1008 0x1e9a, 0x1e9e, 0x1f50, 0x1f80 | 0x40000000, 0x1f87 | 0x80000000, 0x1f88 | 0x40000000, 0x1f8f | 0x80000000, 0x1f90 | 0x40000000, 0x1f97 | 0x80000000, 0x1f98 | 0x40000000, 0x1f9f | 0x80000000, 1009 0x1fa0 | 0x40000000, 0x1fa7 | 0x80000000, 0x1fa8 | 0x40000000, 0x1faf | 0x80000000, 0x1fb2, 0x1fb3, 0x1fb4, 0x1fb6, 0x1fbc, 0x1fc2, 1010 0x1fc3, 0x1fc4, 0x1fc6, 0x1fcc, 0x1fd6, 0x1fe4, 0x1fe6, 0x1ff2, 0x1ff3, 1011 0x1ff4, 0x1ff6, 0x1ffc, 0xfb00, 0xfb01, 0xfb02, 0xfb05, 0xfb06, 0xfb13, 1012 0xfb14, 0xfb15, 0xfb16, 0xfb17 1013 ]; 1014 1015 static immutable uint[] FOLD_MAP_2_DATA = 1016 [ 1017 0x0073,0x0073, 0x0069,0x0307, 0x02bc,0x006e, 0x006a,0x030c, 0x0565,0x0582, 0x0068,0x0331, 0x0074,0x0308, 1018 0x0077,0x030a, 0x0079,0x030a, 0x0061,0x02be, 0x0073,0x0073, 0x03c5,0x0313, 0x1f00,0x03b9, 0x1f07,0x03b9, 1019 0x1f00,0x03b9, 0x1f07,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f60,0x03b9, 1020 0x1f67,0x03b9, 0x1f60,0x03b9, 0x1f67,0x03b9, 0x1f70,0x03b9, 0x03b1,0x03b9, 0x03ac,0x03b9, 0x03b1,0x0342, 1021 0x03b1,0x03b9, 0x1f74,0x03b9, 0x03b7,0x03b9, 0x03ae,0x03b9, 0x03b7,0x0342, 0x03b7,0x03b9, 0x03b9,0x0342, 1022 0x03c1,0x0313, 0x03c5,0x0342, 0x1f7c,0x03b9, 0x03c9,0x03b9, 0x03ce,0x03b9, 0x03c9,0x0342, 0x03c9,0x03b9, 1023 0x0066,0x0066, 0x0066,0x0069, 0x0066,0x006c, 0x0073,0x0074, 0x0073,0x0074, 0x0574,0x0576, 0x0574,0x0565, 1024 0x0574,0x056b, 0x057e,0x0576, 0x0574,0x056d 1025 ]; 1026 1027 static immutable uint[] FOLD_MAP_3 = 1028 [ 1029 0x0390, 0x03b0, 0x1f52, 0x1f54, 0x1f56, 0x1fb7, 0x1fc7, 0x1fd2, 0x1fd3, 1030 0x1fd7, 0x1fe2, 0x1fe3, 0x1fe7, 0x1ff7, 0xfb03, 0xfb04 1031 ]; 1032 1033 static immutable uint[] FOLD_MAP_3_DATA = 1034 [ 1035 0x03b9,0x0308,0x0301, 0x03c5,0x0308,0x0301, 0x03c5,0x0313,0x0300, 0x03c5,0x0313,0x0301, 1036 0x03c5,0x0313,0x0342, 0x03b1,0x0342,0x03b9, 0x03b7,0x0342,0x03b9, 0x03b9,0x0308,0x0300, 1037 0x03b9,0x0308,0x0301, 0x03b9,0x0308,0x0342, 0x03c5,0x0308,0x0300, 0x03c5,0x0308,0x0301, 1038 0x03c5,0x0308,0x0342, 0x03c9,0x0342,0x03b9, 0x0066,0x0066,0x0069, 0x0066,0x0066,0x006c 1039 ]; 1040 1041 static struct FOLD_MAP 1042 { 1043 const(uint)* map; 1044 const(uint)* data; 1045 size_t map_size; 1046 int n_codepoints; 1047 } 1048 1049 /*static immutable*/ FOLD_MAP[3] FOLD_MAP_LIST = 1050 [ 1051 FOLD_MAP(FOLD_MAP_1.ptr, FOLD_MAP_1_DATA.ptr, FOLD_MAP_1.length, 1), 1052 FOLD_MAP(FOLD_MAP_2.ptr, FOLD_MAP_2_DATA.ptr, FOLD_MAP_2.length, 2), 1053 FOLD_MAP(FOLD_MAP_3.ptr, FOLD_MAP_3_DATA.ptr, FOLD_MAP_3.length, 3), 1054 ]; 1055 1056 int i; 1057 1058 /* Fast path for ASCII characters. */ 1059 if(codepoint <= 0x7f) { 1060 info.codepoints[0] = codepoint; 1061 if(ISUPPER_(cast(CHAR)codepoint)) 1062 info.codepoints[0] += 'a' - 'A'; 1063 info.n_codepoints = 1; 1064 return; 1065 } 1066 1067 /* Try to locate the codepoint in any of the maps. */ 1068 for(i = 0; i < cast(int) (FOLD_MAP_LIST.length); i++) { 1069 int index; 1070 1071 index = md_unicode_bsearch__(codepoint, FOLD_MAP_LIST[i].map, FOLD_MAP_LIST[i].map_size); 1072 if(index >= 0) { 1073 /* Found the mapping. */ 1074 int n_codepoints = FOLD_MAP_LIST[i].n_codepoints; 1075 const uint* map = FOLD_MAP_LIST[i].map; 1076 const uint* codepoints = FOLD_MAP_LIST[i].data + (index * n_codepoints); 1077 1078 memcpy(info.codepoints.ptr, codepoints, uint.sizeof * n_codepoints); 1079 info.n_codepoints = n_codepoints; 1080 1081 if(FOLD_MAP_LIST[i].map[index] != codepoint) { 1082 /* The found mapping maps whole range of codepoints, 1083 * i.e. we have to offset info.codepoints[0] accordingly. */ 1084 if((map[index] & 0x00ffffff)+1 == codepoints[0]) { 1085 /* Alternating type of the range. */ 1086 info.codepoints[0] = codepoint + ((codepoint & 0x1) == (map[index] & 0x1) ? 1 : 0); 1087 } else { 1088 /* Range to range kind of mapping. */ 1089 info.codepoints[0] += (codepoint - (map[index] & 0x00ffffff)); 1090 } 1091 } 1092 1093 return; 1094 } 1095 } 1096 1097 /* No mapping found. Map the codepoint to itself. */ 1098 info.codepoints[0] = codepoint; 1099 info.n_codepoints = 1; 1100 } 1101 1102 1103 bool IS_UTF8_LEAD1(CHAR ch) 1104 { 1105 return cast(ubyte)(ch) <= 0x7f; 1106 } 1107 1108 bool IS_UTF8_LEAD2(CHAR ch) 1109 { 1110 return (cast(ubyte)(ch) & 0xe0) == 0xc0; 1111 } 1112 1113 bool IS_UTF8_LEAD3(CHAR ch) 1114 { 1115 return (cast(ubyte)(ch) & 0xf0) == 0xe0; 1116 } 1117 1118 bool IS_UTF8_LEAD4(CHAR ch) 1119 { 1120 return (cast(ubyte)(ch) & 0xf8) == 0xf0; 1121 } 1122 1123 bool IS_UTF8_TAIL(CHAR ch) 1124 { 1125 return (cast(ubyte)(ch) & 0xc0) == 0x80; 1126 } 1127 1128 uint md_decode_utf8__(const(CHAR)* str, SZ str_size, SZ* p_size) 1129 { 1130 if(!IS_UTF8_LEAD1(str[0])) { 1131 if(IS_UTF8_LEAD2(str[0])) { 1132 if(1 < str_size && IS_UTF8_TAIL(str[1])) { 1133 if(p_size != null) 1134 *p_size = 2; 1135 1136 return ((cast(uint)str[0] & 0x1f) << 6) | 1137 ((cast(uint)str[1] & 0x3f) << 0); 1138 } 1139 } else if(IS_UTF8_LEAD3(str[0])) { 1140 if(2 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2])) { 1141 if(p_size != null) 1142 *p_size = 3; 1143 1144 return ((cast(uint)str[0] & 0x0f) << 12) | 1145 ((cast(uint)str[1] & 0x3f) << 6) | 1146 ((cast(uint)str[2] & 0x3f) << 0); 1147 } 1148 } else if(IS_UTF8_LEAD4(str[0])) { 1149 if(3 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2]) && IS_UTF8_TAIL(str[3])) { 1150 if(p_size != null) 1151 *p_size = 4; 1152 1153 return ((cast(uint)str[0] & 0x07) << 18) | 1154 ((cast(uint)str[1] & 0x3f) << 12) | 1155 ((cast(uint)str[2] & 0x3f) << 6) | 1156 ((cast(uint)str[3] & 0x3f) << 0); 1157 } 1158 } 1159 } 1160 1161 if(p_size != null) 1162 *p_size = 1; 1163 return cast(uint) str[0]; 1164 } 1165 1166 uint md_decode_utf8_before__(MD_CTX* ctx, OFF off) 1167 { 1168 if(!IS_UTF8_LEAD1(ctx.CH(off-1))) { 1169 if(off > 1 && IS_UTF8_LEAD2(ctx.CH(off-2)) && IS_UTF8_TAIL(ctx.CH(off-1))) 1170 return ((cast(uint)ctx.CH(off-2) & 0x1f) << 6) | 1171 ((cast(uint)ctx.CH(off-1) & 0x3f) << 0); 1172 1173 if(off > 2 && IS_UTF8_LEAD3(ctx.CH(off-3)) && IS_UTF8_TAIL(ctx.CH(off-2)) && IS_UTF8_TAIL(ctx.CH(off-1))) 1174 return ((cast(uint)ctx.CH(off-3) & 0x0f) << 12) | 1175 ((cast(uint)ctx.CH(off-2) & 0x3f) << 6) | 1176 ((cast(uint)ctx.CH(off-1) & 0x3f) << 0); 1177 1178 if(off > 3 && IS_UTF8_LEAD4(ctx.CH(off-4)) && IS_UTF8_TAIL(ctx.CH(off-3)) && IS_UTF8_TAIL(ctx.CH(off-2)) && IS_UTF8_TAIL(ctx.CH(off-1))) 1179 return ((cast(uint)ctx.CH(off-4) & 0x07) << 18) | 1180 ((cast(uint)ctx.CH(off-3) & 0x3f) << 12) | 1181 ((cast(uint)ctx.CH(off-2) & 0x3f) << 6) | 1182 ((cast(uint)ctx.CH(off-1) & 0x3f) << 0); 1183 } 1184 1185 return cast(uint) ctx.CH(off-1); 1186 } 1187 1188 bool ISUNICODEWHITESPACE_(uint codepoint) 1189 { 1190 return md_is_unicode_whitespace__(codepoint); 1191 } 1192 1193 bool ISUNICODEWHITESPACE(MD_CTX* ctx, OFF off) 1194 { 1195 return md_is_unicode_whitespace__(md_decode_utf8__(ctx.STR(off), ctx.size - (off), null)); 1196 } 1197 1198 bool ISUNICODEWHITESPACEBEFORE(MD_CTX* ctx, OFF off) 1199 { 1200 return md_is_unicode_whitespace__(md_decode_utf8_before__(ctx, off)); 1201 } 1202 1203 bool ISUNICODEPUNCT(MD_CTX* ctx, OFF off) 1204 { 1205 return md_is_unicode_punct__(md_decode_utf8__(ctx.STR(off), ctx.size - (off), null)); 1206 } 1207 1208 bool ISUNICODEPUNCTBEFORE(MD_CTX* ctx, OFF off) 1209 { 1210 return md_is_unicode_punct__(md_decode_utf8_before__(ctx, off)); 1211 } 1212 1213 uint md_decode_unicode(const(CHAR)* str, OFF off, SZ str_size, SZ* p_char_size) 1214 { 1215 return md_decode_utf8__(str+off, str_size-off, p_char_size); 1216 } 1217 1218 /************************************* 1219 *** Helper string manipulations *** 1220 *************************************/ 1221 1222 /* Fill buffer with copy of the string between 'beg' and 'end' but replace any 1223 * line breaks with given replacement character. 1224 * 1225 * NOTE: Caller is responsible to make sure the buffer is large enough. 1226 * (Given the output is always shorter then input, (end - beg) is good idea 1227 * what the caller should allocate.) 1228 */ 1229 void md_merge_lines(MD_CTX* ctx, OFF beg, OFF end, const(MD_LINE)* lines, int n_lines, 1230 CHAR line_break_replacement_char, CHAR* buffer, SZ* p_size) 1231 { 1232 CHAR* ptr = buffer; 1233 int line_index = 0; 1234 OFF off = beg; 1235 1236 while(1) { 1237 const MD_LINE* line = &lines[line_index]; 1238 OFF line_end = line.end; 1239 if(end < line_end) 1240 line_end = end; 1241 1242 while(off < line_end) { 1243 *ptr = ctx.CH(off); 1244 ptr++; 1245 off++; 1246 } 1247 1248 if(off >= end) { 1249 *p_size = cast(uint)(ptr - buffer); 1250 return; 1251 } 1252 1253 *ptr = line_break_replacement_char; 1254 ptr++; 1255 1256 line_index++; 1257 off = lines[line_index].beg; 1258 } 1259 } 1260 1261 /* Wrapper of md_merge_lines() which allocates new buffer for the output string. 1262 */ 1263 int md_merge_lines_alloc(MD_CTX* ctx, OFF beg, OFF end, const(MD_LINE)* lines, int n_lines, 1264 CHAR line_break_replacement_char, const(CHAR)** p_str, SZ* p_size) 1265 { 1266 CHAR* buffer; 1267 1268 buffer = cast(CHAR*) malloc(CHAR.sizeof * (end - beg)); 1269 if(buffer == null) { 1270 ctx.MD_LOG("malloc() failed."); 1271 return -1; 1272 } 1273 1274 md_merge_lines(ctx, beg, end, lines, n_lines, 1275 line_break_replacement_char, buffer, p_size); 1276 1277 *p_str = buffer; 1278 return 0; 1279 } 1280 1281 OFF md_skip_unicode_whitespace(const(CHAR)* label, OFF off, SZ size) 1282 { 1283 SZ char_size; 1284 uint codepoint; 1285 1286 while(off < size) { 1287 codepoint = md_decode_unicode(label, off, size, &char_size); 1288 if(!ISUNICODEWHITESPACE_(codepoint) && !ISNEWLINE_(label[off])) 1289 break; 1290 off += char_size; 1291 } 1292 1293 return off; 1294 } 1295 1296 1297 /****************************** 1298 *** Recognizing raw HTML *** 1299 ******************************/ 1300 1301 /* md_is_html_tag() may be called when processing inlines (inline raw HTML) 1302 * or when breaking document to blocks (checking for start of HTML block type 7). 1303 * 1304 * When breaking document to blocks, we do not yet know line boundaries, but 1305 * in that case the whole tag has to live on a single line. We distinguish this 1306 * by n_lines == 0. 1307 */ 1308 int md_is_html_tag(MD_CTX* ctx, const(MD_LINE)* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) 1309 { 1310 int attr_state; 1311 OFF off = beg; 1312 OFF line_end = (n_lines > 0) ? lines[0].end : ctx.size; 1313 int i = 0; 1314 1315 assert(ctx.CH(beg) == '<'); 1316 1317 if(off + 1 >= line_end) 1318 return FALSE; 1319 off++; 1320 1321 /* For parsing attributes, we need a little state automaton below. 1322 * State -1: no attributes are allowed. 1323 * State 0: attribute could follow after some whitespace. 1324 * State 1: after a whitespace (attribute name may follow). 1325 * State 2: after attribute name ('=' MAY follow). 1326 * State 3: after '=' (value specification MUST follow). 1327 * State 41: in middle of unquoted attribute value. 1328 * State 42: in middle of single-quoted attribute value. 1329 * State 43: in middle of double-quoted attribute value. 1330 */ 1331 attr_state = 0; 1332 1333 if(ctx.CH(off) == '/') { 1334 /* Closer tag "</ ... >". No attributes may be present. */ 1335 attr_state = -1; 1336 off++; 1337 } 1338 1339 /* Tag name */ 1340 if(off >= line_end || !ctx.ISALPHA(off)) 1341 return FALSE; 1342 off++; 1343 while(off < line_end && (ctx.ISALNUM(off) || ctx.CH(off) == '-')) 1344 off++; 1345 1346 /* (Optional) attributes (if not closer), (optional) '/' (if not closer) 1347 * and final '>'. */ 1348 while(1) { 1349 while(off < line_end && !ctx.ISNEWLINE(off)) { 1350 if(attr_state > 40) { 1351 if(attr_state == 41 && (ctx.ISBLANK(off) || ctx.ISANYOF(off, "\"'=<>`"))) { 1352 attr_state = 0; 1353 off--; /* Put the char back for re-inspection in the new state. */ 1354 } else if(attr_state == 42 && ctx.CH(off) == '\'') { 1355 attr_state = 0; 1356 } else if(attr_state == 43 && ctx.CH(off) == '"') { 1357 attr_state = 0; 1358 } 1359 off++; 1360 } else if(ctx.ISWHITESPACE(off)) { 1361 if(attr_state == 0) 1362 attr_state = 1; 1363 off++; 1364 } else if(attr_state <= 2 && ctx.CH(off) == '>') { 1365 /* End. */ 1366 goto done; 1367 } else if(attr_state <= 2 && ctx.CH(off) == '/' && off+1 < line_end && ctx.CH(off+1) == '>') { 1368 /* End with digraph '/>' */ 1369 off++; 1370 goto done; 1371 } else if((attr_state == 1 || attr_state == 2) && (ctx.ISALPHA(off) || ctx.CH(off) == '_' || ctx.CH(off) == ':')) { 1372 off++; 1373 /* Attribute name */ 1374 while(off < line_end && (ctx.ISALNUM(off) || ctx.ISANYOF(off, "_.:-"))) 1375 off++; 1376 attr_state = 2; 1377 } else if(attr_state == 2 && ctx.CH(off) == '=') { 1378 /* Attribute assignment sign */ 1379 off++; 1380 attr_state = 3; 1381 } else if(attr_state == 3) { 1382 /* Expecting start of attribute value. */ 1383 if(ctx.CH(off) == '"') 1384 attr_state = 43; 1385 else if(ctx.CH(off) == '\'') 1386 attr_state = 42; 1387 else if(!ctx.ISANYOF(off, "\"'=<>`") && !ctx.ISNEWLINE(off)) 1388 attr_state = 41; 1389 else 1390 return FALSE; 1391 off++; 1392 } else { 1393 /* Anything unexpected. */ 1394 return FALSE; 1395 } 1396 } 1397 1398 /* We have to be on a single line. See definition of start condition 1399 * of HTML block, type 7. */ 1400 if(n_lines == 0) 1401 return FALSE; 1402 1403 i++; 1404 if(i >= n_lines) 1405 return FALSE; 1406 1407 off = lines[i].beg; 1408 line_end = lines[i].end; 1409 1410 if(attr_state == 0 || attr_state == 41) 1411 attr_state = 1; 1412 1413 if(off >= max_end) 1414 return FALSE; 1415 } 1416 1417 done: 1418 if(off >= max_end) 1419 return FALSE; 1420 1421 *p_end = off+1; 1422 return TRUE; 1423 } 1424 1425 static int 1426 md_scan_for_html_closer(MD_CTX* ctx, const MD_CHAR* str, MD_SIZE len, 1427 const MD_LINE* lines, int n_lines, 1428 OFF beg, OFF max_end, OFF* p_end, 1429 OFF* p_scan_horizon) 1430 { 1431 OFF off = beg; 1432 int i = 0; 1433 1434 if(off < *p_scan_horizon && *p_scan_horizon >= max_end - len) { 1435 /* We have already scanned the range up to the max_end so we know 1436 * there is nothing to see. */ 1437 return FALSE; 1438 } 1439 1440 while(TRUE) { 1441 while(off + len <= lines[i].end && off + len <= max_end) { 1442 if(md_ascii_eq(ctx.STR(off), str, len)) { 1443 /* Success. */ 1444 *p_end = off + len; 1445 return TRUE; 1446 } 1447 off++; 1448 } 1449 1450 i++; 1451 if(off >= max_end || i >= n_lines) { 1452 /* Failure. */ 1453 *p_scan_horizon = off; 1454 return FALSE; 1455 } 1456 1457 off = lines[i].beg; 1458 } 1459 } 1460 1461 static int 1462 md_is_html_comment(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) 1463 { 1464 OFF off = beg; 1465 1466 assert(ctx.CH(beg) == '<'); 1467 1468 if(off + 4 >= lines[0].end) 1469 return FALSE; 1470 if(ctx.CH(off+1) != '!' || ctx.CH(off+2) != '-' || ctx.CH(off+3) != '-') 1471 return FALSE; 1472 off += 4; 1473 1474 /* ">" and "." must not follow the opening. */ 1475 if(off < lines[0].end && ctx.CH(off) == '>') 1476 return FALSE; 1477 if(off+1 < lines[0].end && ctx.CH(off) == '-' && ctx.CH(off+1) == '>') 1478 return FALSE; 1479 1480 /* HTML comment must not contain "--", so we scan just for "--" instead 1481 * of "-." and verify manually that '>' follows. */ 1482 if(md_scan_for_html_closer(ctx, "--", 2, 1483 lines, n_lines, off, max_end, p_end, &ctx.html_comment_horizon)) 1484 { 1485 if(*p_end < max_end && ctx.CH(*p_end) == '>') { 1486 *p_end = *p_end + 1; 1487 return TRUE; 1488 } 1489 } 1490 1491 return FALSE; 1492 } 1493 1494 static int 1495 md_is_html_processing_instruction(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) 1496 { 1497 OFF off = beg; 1498 1499 if(off + 2 >= lines[0].end) 1500 return FALSE; 1501 if(ctx.CH(off+1) != '?') 1502 return FALSE; 1503 off += 2; 1504 1505 return md_scan_for_html_closer(ctx, "?>", 2, 1506 lines, n_lines, off, max_end, p_end, &ctx.html_proc_instr_horizon); 1507 } 1508 1509 static int 1510 md_is_html_declaration(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) 1511 { 1512 OFF off = beg; 1513 1514 if(off + 2 >= lines[0].end) 1515 return FALSE; 1516 if(ctx.CH(off+1) != '!') 1517 return FALSE; 1518 off += 2; 1519 1520 /* Declaration name. */ 1521 if(off >= lines[0].end || !ctx.ISALPHA(off)) 1522 return FALSE; 1523 off++; 1524 while(off < lines[0].end && ctx.ISALPHA(off)) 1525 off++; 1526 if(off < lines[0].end && !ctx.ISWHITESPACE(off)) 1527 return FALSE; 1528 1529 return md_scan_for_html_closer(ctx, ">", 1, 1530 lines, n_lines, off, max_end, p_end, &ctx.html_decl_horizon); 1531 } 1532 1533 static int 1534 md_is_html_cdata(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) 1535 { 1536 string open_str = "<![CDATA["; 1537 1538 OFF off = beg; 1539 1540 if(off + open_str.length >= lines[0].end) 1541 return FALSE; 1542 if(memcmp(ctx.STR(off), open_str.ptr, open_str.length) != 0) 1543 return FALSE; 1544 off += open_str.length; 1545 1546 if(lines[n_lines-1].end < max_end) 1547 max_end = lines[n_lines-1].end - 2; 1548 1549 return md_scan_for_html_closer(ctx, "]]>", 3, 1550 lines, n_lines, off, max_end, p_end, &ctx.html_cdata_horizon); 1551 } 1552 1553 static int 1554 md_is_html_any(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) 1555 { 1556 assert(ctx.CH(beg) == '<'); 1557 return (md_is_html_tag(ctx, lines, n_lines, beg, max_end, p_end) || 1558 md_is_html_comment(ctx, lines, n_lines, beg, max_end, p_end) || 1559 md_is_html_processing_instruction(ctx, lines, n_lines, beg, max_end, p_end) || 1560 md_is_html_declaration(ctx, lines, n_lines, beg, max_end, p_end) || 1561 md_is_html_cdata(ctx, lines, n_lines, beg, max_end, p_end)); 1562 } 1563 1564 1565 /**************************** 1566 *** Recognizing Entity *** 1567 ****************************/ 1568 1569 static int 1570 md_is_hex_entity_contents(MD_CTX* ctx, const(CHAR)* text, OFF beg, OFF max_end, OFF* p_end) 1571 { 1572 OFF off = beg; 1573 1574 while(off < max_end && ISXDIGIT_(text[off]) && off - beg <= 8) 1575 off++; 1576 1577 if(1 <= off - beg && off - beg <= 6) { 1578 *p_end = off; 1579 return TRUE; 1580 } else { 1581 return FALSE; 1582 } 1583 } 1584 1585 static int 1586 md_is_dec_entity_contents(MD_CTX* ctx, const(CHAR)* text, OFF beg, OFF max_end, OFF* p_end) 1587 { 1588 OFF off = beg; 1589 1590 while(off < max_end && ISDIGIT_(text[off]) && off - beg <= 8) 1591 off++; 1592 1593 if(1 <= off - beg && off - beg <= 7) { 1594 *p_end = off; 1595 return TRUE; 1596 } else { 1597 return FALSE; 1598 } 1599 } 1600 1601 static int 1602 md_is_named_entity_contents(MD_CTX* ctx, const(CHAR)* text, OFF beg, OFF max_end, OFF* p_end) 1603 { 1604 OFF off = beg; 1605 1606 if(off < max_end && ISALPHA_(text[off])) 1607 off++; 1608 else 1609 return FALSE; 1610 1611 while(off < max_end && ISALNUM_(text[off]) && off - beg <= 48) 1612 off++; 1613 1614 if(2 <= off - beg && off - beg <= 48) { 1615 *p_end = off; 1616 return TRUE; 1617 } else { 1618 return FALSE; 1619 } 1620 } 1621 1622 static int 1623 md_is_entity_str(MD_CTX* ctx, const(CHAR)* text, OFF beg, OFF max_end, OFF* p_end) 1624 { 1625 int is_contents; 1626 OFF off = beg; 1627 1628 assert(text[off] == '&'); 1629 off++; 1630 1631 if(off+2 < max_end && text[off] == '#' && (text[off+1] == 'x' || text[off+1] == 'X')) 1632 is_contents = md_is_hex_entity_contents(ctx, text, off+2, max_end, &off); 1633 else if(off+1 < max_end && text[off] == '#') 1634 is_contents = md_is_dec_entity_contents(ctx, text, off+1, max_end, &off); 1635 else 1636 is_contents = md_is_named_entity_contents(ctx, text, off, max_end, &off); 1637 1638 if(is_contents && off < max_end && text[off] == ';') { 1639 *p_end = off+1; 1640 return TRUE; 1641 } else { 1642 return FALSE; 1643 } 1644 } 1645 1646 static int 1647 md_is_entity(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end) 1648 { 1649 return md_is_entity_str(ctx, ctx.text, beg, max_end, p_end); 1650 } 1651 1652 1653 /****************************** 1654 *** Attribute Management *** 1655 ******************************/ 1656 1657 struct MD_ATTRIBUTE_BUILD 1658 { 1659 CHAR* text = null; 1660 MD_TEXTTYPE* substr_types = null; 1661 OFF* substr_offsets = null; 1662 int substr_count = 0; 1663 int substr_alloc = 0; 1664 MD_TEXTTYPE[1] trivial_types = [0]; 1665 OFF[2] trivial_offsets = [0, 0]; 1666 } 1667 1668 1669 enum MD_BUILD_ATTR_NO_ESCAPES = 0x0001; 1670 1671 void* realloc_safe(void* ptr, size_t newSize) 1672 { 1673 import core.stdc.stdlib : free, realloc; 1674 1675 if (newSize == 0) 1676 { 1677 free(ptr); 1678 return null; 1679 } 1680 1681 return realloc(ptr, newSize); 1682 } 1683 1684 1685 int md_build_attr_append_substr(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build, 1686 MD_TEXTTYPE type, OFF off) 1687 { 1688 if(build.substr_count >= build.substr_alloc) { 1689 MD_TEXTTYPE* new_substr_types; 1690 OFF* new_substr_offsets; 1691 1692 build.substr_alloc = (build.substr_alloc == 0 ? 8 : build.substr_alloc * 2); 1693 1694 new_substr_types = cast(MD_TEXTTYPE*) realloc_safe(build.substr_types, 1695 build.substr_alloc * MD_TEXTTYPE.sizeof); 1696 if(new_substr_types == null) { 1697 ctx.MD_LOG("realloc() failed."); 1698 return -1; 1699 } 1700 /* Note +1 to reserve space for final offset (== raw_size). */ 1701 new_substr_offsets = cast(OFF*) realloc_safe(build.substr_offsets, 1702 (build.substr_alloc+1) * OFF.sizeof); 1703 if(new_substr_offsets == null) { 1704 ctx.MD_LOG("realloc() failed."); 1705 free(new_substr_types); 1706 return -1; 1707 } 1708 1709 build.substr_types = new_substr_types; 1710 build.substr_offsets = new_substr_offsets; 1711 } 1712 1713 build.substr_types[build.substr_count] = type; 1714 build.substr_offsets[build.substr_count] = off; 1715 build.substr_count++; 1716 return 0; 1717 } 1718 1719 void md_free_attribute(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build) 1720 { 1721 if(build.substr_alloc > 0) { 1722 free(build.text); 1723 free(build.substr_types); 1724 free(build.substr_offsets); 1725 } 1726 } 1727 1728 int md_build_attribute(MD_CTX* ctx, const(CHAR)* raw_text, SZ raw_size, 1729 uint flags, MD_ATTRIBUTE* attr, MD_ATTRIBUTE_BUILD* build) 1730 { 1731 OFF raw_off, off; 1732 int is_trivial; 1733 int ret = 0; 1734 1735 memset(build, 0, MD_ATTRIBUTE_BUILD.sizeof); 1736 1737 /* If there is no backslash and no ampersand, build trivial attribute 1738 * without any malloc(). */ 1739 is_trivial = TRUE; 1740 for(raw_off = 0; raw_off < raw_size; raw_off++) { 1741 if(ISANYOF3_(raw_text[raw_off], '\\', '&', '\0')) { 1742 is_trivial = FALSE; 1743 break; 1744 } 1745 } 1746 1747 if(is_trivial) { 1748 build.text = cast(CHAR*) (raw_size ? raw_text : null); 1749 build.substr_types = build.trivial_types.ptr; 1750 build.substr_offsets = build.trivial_offsets.ptr; 1751 build.substr_count = 1; 1752 build.substr_alloc = 0; 1753 build.trivial_types[0] = MD_TEXT_NORMAL; 1754 build.trivial_offsets[0] = 0; 1755 build.trivial_offsets[1] = raw_size; 1756 off = raw_size; 1757 } else { 1758 build.text = cast(CHAR*) malloc(raw_size * CHAR.sizeof); 1759 if(build.text == null) { 1760 ctx.MD_LOG("malloc() failed."); 1761 goto abort; 1762 } 1763 1764 raw_off = 0; 1765 off = 0; 1766 1767 while(raw_off < raw_size) { 1768 if(raw_text[raw_off] == '\0') { 1769 ret = (md_build_attr_append_substr(ctx, build, MD_TEXT_NULLCHAR, off)); 1770 if (ret < 0) goto abort; 1771 memcpy(build.text + off, raw_text + raw_off, 1); 1772 off++; 1773 raw_off++; 1774 continue; 1775 } 1776 1777 if(raw_text[raw_off] == '&') { 1778 OFF ent_end; 1779 1780 if(md_is_entity_str(ctx, raw_text, raw_off, raw_size, &ent_end)) { 1781 ret = (md_build_attr_append_substr(ctx, build, MD_TEXT_ENTITY, off)); 1782 if (ret < 0) goto abort; 1783 memcpy(build.text + off, raw_text + raw_off, ent_end - raw_off); 1784 off += ent_end - raw_off; 1785 raw_off = ent_end; 1786 continue; 1787 } 1788 } 1789 1790 if(build.substr_count == 0 || build.substr_types[build.substr_count-1] != MD_TEXT_NORMAL) 1791 { 1792 ret = (md_build_attr_append_substr(ctx, build, MD_TEXT_NORMAL, off)); 1793 if (ret < 0) goto abort; 1794 } 1795 1796 if(!(flags & MD_BUILD_ATTR_NO_ESCAPES) && 1797 raw_text[raw_off] == '\\' && raw_off+1 < raw_size && 1798 (ISPUNCT_(raw_text[raw_off+1]) || ISNEWLINE_(raw_text[raw_off+1]))) 1799 raw_off++; 1800 1801 build.text[off++] = raw_text[raw_off++]; 1802 } 1803 build.substr_offsets[build.substr_count] = off; 1804 } 1805 1806 attr.text = build.text; 1807 attr.size = off; 1808 attr.substr_offsets = build.substr_offsets; 1809 attr.substr_types = build.substr_types; 1810 return 0; 1811 1812 abort: 1813 md_free_attribute(ctx, build); 1814 return -1; 1815 } 1816 1817 1818 /********************************************* 1819 *** Dictionary of Reference Definitions *** 1820 *********************************************/ 1821 1822 enum MD_FNV1A_BASE = 2166136261; 1823 enum MD_FNV1A_PRIME = 16777619; 1824 1825 uint md_fnv1a(uint base, const(void)* data, size_t n) 1826 { 1827 const(ubyte)* buf = cast(const(ubyte)*) data; 1828 uint hash = base; 1829 size_t i; 1830 1831 for(i = 0; i < n; i++) { 1832 hash ^= buf[i]; 1833 hash *= MD_FNV1A_PRIME; 1834 } 1835 1836 return hash; 1837 } 1838 1839 1840 struct MD_REF_DEF 1841 { 1842 const(CHAR)* label; 1843 const(CHAR)* title; 1844 uint hash; 1845 SZ label_size; 1846 bool label_needs_free; 1847 bool title_needs_free; 1848 SZ title_size; 1849 OFF dest_beg; 1850 OFF dest_end; 1851 }; 1852 1853 /* Label equivalence is quite complicated with regards to whitespace and case 1854 * folding. This complicates computing a hash of it as well as direct comparison 1855 * of two labels. */ 1856 1857 uint md_link_label_hash(const(CHAR)* label, SZ size) 1858 { 1859 uint hash = MD_FNV1A_BASE; 1860 OFF off; 1861 uint codepoint; 1862 int is_whitespace = FALSE; 1863 1864 off = md_skip_unicode_whitespace(label, 0, size); 1865 while(off < size) { 1866 SZ char_size; 1867 1868 codepoint = md_decode_unicode(label, off, size, &char_size); 1869 is_whitespace = ISUNICODEWHITESPACE_(codepoint) || ISNEWLINE_(label[off]); 1870 1871 if(is_whitespace) { 1872 codepoint = ' '; 1873 hash = md_fnv1a(hash, &codepoint, uint.sizeof); 1874 off = md_skip_unicode_whitespace(label, off, size); 1875 } else { 1876 MD_UNICODE_FOLD_INFO fold_info; 1877 1878 md_get_unicode_fold_info(codepoint, &fold_info); 1879 hash = md_fnv1a(hash, fold_info.codepoints.ptr, fold_info.n_codepoints * uint.sizeof); 1880 off += char_size; 1881 } 1882 } 1883 1884 return hash; 1885 } 1886 1887 OFF md_link_label_cmp_load_fold_info(const(CHAR)* label, OFF off, SZ size, 1888 MD_UNICODE_FOLD_INFO* fold_info) 1889 { 1890 uint codepoint; 1891 SZ char_size; 1892 1893 if(off >= size) { 1894 /* Treat end of link label as a whitespace. */ 1895 goto whitespace; 1896 } 1897 1898 if(ISNEWLINE_(label[off])) { 1899 /* Treat new lines as a whitespace. */ 1900 off++; 1901 goto whitespace; 1902 } 1903 1904 codepoint = md_decode_unicode(label, off, size, &char_size); 1905 off += char_size; 1906 if(ISUNICODEWHITESPACE_(codepoint)) { 1907 /* Treat all whitespace as equivalent */ 1908 goto whitespace; 1909 } 1910 1911 /* Get real folding info. */ 1912 md_get_unicode_fold_info(codepoint, fold_info); 1913 return off; 1914 1915 whitespace: 1916 fold_info.codepoints[0] = ' '; 1917 fold_info.n_codepoints = 1; 1918 return off; 1919 } 1920 1921 static int 1922 md_link_label_cmp(const(CHAR)* a_label, SZ a_size, const(CHAR)* b_label, SZ b_size) 1923 { 1924 OFF a_off; 1925 OFF b_off; 1926 int a_reached_end = FALSE; 1927 int b_reached_end = FALSE; 1928 MD_UNICODE_FOLD_INFO a_fi = { 0 }; 1929 MD_UNICODE_FOLD_INFO b_fi = { 0 }; 1930 OFF a_fi_off = 0; 1931 OFF b_fi_off = 0; 1932 int cmp; 1933 1934 a_off = md_skip_unicode_whitespace(a_label, 0, a_size); 1935 b_off = md_skip_unicode_whitespace(b_label, 0, b_size); 1936 while(!a_reached_end && !b_reached_end) { 1937 /* If needed, load fold info for next char. */ 1938 if(a_fi_off >= a_fi.n_codepoints) { 1939 a_fi_off = 0; 1940 a_off = md_link_label_cmp_load_fold_info(a_label, a_off, a_size, &a_fi); 1941 a_reached_end = (a_off >= a_size); 1942 } 1943 if(b_fi_off >= b_fi.n_codepoints) { 1944 b_fi_off = 0; 1945 b_off = md_link_label_cmp_load_fold_info(b_label, b_off, b_size, &b_fi); 1946 b_reached_end = (b_off >= b_size); 1947 } 1948 1949 cmp = b_fi.codepoints[b_fi_off] - a_fi.codepoints[a_fi_off]; 1950 if(cmp != 0) 1951 return cmp; 1952 1953 a_fi_off++; 1954 b_fi_off++; 1955 } 1956 1957 return 0; 1958 } 1959 1960 struct MD_REF_DEF_LIST 1961 { 1962 nothrow: 1963 @nogc: 1964 1965 int n_ref_defs; 1966 int alloc_ref_defs; 1967 1968 /* Valid items always point into ctx.ref_defs[] */ 1969 MD_REF_DEF* ref_defs_space; // Starting here, a list of pointer at the end of the struct 1970 1971 // To allocate a MD_REF_DEF_LIST 1972 static size_t SIZEOF(int numDefRefs) 1973 { 1974 return 8 + (MD_REF_DEF*).sizeof * numDefRefs; 1975 } 1976 1977 // Returns: a slice of ref defs embedded at the end of the struct 1978 static MD_REF_DEF*[] refDefs(MD_REF_DEF_LIST* list) 1979 { 1980 return (&(list.ref_defs_space))[0..list.n_ref_defs]; 1981 } 1982 1983 ref MD_REF_DEF* ref_defs_nth(size_t index) 1984 { 1985 MD_REF_DEF** base = &ref_defs_space; 1986 return base[index]; 1987 } 1988 } 1989 1990 extern(C) int md_ref_def_cmp(scope const(void)* a, scope const void* b) 1991 { 1992 const(MD_REF_DEF)* a_ref = *cast(const(MD_REF_DEF*)*)a; 1993 const(MD_REF_DEF)* b_ref = *cast(const(MD_REF_DEF*)*)b; 1994 1995 if(a_ref.hash < b_ref.hash) 1996 return -1; 1997 else if(a_ref.hash > b_ref.hash) 1998 return +1; 1999 else 2000 return md_link_label_cmp(a_ref.label, a_ref.label_size, b_ref.label, b_ref.label_size); 2001 } 2002 2003 extern(C) int md_ref_def_cmp_stable(scope const(void)* a, scope const(void)* b) 2004 { 2005 int cmp; 2006 2007 cmp = md_ref_def_cmp(a, b); 2008 2009 /* Ensure stability of the sorting. */ 2010 if(cmp == 0) { 2011 const(MD_REF_DEF)* a_ref = *cast(const(MD_REF_DEF*)*)a; 2012 const(MD_REF_DEF)* b_ref = *cast(const(MD_REF_DEF*)*)b; 2013 2014 if(a_ref < b_ref) 2015 cmp = -1; 2016 else if(a_ref > b_ref) 2017 cmp = +1; 2018 else 2019 cmp = 0; 2020 } 2021 2022 return cmp; 2023 } 2024 2025 int md_build_ref_def_hashtable(MD_CTX* ctx) 2026 { 2027 int i, j; 2028 2029 if(ctx.n_ref_defs == 0) 2030 return 0; 2031 2032 ctx.ref_def_hashtable_size = (ctx.n_ref_defs * 5) / 4; 2033 ctx.ref_def_hashtable = cast(void**) malloc(ctx.ref_def_hashtable_size * (void*).sizeof); 2034 if(ctx.ref_def_hashtable == null) { 2035 ctx.MD_LOG("malloc() failed."); 2036 goto abort; 2037 } 2038 memset(ctx.ref_def_hashtable, 0, ctx.ref_def_hashtable_size * (void*).sizeof); 2039 2040 /* Each member of ctx.ref_def_hashtable[] can be: 2041 * -- null, 2042 * -- pointer to the MD_REF_DEF in ctx.ref_defs[], or 2043 * -- pointer to a MD_REF_DEF_LIST, which holds multiple pointers to 2044 * such MD_REF_DEFs. 2045 */ 2046 for(i = 0; i < ctx.n_ref_defs; i++) { 2047 MD_REF_DEF* def = &ctx.ref_defs[i]; 2048 void* bucket; 2049 MD_REF_DEF_LIST* list; 2050 2051 def.hash = md_link_label_hash(def.label, def.label_size); 2052 bucket = ctx.ref_def_hashtable[def.hash % ctx.ref_def_hashtable_size]; 2053 2054 if(bucket == null) { 2055 ctx.ref_def_hashtable[def.hash % ctx.ref_def_hashtable_size] = def; 2056 continue; 2057 } 2058 2059 if(ctx.ref_defs <= cast(MD_REF_DEF*) bucket && cast(MD_REF_DEF*) bucket < ctx.ref_defs + ctx.n_ref_defs) { 2060 /* The bucket already contains one ref. def. Lets see whether it 2061 * is the same label (ref. def. duplicate) or different one 2062 * (hash conflict). */ 2063 MD_REF_DEF* old_def = cast(MD_REF_DEF*) bucket; 2064 2065 if(md_link_label_cmp(def.label, def.label_size, old_def.label, old_def.label_size) == 0) { 2066 /* Ignore this ref. def. */ 2067 continue; 2068 } 2069 2070 /* Make the bucket capable of holding more ref. defs. */ 2071 list = cast(MD_REF_DEF_LIST*) malloc(MD_REF_DEF_LIST.SIZEOF(4)); 2072 if(list == null) { 2073 ctx.MD_LOG("malloc() failed."); 2074 goto abort; 2075 } 2076 list.ref_defs_nth(0) = old_def; 2077 list.ref_defs_nth(1) = def; 2078 list.n_ref_defs = 2; 2079 list.alloc_ref_defs = 4; 2080 ctx.ref_def_hashtable[def.hash % ctx.ref_def_hashtable_size] = list; 2081 continue; 2082 } 2083 2084 /* Append the def to the bucket list. */ 2085 list = cast(MD_REF_DEF_LIST*) bucket; 2086 if(list.n_ref_defs >= list.alloc_ref_defs) { 2087 MD_REF_DEF_LIST* list_tmp = cast(MD_REF_DEF_LIST*) realloc_safe(list, MD_REF_DEF_LIST.SIZEOF( 2 * list.alloc_ref_defs )); 2088 if(list_tmp == null) { 2089 ctx.MD_LOG("realloc() failed."); 2090 goto abort; 2091 } 2092 list = list_tmp; 2093 list.alloc_ref_defs *= 2; 2094 ctx.ref_def_hashtable[def.hash % ctx.ref_def_hashtable_size] = list; 2095 } 2096 2097 list.ref_defs_nth(list.n_ref_defs) = def; 2098 list.n_ref_defs++; 2099 } 2100 2101 /* Sort the complex buckets so we can use bsearch() with them. */ 2102 for(i = 0; i < ctx.ref_def_hashtable_size; i++) { 2103 void* bucket = ctx.ref_def_hashtable[i]; 2104 MD_REF_DEF_LIST* list; 2105 2106 if(bucket == null) 2107 continue; 2108 if(ctx.ref_defs <= cast(MD_REF_DEF*) bucket && cast(MD_REF_DEF*) bucket < ctx.ref_defs + ctx.n_ref_defs) 2109 continue; 2110 2111 list = cast(MD_REF_DEF_LIST*) bucket; 2112 qsort(MD_REF_DEF_LIST.refDefs(list).ptr, list.n_ref_defs, (MD_REF_DEF*).sizeof, &md_ref_def_cmp_stable); 2113 2114 /* Disable duplicates. */ 2115 for(j = 1; j < list.n_ref_defs; j++) { 2116 if(md_ref_def_cmp(&list.ref_defs_nth(j-1), &list.ref_defs_nth(j)) == 0) 2117 list.ref_defs_nth(j) = list.ref_defs_nth(j-1); 2118 } 2119 } 2120 2121 return 0; 2122 2123 abort: 2124 return -1; 2125 } 2126 2127 static void 2128 md_free_ref_def_hashtable(MD_CTX* ctx) 2129 { 2130 if(ctx.ref_def_hashtable != null) { 2131 int i; 2132 2133 for(i = 0; i < ctx.ref_def_hashtable_size; i++) { 2134 void* bucket = ctx.ref_def_hashtable[i]; 2135 if(bucket == null) 2136 continue; 2137 if(ctx.ref_defs <= cast(MD_REF_DEF*) bucket && cast(MD_REF_DEF*) bucket < ctx.ref_defs + ctx.n_ref_defs) 2138 continue; 2139 free(bucket); 2140 } 2141 2142 free(ctx.ref_def_hashtable); 2143 } 2144 } 2145 2146 const(MD_REF_DEF)* md_lookup_ref_def(MD_CTX* ctx, const(CHAR)* label, SZ label_size) 2147 { 2148 uint hash; 2149 void* bucket; 2150 2151 if(ctx.ref_def_hashtable_size == 0) 2152 return null; 2153 2154 hash = md_link_label_hash(label, label_size); 2155 bucket = ctx.ref_def_hashtable[hash % ctx.ref_def_hashtable_size]; 2156 2157 if(bucket == null) { 2158 return null; 2159 } else if(ctx.ref_defs <= cast(MD_REF_DEF*) bucket && cast(MD_REF_DEF*) bucket < ctx.ref_defs + ctx.n_ref_defs) { 2160 const MD_REF_DEF* def = cast(MD_REF_DEF*) bucket; 2161 2162 if(md_link_label_cmp(def.label, def.label_size, label, label_size) == 0) 2163 return def; 2164 else 2165 return null; 2166 } else { 2167 MD_REF_DEF_LIST* list = cast(MD_REF_DEF_LIST*) bucket; 2168 MD_REF_DEF key_buf; 2169 const MD_REF_DEF* key = &key_buf; 2170 const(MD_REF_DEF*)* ret; 2171 2172 key_buf.label = cast(CHAR*) label; 2173 key_buf.label_size = label_size; 2174 key_buf.hash = md_link_label_hash(key_buf.label, key_buf.label_size); 2175 2176 ret = cast(const(MD_REF_DEF*)*) bsearch(&key, MD_REF_DEF_LIST.refDefs(list).ptr, 2177 list.n_ref_defs, (MD_REF_DEF*).sizeof, &md_ref_def_cmp); 2178 if(ret != null) 2179 return *ret; 2180 else 2181 return null; 2182 } 2183 } 2184 2185 2186 /*************************** 2187 *** Recognizing Links *** 2188 ***************************/ 2189 2190 /* Note this code is partially shared between processing inlines and blocks 2191 * as reference definitions and links share some helper parser functions. 2192 */ 2193 2194 struct MD_LINK_ATTR 2195 { 2196 OFF dest_beg; 2197 OFF dest_end; 2198 2199 const(CHAR)* title; 2200 SZ title_size; 2201 bool title_needs_free; 2202 } 2203 2204 2205 static int 2206 md_is_link_label(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, 2207 OFF* p_end, int* p_beg_line_index, int* p_end_line_index, 2208 OFF* p_contents_beg, OFF* p_contents_end) 2209 { 2210 OFF off = beg; 2211 OFF contents_beg = 0; 2212 OFF contents_end = 0; 2213 int line_index = 0; 2214 int len = 0; 2215 2216 if(ctx.CH(off) != '[') 2217 return FALSE; 2218 off++; 2219 2220 while(1) { 2221 OFF line_end = lines[line_index].end; 2222 2223 while(off < line_end) { 2224 if(ctx.CH(off) == '\\' && off+1 < ctx.size && (ctx.ISPUNCT(off+1) || ctx.ISNEWLINE(off+1))) { 2225 if(contents_end == 0) { 2226 contents_beg = off; 2227 *p_beg_line_index = line_index; 2228 } 2229 contents_end = off + 2; 2230 off += 2; 2231 } else if(ctx.CH(off) == '[') { 2232 return FALSE; 2233 } else if(ctx.CH(off) == ']') { 2234 if(contents_beg < contents_end) { 2235 /* Success. */ 2236 *p_contents_beg = contents_beg; 2237 *p_contents_end = contents_end; 2238 *p_end = off+1; 2239 *p_end_line_index = line_index; 2240 return TRUE; 2241 } else { 2242 /* Link label must have some non-whitespace contents. */ 2243 return FALSE; 2244 } 2245 } else { 2246 uint codepoint; 2247 SZ char_size; 2248 2249 codepoint = md_decode_unicode(ctx.text, off, ctx.size, &char_size); 2250 if(!ISUNICODEWHITESPACE_(codepoint)) { 2251 if(contents_end == 0) { 2252 contents_beg = off; 2253 *p_beg_line_index = line_index; 2254 } 2255 contents_end = off + char_size; 2256 } 2257 2258 off += char_size; 2259 } 2260 2261 len++; 2262 if(len > 999) 2263 return FALSE; 2264 } 2265 2266 line_index++; 2267 len++; 2268 if(line_index < n_lines) 2269 off = lines[line_index].beg; 2270 else 2271 break; 2272 } 2273 2274 return FALSE; 2275 } 2276 2277 static int 2278 md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, 2279 OFF* p_contents_beg, OFF* p_contents_end) 2280 { 2281 OFF off = beg; 2282 2283 if(off >= max_end || ctx.CH(off) != '<') 2284 return FALSE; 2285 off++; 2286 2287 while(off < max_end) { 2288 if(ctx.CH(off) == '\\' && off+1 < max_end && ctx.ISPUNCT(off+1)) { 2289 off += 2; 2290 continue; 2291 } 2292 2293 if(ctx.ISNEWLINE(off) || ctx.CH(off) == '<') 2294 return FALSE; 2295 2296 if(ctx.CH(off) == '>') { 2297 /* Success. */ 2298 *p_contents_beg = beg+1; 2299 *p_contents_end = off; 2300 *p_end = off+1; 2301 return TRUE; 2302 } 2303 2304 off++; 2305 } 2306 2307 return FALSE; 2308 } 2309 2310 static int 2311 md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, 2312 OFF* p_contents_beg, OFF* p_contents_end) 2313 { 2314 OFF off = beg; 2315 int parenthesis_level = 0; 2316 2317 while(off < max_end) { 2318 if(ctx.CH(off) == '\\' && off+1 < max_end && ctx.ISPUNCT(off+1)) { 2319 off += 2; 2320 continue; 2321 } 2322 2323 if(ctx.ISWHITESPACE(off) || ctx.ISCNTRL(off)) 2324 break; 2325 2326 /* Link destination may include balanced pairs of unescaped '(' ')'. 2327 * Note we limit the maximal nesting level by 32 to protect us from 2328 * https://github.com/jgm/cmark/issues/214 */ 2329 if(ctx.CH(off) == '(') { 2330 parenthesis_level++; 2331 if(parenthesis_level > 32) 2332 return FALSE; 2333 } else if(ctx.CH(off) == ')') { 2334 if(parenthesis_level == 0) 2335 break; 2336 parenthesis_level--; 2337 } 2338 2339 off++; 2340 } 2341 2342 if(parenthesis_level != 0 || off == beg) 2343 return FALSE; 2344 2345 /* Success. */ 2346 *p_contents_beg = beg; 2347 *p_contents_end = off; 2348 *p_end = off; 2349 return TRUE; 2350 } 2351 2352 static int 2353 md_is_link_destination(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, 2354 OFF* p_contents_beg, OFF* p_contents_end) 2355 { 2356 if(ctx.CH(beg) == '<') 2357 return md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end); 2358 else 2359 return md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end); 2360 } 2361 2362 static int 2363 md_is_link_title(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, 2364 OFF* p_end, int* p_beg_line_index, int* p_end_line_index, 2365 OFF* p_contents_beg, OFF* p_contents_end) 2366 { 2367 OFF off = beg; 2368 CHAR closer_char; 2369 int line_index = 0; 2370 2371 /* White space with up to one line break. */ 2372 while(off < lines[line_index].end && ctx.ISWHITESPACE(off)) 2373 off++; 2374 if(off >= lines[line_index].end) { 2375 line_index++; 2376 if(line_index >= n_lines) 2377 return FALSE; 2378 off = lines[line_index].beg; 2379 } 2380 if(off == beg) 2381 return FALSE; 2382 2383 *p_beg_line_index = line_index; 2384 2385 /* First char determines how to detect end of it. */ 2386 switch(ctx.CH(off)) { 2387 case '"': closer_char = '"'; break; 2388 case '\'': closer_char = '\''; break; 2389 case '(': closer_char = ')'; break; 2390 default: return FALSE; 2391 } 2392 off++; 2393 2394 *p_contents_beg = off; 2395 2396 while(line_index < n_lines) { 2397 OFF line_end = lines[line_index].end; 2398 2399 while(off < line_end) { 2400 if(ctx.CH(off) == '\\' && off+1 < ctx.size && (ctx.ISPUNCT(off+1) || ctx.ISNEWLINE(off+1))) { 2401 off++; 2402 } else if(ctx.CH(off) == closer_char) { 2403 /* Success. */ 2404 *p_contents_end = off; 2405 *p_end = off+1; 2406 *p_end_line_index = line_index; 2407 return TRUE; 2408 } else if(closer_char == ')' && ctx.CH(off) == '(') { 2409 /* ()-style title cannot contain (unescaped '(')) */ 2410 return FALSE; 2411 } 2412 2413 off++; 2414 } 2415 2416 line_index++; 2417 } 2418 2419 return FALSE; 2420 } 2421 2422 /* Returns 0 if it is not a reference definition. 2423 * 2424 * Returns N > 0 if it is a reference definition. N then corresponds to the 2425 * number of lines forming it). In this case the definition is stored for 2426 * resolving any links referring to it. 2427 * 2428 * Returns -1 in case of an error (out of memory). 2429 */ 2430 int md_is_link_reference_definition(MD_CTX* ctx, const(MD_LINE)* lines, int n_lines) 2431 { 2432 OFF label_contents_beg; 2433 OFF label_contents_end; 2434 int label_contents_line_index = -1; 2435 int label_is_multiline; 2436 const(CHAR)* label; 2437 SZ label_size; 2438 bool label_needs_free = false; 2439 OFF dest_contents_beg; 2440 OFF dest_contents_end; 2441 OFF title_contents_beg; 2442 OFF title_contents_end; 2443 int title_contents_line_index; 2444 int title_is_multiline; 2445 OFF off; 2446 int line_index = 0; 2447 int tmp_line_index; 2448 MD_REF_DEF* def; 2449 int ret; 2450 2451 /* Link label. */ 2452 if(!md_is_link_label(ctx, lines, n_lines, lines[0].beg, 2453 &off, &label_contents_line_index, &line_index, 2454 &label_contents_beg, &label_contents_end)) 2455 return FALSE; 2456 label_is_multiline = (label_contents_line_index != line_index); 2457 2458 /* Colon. */ 2459 if(off >= lines[line_index].end || ctx.CH(off) != ':') 2460 return FALSE; 2461 off++; 2462 2463 /* Optional white space with up to one line break. */ 2464 while(off < lines[line_index].end && ctx.ISWHITESPACE(off)) 2465 off++; 2466 if(off >= lines[line_index].end) { 2467 line_index++; 2468 if(line_index >= n_lines) 2469 return FALSE; 2470 off = lines[line_index].beg; 2471 } 2472 2473 /* Link destination. */ 2474 if(!md_is_link_destination(ctx, off, lines[line_index].end, 2475 &off, &dest_contents_beg, &dest_contents_end)) 2476 return FALSE; 2477 2478 /* (Optional) title. Note we interpret it as an title only if nothing 2479 * more follows on its last line. */ 2480 if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off, 2481 &off, &title_contents_line_index, &tmp_line_index, 2482 &title_contents_beg, &title_contents_end) 2483 && off >= lines[line_index + tmp_line_index].end) 2484 { 2485 title_is_multiline = (tmp_line_index != title_contents_line_index); 2486 title_contents_line_index += line_index; 2487 line_index += tmp_line_index; 2488 } else { 2489 /* Not a title. */ 2490 title_is_multiline = FALSE; 2491 title_contents_beg = off; 2492 title_contents_end = off; 2493 title_contents_line_index = 0; 2494 } 2495 2496 /* Nothing more can follow on the last line. */ 2497 if(off < lines[line_index].end) 2498 return FALSE; 2499 2500 /* Construct label. */ 2501 if(!label_is_multiline) { 2502 label = cast(CHAR*) ctx.STR(label_contents_beg); 2503 label_size = label_contents_end - label_contents_beg; 2504 label_needs_free = false; 2505 } else { 2506 ret = (md_merge_lines_alloc(ctx, label_contents_beg, label_contents_end, 2507 lines + label_contents_line_index, n_lines - label_contents_line_index, 2508 ' ', &label, &label_size)); 2509 if (ret < 0) goto abort; 2510 label_needs_free = true; 2511 } 2512 2513 /* Store the reference definition. */ 2514 if(ctx.n_ref_defs >= ctx.alloc_ref_defs) { 2515 MD_REF_DEF* new_defs; 2516 2517 ctx.alloc_ref_defs = (ctx.alloc_ref_defs > 0 ? ctx.alloc_ref_defs * 2 : 16); 2518 new_defs = cast(MD_REF_DEF*) realloc_safe(ctx.ref_defs, ctx.alloc_ref_defs * MD_REF_DEF.sizeof); 2519 if(new_defs == null) { 2520 ctx.MD_LOG("realloc() failed."); 2521 ret = -1; 2522 goto abort; 2523 } 2524 2525 ctx.ref_defs = new_defs; 2526 } 2527 2528 def = &ctx.ref_defs[ctx.n_ref_defs]; 2529 memset(def, 0, MD_REF_DEF.sizeof); 2530 2531 def.label = label; 2532 def.label_size = label_size; 2533 def.label_needs_free = label_needs_free; 2534 2535 def.dest_beg = dest_contents_beg; 2536 def.dest_end = dest_contents_end; 2537 2538 if(title_contents_beg >= title_contents_end) { 2539 def.title = null; 2540 def.title_size = 0; 2541 } else if(!title_is_multiline) { 2542 def.title = cast(CHAR*) ctx.STR(title_contents_beg); 2543 def.title_size = title_contents_end - title_contents_beg; 2544 } else { 2545 ret = (md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end, 2546 lines + title_contents_line_index, n_lines - title_contents_line_index, 2547 '\n', &def.title, &def.title_size)); 2548 if (ret < 0) goto abort; 2549 def.title_needs_free = true; 2550 } 2551 2552 /* Success. */ 2553 ctx.n_ref_defs++; 2554 return line_index + 1; 2555 2556 abort: 2557 /* Failure. */ 2558 if(label_needs_free) 2559 free(cast(void*)label); // Note: const_cast here 2560 return -1; 2561 } 2562 2563 static int 2564 md_is_link_reference(MD_CTX* ctx, const(MD_LINE)* lines, int n_lines, 2565 OFF beg, OFF end, MD_LINK_ATTR* attr) 2566 { 2567 const(MD_REF_DEF)* def; 2568 const(MD_LINE)* beg_line; 2569 const(MD_LINE)* end_line; 2570 const(CHAR)* label; 2571 SZ label_size; 2572 int ret; 2573 2574 assert(ctx.CH(beg) == '[' || ctx.CH(beg) == '!'); 2575 assert(ctx.CH(end-1) == ']'); 2576 2577 beg += (ctx.CH(beg) == '!' ? 2 : 1); 2578 end--; 2579 2580 /* Find lines corresponding to the beg and end positions. */ 2581 assert(lines[0].beg <= beg); 2582 beg_line = lines; 2583 while(beg >= beg_line.end) 2584 beg_line++; 2585 2586 assert(end <= lines[n_lines-1].end); 2587 end_line = beg_line; 2588 while(end >= end_line.end) 2589 end_line++; 2590 2591 if(beg_line != end_line) { 2592 ret = (md_merge_lines_alloc(ctx, beg, end, beg_line, 2593 cast(int)(n_lines - (beg_line - lines)), ' ', &label, &label_size)); 2594 if (ret < 0) goto abort; 2595 } else { 2596 label = cast(CHAR*) ctx.STR(beg); 2597 label_size = end - beg; 2598 } 2599 2600 def = md_lookup_ref_def(ctx, label, label_size); 2601 if(def != null) { 2602 attr.dest_beg = def.dest_beg; 2603 attr.dest_end = def.dest_end; 2604 attr.title = def.title; 2605 attr.title_size = def.title_size; 2606 attr.title_needs_free = false; 2607 } 2608 2609 if(beg_line != end_line) 2610 free(cast(void*)label); // Note: const_cast here 2611 2612 ret = (def != null); 2613 2614 abort: 2615 return ret; 2616 } 2617 2618 static int 2619 md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines, 2620 OFF beg, OFF* p_end, MD_LINK_ATTR* attr) 2621 { 2622 int line_index = 0; 2623 int tmp_line_index; 2624 OFF title_contents_beg; 2625 OFF title_contents_end; 2626 int title_contents_line_index; 2627 int title_is_multiline; 2628 OFF off = beg; 2629 int ret = FALSE; 2630 2631 while(off >= lines[line_index].end) 2632 line_index++; 2633 2634 assert(ctx.CH(off) == '('); 2635 off++; 2636 2637 /* Optional white space with up to one line break. */ 2638 while(off < lines[line_index].end && ctx.ISWHITESPACE(off)) 2639 off++; 2640 if(off >= lines[line_index].end && ctx.ISNEWLINE(off)) { 2641 line_index++; 2642 if(line_index >= n_lines) 2643 return FALSE; 2644 off = lines[line_index].beg; 2645 } 2646 2647 /* Link destination may be omitted, but only when not also having a title. */ 2648 if(off < ctx.size && ctx.CH(off) == ')') { 2649 attr.dest_beg = off; 2650 attr.dest_end = off; 2651 attr.title = null; 2652 attr.title_size = 0; 2653 attr.title_needs_free = false; 2654 off++; 2655 *p_end = off; 2656 return TRUE; 2657 } 2658 2659 /* Link destination. */ 2660 if(!md_is_link_destination(ctx, off, lines[line_index].end, 2661 &off, &attr.dest_beg, &attr.dest_end)) 2662 return FALSE; 2663 2664 /* (Optional) title. */ 2665 if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off, 2666 &off, &title_contents_line_index, &tmp_line_index, 2667 &title_contents_beg, &title_contents_end)) 2668 { 2669 title_is_multiline = (tmp_line_index != title_contents_line_index); 2670 title_contents_line_index += line_index; 2671 line_index += tmp_line_index; 2672 } else { 2673 /* Not a title. */ 2674 title_is_multiline = FALSE; 2675 title_contents_beg = off; 2676 title_contents_end = off; 2677 title_contents_line_index = 0; 2678 } 2679 2680 /* Optional whitespace followed with final ')'. */ 2681 while(off < lines[line_index].end && ctx.ISWHITESPACE(off)) 2682 off++; 2683 if(off >= lines[line_index].end && ctx.ISNEWLINE(off)) { 2684 line_index++; 2685 if(line_index >= n_lines) 2686 return FALSE; 2687 off = lines[line_index].beg; 2688 } 2689 if(ctx.CH(off) != ')') 2690 goto abort; 2691 off++; 2692 2693 if(title_contents_beg >= title_contents_end) { 2694 attr.title = null; 2695 attr.title_size = 0; 2696 attr.title_needs_free = false; 2697 } else if(!title_is_multiline) { 2698 attr.title = cast(CHAR*) ctx.STR(title_contents_beg); // Note: const_cast here! 2699 attr.title_size = title_contents_end - title_contents_beg; 2700 attr.title_needs_free = false; 2701 } else { 2702 ret = (md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end, 2703 lines + title_contents_line_index, n_lines - title_contents_line_index, 2704 '\n', &attr.title, &attr.title_size)); 2705 if (ret < 0) goto abort; 2706 attr.title_needs_free = true; 2707 } 2708 2709 *p_end = off; 2710 ret = TRUE; 2711 2712 abort: 2713 return ret; 2714 } 2715 2716 void md_free_ref_defs(MD_CTX* ctx) 2717 { 2718 int i; 2719 2720 for(i = 0; i < ctx.n_ref_defs; i++) { 2721 MD_REF_DEF* def = &ctx.ref_defs[i]; 2722 2723 if(def.label_needs_free) 2724 free(cast(void*)def.label); // Note: const_cast here 2725 if(def.title_needs_free) 2726 free(cast(void*)def.title); // Note: const_cast here 2727 } 2728 2729 free(ctx.ref_defs); 2730 } 2731 2732 2733 /****************************************** 2734 *** Processing Inlines (a.k.a Spans) *** 2735 ******************************************/ 2736 2737 /* We process inlines in few phases: 2738 * 2739 * (1) We go through the block text and collect all significant characters 2740 * which may start/end a span or some other significant position into 2741 * ctx.marks[]. Core of this is what md_collect_marks() does. 2742 * 2743 * We also do some very brief preliminary context-less analysis, whether 2744 * it might be opener or closer (e.g. of an emphasis span). 2745 * 2746 * This speeds the other steps as we do not need to re-iterate over all 2747 * characters anymore. 2748 * 2749 * (2) We analyze each potential mark types, in order by their precedence. 2750 * 2751 * In each md_analyze_XXX() function, we re-iterate list of the marks, 2752 * skipping already resolved regions (in preceding precedences) and try to 2753 * resolve them. 2754 * 2755 * (2.1) For trivial marks, which are single (e.g. HTML entity), we just mark 2756 * them as resolved. 2757 * 2758 * (2.2) For range-type marks, we analyze whether the mark could be closer 2759 * and, if yes, whether there is some preceding opener it could satisfy. 2760 * 2761 * If not we check whether it could be really an opener and if yes, we 2762 * remember it so subsequent closers may resolve it. 2763 * 2764 * (3) Finally, when all marks were analyzed, we render the block contents 2765 * by calling MD_RENDERER::text() callback, interrupting by ::enter_span() 2766 * or ::close_span() whenever we reach a resolved mark. 2767 */ 2768 2769 2770 /* The mark structure. 2771 * 2772 * '\\': Maybe escape sequence. 2773 * '\0': null char. 2774 * '*': Maybe (strong) emphasis start/end. 2775 * '_': Maybe (strong) emphasis start/end. 2776 * '~': Maybe strikethrough start/end (needs MD_FLAG_STRIKETHROUGH). 2777 * '`': Maybe code span start/end. 2778 * '&': Maybe start of entity. 2779 * ';': Maybe end of entity. 2780 * '<': Maybe start of raw HTML or autolink. 2781 * '>': Maybe end of raw HTML or autolink. 2782 * '[': Maybe start of link label or link text. 2783 * '!': Equivalent of '[' for image. 2784 * ']': Maybe end of link label or link text. 2785 * '@': Maybe permissive e-mail auto-link (needs MD_FLAG_PERMISSIVEEMAILAUTOLINKS). 2786 * ':': Maybe permissive URL auto-link (needs MD_FLAG_PERMISSIVEURLAUTOLINKS). 2787 * '.': Maybe permissive WWW auto-link (needs MD_FLAG_PERMISSIVEWWWAUTOLINKS). 2788 * 'D': Dummy mark, it reserves a space for splitting a previous mark 2789 * (e.g. emphasis) or to make more space for storing some special data 2790 * related to the preceding mark (e.g. link). 2791 * 2792 * Note that not all instances of these chars in the text imply creation of the 2793 * structure. Only those which have (or may have, after we see more context) 2794 * the special meaning. 2795 * 2796 * (Keep this struct as small as possible to fit as much of them into CPU 2797 * cache line.) 2798 */ 2799 2800 struct MD_MARK { 2801 OFF beg; 2802 OFF end; 2803 2804 /* For unresolved openers, 'prev' and 'next' form the chain of open openers 2805 * of given type 'ch'. 2806 * 2807 * During resolving, we disconnect from the chain and point to the 2808 * corresponding counterpart so opener points to its closer and vice versa. 2809 */ 2810 int prev; 2811 int next; 2812 CHAR ch; 2813 ubyte flags; 2814 }; 2815 2816 /* Mark flags (these apply to ALL mark types). */ 2817 enum MD_MARK_POTENTIAL_OPENER = 0x01; /* Maybe opener. */ 2818 enum MD_MARK_POTENTIAL_CLOSER = 0x02; /* Maybe closer. */ 2819 enum MD_MARK_OPENER = 0x04; /* Definitely opener. */ 2820 enum MD_MARK_CLOSER = 0x08; /* Definitely closer. */ 2821 enum MD_MARK_RESOLVED = 0x10; /* Resolved in any definite way. */ 2822 2823 /* Mark flags specific for various mark types (so they can share bits). */ 2824 enum MD_MARK_EMPH_INTRAWORD = 0x20; /* Helper for the "rule of 3". */ 2825 enum MD_MARK_EMPH_MOD3_0 = 0x40; 2826 enum MD_MARK_EMPH_MOD3_1 = 0x80; 2827 enum MD_MARK_EMPH_MOD3_2 = (0x40 | 0x80); 2828 enum MD_MARK_EMPH_MOD3_MASK = (0x40 | 0x80); 2829 enum MD_MARK_AUTOLINK = 0x20; /* Distinguisher for '<', '>'. */ 2830 enum MD_MARK_VALIDPERMISSIVEAUTOLINK = 0x20; /* For permissive autolinks. */ 2831 2832 MD_MARKCHAIN* md_asterisk_chain(MD_CTX* ctx, uint flags) 2833 { 2834 switch(flags & (MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_MASK)) 2835 { 2836 case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_0: return ctx.ASTERISK_OPENERS_intraword_mod3_0; 2837 case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_1: return ctx.ASTERISK_OPENERS_intraword_mod3_1; 2838 case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_2: return ctx.ASTERISK_OPENERS_intraword_mod3_2; 2839 case MD_MARK_EMPH_MOD3_0: return ctx.ASTERISK_OPENERS_extraword_mod3_0; 2840 case MD_MARK_EMPH_MOD3_1: return ctx.ASTERISK_OPENERS_extraword_mod3_1; 2841 case MD_MARK_EMPH_MOD3_2: return ctx.ASTERISK_OPENERS_extraword_mod3_2; 2842 default: assert(false); 2843 } 2844 } 2845 2846 MD_MARKCHAIN* md_mark_chain(MD_CTX* ctx, int mark_index) 2847 { 2848 MD_MARK* mark = &ctx.marks[mark_index]; 2849 2850 switch(mark.ch) { 2851 case '*': return md_asterisk_chain(ctx, mark.flags); 2852 case '_': return ctx.UNDERSCORE_OPENERS; 2853 case '~': return ctx.TILDE_OPENERS; 2854 case '[': return ctx.BRACKET_OPENERS; 2855 case '|': return ctx.TABLECELLBOUNDARIES; 2856 default: return null; 2857 } 2858 } 2859 2860 MD_MARK* md_push_mark(MD_CTX* ctx) 2861 { 2862 if(ctx.n_marks >= ctx.alloc_marks) { 2863 MD_MARK* new_marks; 2864 2865 ctx.alloc_marks = (ctx.alloc_marks > 0 ? ctx.alloc_marks * 2 : 64); 2866 new_marks = cast(MD_MARK*) realloc_safe(ctx.marks, ctx.alloc_marks * MD_MARK.sizeof); 2867 if(new_marks == null) { 2868 ctx.MD_LOG("realloc() failed."); 2869 return null; 2870 } 2871 2872 ctx.marks = new_marks; 2873 } 2874 2875 return &ctx.marks[ctx.n_marks++]; 2876 } 2877 2878 int PUSH_MARK_(MD_CTX* ctx, MD_MARK** mark) 2879 { 2880 *mark = md_push_mark(ctx); 2881 if(*mark == null) 2882 { 2883 return -1; 2884 } 2885 return 0; 2886 } 2887 2888 int PUSH_MARK(MD_CTX* ctx, MD_MARK** mark, CHAR ch_, OFF beg_, OFF end_, int flags_) 2889 { 2890 int ret = PUSH_MARK_(ctx, mark); 2891 if (ret != 0) 2892 return ret; 2893 2894 (*mark).beg = (beg_); 2895 (*mark).end = (end_); 2896 (*mark).prev = -1; 2897 (*mark).next = -1; 2898 (*mark).ch = cast(char)(ch_); 2899 (*mark).flags = cast(ubyte)flags_; 2900 return 0; 2901 } 2902 2903 static void 2904 md_mark_chain_append(MD_CTX* ctx, MD_MARKCHAIN* chain, int mark_index) 2905 { 2906 if(chain.tail >= 0) 2907 ctx.marks[chain.tail].next = mark_index; 2908 else 2909 chain.head = mark_index; 2910 2911 ctx.marks[mark_index].prev = chain.tail; 2912 chain.tail = mark_index; 2913 } 2914 2915 /* Sometimes, we need to store a pointer into the mark. It is quite rare 2916 * so we do not bother to make MD_MARK use union, and it can only happen 2917 * for dummy marks. */ 2918 void md_mark_store_ptr(MD_CTX* ctx, int mark_index, const(void)* ptr) 2919 { 2920 MD_MARK* mark = &ctx.marks[mark_index]; 2921 assert(mark.ch == 'D'); 2922 2923 /* Check only members beg and end are misused for this. */ 2924 assert((void*).sizeof <= 2 * OFF.sizeof); 2925 memcpy(mark, &ptr, (void*).sizeof); 2926 } 2927 2928 static void* 2929 md_mark_get_ptr(MD_CTX* ctx, int mark_index) 2930 { 2931 void* ptr; 2932 MD_MARK* mark = &ctx.marks[mark_index]; 2933 assert(mark.ch == 'D'); 2934 memcpy(&ptr, mark, (void*).sizeof); 2935 return ptr; 2936 } 2937 2938 static void 2939 md_resolve_range(MD_CTX* ctx, MD_MARKCHAIN* chain, int opener_index, int closer_index) 2940 { 2941 MD_MARK* opener = &ctx.marks[opener_index]; 2942 MD_MARK* closer = &ctx.marks[closer_index]; 2943 2944 /* Remove opener from the list of openers. */ 2945 if(chain != null) { 2946 if(opener.prev >= 0) 2947 ctx.marks[opener.prev].next = opener.next; 2948 else 2949 chain.head = opener.next; 2950 2951 if(opener.next >= 0) 2952 ctx.marks[opener.next].prev = opener.prev; 2953 else 2954 chain.tail = opener.prev; 2955 } 2956 2957 /* Interconnect opener and closer and mark both as resolved. */ 2958 opener.next = closer_index; 2959 opener.flags |= MD_MARK_OPENER | MD_MARK_RESOLVED; 2960 closer.prev = opener_index; 2961 closer.flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED; 2962 } 2963 2964 2965 enum MD_ROLLBACK_ALL = 0; 2966 enum MD_ROLLBACK_CROSSING = 1; 2967 2968 /* In the range ctx.marks[opener_index] ... [closer_index], undo some or all 2969 * resolvings accordingly to these rules: 2970 * 2971 * (1) All openers BEFORE the range corresponding to any closer inside the 2972 * range are un-resolved and they are re-added to their respective chains 2973 * of unresolved openers. This ensures we can reuse the opener for closers 2974 * AFTER the range. 2975 * 2976 * (2) If 'how' is MD_ROLLBACK_ALL, then ALL resolved marks inside the range 2977 * are discarded. 2978 * 2979 * (3) If 'how' is MD_ROLLBACK_CROSSING, only closers with openers handled 2980 * in (1) are discarded. I.e. pairs of openers and closers which are both 2981 * inside the range are retained as well as any unpaired marks. 2982 */ 2983 static void 2984 md_rollback(MD_CTX* ctx, int opener_index, int closer_index, int how) 2985 { 2986 int i; 2987 int mark_index; 2988 2989 /* Cut all unresolved openers at the mark index. */ 2990 for(i = OPENERS_CHAIN_FIRST; i < OPENERS_CHAIN_LAST+1; i++) { 2991 MD_MARKCHAIN* chain = &ctx.mark_chains[i]; 2992 2993 while(chain.tail >= opener_index) 2994 chain.tail = ctx.marks[chain.tail].prev; 2995 2996 if(chain.tail >= 0) 2997 ctx.marks[chain.tail].next = -1; 2998 else 2999 chain.head = -1; 3000 } 3001 3002 /* Go backwards so that un-resolved openers are re-added into their 3003 * respective chains, in the right order. */ 3004 mark_index = closer_index - 1; 3005 while(mark_index > opener_index) { 3006 MD_MARK* mark = &ctx.marks[mark_index]; 3007 int mark_flags = mark.flags; 3008 int discard_flag = (how == MD_ROLLBACK_ALL); 3009 3010 if(mark.flags & MD_MARK_CLOSER) { 3011 int mark_opener_index = mark.prev; 3012 3013 /* Undo opener BEFORE the range. */ 3014 if(mark_opener_index < opener_index) { 3015 MD_MARK* mark_opener = &ctx.marks[mark_opener_index]; 3016 MD_MARKCHAIN* chain; 3017 3018 mark_opener.flags &= ~(MD_MARK_OPENER | MD_MARK_CLOSER | MD_MARK_RESOLVED); 3019 chain = md_mark_chain(ctx, opener_index); 3020 if(chain != null) { 3021 md_mark_chain_append(ctx, chain, mark_opener_index); 3022 discard_flag = 1; 3023 } 3024 } 3025 } 3026 3027 /* And reset our flags. */ 3028 if(discard_flag) 3029 mark.flags &= ~(MD_MARK_OPENER | MD_MARK_CLOSER | MD_MARK_RESOLVED); 3030 3031 /* Jump as far as we can over unresolved or non-interesting marks. */ 3032 switch(how) { 3033 case MD_ROLLBACK_CROSSING: 3034 if((mark_flags & MD_MARK_CLOSER) && mark.prev > opener_index) { 3035 /* If we are closer with opener INSIDE the range, there may 3036 * not be any other crosser inside the subrange. */ 3037 mark_index = mark.prev; 3038 break; 3039 } 3040 goto default; 3041 /* Pass through. */ 3042 default: 3043 mark_index--; 3044 break; 3045 } 3046 } 3047 } 3048 3049 void md_build_mark_char_map(MD_CTX* ctx) 3050 { 3051 memset(ctx.mark_char_map.ptr, 0, ctx.mark_char_map.length); 3052 3053 ctx.mark_char_map['\\'] = 1; 3054 ctx.mark_char_map['*'] = 1; 3055 ctx.mark_char_map['_'] = 1; 3056 ctx.mark_char_map['`'] = 1; 3057 ctx.mark_char_map['&'] = 1; 3058 ctx.mark_char_map[';'] = 1; 3059 ctx.mark_char_map['<'] = 1; 3060 ctx.mark_char_map['>'] = 1; 3061 ctx.mark_char_map['['] = 1; 3062 ctx.mark_char_map['!'] = 1; 3063 ctx.mark_char_map[']'] = 1; 3064 ctx.mark_char_map['\0'] = 1; 3065 3066 if(ctx.parser.flags & MD_FLAG_STRIKETHROUGH) 3067 ctx.mark_char_map['~'] = 1; 3068 3069 if(ctx.parser.flags & MD_FLAG_LATEXMATHSPANS) 3070 ctx.mark_char_map['$'] = 1; 3071 3072 if(ctx.parser.flags & MD_FLAG_PERMISSIVEEMAILAUTOLINKS) 3073 ctx.mark_char_map['@'] = 1; 3074 3075 if(ctx.parser.flags & MD_FLAG_PERMISSIVEURLAUTOLINKS) 3076 ctx.mark_char_map[':'] = 1; 3077 3078 if(ctx.parser.flags & MD_FLAG_PERMISSIVEWWWAUTOLINKS) 3079 ctx.mark_char_map['.'] = 1; 3080 3081 if(ctx.parser.flags & MD_FLAG_TABLES) 3082 ctx.mark_char_map['|'] = 1; 3083 3084 if(ctx.parser.flags & MD_FLAG_COLLAPSEWHITESPACE) { 3085 int i; 3086 3087 for(i = 0; i < cast(int) (ctx.mark_char_map).sizeof; i++) { 3088 if(ISWHITESPACE_(cast(CHAR)i)) 3089 ctx.mark_char_map[i] = 1; 3090 } 3091 } 3092 } 3093 3094 /* We limit code span marks to lower then 32 backticks. This solves the 3095 * pathologic case of too many openers, each of different length: Their 3096 * resolving would be then O(n^2). */ 3097 enum CODESPAN_MARK_MAXLEN = 32; 3098 3099 int md_is_code_span(MD_CTX* ctx, const(MD_LINE)* lines, int n_lines, OFF beg, 3100 OFF* p_opener_beg, OFF* p_opener_end, 3101 OFF* p_closer_beg, OFF* p_closer_end, 3102 OFF* last_potential_closers, 3103 int* p_reached_paragraph_end) 3104 { 3105 OFF opener_beg = beg; 3106 OFF opener_end; 3107 OFF closer_beg; 3108 OFF closer_end; 3109 SZ mark_len; 3110 OFF line_end; 3111 int has_space_after_opener = FALSE; 3112 int has_eol_after_opener = FALSE; 3113 int has_space_before_closer = FALSE; 3114 int has_eol_before_closer = FALSE; 3115 int has_only_space = TRUE; 3116 int line_index = 0; 3117 3118 line_end = lines[0].end; 3119 opener_end = opener_beg; 3120 while(opener_end < line_end && ctx.CH(opener_end) == '`') 3121 opener_end++; 3122 has_space_after_opener = (opener_end < line_end && ctx.CH(opener_end) == ' '); 3123 has_eol_after_opener = (opener_end == line_end); 3124 3125 /* The caller needs to know end of the opening mark even if we fail. */ 3126 *p_opener_end = opener_end; 3127 3128 mark_len = opener_end - opener_beg; 3129 if(mark_len > CODESPAN_MARK_MAXLEN) 3130 return FALSE; 3131 3132 /* Check whether we already know there is no closer of this length. 3133 * If so, re-scan does no sense. This fixes issue #59. */ 3134 if(last_potential_closers[mark_len-1] >= lines[n_lines-1].end || 3135 (*p_reached_paragraph_end && last_potential_closers[mark_len-1] < opener_end)) 3136 return FALSE; 3137 3138 closer_beg = opener_end; 3139 closer_end = opener_end; 3140 3141 /* Find closer mark. */ 3142 while(TRUE) { 3143 while(closer_beg < line_end && ctx.CH(closer_beg) != '`') { 3144 if(ctx.CH(closer_beg) != ' ') 3145 has_only_space = FALSE; 3146 closer_beg++; 3147 } 3148 closer_end = closer_beg; 3149 while(closer_end < line_end && ctx.CH(closer_end) == '`') 3150 closer_end++; 3151 3152 if(closer_end - closer_beg == mark_len) { 3153 /* Success. */ 3154 has_space_before_closer = (closer_beg > lines[line_index].beg && ctx.CH(closer_beg-1) == ' '); 3155 has_eol_before_closer = (closer_beg == lines[line_index].beg); 3156 break; 3157 } 3158 3159 if(closer_end - closer_beg > 0) { 3160 /* We have found a back-tick which is not part of the closer. */ 3161 has_only_space = FALSE; 3162 3163 /* But if we eventually fail, remember it as a potential closer 3164 * of its own length for future attempts. This mitigates needs for 3165 * rescans. */ 3166 if(closer_end - closer_beg < CODESPAN_MARK_MAXLEN) { 3167 if(closer_beg > last_potential_closers[closer_end - closer_beg - 1]) 3168 last_potential_closers[closer_end - closer_beg - 1] = closer_beg; 3169 } 3170 } 3171 3172 if(closer_end >= line_end) { 3173 line_index++; 3174 if(line_index >= n_lines) { 3175 /* Reached end of the paragraph and still nothing. */ 3176 *p_reached_paragraph_end = TRUE; 3177 return FALSE; 3178 } 3179 /* Try on the next line. */ 3180 line_end = lines[line_index].end; 3181 closer_beg = lines[line_index].beg; 3182 } else { 3183 closer_beg = closer_end; 3184 } 3185 } 3186 3187 /* If there is a space or a new line both after and before the opener 3188 * (and if the code span is not made of spaces only), consume one initial 3189 * and one trailing space as part of the marks. */ 3190 if(!has_only_space && 3191 (has_space_after_opener || has_eol_after_opener) && 3192 (has_space_before_closer || has_eol_before_closer)) 3193 { 3194 if(has_space_after_opener) 3195 opener_end++; 3196 else 3197 opener_end = lines[1].beg; 3198 3199 if(has_space_before_closer) 3200 closer_beg--; 3201 else { 3202 closer_beg = lines[line_index-1].end; 3203 /* We need to eat the preceding "\r\n" but not any line trailing 3204 * spaces. */ 3205 while(closer_beg < ctx.size && ctx.ISBLANK(closer_beg)) 3206 closer_beg++; 3207 } 3208 } 3209 3210 *p_opener_beg = opener_beg; 3211 *p_opener_end = opener_end; 3212 *p_closer_beg = closer_beg; 3213 *p_closer_end = closer_end; 3214 return TRUE; 3215 } 3216 3217 static int 3218 md_is_autolink_uri(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end) 3219 { 3220 OFF off = beg+1; 3221 3222 assert(ctx.CH(beg) == '<'); 3223 3224 /* Check for scheme. */ 3225 if(off >= max_end || !ctx.ISASCII(off)) 3226 return FALSE; 3227 off++; 3228 while(1) { 3229 if(off >= max_end) 3230 return FALSE; 3231 if(off - beg > 32) 3232 return FALSE; 3233 if(ctx.CH(off) == ':' && off - beg >= 3) 3234 break; 3235 if(!ctx.ISALNUM(off) && ctx.CH(off) != '+' && ctx.CH(off) != '-' && ctx.CH(off) != '.') 3236 return FALSE; 3237 off++; 3238 } 3239 3240 /* Check the path after the scheme. */ 3241 while(off < max_end && ctx.CH(off) != '>') { 3242 if(ctx.ISWHITESPACE(off) || ctx.ISCNTRL(off) || ctx.CH(off) == '<') 3243 return FALSE; 3244 off++; 3245 } 3246 3247 if(off >= max_end) 3248 return FALSE; 3249 3250 assert(ctx.CH(off) == '>'); 3251 *p_end = off+1; 3252 return TRUE; 3253 } 3254 3255 static int 3256 md_is_autolink_email(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end) 3257 { 3258 OFF off = beg + 1; 3259 int label_len; 3260 3261 assert(ctx.CH(beg) == '<'); 3262 3263 /* The code should correspond to this regexp: 3264 /^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+ 3265 @[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? 3266 (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/ 3267 */ 3268 3269 /* Username (before '@'). */ 3270 while(off < max_end && (ctx.ISALNUM(off) || ctx.ISANYOF(off, ".!#$%&'*+/=?^_`{|}~-"))) 3271 off++; 3272 if(off <= beg+1) 3273 return FALSE; 3274 3275 /* '@' */ 3276 if(off >= max_end || ctx.CH(off) != '@') 3277 return FALSE; 3278 off++; 3279 3280 /* Labels delimited with '.'; each label is sequence of 1 - 62 alnum 3281 * characters or '-', but '-' is not allowed as first or last char. */ 3282 label_len = 0; 3283 while(off < max_end) { 3284 if(ctx.ISALNUM(off)) 3285 label_len++; 3286 else if(ctx.CH(off) == '-' && label_len > 0) 3287 label_len++; 3288 else if(ctx.CH(off) == '.' && label_len > 0 && ctx.CH(off-1) != '-') 3289 label_len = 0; 3290 else 3291 break; 3292 3293 if(label_len > 62) 3294 return FALSE; 3295 3296 off++; 3297 } 3298 3299 if(label_len <= 0 || off >= max_end || ctx.CH(off) != '>' || ctx.CH(off-1) == '-') 3300 return FALSE; 3301 3302 *p_end = off+1; 3303 return TRUE; 3304 } 3305 3306 static int 3307 md_is_autolink(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, int* p_missing_mailto) 3308 { 3309 if(md_is_autolink_uri(ctx, beg, max_end, p_end)) { 3310 *p_missing_mailto = FALSE; 3311 return TRUE; 3312 } 3313 3314 if(md_is_autolink_email(ctx, beg, max_end, p_end)) { 3315 *p_missing_mailto = TRUE; 3316 return TRUE; 3317 } 3318 3319 return FALSE; 3320 } 3321 3322 /* For 8-bit encodings, mark_char_map[] covers all 256 elements. */ 3323 bool IS_MARK_CHAR(MD_CTX* ctx, OFF off) 3324 { 3325 return (ctx.mark_char_map[cast(ubyte) ctx.CH(off)]) != 0; 3326 } 3327 3328 int md_collect_marks(MD_CTX* ctx, const(MD_LINE)* lines, int n_lines, int table_mode) 3329 { 3330 int i; 3331 int ret = 0; 3332 MD_MARK* mark; 3333 OFF[CODESPAN_MARK_MAXLEN] codespan_last_potential_closers = 3334 [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3335 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]; 3336 3337 int codespan_scanned_till_paragraph_end = FALSE; 3338 3339 for(i = 0; i < n_lines; i++) 3340 { 3341 const(MD_LINE)* line = &lines[i]; 3342 OFF off = line.beg; 3343 OFF line_end = line.end; 3344 3345 while(true) 3346 { 3347 CHAR ch; 3348 3349 /* Optimization: Use some loop unrolling. */ 3350 while(off + 3 < line_end && !IS_MARK_CHAR(ctx, off+0) && !IS_MARK_CHAR(ctx, off+1) 3351 && !IS_MARK_CHAR(ctx, off+2) && !IS_MARK_CHAR(ctx, off+3)) 3352 off += 4; 3353 while(off < line_end && !IS_MARK_CHAR(ctx, off+0)) 3354 off++; 3355 3356 if(off >= line_end) 3357 break; 3358 3359 ch = ctx.CH(off); 3360 3361 /* A backslash escape. 3362 * It can go beyond line.end as it may involve escaped new 3363 * line to form a hard break. */ 3364 if(ch == '\\' && off+1 < ctx.size && (ctx.ISPUNCT(off+1) || ctx.ISNEWLINE(off+1))) { 3365 /* Hard-break cannot be on the last line of the block. */ 3366 if(!ctx.ISNEWLINE(off+1) || i+1 < n_lines) 3367 { 3368 ret = PUSH_MARK(ctx, &mark, ch, off, off+2, MD_MARK_RESOLVED); 3369 if (ret != 0) goto abort; 3370 } 3371 off += 2; 3372 continue; 3373 } 3374 3375 /* A potential (string) emphasis start/end. */ 3376 if(ch == '*' || ch == '_') { 3377 OFF tmp = off+1; 3378 int left_level; /* What precedes: 0 = whitespace; 1 = punctuation; 2 = other char. */ 3379 int right_level; /* What follows: 0 = whitespace; 1 = punctuation; 2 = other char. */ 3380 3381 while(tmp < line_end && ctx.CH(tmp) == ch) 3382 tmp++; 3383 3384 if(off == line.beg || ctx.ISUNICODEWHITESPACEBEFORE(off)) 3385 left_level = 0; 3386 else if(ctx.ISUNICODEPUNCTBEFORE(off)) 3387 left_level = 1; 3388 else 3389 left_level = 2; 3390 3391 if(tmp == line_end || ctx.ISUNICODEWHITESPACE(tmp)) 3392 right_level = 0; 3393 else if(ctx.ISUNICODEPUNCT(tmp)) 3394 right_level = 1; 3395 else 3396 right_level = 2; 3397 3398 /* Intra-word underscore doesn't have special meaning. */ 3399 if(ch == '_' && left_level == 2 && right_level == 2) { 3400 left_level = 0; 3401 right_level = 0; 3402 } 3403 3404 if(left_level != 0 || right_level != 0) { 3405 uint flags = 0; 3406 3407 if(left_level > 0 && left_level >= right_level) 3408 flags |= MD_MARK_POTENTIAL_CLOSER; 3409 if(right_level > 0 && right_level >= left_level) 3410 flags |= MD_MARK_POTENTIAL_OPENER; 3411 if(left_level == 2 && right_level == 2) 3412 flags |= MD_MARK_EMPH_INTRAWORD; 3413 3414 /* For "the rule of three" we need to remember the original 3415 * size of the mark (modulo three), before we potentially 3416 * split the mark when being later resolved partially by some 3417 * shorter closer. */ 3418 switch((tmp - off) % 3) 3419 { 3420 case 0: flags |= MD_MARK_EMPH_MOD3_0; break; 3421 case 1: flags |= MD_MARK_EMPH_MOD3_1; break; 3422 case 2: flags |= MD_MARK_EMPH_MOD3_2; break; 3423 default: break; 3424 } 3425 3426 ret = PUSH_MARK(ctx, &mark, ch, off, tmp, flags); 3427 if (ret != 0) goto abort; 3428 3429 /* During resolving, multiple asterisks may have to be 3430 * split into independent span start/ends. Consider e.g. 3431 * "**foo* bar*". Therefore we push also some empty dummy 3432 * marks to have enough space for that. */ 3433 off++; 3434 while(off < tmp) { 3435 ret = PUSH_MARK(ctx, &mark, 'D', off, off, 0); 3436 if (ret != 0) goto abort; 3437 off++; 3438 } 3439 continue; 3440 } 3441 3442 off = tmp; 3443 continue; 3444 } 3445 3446 /* A potential code span start/end. */ 3447 if(ch == '`') { 3448 OFF opener_beg, opener_end; 3449 OFF closer_beg, closer_end; 3450 int is_code_span; 3451 3452 is_code_span = md_is_code_span(ctx, lines + i, n_lines - i, off, 3453 &opener_beg, &opener_end, &closer_beg, &closer_end, 3454 codespan_last_potential_closers.ptr, 3455 &codespan_scanned_till_paragraph_end); 3456 if(is_code_span) { 3457 ret = PUSH_MARK(ctx, &mark, '`', opener_beg, opener_end, MD_MARK_OPENER | MD_MARK_RESOLVED); 3458 if (ret != 0) goto abort; 3459 ret = PUSH_MARK(ctx, &mark, '`', closer_beg, closer_end, MD_MARK_CLOSER | MD_MARK_RESOLVED); 3460 if (ret != 0) goto abort; 3461 ctx.marks[ctx.n_marks-2].next = ctx.n_marks-1; 3462 ctx.marks[ctx.n_marks-1].prev = ctx.n_marks-2; 3463 3464 off = closer_end; 3465 3466 /* Advance the current line accordingly. */ 3467 while(off > line_end) { 3468 i++; 3469 line++; 3470 line_end = line.end; 3471 } 3472 continue; 3473 } 3474 3475 off = opener_end; 3476 continue; 3477 } 3478 3479 /* A potential entity start. */ 3480 if(ch == '&') { 3481 ret = PUSH_MARK(ctx, &mark, ch, off, off+1, MD_MARK_POTENTIAL_OPENER); 3482 if (ret != 0) goto abort; 3483 off++; 3484 continue; 3485 } 3486 3487 /* A potential entity end. */ 3488 if(ch == ';') { 3489 /* We surely cannot be entity unless the previous mark is '&'. */ 3490 if(ctx.n_marks > 0 && ctx.marks[ctx.n_marks-1].ch == '&') 3491 { 3492 ret = PUSH_MARK(ctx, &mark, ch, off, off+1, MD_MARK_POTENTIAL_CLOSER); 3493 if (ret != 0) goto abort; 3494 } 3495 3496 off++; 3497 continue; 3498 } 3499 3500 /* A potential autolink or raw HTML start/end. */ 3501 if(ch == '<') { 3502 int is_autolink; 3503 OFF autolink_end; 3504 int missing_mailto; 3505 3506 if(!(ctx.parser.flags & MD_FLAG_NOHTMLSPANS)) { 3507 int is_html; 3508 OFF html_end; 3509 3510 /* Given the nature of the raw HTML, we have to recognize 3511 * it here. Doing so later in md_analyze_lt_gt() could 3512 * open can of worms of quadratic complexity. */ 3513 is_html = md_is_html_any(ctx, lines + i, n_lines - i, off, 3514 lines[n_lines-1].end, &html_end); 3515 if(is_html) { 3516 ret = PUSH_MARK(ctx, &mark, '<', off, off, MD_MARK_OPENER | MD_MARK_RESOLVED); 3517 if (ret != 0) goto abort; 3518 ret = PUSH_MARK(ctx, &mark, '>', html_end, html_end, MD_MARK_CLOSER | MD_MARK_RESOLVED); 3519 if (ret != 0) goto abort; 3520 ctx.marks[ctx.n_marks-2].next = ctx.n_marks-1; 3521 ctx.marks[ctx.n_marks-1].prev = ctx.n_marks-2; 3522 off = html_end; 3523 3524 /* Advance the current line accordingly. */ 3525 while(off > line_end) { 3526 i++; 3527 line++; 3528 line_end = line.end; 3529 } 3530 continue; 3531 } 3532 } 3533 3534 is_autolink = md_is_autolink(ctx, off, lines[n_lines-1].end, 3535 &autolink_end, &missing_mailto); 3536 if(is_autolink) { 3537 ret = PUSH_MARK(ctx, &mark, (missing_mailto ? '@' : '<'), off, off+1, 3538 MD_MARK_OPENER | MD_MARK_RESOLVED | MD_MARK_AUTOLINK); 3539 if (ret != 0) goto abort; 3540 ret = PUSH_MARK(ctx, &mark, '>', autolink_end-1, autolink_end, 3541 MD_MARK_CLOSER | MD_MARK_RESOLVED | MD_MARK_AUTOLINK); 3542 if (ret != 0) goto abort; 3543 ctx.marks[ctx.n_marks-2].next = ctx.n_marks-1; 3544 ctx.marks[ctx.n_marks-1].prev = ctx.n_marks-2; 3545 off = autolink_end; 3546 continue; 3547 } 3548 3549 off++; 3550 continue; 3551 } 3552 3553 /* A potential link or its part. */ 3554 if(ch == '[' || (ch == '!' && off+1 < line_end && ctx.CH(off+1) == '[')) { 3555 OFF tmp = (ch == '[' ? off+1 : off+2); 3556 ret = PUSH_MARK(ctx, &mark, ch, off, tmp, MD_MARK_POTENTIAL_OPENER); 3557 if (ret != 0) goto abort; 3558 off = tmp; 3559 /* Two dummies to make enough place for data we need if it is 3560 * a link. */ 3561 ret = PUSH_MARK(ctx, &mark, 'D', off, off, 0); 3562 if (ret != 0) goto abort; 3563 ret = PUSH_MARK(ctx, &mark, 'D', off, off, 0); 3564 if (ret != 0) goto abort; 3565 continue; 3566 } 3567 if(ch == ']') { 3568 ret = PUSH_MARK(ctx, &mark, ch, off, off+1, MD_MARK_POTENTIAL_CLOSER); 3569 if (ret != 0) goto abort; 3570 off++; 3571 continue; 3572 } 3573 3574 /* A potential permissive e-mail autolink. */ 3575 if(ch == '@') { 3576 if(line.beg + 1 <= off && ctx.ISALNUM(off-1) && 3577 off + 3 < line.end && ctx.ISALNUM(off+1)) 3578 { 3579 ret = PUSH_MARK(ctx, &mark, ch, off, off+1, MD_MARK_POTENTIAL_OPENER); 3580 if (ret != 0) goto abort; 3581 /* Push a dummy as a reserve for a closer. */ 3582 ret = PUSH_MARK(ctx, &mark, 'D', off, off, 0); 3583 if (ret != 0) goto abort; 3584 } 3585 3586 off++; 3587 continue; 3588 } 3589 3590 /* A potential permissive URL autolink. */ 3591 if(ch == ':') 3592 { 3593 static struct Scheme 3594 { 3595 const(CHAR)* scheme; 3596 SZ scheme_size; 3597 const(CHAR)* suffix; 3598 SZ suffix_size; 3599 } 3600 3601 static immutable Scheme[] scheme_map = 3602 [ 3603 Scheme("http", 4, "//", 2), 3604 Scheme("https", 5, "//", 2), 3605 Scheme("ftp", 3, "//", 2) 3606 ]; 3607 3608 int scheme_index; 3609 3610 for(scheme_index = 0; scheme_index < cast(int) (scheme_map.length); scheme_index++) { 3611 const(CHAR)* scheme = scheme_map[scheme_index].scheme; 3612 const SZ scheme_size = scheme_map[scheme_index].scheme_size; 3613 const(CHAR)* suffix = scheme_map[scheme_index].suffix; 3614 const SZ suffix_size = scheme_map[scheme_index].suffix_size; 3615 3616 if(line.beg + scheme_size <= off && md_ascii_eq(ctx.STR(off-scheme_size), scheme, scheme_size) && 3617 (line.beg + scheme_size == off || ctx.ISWHITESPACE(off-scheme_size-1) || ctx.ISANYOF(off-scheme_size-1, "*_~([")) && 3618 off + 1 + suffix_size < line.end && md_ascii_eq(ctx.STR(off+1), suffix, suffix_size)) 3619 { 3620 ret = PUSH_MARK(ctx, &mark, ch, off-scheme_size, off+1+suffix_size, MD_MARK_POTENTIAL_OPENER); 3621 if (ret != 0) goto abort; 3622 /* Push a dummy as a reserve for a closer. */ 3623 ret = PUSH_MARK(ctx, &mark, 'D', off, off, 0); 3624 if (ret != 0) goto abort; 3625 off += 1 + suffix_size; 3626 continue; 3627 } 3628 } 3629 3630 off++; 3631 continue; 3632 } 3633 3634 /* A potential permissive WWW autolink. */ 3635 if(ch == '.') { 3636 if(line.beg + 3 <= off && md_ascii_eq(ctx.STR(off-3), "www", 3) && 3637 (line.beg + 3 == off || ctx.ISWHITESPACE(off-4) || ctx.ISANYOF(off-4, "*_~([")) && 3638 off + 1 < line_end) 3639 { 3640 ret = PUSH_MARK(ctx, &mark, ch, off-3, off+1, MD_MARK_POTENTIAL_OPENER); 3641 if (ret != 0) goto abort; 3642 /* Push a dummy as a reserve for a closer. */ 3643 ret = PUSH_MARK(ctx, &mark, 'D', off, off, 0); 3644 if (ret != 0) goto abort; 3645 off++; 3646 continue; 3647 } 3648 3649 off++; 3650 continue; 3651 } 3652 3653 /* A potential table cell boundary. */ 3654 if(table_mode && ch == '|') { 3655 ret = PUSH_MARK(ctx, &mark, ch, off, off+1, 0); 3656 if (ret != 0) goto abort; 3657 off++; 3658 continue; 3659 } 3660 3661 /* A potential strikethrough start/end. */ 3662 if(ch == '~') { 3663 OFF tmp = off+1; 3664 3665 while(tmp < line_end && ctx.CH(tmp) == '~') 3666 tmp++; 3667 3668 ret = PUSH_MARK(ctx, &mark, ch, off, tmp, MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER); 3669 if (ret != 0) goto abort; 3670 off = tmp; 3671 continue; 3672 } 3673 3674 /* A potential equation start/end */ 3675 if(ch == '$') { 3676 /* We can have at most two consecutive $ signs, 3677 * where two dollar signs signify a display equation. */ 3678 OFF tmp = off+1; 3679 3680 while(tmp < line_end && ctx.CH(tmp) == '$') 3681 tmp++; 3682 3683 if (tmp - off <= 2) 3684 { 3685 ret = PUSH_MARK(ctx, &mark, ch, off, tmp, MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER); 3686 if (ret != 0) goto abort; 3687 } 3688 off = tmp; 3689 continue; 3690 } 3691 3692 /* Turn non-trivial whitespace into single space. */ 3693 if(ISWHITESPACE_(ch)) { 3694 OFF tmp = off+1; 3695 3696 while(tmp < line_end && ctx.ISWHITESPACE(tmp)) 3697 tmp++; 3698 3699 if(tmp - off > 1 || ch != ' ') 3700 { 3701 ret = PUSH_MARK(ctx, &mark, ch, off, tmp, MD_MARK_RESOLVED); 3702 if (ret != 0) goto abort; 3703 } 3704 3705 off = tmp; 3706 continue; 3707 } 3708 3709 /* null character. */ 3710 if(ch == '\0') { 3711 ret = PUSH_MARK(ctx, &mark, ch, off, off+1, MD_MARK_RESOLVED); 3712 if (ret != 0) goto abort; 3713 off++; 3714 continue; 3715 } 3716 3717 off++; 3718 } 3719 } 3720 3721 /* Add a dummy mark at the end of the mark vector to simplify 3722 * process_inlines(). */ 3723 ret = PUSH_MARK(ctx, &mark, 127, ctx.size, ctx.size, MD_MARK_RESOLVED); 3724 if (ret != 0) goto abort; 3725 3726 abort: 3727 return ret; 3728 } 3729 3730 static void 3731 md_analyze_bracket(MD_CTX* ctx, int mark_index) 3732 { 3733 /* We cannot really resolve links here as for that we would need 3734 * more context. E.g. a following pair of brackets (reference link), 3735 * or enclosing pair of brackets (if the inner is the link, the outer 3736 * one cannot be.) 3737 * 3738 * Therefore we here only construct a list of resolved '[' ']' pairs 3739 * ordered by position of the closer. This allows ur to analyze what is 3740 * or is not link in the right order, from inside to outside in case 3741 * of nested brackets. 3742 * 3743 * The resolving itself is deferred into md_resolve_links(). 3744 */ 3745 3746 MD_MARK* mark = &ctx.marks[mark_index]; 3747 3748 if(mark.flags & MD_MARK_POTENTIAL_OPENER) { 3749 md_mark_chain_append(ctx, ctx.BRACKET_OPENERS, mark_index); 3750 return; 3751 } 3752 3753 if(ctx.BRACKET_OPENERS.tail >= 0) { 3754 /* Pop the opener from the chain. */ 3755 int opener_index = ctx.BRACKET_OPENERS.tail; 3756 MD_MARK* opener = &ctx.marks[opener_index]; 3757 if(opener.prev >= 0) 3758 ctx.marks[opener.prev].next = -1; 3759 else 3760 ctx.BRACKET_OPENERS.head = -1; 3761 ctx.BRACKET_OPENERS.tail = opener.prev; 3762 3763 /* Interconnect the opener and closer. */ 3764 opener.next = mark_index; 3765 mark.prev = opener_index; 3766 3767 /* Add the pair into chain of potential links for md_resolve_links(). 3768 * Note we misuse opener.prev for this as opener.next points to its 3769 * closer. */ 3770 if(ctx.unresolved_link_tail >= 0) 3771 ctx.marks[ctx.unresolved_link_tail].prev = opener_index; 3772 else 3773 ctx.unresolved_link_head = opener_index; 3774 ctx.unresolved_link_tail = opener_index; 3775 opener.prev = -1; 3776 } 3777 } 3778 3779 /* Forward declaration. */ 3780 static void md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines, 3781 int mark_beg, int mark_end); 3782 3783 static int 3784 md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines) 3785 { 3786 int opener_index = ctx.unresolved_link_head; 3787 OFF last_link_beg = 0; 3788 OFF last_link_end = 0; 3789 OFF last_img_beg = 0; 3790 OFF last_img_end = 0; 3791 3792 while(opener_index >= 0) { 3793 MD_MARK* opener = &ctx.marks[opener_index]; 3794 int closer_index = opener.next; 3795 MD_MARK* closer = &ctx.marks[closer_index]; 3796 int next_index = opener.prev; 3797 MD_MARK* next_opener; 3798 MD_MARK* next_closer; 3799 MD_LINK_ATTR attr; 3800 int is_link = FALSE; 3801 3802 if(next_index >= 0) { 3803 next_opener = &ctx.marks[next_index]; 3804 next_closer = &ctx.marks[next_opener.next]; 3805 } else { 3806 next_opener = null; 3807 next_closer = null; 3808 } 3809 3810 /* If nested ("[ [ ] ]"), we need to make sure that: 3811 * - The outer does not end inside of (...) belonging to the inner. 3812 * - The outer cannot be link if the inner is link (i.e. not image). 3813 * 3814 * (Note we here analyze from inner to outer as the marks are ordered 3815 * by closer.beg.) 3816 */ 3817 if((opener.beg < last_link_beg && closer.end < last_link_end) || 3818 (opener.beg < last_img_beg && closer.end < last_img_end) || 3819 (opener.beg < last_link_end && opener.ch == '[')) 3820 { 3821 opener_index = next_index; 3822 continue; 3823 } 3824 3825 if(next_opener != null && next_opener.beg == closer.end) { 3826 if(next_closer.beg > closer.end + 1) { 3827 /* Might be full reference link. */ 3828 is_link = md_is_link_reference(ctx, lines, n_lines, next_opener.beg, next_closer.end, &attr); 3829 } else { 3830 /* Might be shortcut reference link. */ 3831 is_link = md_is_link_reference(ctx, lines, n_lines, opener.beg, closer.end, &attr); 3832 } 3833 3834 if(is_link < 0) 3835 return -1; 3836 3837 if(is_link) { 3838 /* Eat the 2nd "[...]". */ 3839 closer.end = next_closer.end; 3840 } 3841 } else { 3842 if(closer.end < ctx.size && ctx.CH(closer.end) == '(') { 3843 /* Might be inline link. */ 3844 OFF inline_link_end = uint.max; 3845 3846 is_link = md_is_inline_link_spec(ctx, lines, n_lines, closer.end, &inline_link_end, &attr); 3847 if(is_link < 0) 3848 return -1; 3849 3850 /* Check the closing ')' is not inside an already resolved range 3851 * (i.e. a range with a higher priority), e.g. a code span. */ 3852 if(is_link) { 3853 int i = closer_index + 1; 3854 3855 while(i < ctx.n_marks) { 3856 MD_MARK* mark = &ctx.marks[i]; 3857 3858 if(mark.beg >= inline_link_end) 3859 break; 3860 if((mark.flags & (MD_MARK_OPENER | MD_MARK_RESOLVED)) == (MD_MARK_OPENER | MD_MARK_RESOLVED)) { 3861 if(ctx.marks[mark.next].beg >= inline_link_end) { 3862 /* Cancel the link status. */ 3863 if(attr.title_needs_free) 3864 free(cast(void*)(attr.title)); 3865 is_link = FALSE; 3866 break; 3867 } 3868 3869 i = mark.next + 1; 3870 } else { 3871 i++; 3872 } 3873 } 3874 } 3875 3876 if(is_link) { 3877 /* Eat the "(...)" */ 3878 closer.end = inline_link_end; 3879 } 3880 } 3881 3882 if(!is_link) { 3883 /* Might be collapsed reference link. */ 3884 is_link = md_is_link_reference(ctx, lines, n_lines, opener.beg, closer.end, &attr); 3885 if(is_link < 0) 3886 return -1; 3887 } 3888 } 3889 3890 if(is_link) { 3891 /* Resolve the brackets as a link. */ 3892 opener.flags |= MD_MARK_OPENER | MD_MARK_RESOLVED; 3893 closer.flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED; 3894 3895 /* If it is a link, we store the destination and title in the two 3896 * dummy marks after the opener. */ 3897 assert(ctx.marks[opener_index+1].ch == 'D'); 3898 ctx.marks[opener_index+1].beg = attr.dest_beg; 3899 ctx.marks[opener_index+1].end = attr.dest_end; 3900 3901 assert(ctx.marks[opener_index+2].ch == 'D'); 3902 md_mark_store_ptr(ctx, opener_index+2, attr.title); 3903 if(attr.title_needs_free) 3904 md_mark_chain_append(ctx, ctx.PTR_CHAIN, opener_index+2); 3905 ctx.marks[opener_index+2].prev = attr.title_size; 3906 3907 if(opener.ch == '[') { 3908 last_link_beg = opener.beg; 3909 last_link_end = closer.end; 3910 } else { 3911 last_img_beg = opener.beg; 3912 last_img_end = closer.end; 3913 } 3914 3915 md_analyze_link_contents(ctx, lines, n_lines, opener_index+1, closer_index); 3916 } 3917 3918 opener_index = next_index; 3919 } 3920 3921 return 0; 3922 } 3923 3924 /* Analyze whether the mark '&' starts a HTML entity. 3925 * If so, update its flags as well as flags of corresponding closer ';'. */ 3926 static void 3927 md_analyze_entity(MD_CTX* ctx, int mark_index) 3928 { 3929 MD_MARK* opener = &ctx.marks[mark_index]; 3930 MD_MARK* closer; 3931 OFF off; 3932 3933 /* Cannot be entity if there is no closer as the next mark. 3934 * (Any other mark between would mean strange character which cannot be 3935 * part of the entity. 3936 * 3937 * So we can do all the work on '&' and do not call this later for the 3938 * closing mark ';'. 3939 */ 3940 if(mark_index + 1 >= ctx.n_marks) 3941 return; 3942 closer = &ctx.marks[mark_index+1]; 3943 if(closer.ch != ';') 3944 return; 3945 3946 if(md_is_entity(ctx, opener.beg, closer.end, &off)) { 3947 assert(off == closer.end); 3948 3949 md_resolve_range(ctx, null, mark_index, mark_index+1); 3950 opener.end = closer.end; 3951 } 3952 } 3953 3954 static void 3955 md_analyze_table_cell_boundary(MD_CTX* ctx, int mark_index) 3956 { 3957 MD_MARK* mark = &ctx.marks[mark_index]; 3958 mark.flags |= MD_MARK_RESOLVED; 3959 3960 md_mark_chain_append(ctx, ctx.TABLECELLBOUNDARIES, mark_index); 3961 ctx.n_table_cell_boundaries++; 3962 } 3963 3964 /* Split a longer mark into two. The new mark takes the given count of 3965 * characters. May only be called if an adequate number of dummy 'D' marks 3966 * follows. 3967 */ 3968 static int 3969 md_split_emph_mark(MD_CTX* ctx, int mark_index, SZ n) 3970 { 3971 MD_MARK* mark = &ctx.marks[mark_index]; 3972 int new_mark_index = mark_index + (mark.end - mark.beg - n); 3973 MD_MARK* dummy = &ctx.marks[new_mark_index]; 3974 3975 assert(mark.end - mark.beg > n); 3976 assert(dummy.ch == 'D'); 3977 3978 memcpy(dummy, mark, MD_MARK.sizeof); 3979 mark.end -= n; 3980 dummy.beg = mark.end; 3981 3982 return new_mark_index; 3983 } 3984 3985 static void 3986 md_analyze_emph(MD_CTX* ctx, int mark_index) 3987 { 3988 MD_MARK* mark = &ctx.marks[mark_index]; 3989 MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index); 3990 3991 /* If we can be a closer, try to resolve with the preceding opener. */ 3992 if(mark.flags & MD_MARK_POTENTIAL_CLOSER) { 3993 MD_MARK* opener = null; 3994 int opener_index; 3995 3996 if(mark.ch == '*') { 3997 MD_MARKCHAIN*[6] opener_chains; 3998 int i, n_opener_chains; 3999 uint flags = mark.flags; 4000 4001 /* Apply "rule of three". (This is why we break asterisk opener 4002 * marks into multiple chains.) */ 4003 n_opener_chains = 0; 4004 opener_chains[n_opener_chains++] = ctx.ASTERISK_OPENERS_intraword_mod3_0; 4005 if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2) 4006 opener_chains[n_opener_chains++] = ctx.ASTERISK_OPENERS_intraword_mod3_1; 4007 if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1) 4008 opener_chains[n_opener_chains++] = ctx.ASTERISK_OPENERS_intraword_mod3_2; 4009 opener_chains[n_opener_chains++] = ctx.ASTERISK_OPENERS_extraword_mod3_0; 4010 if(!(flags & MD_MARK_EMPH_INTRAWORD) || (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2) 4011 opener_chains[n_opener_chains++] = ctx.ASTERISK_OPENERS_extraword_mod3_1; 4012 if(!(flags & MD_MARK_EMPH_INTRAWORD) || (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1) 4013 opener_chains[n_opener_chains++] = ctx.ASTERISK_OPENERS_extraword_mod3_2; 4014 4015 /* Opener is the most recent mark from the allowed chains. */ 4016 for(i = 0; i < n_opener_chains; i++) { 4017 if(opener_chains[i].tail >= 0) { 4018 int tmp_index = opener_chains[i].tail; 4019 MD_MARK* tmp_mark = &ctx.marks[tmp_index]; 4020 if(opener == null || tmp_mark.end > opener.end) { 4021 opener_index = tmp_index; 4022 opener = tmp_mark; 4023 } 4024 } 4025 } 4026 } else { 4027 /* Simple emph. mark */ 4028 if(chain.tail >= 0) { 4029 opener_index = chain.tail; 4030 opener = &ctx.marks[opener_index]; 4031 } 4032 } 4033 4034 /* Resolve, if we have found matching opener. */ 4035 if(opener != null) { 4036 SZ opener_size = opener.end - opener.beg; 4037 SZ closer_size = mark.end - mark.beg; 4038 4039 if(opener_size > closer_size) { 4040 opener_index = md_split_emph_mark(ctx, opener_index, closer_size); 4041 md_mark_chain_append(ctx, md_mark_chain(ctx, opener_index), opener_index); 4042 } else if(opener_size < closer_size) { 4043 md_split_emph_mark(ctx, mark_index, closer_size - opener_size); 4044 } 4045 4046 md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING); 4047 md_resolve_range(ctx, chain, opener_index, mark_index); 4048 return; 4049 } 4050 } 4051 4052 /* If we could not resolve as closer, we may be yet be an opener. */ 4053 if(mark.flags & MD_MARK_POTENTIAL_OPENER) 4054 md_mark_chain_append(ctx, chain, mark_index); 4055 } 4056 4057 static void 4058 md_analyze_tilde(MD_CTX* ctx, int mark_index) 4059 { 4060 /* We attempt to be Github Flavored Markdown compatible here. GFM says 4061 * that length of the tilde sequence is not important at all. Note that 4062 * implies the ctx.TILDE_OPENERS chain can have at most one item. */ 4063 4064 if(ctx.TILDE_OPENERS.head >= 0) { 4065 /* The chain already contains an opener, so we may resolve the span. */ 4066 int opener_index = ctx.TILDE_OPENERS.head; 4067 4068 md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING); 4069 md_resolve_range(ctx, ctx.TILDE_OPENERS, opener_index, mark_index); 4070 } else { 4071 /* We can only be opener. */ 4072 md_mark_chain_append(ctx, ctx.TILDE_OPENERS, mark_index); 4073 } 4074 } 4075 4076 static void 4077 md_analyze_dollar(MD_CTX* ctx, int mark_index) 4078 { 4079 /* This should mimic the way inline equations work in LaTeX, so there 4080 * can only ever be one item in the chain (i.e. the dollars can't be 4081 * nested). This is basically the same as the md_analyze_tilde function, 4082 * except that we require matching openers and closers to be of the same 4083 * length. 4084 * 4085 * E.g.: $abc$$def$$ => abc (display equation) def (end equation) */ 4086 if(ctx.DOLLAR_OPENERS.head >= 0) { 4087 /* If the potential closer has a non-matching number of $, discard */ 4088 MD_MARK* open = &ctx.marks[ctx.DOLLAR_OPENERS.head]; 4089 MD_MARK* close = &ctx.marks[mark_index]; 4090 4091 int opener_index = ctx.DOLLAR_OPENERS.head; 4092 md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_ALL); 4093 if (open.end - open.beg == close.end - close.beg) { 4094 /* We are the matching closer */ 4095 md_resolve_range(ctx, ctx.DOLLAR_OPENERS, opener_index, mark_index); 4096 } else { 4097 /* We don't match the opener, so discard old opener and insert as opener */ 4098 md_mark_chain_append(ctx, ctx.DOLLAR_OPENERS, mark_index); 4099 } 4100 } else { 4101 /* No unmatched openers, so we are opener */ 4102 md_mark_chain_append(ctx, ctx.DOLLAR_OPENERS, mark_index); 4103 } 4104 } 4105 4106 static void 4107 md_analyze_permissive_url_autolink(MD_CTX* ctx, int mark_index) 4108 { 4109 MD_MARK* opener = &ctx.marks[mark_index]; 4110 int closer_index = mark_index + 1; 4111 MD_MARK* closer = &ctx.marks[closer_index]; 4112 MD_MARK* next_resolved_mark; 4113 OFF off = opener.end; 4114 int n_dots = FALSE; 4115 int has_underscore_in_last_seg = FALSE; 4116 int has_underscore_in_next_to_last_seg = FALSE; 4117 int n_opened_parenthesis = 0; 4118 4119 /* Check for domain. */ 4120 while(off < ctx.size) { 4121 if(ctx.ISALNUM(off) || ctx.CH(off) == '-') { 4122 off++; 4123 } else if(ctx.CH(off) == '.') { 4124 /* We must see at least one period. */ 4125 n_dots++; 4126 has_underscore_in_next_to_last_seg = has_underscore_in_last_seg; 4127 has_underscore_in_last_seg = FALSE; 4128 off++; 4129 } else if(ctx.CH(off) == '_') { 4130 /* No underscore may be present in the last two domain segments. */ 4131 has_underscore_in_last_seg = TRUE; 4132 off++; 4133 } else { 4134 break; 4135 } 4136 } 4137 if(off > opener.end && ctx.CH(off-1) == '.') { 4138 off--; 4139 n_dots--; 4140 } 4141 if(off <= opener.end || n_dots == 0 || has_underscore_in_next_to_last_seg || has_underscore_in_last_seg) 4142 return; 4143 4144 /* Check for path. */ 4145 next_resolved_mark = closer + 1; 4146 while(next_resolved_mark.ch == 'D' || !(next_resolved_mark.flags & MD_MARK_RESOLVED)) 4147 next_resolved_mark++; 4148 while(off < next_resolved_mark.beg && ctx.CH(off) != '<' && !ctx.ISWHITESPACE(off) && !ctx.ISNEWLINE(off)) { 4149 /* Parenthesis must be balanced. */ 4150 if(ctx.CH(off) == '(') { 4151 n_opened_parenthesis++; 4152 } else if(ctx.CH(off) == ')') { 4153 if(n_opened_parenthesis > 0) 4154 n_opened_parenthesis--; 4155 else 4156 break; 4157 } 4158 4159 off++; 4160 } 4161 /* These cannot be last char In such case they are more likely normal 4162 * punctuation. */ 4163 if(ctx.ISANYOF(off-1, "?!.,:*_~")) 4164 off--; 4165 4166 /* Ok. Lets call it auto-link. Adapt opener and create closer to zero 4167 * length so all the contents becomes the link text. */ 4168 assert(closer.ch == 'D'); 4169 opener.end = opener.beg; 4170 closer.ch = opener.ch; 4171 closer.beg = off; 4172 closer.end = off; 4173 md_resolve_range(ctx, null, mark_index, closer_index); 4174 } 4175 4176 /* The permissive autolinks do not have to be enclosed in '<' '>' but we 4177 * instead impose stricter rules what is understood as an e-mail address 4178 * here. Actually any non-alphanumeric characters with exception of '.' 4179 * are prohibited both in username and after '@'. */ 4180 static void 4181 md_analyze_permissive_email_autolink(MD_CTX* ctx, int mark_index) 4182 { 4183 MD_MARK* opener = &ctx.marks[mark_index]; 4184 int closer_index; 4185 MD_MARK* closer; 4186 OFF beg = opener.beg; 4187 OFF end = opener.end; 4188 int dot_count = 0; 4189 4190 assert(ctx.CH(beg) == '@'); 4191 4192 /* Scan for name before '@'. */ 4193 while(beg > 0 && (ctx.ISALNUM(beg-1) || ctx.ISANYOF(beg-1, ".-_+"))) 4194 beg--; 4195 4196 /* Scan for domain after '@'. */ 4197 while(end < ctx.size && (ctx.ISALNUM(end) || ctx.ISANYOF(end, ".-_"))) { 4198 if(ctx.CH(end) == '.') 4199 dot_count++; 4200 end++; 4201 } 4202 if(ctx.CH(end-1) == '.') { /* Final '.' not part of it. */ 4203 dot_count--; 4204 end--; 4205 } 4206 else if(ctx.ISANYOF2(end-1, '-', '_')) /* These are forbidden at the end. */ 4207 return; 4208 if(ctx.CH(end-1) == '@' || dot_count == 0) 4209 return; 4210 4211 /* Ok. Lets call it auto-link. Adapt opener and create closer to zero 4212 * length so all the contents becomes the link text. */ 4213 closer_index = mark_index + 1; 4214 closer = &ctx.marks[closer_index]; 4215 assert(closer.ch == 'D'); 4216 4217 opener.beg = beg; 4218 opener.end = beg; 4219 closer.ch = opener.ch; 4220 closer.beg = end; 4221 closer.end = end; 4222 md_resolve_range(ctx, null, mark_index, closer_index); 4223 } 4224 4225 static void 4226 md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, 4227 int mark_beg, int mark_end, const(CHAR)* mark_chars) 4228 { 4229 int i = mark_beg; 4230 4231 while(i < mark_end) { 4232 MD_MARK* mark = &ctx.marks[i]; 4233 4234 /* Skip resolved spans. */ 4235 if(mark.flags & MD_MARK_RESOLVED) { 4236 if(mark.flags & MD_MARK_OPENER) { 4237 assert(i < mark.next); 4238 i = mark.next + 1; 4239 } else { 4240 i++; 4241 } 4242 continue; 4243 } 4244 4245 /* Skip marks we do not want to deal with. */ 4246 if(!ISANYOF_(mark.ch, mark_chars)) { 4247 i++; 4248 continue; 4249 } 4250 4251 /* Analyze the mark. */ 4252 switch(mark.ch) { 4253 case '[': /* Pass through. */ 4254 case '!': /* Pass through. */ 4255 case ']': md_analyze_bracket(ctx, i); break; 4256 case '&': md_analyze_entity(ctx, i); break; 4257 case '|': md_analyze_table_cell_boundary(ctx, i); break; 4258 case '_': /* Pass through. */ 4259 case '*': md_analyze_emph(ctx, i); break; 4260 case '~': md_analyze_tilde(ctx, i); break; 4261 case '$': md_analyze_dollar(ctx, i); break; 4262 case '.': /* Pass through. */ 4263 case ':': md_analyze_permissive_url_autolink(ctx, i); break; 4264 case '@': md_analyze_permissive_email_autolink(ctx, i); break; 4265 default: break; 4266 } 4267 4268 i++; 4269 } 4270 } 4271 4272 /* Analyze marks (build ctx.marks). */ 4273 static int 4274 md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode) 4275 { 4276 int ret; 4277 4278 /* Reset the previously collected stack of marks. */ 4279 ctx.n_marks = 0; 4280 4281 /* Collect all marks. */ 4282 ret = (md_collect_marks(ctx, lines, n_lines, table_mode)); 4283 if (ret < 0) goto abort; 4284 4285 /* We analyze marks in few groups to handle their precedence. */ 4286 /* (1) Entities; code spans; autolinks; raw HTML. */ 4287 md_analyze_marks(ctx, lines, n_lines, 0, ctx.n_marks, "&"); 4288 4289 if(table_mode) { 4290 /* (2) Analyze table cell boundaries. 4291 * Note we reset ctx.TABLECELLBOUNDARIES chain prior to the call md_analyze_marks(), 4292 * not after, because caller may need it. */ 4293 assert(n_lines == 1); 4294 ctx.TABLECELLBOUNDARIES.head = -1; 4295 ctx.TABLECELLBOUNDARIES.tail = -1; 4296 ctx.n_table_cell_boundaries = 0; 4297 md_analyze_marks(ctx, lines, n_lines, 0, ctx.n_marks, "|"); 4298 return ret; 4299 } 4300 4301 /* (3) Links. */ 4302 md_analyze_marks(ctx, lines, n_lines, 0, ctx.n_marks, "[]!"); 4303 ret = (md_resolve_links(ctx, lines, n_lines)); 4304 if (ret < 0) goto abort; 4305 ctx.BRACKET_OPENERS.head = -1; 4306 ctx.BRACKET_OPENERS.tail = -1; 4307 ctx.unresolved_link_head = -1; 4308 ctx.unresolved_link_tail = -1; 4309 4310 /* (4) Emphasis and strong emphasis; permissive autolinks. */ 4311 md_analyze_link_contents(ctx, lines, n_lines, 0, ctx.n_marks); 4312 4313 abort: 4314 return ret; 4315 } 4316 4317 static void 4318 md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines, 4319 int mark_beg, int mark_end) 4320 { 4321 md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, "*_~$@:."); 4322 ctx.ASTERISK_OPENERS_extraword_mod3_0.head = -1; 4323 ctx.ASTERISK_OPENERS_extraword_mod3_0.tail = -1; 4324 ctx.ASTERISK_OPENERS_extraword_mod3_1.head = -1; 4325 ctx.ASTERISK_OPENERS_extraword_mod3_1.tail = -1; 4326 ctx.ASTERISK_OPENERS_extraword_mod3_2.head = -1; 4327 ctx.ASTERISK_OPENERS_extraword_mod3_2.tail = -1; 4328 ctx.ASTERISK_OPENERS_intraword_mod3_0.head = -1; 4329 ctx.ASTERISK_OPENERS_intraword_mod3_0.tail = -1; 4330 ctx.ASTERISK_OPENERS_intraword_mod3_1.head = -1; 4331 ctx.ASTERISK_OPENERS_intraword_mod3_1.tail = -1; 4332 ctx.ASTERISK_OPENERS_intraword_mod3_2.head = -1; 4333 ctx.ASTERISK_OPENERS_intraword_mod3_2.tail = -1; 4334 ctx.UNDERSCORE_OPENERS.head = -1; 4335 ctx.UNDERSCORE_OPENERS.tail = -1; 4336 ctx.TILDE_OPENERS.head = -1; 4337 ctx.TILDE_OPENERS.tail = -1; 4338 ctx.DOLLAR_OPENERS.head = -1; 4339 ctx.DOLLAR_OPENERS.tail = -1; 4340 } 4341 4342 static int 4343 md_enter_leave_span_a(MD_CTX* ctx, int enter, MD_SPANTYPE type, 4344 const(CHAR)* dest, SZ dest_size, int prohibit_escapes_in_dest, 4345 const(CHAR)* title, SZ title_size) 4346 { 4347 MD_ATTRIBUTE_BUILD href_build = MD_ATTRIBUTE_BUILD.init; 4348 MD_ATTRIBUTE_BUILD title_build = MD_ATTRIBUTE_BUILD.init; 4349 MD_SPAN_A_DETAIL det; 4350 int ret = 0; 4351 4352 /* Note we here rely on fact that MD_SPAN_A_DETAIL and 4353 * MD_SPAN_IMG_DETAIL are binary-compatible. */ 4354 memset(&det, 0, MD_SPAN_A_DETAIL.sizeof); 4355 ret = (md_build_attribute(ctx, dest, dest_size, 4356 (prohibit_escapes_in_dest ? MD_BUILD_ATTR_NO_ESCAPES : 0), 4357 &det.href, &href_build)); 4358 if (ret < 0) goto abort; 4359 ret = (md_build_attribute(ctx, title, title_size, 0, &det.title, &title_build)); 4360 if (ret < 0) goto abort; 4361 4362 if(enter) 4363 { 4364 ret = MD_ENTER_SPAN(ctx, type, &det); 4365 if (ret != 0) goto abort; 4366 } 4367 else 4368 { 4369 ret = MD_LEAVE_SPAN(ctx, type, &det); 4370 if (ret != 0) goto abort; 4371 } 4372 4373 abort: 4374 md_free_attribute(ctx, &href_build); 4375 md_free_attribute(ctx, &title_build); 4376 return ret; 4377 } 4378 4379 /* Render the output, accordingly to the analyzed ctx.marks. */ 4380 static int 4381 md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines) 4382 { 4383 MD_TEXTTYPE text_type; 4384 const(MD_LINE)* line = lines; 4385 MD_MARK* prev_mark = null; 4386 MD_MARK* mark; 4387 OFF off = lines[0].beg; 4388 OFF end = lines[n_lines-1].end; 4389 int enforce_hardbreak = 0; 4390 int ret = 0; 4391 4392 /* Find first resolved mark. Note there is always at least one resolved 4393 * mark, the dummy last one after the end of the latest line we actually 4394 * never really reach. This saves us of a lot of special checks and cases 4395 * in this function. */ 4396 mark = ctx.marks; 4397 while(!(mark.flags & MD_MARK_RESOLVED)) 4398 mark++; 4399 4400 text_type = MD_TEXT_NORMAL; 4401 4402 while(1) { 4403 /* Process the text up to the next mark or end-of-line. */ 4404 OFF tmp = (line.end < mark.beg ? line.end : mark.beg); 4405 if(tmp > off) { 4406 ret = MD_TEXT(ctx, text_type, ctx.STR(off), tmp - off); 4407 if (ret != 0) goto abort; 4408 off = tmp; 4409 } 4410 4411 /* If reached the mark, process it and move to next one. */ 4412 if(off >= mark.beg) { 4413 switch(mark.ch) { 4414 case '\\': /* Backslash escape. */ 4415 if(ctx.ISNEWLINE(mark.beg+1)) 4416 enforce_hardbreak = 1; 4417 else 4418 { 4419 ret = MD_TEXT(ctx, text_type, ctx.STR(mark.beg+1), 1); 4420 if (ret != 0) goto abort; 4421 } 4422 break; 4423 4424 case ' ': /* Non-trivial space. */ 4425 ret = MD_TEXT(ctx, text_type, " ", 1); 4426 if (ret != 0) goto abort; 4427 break; 4428 4429 case '`': /* Code span. */ 4430 if(mark.flags & MD_MARK_OPENER) { 4431 ret = MD_ENTER_SPAN(ctx, MD_SPAN_CODE, null); 4432 if (ret != 0) goto abort; 4433 text_type = MD_TEXT_CODE; 4434 } else { 4435 ret = MD_LEAVE_SPAN(ctx, MD_SPAN_CODE, null); 4436 if (ret != 0) goto abort; 4437 text_type = MD_TEXT_NORMAL; 4438 } 4439 break; 4440 4441 case '_': 4442 case '*': /* Emphasis, strong emphasis. */ 4443 if(mark.flags & MD_MARK_OPENER) { 4444 if((mark.end - off) % 2) { 4445 ret = MD_ENTER_SPAN(ctx, MD_SPAN_EM, null); 4446 if (ret != 0) goto abort; 4447 off++; 4448 } 4449 while(off + 1 < mark.end) { 4450 ret = MD_ENTER_SPAN(ctx, MD_SPAN_STRONG, null); 4451 if (ret != 0) goto abort; 4452 off += 2; 4453 } 4454 } else { 4455 while(off + 1 < mark.end) { 4456 ret = MD_LEAVE_SPAN(ctx, MD_SPAN_STRONG, null); 4457 if (ret != 0) goto abort; 4458 off += 2; 4459 } 4460 if((mark.end - off) % 2) { 4461 ret = MD_LEAVE_SPAN(ctx, MD_SPAN_EM, null); 4462 if (ret != 0) goto abort; 4463 off++; 4464 } 4465 } 4466 break; 4467 4468 case '~': 4469 if(mark.flags & MD_MARK_OPENER) 4470 { 4471 ret = MD_ENTER_SPAN(ctx, MD_SPAN_DEL, null); 4472 if (ret != 0) goto abort; 4473 } 4474 else 4475 { 4476 ret = MD_LEAVE_SPAN(ctx, MD_SPAN_DEL, null); 4477 if (ret != 0) goto abort; 4478 } 4479 break; 4480 4481 case '$': 4482 if(mark.flags & MD_MARK_OPENER) { 4483 ret = MD_ENTER_SPAN(ctx, (mark.end - off) % 2 ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, null); 4484 if (ret != 0) goto abort; 4485 text_type = MD_TEXT_LATEXMATH; 4486 } else { 4487 ret = MD_LEAVE_SPAN(ctx, (mark.end - off) % 2 ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, null); 4488 if (ret != 0) goto abort; 4489 text_type = MD_TEXT_NORMAL; 4490 } 4491 break; 4492 4493 case '[': /* Link, image. */ 4494 case '!': 4495 case ']': 4496 { 4497 const MD_MARK* opener = (mark.ch != ']' ? mark : &ctx.marks[mark.prev]); 4498 const MD_MARK* dest_mark = opener+1; 4499 const MD_MARK* title_mark = opener+2; 4500 4501 assert(dest_mark.ch == 'D'); 4502 assert(title_mark.ch == 'D'); 4503 4504 ret = (md_enter_leave_span_a(ctx, (mark.ch != ']') ? 1 : 0, 4505 (opener.ch == '!' ? MD_SPAN_IMG : MD_SPAN_A), 4506 ctx.STR(dest_mark.beg), dest_mark.end - dest_mark.beg, FALSE, 4507 cast(char*) md_mark_get_ptr(ctx, cast(int)(title_mark - ctx.marks)), title_mark.prev)); 4508 if (ret < 0) goto abort; 4509 4510 /* link/image closer may span multiple lines. */ 4511 if(mark.ch == ']') { 4512 while(mark.end > line.end) 4513 line++; 4514 } 4515 4516 break; 4517 } 4518 4519 case '<': 4520 case '>': /* Autolink or raw HTML. */ 4521 if(!(mark.flags & MD_MARK_AUTOLINK)) { 4522 /* Raw HTML. */ 4523 if(mark.flags & MD_MARK_OPENER) 4524 text_type = MD_TEXT_HTML; 4525 else 4526 text_type = MD_TEXT_NORMAL; 4527 break; 4528 } 4529 /* Pass through, if auto-link. */ 4530 goto case '.'; 4531 4532 case '@': /* Permissive e-mail autolink. */ 4533 case ':': /* Permissive URL autolink. */ 4534 case '.': /* Permissive WWW autolink. */ 4535 { 4536 MD_MARK* opener = ((mark.flags & MD_MARK_OPENER) ? mark : &ctx.marks[mark.prev]); 4537 MD_MARK* closer = &ctx.marks[opener.next]; 4538 const(CHAR)* dest = ctx.STR(opener.end); 4539 SZ dest_size = closer.beg - opener.end; 4540 4541 /* For permissive auto-links we do not know closer mark 4542 * position at the time of md_collect_marks(), therefore 4543 * it can be out-of-order in ctx.marks[]. 4544 * 4545 * With this flag, we make sure that we output the closer 4546 * only if we processed the opener. */ 4547 if(mark.flags & MD_MARK_OPENER) 4548 closer.flags |= MD_MARK_VALIDPERMISSIVEAUTOLINK; 4549 4550 if(opener.ch == '@' || opener.ch == '.') { 4551 dest_size += 7; 4552 ret = MD_TEMP_BUFFER(ctx, dest_size * CHAR.sizeof); 4553 if (ret < 0) goto abort; 4554 memcpy(ctx.buffer, 4555 (opener.ch == '@' ? "mailto:" : "http://").ptr, 4556 7 * CHAR.sizeof); 4557 memcpy(ctx.buffer + 7, dest, (dest_size-7) * CHAR.sizeof); 4558 dest = ctx.buffer; 4559 } 4560 4561 if(closer.flags & MD_MARK_VALIDPERMISSIVEAUTOLINK) 4562 { 4563 ret = (md_enter_leave_span_a(ctx, (mark.flags & MD_MARK_OPENER), 4564 MD_SPAN_A, dest, dest_size, TRUE, null, 0)); 4565 if (ret < 0) goto abort; 4566 } 4567 break; 4568 } 4569 4570 case '&': /* Entity. */ 4571 ret = MD_TEXT(ctx, MD_TEXT_ENTITY, ctx.STR(mark.beg), mark.end - mark.beg); 4572 if (ret != 0) goto abort; 4573 break; 4574 4575 case '\0': 4576 ret = MD_TEXT(ctx, MD_TEXT_NULLCHAR, "", 1); 4577 if (ret != 0) goto abort; 4578 break; 4579 4580 case 127: 4581 goto abort; 4582 4583 default: 4584 break; 4585 } 4586 4587 off = mark.end; 4588 4589 /* Move to next resolved mark. */ 4590 prev_mark = mark; 4591 mark++; 4592 while(!(mark.flags & MD_MARK_RESOLVED) || mark.beg < off) 4593 mark++; 4594 } 4595 4596 /* If reached end of line, move to next one. */ 4597 if(off >= line.end) { 4598 /* If it is the last line, we are done. */ 4599 if(off >= end) 4600 break; 4601 4602 if(text_type == MD_TEXT_CODE || text_type == MD_TEXT_LATEXMATH) { 4603 OFF tmp2; 4604 4605 assert(prev_mark != null); 4606 assert(ISANYOF2_(prev_mark.ch, '`', '$') && (prev_mark.flags & MD_MARK_OPENER)); 4607 assert(ISANYOF2_(mark.ch, '`', '$') && (mark.flags & MD_MARK_CLOSER)); 4608 4609 /* Inside a code span, trailing line whitespace has to be 4610 * outputted. */ 4611 tmp2 = off; 4612 while(off < ctx.size && ctx.ISBLANK(off)) 4613 off++; 4614 if(off > tmp2) 4615 { 4616 ret = MD_TEXT(ctx, text_type, ctx.STR(tmp2), off-tmp2); 4617 if (ret != 0) goto abort; 4618 } 4619 4620 /* and new lines are transformed into single spaces. */ 4621 if(prev_mark.end < off && off < mark.beg) 4622 { 4623 ret = MD_TEXT(ctx, text_type, " ", 1); 4624 if (ret != 0) goto abort; 4625 } 4626 } else if(text_type == MD_TEXT_HTML) { 4627 /* Inside raw HTML, we output the new line verbatim, including 4628 * any trailing spaces. */ 4629 OFF tmp2 = off; 4630 4631 while(tmp2 < end && ctx.ISBLANK(tmp2)) 4632 tmp2++; 4633 if(tmp2 > off) 4634 { 4635 ret = MD_TEXT(ctx, MD_TEXT_HTML, ctx.STR(off), tmp2 - off); 4636 if (ret != 0) goto abort; 4637 } 4638 ret = MD_TEXT(ctx, MD_TEXT_HTML, "\n", 1); 4639 if (ret != 0) goto abort; 4640 } else { 4641 /* Output soft or hard line break. */ 4642 MD_TEXTTYPE break_type = MD_TEXT_SOFTBR; 4643 4644 if(text_type == MD_TEXT_NORMAL) { 4645 if(enforce_hardbreak) 4646 break_type = MD_TEXT_BR; 4647 else if((ctx.CH(line.end) == ' ' && ctx.CH(line.end+1) == ' ')) 4648 break_type = MD_TEXT_BR; 4649 } 4650 4651 ret = MD_TEXT(ctx, break_type, "\n", 1); 4652 if (ret != 0) goto abort; 4653 } 4654 4655 /* Move to the next line. */ 4656 line++; 4657 off = line.beg; 4658 4659 enforce_hardbreak = 0; 4660 } 4661 } 4662 4663 abort: 4664 return ret; 4665 } 4666 4667 4668 /*************************** 4669 *** Processing Tables *** 4670 ***************************/ 4671 4672 void md_analyze_table_alignment(MD_CTX* ctx, OFF beg, OFF end, MD_ALIGN* align_, int n_align) 4673 { 4674 static immutable MD_ALIGN[] align_map = 4675 [ 4676 MD_ALIGN_DEFAULT, 4677 MD_ALIGN_LEFT, 4678 MD_ALIGN_RIGHT, 4679 MD_ALIGN_CENTER 4680 ]; 4681 OFF off = beg; 4682 4683 while(n_align > 0) { 4684 int index = 0; /* index into align_map[] */ 4685 4686 while(ctx.CH(off) != '-') 4687 off++; 4688 if(off > beg && ctx.CH(off-1) == ':') 4689 index |= 1; 4690 while(off < end && ctx.CH(off) == '-') 4691 off++; 4692 if(off < end && ctx.CH(off) == ':') 4693 index |= 2; 4694 4695 *align_ = align_map[index]; 4696 align_++; 4697 n_align--; 4698 } 4699 4700 } 4701 4702 int md_process_table_cell(MD_CTX* ctx, MD_BLOCKTYPE cell_type, MD_ALIGN align_, OFF beg, OFF end) 4703 { 4704 MD_LINE line; 4705 MD_BLOCK_TD_DETAIL det; 4706 int ret = 0; 4707 4708 while(beg < end && ctx.ISWHITESPACE(beg)) 4709 beg++; 4710 while(end > beg && ctx.ISWHITESPACE(end-1)) 4711 end--; 4712 4713 det.align_ = align_; 4714 line.beg = beg; 4715 line.end = end; 4716 4717 ret = MD_ENTER_BLOCK(ctx, cell_type, &det); 4718 if (ret != 0) goto abort; 4719 ret = (md_process_normal_block_contents(ctx, &line, 1)); 4720 if (ret < 0) goto abort; 4721 ret = MD_LEAVE_BLOCK(ctx, cell_type, &det); 4722 if (ret != 0) goto abort; 4723 4724 abort: 4725 return ret; 4726 } 4727 4728 int md_process_table_row(MD_CTX* ctx, MD_BLOCKTYPE cell_type, OFF beg, OFF end, 4729 const MD_ALIGN* align_, int col_count) 4730 { 4731 MD_LINE line; 4732 OFF* pipe_offs = null; 4733 int i, j, n; 4734 int ret = 0; 4735 4736 line.beg = beg; 4737 line.end = end; 4738 4739 /* Break the line into table cells by identifying pipe characters who 4740 * form the cell boundary. */ 4741 ret = (md_analyze_inlines(ctx, &line, 1, TRUE)); 4742 if (ret < 0) goto abort; 4743 4744 /* We have to remember the cell boundaries in local buffer because 4745 * ctx.marks[] shall be reused during cell contents processing. */ 4746 n = ctx.n_table_cell_boundaries; 4747 pipe_offs = cast(OFF*) malloc(n * OFF.sizeof); 4748 if(pipe_offs == null) { 4749 ctx.MD_LOG("malloc() failed."); 4750 ret = -1; 4751 goto abort; 4752 } 4753 for(i = ctx.TABLECELLBOUNDARIES.head, j = 0; i >= 0; i = ctx.marks[i].next) { 4754 MD_MARK* mark = &ctx.marks[i]; 4755 pipe_offs[j++] = mark.beg; 4756 } 4757 4758 /* Process cells. */ 4759 ret = MD_ENTER_BLOCK(ctx, MD_BLOCK_TR, null); 4760 if (ret != 0) goto abort; 4761 4762 j = 0; 4763 if(beg < pipe_offs[0] && j < col_count) 4764 { 4765 ret = (md_process_table_cell(ctx, cell_type, align_[j++], beg, pipe_offs[0])); 4766 if (ret < 0) goto abort; 4767 } 4768 for(i = 0; i < n-1 && j < col_count; i++) 4769 { 4770 ret = (md_process_table_cell(ctx, cell_type, align_[j++], pipe_offs[i]+1, pipe_offs[i+1])); 4771 if (ret < 0) goto abort; 4772 } 4773 if(pipe_offs[n-1] < end-1 && j < col_count) 4774 { 4775 ret = (md_process_table_cell(ctx, cell_type, align_[j++], pipe_offs[n-1]+1, end)); 4776 if (ret < 0) goto abort; 4777 } 4778 /* Make sure we call enough table cells even if the current table contains 4779 * too few of them. */ 4780 while(j < col_count) 4781 { 4782 ret = (md_process_table_cell(ctx, cell_type, align_[j++], 0, 0)); 4783 if (ret < 0) goto abort; 4784 } 4785 4786 ret = MD_LEAVE_BLOCK(ctx, MD_BLOCK_TR, null); 4787 if (ret != 0) goto abort; 4788 4789 abort: 4790 free(pipe_offs); 4791 4792 /* Free any temporary memory blocks stored within some dummy marks. */ 4793 for(i = ctx.PTR_CHAIN.head; i >= 0; i = ctx.marks[i].next) 4794 free(md_mark_get_ptr(ctx, i)); 4795 ctx.PTR_CHAIN.head = -1; 4796 ctx.PTR_CHAIN.tail = -1; 4797 4798 return ret; 4799 } 4800 4801 int md_process_table_block_contents(MD_CTX* ctx, int col_count, const MD_LINE* lines, int n_lines) 4802 { 4803 MD_ALIGN* align_; 4804 int i; 4805 int ret = 0; 4806 4807 /* At least two lines have to be present: The column headers and the line 4808 * with the underlines. */ 4809 assert(n_lines >= 2); 4810 4811 align_ = cast(MD_ALIGN*) malloc(col_count * MD_ALIGN.sizeof); 4812 if(align_ == null) { 4813 ctx.MD_LOG("malloc() failed."); 4814 ret = -1; 4815 goto abort; 4816 } 4817 4818 md_analyze_table_alignment(ctx, lines[1].beg, lines[1].end, align_, col_count); 4819 4820 ret = MD_ENTER_BLOCK(ctx, MD_BLOCK_THEAD, null); 4821 if (ret != 0) goto abort; 4822 ret = (md_process_table_row(ctx, MD_BLOCK_TH, 4823 lines[0].beg, lines[0].end, align_, col_count)); 4824 if (ret < 0) goto abort; 4825 ret = MD_LEAVE_BLOCK(ctx, MD_BLOCK_THEAD, null); 4826 if (ret != 0) goto abort; 4827 4828 ret = MD_ENTER_BLOCK(ctx, MD_BLOCK_TBODY, null); 4829 if (ret != 0) goto abort; 4830 for(i = 2; i < n_lines; i++) { 4831 ret = (md_process_table_row(ctx, MD_BLOCK_TD, 4832 lines[i].beg, lines[i].end, align_, col_count)); 4833 if (ret < 0) goto abort; 4834 } 4835 ret = MD_LEAVE_BLOCK(ctx, MD_BLOCK_TBODY, null); 4836 if (ret != 0) goto abort; 4837 4838 abort: 4839 free(align_); 4840 return ret; 4841 } 4842 4843 int md_is_table_row(MD_CTX* ctx, OFF beg, OFF* p_end) 4844 { 4845 MD_LINE line; 4846 int i; 4847 int ret = FALSE; 4848 4849 line.beg = beg; 4850 line.end = beg; 4851 4852 /* Find end of line. */ 4853 while(line.end < ctx.size && !ctx.ISNEWLINE(line.end)) 4854 line.end++; 4855 4856 ret = (md_analyze_inlines(ctx, &line, 1, TRUE)); 4857 if (ret < 0) goto abort; 4858 4859 if(ctx.TABLECELLBOUNDARIES.head >= 0) { 4860 if(p_end != null) 4861 *p_end = line.end; 4862 ret = TRUE; 4863 } 4864 4865 abort: 4866 /* Free any temporary memory blocks stored within some dummy marks. */ 4867 for(i = ctx.PTR_CHAIN.head; i >= 0; i = ctx.marks[i].next) 4868 free(md_mark_get_ptr(ctx, i)); 4869 ctx.PTR_CHAIN.head = -1; 4870 ctx.PTR_CHAIN.tail = -1; 4871 4872 return ret; 4873 } 4874 4875 4876 /************************** 4877 *** Processing Block *** 4878 **************************/ 4879 4880 enum MD_BLOCK_CONTAINER_OPENER = 0x01; 4881 enum MD_BLOCK_CONTAINER_CLOSER = 0x02; 4882 enum MD_BLOCK_CONTAINER = (MD_BLOCK_CONTAINER_OPENER | MD_BLOCK_CONTAINER_CLOSER); 4883 enum MD_BLOCK_LOOSE_LIST = 0x04; 4884 enum MD_BLOCK_SETEXT_HEADER = 0x08; 4885 4886 struct MD_BLOCK 4887 { 4888 nothrow: 4889 @nogc: 4890 ubyte type_; 4891 ubyte flags_; 4892 ushort data_; 4893 4894 MD_BLOCKTYPE type() const { return type_; } 4895 void type(MD_BLOCKTYPE value) { type_ = cast(ubyte)value; } 4896 4897 uint flags() const { return flags_; } 4898 void flags(uint value) { flags_ = cast(ubyte)value; } 4899 4900 /* MD_BLOCK_H: Header level (1 - 6) 4901 * MD_BLOCK_CODE: Non-zero if fenced, zero if indented. 4902 * MD_BLOCK_LI: Task mark character (0 if not task list item, 'x', 'X' or ' '). 4903 * MD_BLOCK_TABLE: Column count (as determined by the table underline). 4904 */ 4905 uint data() const { return data_; } 4906 void data(uint value) { data_ = cast(ubyte)value; } 4907 4908 /* Leaf blocks: Count of lines (MD_LINE or MD_VERBATIMLINE) on the block. 4909 * MD_BLOCK_LI: Task mark offset in the input doc. 4910 * MD_BLOCK_OL: Start item number. 4911 */ 4912 uint n_lines; 4913 } 4914 4915 static assert(MD_BLOCK.sizeof == 8); 4916 4917 struct MD_CONTAINER 4918 { 4919 nothrow: 4920 @nogc: 4921 4922 CHAR ch; 4923 4924 ubyte is_loose_; 4925 ubyte is_task_; 4926 4927 uint is_loose() { return is_loose_; } 4928 void is_loose(uint value) { is_loose_ = cast(ubyte)value; } 4929 4930 uint is_task() { return is_task_; } 4931 void is_task(uint value) { is_task_ = cast(ubyte)value; } 4932 4933 uint start; 4934 uint mark_indent; 4935 uint contents_indent; 4936 OFF block_byte_off; 4937 OFF task_mark_off; 4938 } 4939 4940 4941 int md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines) 4942 { 4943 int i; 4944 int ret; 4945 4946 ret = (md_analyze_inlines(ctx, lines, n_lines, FALSE)); 4947 if (ret < 0) goto abort; 4948 ret = (md_process_inlines(ctx, lines, n_lines)); 4949 if (ret < 0) goto abort; 4950 4951 abort: 4952 /* Free any temporary memory blocks stored within some dummy marks. */ 4953 for(i = ctx.PTR_CHAIN.head; i >= 0; i = ctx.marks[i].next) 4954 free(md_mark_get_ptr(ctx, i)); 4955 ctx.PTR_CHAIN.head = -1; 4956 ctx.PTR_CHAIN.tail = -1; 4957 4958 return ret; 4959 } 4960 4961 int md_process_verbatim_block_contents(MD_CTX* ctx, MD_TEXTTYPE text_type, const MD_VERBATIMLINE* lines, int n_lines) 4962 { 4963 static immutable string indent_chunk_str = " "; 4964 4965 int i; 4966 int ret = 0; 4967 4968 for(i = 0; i < n_lines; i++) { 4969 const MD_VERBATIMLINE* line = &lines[i]; 4970 int indent = line.indent; 4971 4972 assert(indent >= 0); 4973 4974 /* Output code indentation. */ 4975 while(indent > cast(int)(indent_chunk_str.length)) { 4976 ret = MD_TEXT(ctx, text_type, indent_chunk_str.ptr, cast(SZ)(indent_chunk_str.length)); 4977 if (ret != 0) goto abort; 4978 indent -= indent_chunk_str.length; 4979 } 4980 if(indent > 0) 4981 { 4982 ret = MD_TEXT(ctx, text_type, indent_chunk_str.ptr, indent); 4983 if (ret != 0) goto abort; 4984 } 4985 4986 /* Output the code line itself. */ 4987 ret = MD_TEXT_INSECURE(ctx, text_type, ctx.STR(line.beg), line.end - line.beg); 4988 if (ret != 0) goto abort; 4989 4990 /* Enforce end-of-line. */ 4991 ret = MD_TEXT(ctx, text_type, "\n", 1); 4992 if (ret != 0) goto abort; 4993 } 4994 4995 abort: 4996 return ret; 4997 } 4998 4999 static int 5000 md_process_code_block_contents(MD_CTX* ctx, int is_fenced, const(MD_VERBATIMLINE)* lines, int n_lines) 5001 { 5002 if(is_fenced) { 5003 /* Skip the first line in case of fenced code: It is the fence. 5004 * (Only the starting fence is present due to logic in md_analyze_line().) */ 5005 lines++; 5006 n_lines--; 5007 } else { 5008 /* Ignore blank lines at start/end of indented code block. */ 5009 while(n_lines > 0 && lines[0].beg == lines[0].end) { 5010 lines++; 5011 n_lines--; 5012 } 5013 while(n_lines > 0 && lines[n_lines-1].beg == lines[n_lines-1].end) { 5014 n_lines--; 5015 } 5016 } 5017 5018 if(n_lines == 0) 5019 return 0; 5020 5021 return md_process_verbatim_block_contents(ctx, MD_TEXT_CODE, lines, n_lines); 5022 } 5023 5024 int md_setup_fenced_code_detail(MD_CTX* ctx, const(MD_BLOCK)* block, MD_BLOCK_CODE_DETAIL* det, 5025 MD_ATTRIBUTE_BUILD* info_build, MD_ATTRIBUTE_BUILD* lang_build) 5026 { 5027 const(MD_VERBATIMLINE)* fence_line = cast(const(MD_VERBATIMLINE)*)(block + 1); 5028 OFF beg = fence_line.beg; 5029 OFF end = fence_line.end; 5030 OFF lang_end; 5031 CHAR fence_ch = ctx.CH(fence_line.beg); 5032 int ret = 0; 5033 5034 /* Skip the fence itself. */ 5035 while(beg < ctx.size && ctx.CH(beg) == fence_ch) 5036 beg++; 5037 /* Trim initial spaces. */ 5038 while(beg < ctx.size && ctx.CH(beg) == ' ') 5039 beg++; 5040 5041 /* Trim trailing spaces. */ 5042 while(end > beg && ctx.CH(end-1) == ' ') 5043 end--; 5044 5045 /* Build info string attribute. */ 5046 ret = (md_build_attribute(ctx, ctx.STR(beg), end - beg, 0, &det.info, info_build)); 5047 if (ret < 0) goto abort; 5048 5049 /* Build info string attribute. */ 5050 lang_end = beg; 5051 while(lang_end < end && !ctx.ISWHITESPACE(lang_end)) 5052 lang_end++; 5053 ret = (md_build_attribute(ctx, ctx.STR(beg), lang_end - beg, 0, &det.lang, lang_build)); 5054 if (ret < 0) goto abort; 5055 5056 det.fence_char = fence_ch; 5057 5058 abort: 5059 return ret; 5060 } 5061 5062 static int 5063 md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block) 5064 { 5065 static union HeaderOrCode 5066 { 5067 MD_BLOCK_H_DETAIL header; 5068 MD_BLOCK_CODE_DETAIL code; 5069 } 5070 HeaderOrCode det; 5071 MD_ATTRIBUTE_BUILD info_build; 5072 MD_ATTRIBUTE_BUILD lang_build; 5073 int is_in_tight_list; 5074 int clean_fence_code_detail = FALSE; 5075 int ret = 0; 5076 5077 memset(&det, 0, det.sizeof); 5078 5079 if(ctx.n_containers == 0) 5080 is_in_tight_list = FALSE; 5081 else 5082 is_in_tight_list = !ctx.containers[ctx.n_containers-1].is_loose; 5083 5084 switch(block.type) 5085 { 5086 case MD_BLOCK_H: 5087 det.header.level = block.data; 5088 break; 5089 5090 case MD_BLOCK_CODE: 5091 /* For fenced code block, we may need to set the info string. */ 5092 if(block.data != 0) { 5093 memset(&det.code, 0, MD_BLOCK_CODE_DETAIL.sizeof); 5094 clean_fence_code_detail = TRUE; 5095 ret = (md_setup_fenced_code_detail(ctx, block, &det.code, &info_build, &lang_build)); 5096 if (ret < 0) goto abort; 5097 } 5098 break; 5099 5100 default: 5101 /* Noop. */ 5102 break; 5103 } 5104 5105 if(!is_in_tight_list || block.type != MD_BLOCK_P) 5106 { 5107 ret = MD_ENTER_BLOCK(ctx, block.type, cast(void*) &det); 5108 if (ret != 0) goto abort; 5109 } 5110 5111 /* Process the block contents accordingly to is type. */ 5112 switch(block.type) { 5113 case MD_BLOCK_HR: 5114 /* noop */ 5115 break; 5116 5117 case MD_BLOCK_CODE: 5118 ret = (md_process_code_block_contents(ctx, (block.data != 0), 5119 cast(const(MD_VERBATIMLINE)*)(block + 1), block.n_lines)); 5120 if (ret < 0) goto abort; 5121 break; 5122 5123 case MD_BLOCK_HTML: 5124 ret = (md_process_verbatim_block_contents(ctx, MD_TEXT_HTML, 5125 cast(const(MD_VERBATIMLINE)*)(block + 1), block.n_lines)); 5126 if (ret < 0) goto abort; 5127 break; 5128 5129 case MD_BLOCK_TABLE: 5130 ret = (md_process_table_block_contents(ctx, block.data, 5131 cast(const(MD_LINE)*)(block + 1), block.n_lines)); 5132 if (ret < 0) goto abort; 5133 break; 5134 5135 default: 5136 ret = (md_process_normal_block_contents(ctx, 5137 cast(const(MD_LINE)*)(block + 1), block.n_lines)); 5138 if (ret < 0) goto abort; 5139 break; 5140 } 5141 5142 if(!is_in_tight_list || block.type != MD_BLOCK_P) 5143 { 5144 ret = MD_LEAVE_BLOCK(ctx, block.type, cast(void*) &det); 5145 if (ret != 0) goto abort; 5146 } 5147 5148 abort: 5149 if(clean_fence_code_detail) { 5150 md_free_attribute(ctx, &info_build); 5151 md_free_attribute(ctx, &lang_build); 5152 } 5153 return ret; 5154 } 5155 5156 int md_process_all_blocks(MD_CTX* ctx) 5157 { 5158 int byte_off = 0; 5159 int ret = 0; 5160 5161 /* ctx.containers now is not needed for detection of lists and list items 5162 * so we reuse it for tracking what lists are loose or tight. We rely 5163 * on the fact the vector is large enough to hold the deepest nesting 5164 * level of lists. */ 5165 ctx.n_containers = 0; 5166 5167 while(byte_off < ctx.n_block_bytes) { 5168 MD_BLOCK* block = cast(MD_BLOCK*)(cast(char*)ctx.block_bytes + byte_off); 5169 static union Det 5170 { 5171 MD_BLOCK_UL_DETAIL ul; 5172 MD_BLOCK_OL_DETAIL ol; 5173 MD_BLOCK_LI_DETAIL li; 5174 } 5175 5176 Det det; 5177 5178 switch(block.type) { 5179 case MD_BLOCK_UL: 5180 det.ul.is_tight = (block.flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE; 5181 det.ul.mark = cast(CHAR) block.data; 5182 break; 5183 5184 case MD_BLOCK_OL: 5185 det.ol.start = block.n_lines; 5186 det.ol.is_tight = (block.flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE; 5187 det.ol.mark_delimiter = cast(CHAR) block.data; 5188 break; 5189 5190 case MD_BLOCK_LI: 5191 det.li.is_task = (block.data != 0); 5192 det.li.task_mark = cast(CHAR) block.data; 5193 det.li.task_mark_offset = cast(OFF) block.n_lines; 5194 break; 5195 5196 default: 5197 /* noop */ 5198 break; 5199 } 5200 5201 if(block.flags & MD_BLOCK_CONTAINER) { 5202 if(block.flags & MD_BLOCK_CONTAINER_CLOSER) { 5203 ret = MD_LEAVE_BLOCK(ctx, block.type, &det); 5204 if (ret != 0) goto abort; 5205 5206 if(block.type == MD_BLOCK_UL || block.type == MD_BLOCK_OL || block.type == MD_BLOCK_QUOTE) 5207 ctx.n_containers--; 5208 } 5209 5210 if(block.flags & MD_BLOCK_CONTAINER_OPENER) { 5211 ret = MD_ENTER_BLOCK(ctx, block.type, &det); 5212 if (ret != 0) goto abort; 5213 5214 if(block.type == MD_BLOCK_UL || block.type == MD_BLOCK_OL) { 5215 ctx.containers[ctx.n_containers].is_loose = (block.flags & MD_BLOCK_LOOSE_LIST); 5216 ctx.n_containers++; 5217 } else if(block.type == MD_BLOCK_QUOTE) { 5218 /* This causes that any text in a block quote, even if 5219 * nested inside a tight list item, is wrapped with 5220 * <p>...</p>. */ 5221 ctx.containers[ctx.n_containers].is_loose = TRUE; 5222 ctx.n_containers++; 5223 } 5224 } 5225 } else { 5226 ret = (md_process_leaf_block(ctx, block)); 5227 if (ret < 0) goto abort; 5228 5229 if(block.type == MD_BLOCK_CODE || block.type == MD_BLOCK_HTML) 5230 byte_off += block.n_lines * MD_VERBATIMLINE.sizeof; 5231 else 5232 byte_off += block.n_lines * MD_LINE.sizeof; 5233 } 5234 5235 byte_off += MD_BLOCK.sizeof; 5236 } 5237 5238 ctx.n_block_bytes = 0; 5239 5240 abort: 5241 return ret; 5242 } 5243 5244 5245 /************************************ 5246 *** Grouping Lines into Blocks *** 5247 ************************************/ 5248 5249 static void* 5250 md_push_block_bytes(MD_CTX* ctx, int n_bytes) 5251 { 5252 void* ptr; 5253 5254 if(ctx.n_block_bytes + n_bytes > ctx.alloc_block_bytes) { 5255 void* new_block_bytes; 5256 5257 ctx.alloc_block_bytes = (ctx.alloc_block_bytes > 0 ? ctx.alloc_block_bytes * 2 : 512); 5258 new_block_bytes = realloc_safe(ctx.block_bytes, ctx.alloc_block_bytes); 5259 if(new_block_bytes == null) { 5260 ctx.MD_LOG("realloc() failed."); 5261 return null; 5262 } 5263 5264 /* Fix the .current_block after the reallocation. */ 5265 if(ctx.current_block != null) { 5266 OFF off_current_block = cast(uint)( cast(char*) ctx.current_block - cast(char*) ctx.block_bytes ); 5267 ctx.current_block = cast(MD_BLOCK*) (cast(char*) new_block_bytes + off_current_block); 5268 } 5269 5270 ctx.block_bytes = new_block_bytes; 5271 } 5272 5273 ptr = cast(char*)ctx.block_bytes + ctx.n_block_bytes; 5274 ctx.n_block_bytes += n_bytes; 5275 return ptr; 5276 } 5277 5278 static int 5279 md_start_new_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* line) 5280 { 5281 MD_BLOCK* block; 5282 5283 assert(ctx.current_block == null); 5284 5285 block = cast(MD_BLOCK*) md_push_block_bytes(ctx, MD_BLOCK.sizeof); 5286 if(block == null) 5287 return -1; 5288 5289 switch(line.type) { 5290 case MD_LINE_HR: 5291 block.type = MD_BLOCK_HR; 5292 break; 5293 5294 case MD_LINE_ATXHEADER: 5295 case MD_LINE_SETEXTHEADER: 5296 block.type = MD_BLOCK_H; 5297 break; 5298 5299 case MD_LINE_FENCEDCODE: 5300 case MD_LINE_INDENTEDCODE: 5301 block.type = MD_BLOCK_CODE; 5302 break; 5303 5304 case MD_LINE_TEXT: 5305 block.type = MD_BLOCK_P; 5306 break; 5307 5308 case MD_LINE_HTML: 5309 block.type = MD_BLOCK_HTML; 5310 break; 5311 5312 case MD_LINE_BLANK: 5313 case MD_LINE_SETEXTUNDERLINE: 5314 case MD_LINE_TABLEUNDERLINE: 5315 default: 5316 assert(false); 5317 } 5318 5319 block.flags = 0; 5320 block.data = line.data; 5321 block.n_lines = 0; 5322 5323 ctx.current_block = block; 5324 return 0; 5325 } 5326 5327 /* Eat from start of current (textual) block any reference definitions and 5328 * remember them so we can resolve any links referring to them. 5329 * 5330 * (Reference definitions can only be at start of it as they cannot break 5331 * a paragraph.) 5332 */ 5333 int md_consume_link_reference_definitions(MD_CTX* ctx) 5334 { 5335 MD_LINE* lines = cast(MD_LINE*) (ctx.current_block + 1); 5336 int n_lines = ctx.current_block.n_lines; 5337 int n = 0; 5338 5339 /* Compute how many lines at the start of the block form one or more 5340 * reference definitions. */ 5341 while(n < n_lines) { 5342 int n_link_ref_lines; 5343 5344 n_link_ref_lines = md_is_link_reference_definition(ctx, 5345 lines + n, n_lines - n); 5346 /* Not a reference definition? */ 5347 if(n_link_ref_lines == 0) 5348 break; 5349 5350 /* We fail if it is the ref. def. but it could not be stored due 5351 * a memory allocation error. */ 5352 if(n_link_ref_lines < 0) 5353 return -1; 5354 5355 n += n_link_ref_lines; 5356 } 5357 5358 /* If there was at least one reference definition, we need to remove 5359 * its lines from the block, or perhaps even the whole block. */ 5360 if(n > 0) { 5361 if(n == n_lines) { 5362 /* Remove complete block. */ 5363 ctx.n_block_bytes -= n * MD_LINE.sizeof; 5364 ctx.n_block_bytes -= MD_BLOCK.sizeof; 5365 ctx.current_block = null; 5366 } else { 5367 /* Remove just some initial lines from the block. */ 5368 memmove(lines, lines + n, (n_lines - n) * MD_LINE.sizeof); 5369 ctx.current_block.n_lines -= n; 5370 ctx.n_block_bytes -= n * MD_LINE.sizeof; 5371 } 5372 } 5373 5374 return 0; 5375 } 5376 5377 static int 5378 md_end_current_block(MD_CTX* ctx) 5379 { 5380 int ret = 0; 5381 5382 if(ctx.current_block == null) 5383 return ret; 5384 5385 /* Check whether there is a reference definition. (We do this here instead 5386 * of in md_analyze_line() because reference definition can take multiple 5387 * lines.) */ 5388 if(ctx.current_block.type == MD_BLOCK_P || 5389 (ctx.current_block.type == MD_BLOCK_H && (ctx.current_block.flags & MD_BLOCK_SETEXT_HEADER))) 5390 { 5391 MD_LINE* lines = cast(MD_LINE*) (ctx.current_block + 1); 5392 if(ctx.CH(lines[0].beg) == '[') { 5393 ret = (md_consume_link_reference_definitions(ctx)); 5394 if (ret < 0) goto abort; 5395 if(ctx.current_block == null) 5396 return ret; 5397 } 5398 } 5399 5400 if(ctx.current_block.type == MD_BLOCK_H && (ctx.current_block.flags & MD_BLOCK_SETEXT_HEADER)) { 5401 int n_lines = ctx.current_block.n_lines; 5402 5403 if(n_lines > 1) { 5404 /* Get rid of the underline. */ 5405 ctx.current_block.n_lines--; 5406 ctx.n_block_bytes -= MD_LINE.sizeof; 5407 } else { 5408 /* Only the underline has left after eating the ref. defs. 5409 * Keep the line as beginning of a new ordinary paragraph. */ 5410 ctx.current_block.type = MD_BLOCK_P; 5411 return 0; 5412 } 5413 } 5414 5415 /* Mark we are not building any block anymore. */ 5416 ctx.current_block = null; 5417 5418 abort: 5419 return ret; 5420 } 5421 5422 static int 5423 md_add_line_into_current_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* analysis) 5424 { 5425 assert(ctx.current_block != null); 5426 5427 if(ctx.current_block.type == MD_BLOCK_CODE || ctx.current_block.type == MD_BLOCK_HTML) { 5428 MD_VERBATIMLINE* line; 5429 5430 line = cast(MD_VERBATIMLINE*) md_push_block_bytes(ctx, MD_VERBATIMLINE.sizeof); 5431 if(line == null) 5432 return -1; 5433 5434 line.indent = analysis.indent; 5435 line.beg = analysis.beg; 5436 line.end = analysis.end; 5437 } else { 5438 MD_LINE* line; 5439 5440 line = cast(MD_LINE*) md_push_block_bytes(ctx, MD_LINE.sizeof); 5441 if(line == null) 5442 return -1; 5443 5444 line.beg = analysis.beg; 5445 line.end = analysis.end; 5446 } 5447 ctx.current_block.n_lines++; 5448 5449 return 0; 5450 } 5451 5452 static int 5453 md_push_container_bytes(MD_CTX* ctx, MD_BLOCKTYPE type, uint start, 5454 uint data, uint flags) 5455 { 5456 MD_BLOCK* block; 5457 int ret = 0; 5458 5459 ret = (md_end_current_block(ctx)); 5460 if (ret < 0) goto abort; 5461 5462 block = cast(MD_BLOCK*) md_push_block_bytes(ctx, MD_BLOCK.sizeof); 5463 if(block == null) 5464 return -1; 5465 5466 block.type = type; 5467 block.flags = flags; 5468 block.data = data; 5469 block.n_lines = start; 5470 5471 abort: 5472 return ret; 5473 } 5474 5475 5476 5477 /*********************** 5478 *** Line Analysis *** 5479 ***********************/ 5480 5481 static int 5482 md_is_hr_line(MD_CTX* ctx, OFF beg, OFF* p_end, OFF* p_killer) 5483 { 5484 OFF off = beg + 1; 5485 int n = 1; 5486 5487 while(off < ctx.size && (ctx.CH(off) == ctx.CH(beg) || ctx.CH(off) == ' ' || ctx.CH(off) == '\t')) { 5488 if(ctx.CH(off) == ctx.CH(beg)) 5489 n++; 5490 off++; 5491 } 5492 5493 if(n < 3) { 5494 *p_killer = off; 5495 return FALSE; 5496 } 5497 5498 /* Nothing else can be present on the line. */ 5499 if(off < ctx.size && !ctx.ISNEWLINE(off)) { 5500 *p_killer = off; 5501 return FALSE; 5502 } 5503 5504 *p_end = off; 5505 return TRUE; 5506 } 5507 5508 static int 5509 md_is_atxheader_line(MD_CTX* ctx, OFF beg, OFF* p_beg, OFF* p_end, uint* p_level) 5510 { 5511 int n; 5512 OFF off = beg + 1; 5513 5514 while(off < ctx.size && ctx.CH(off) == '#' && off - beg < 7) 5515 off++; 5516 n = off - beg; 5517 5518 if(n > 6) 5519 return FALSE; 5520 *p_level = n; 5521 5522 if(!(ctx.parser.flags & MD_FLAG_PERMISSIVEATXHEADERS) && off < ctx.size && 5523 ctx.CH(off) != ' ' && ctx.CH(off) != '\t' && !ctx.ISNEWLINE(off)) 5524 return FALSE; 5525 5526 while(off < ctx.size && ctx.CH(off) == ' ') 5527 off++; 5528 *p_beg = off; 5529 *p_end = off; 5530 return TRUE; 5531 } 5532 5533 static int 5534 md_is_setext_underline(MD_CTX* ctx, OFF beg, OFF* p_end, uint* p_level) 5535 { 5536 OFF off = beg + 1; 5537 5538 while(off < ctx.size && ctx.CH(off) == ctx.CH(beg)) 5539 off++; 5540 5541 /* Optionally, space(s) can follow. */ 5542 while(off < ctx.size && ctx.CH(off) == ' ') 5543 off++; 5544 5545 /* But nothing more is allowed on the line. */ 5546 if(off < ctx.size && !ctx.ISNEWLINE(off)) 5547 return FALSE; 5548 5549 *p_level = (ctx.CH(beg) == '=' ? 1 : 2); 5550 *p_end = off; 5551 return TRUE; 5552 } 5553 5554 int md_is_table_underline(MD_CTX* ctx, OFF beg, OFF* p_end, uint* p_col_count) 5555 { 5556 OFF off = beg; 5557 int found_pipe = FALSE; 5558 uint col_count = 0; 5559 5560 if(off < ctx.size && ctx.CH(off) == '|') { 5561 found_pipe = TRUE; 5562 off++; 5563 while(off < ctx.size && ctx.ISWHITESPACE(off)) 5564 off++; 5565 } 5566 5567 while(1) { 5568 OFF cell_beg; 5569 int delimited = FALSE; 5570 5571 /* Cell underline ("-----", ":----", "----:" or ":----:") */ 5572 cell_beg = off; 5573 if(off < ctx.size && ctx.CH(off) == ':') 5574 off++; 5575 while(off < ctx.size && ctx.CH(off) == '-') 5576 off++; 5577 if(off < ctx.size && ctx.CH(off) == ':') 5578 off++; 5579 if(off - cell_beg < 3) 5580 return FALSE; 5581 5582 col_count++; 5583 5584 /* Pipe delimiter (optional at the end of line). */ 5585 while(off < ctx.size && ctx.ISWHITESPACE(off)) 5586 off++; 5587 if(off < ctx.size && ctx.CH(off) == '|') { 5588 delimited = TRUE; 5589 found_pipe = TRUE; 5590 off++; 5591 while(off < ctx.size && ctx.ISWHITESPACE(off)) 5592 off++; 5593 } 5594 5595 /* Success, if we reach end of line. */ 5596 if(off >= ctx.size || ctx.ISNEWLINE(off)) 5597 break; 5598 5599 if(!delimited) 5600 return FALSE; 5601 } 5602 5603 if(!found_pipe) 5604 return FALSE; 5605 5606 *p_end = off; 5607 *p_col_count = col_count; 5608 return TRUE; 5609 } 5610 5611 static int 5612 md_is_opening_code_fence(MD_CTX* ctx, OFF beg, OFF* p_end) 5613 { 5614 OFF off = beg; 5615 5616 while(off < ctx.size && ctx.CH(off) == ctx.CH(beg)) 5617 off++; 5618 5619 /* Fence must have at least three characters. */ 5620 if(off - beg < 3) 5621 return FALSE; 5622 5623 ctx.code_fence_length = off - beg; 5624 5625 /* Optionally, space(s) can follow. */ 5626 while(off < ctx.size && ctx.CH(off) == ' ') 5627 off++; 5628 5629 /* Optionally, an info string can follow. */ 5630 while(off < ctx.size && !ctx.ISNEWLINE(off)) { 5631 /* Backtick-based fence must not contain '`' in the info string. */ 5632 if(ctx.CH(beg) == '`' && ctx.CH(off) == '`') 5633 return FALSE; 5634 off++; 5635 } 5636 5637 *p_end = off; 5638 return TRUE; 5639 } 5640 5641 static int 5642 md_is_closing_code_fence(MD_CTX* ctx, CHAR ch, OFF beg, OFF* p_end) 5643 { 5644 OFF off = beg; 5645 int ret = FALSE; 5646 5647 /* Closing fence must have at least the same length and use same char as 5648 * opening one. */ 5649 while(off < ctx.size && ctx.CH(off) == ch) 5650 off++; 5651 if(off - beg < ctx.code_fence_length) 5652 goto out_; 5653 5654 /* Optionally, space(s) can follow */ 5655 while(off < ctx.size && ctx.CH(off) == ' ') 5656 off++; 5657 5658 /* But nothing more is allowed on the line. */ 5659 if(off < ctx.size && !ctx.ISNEWLINE(off)) 5660 goto out_; 5661 5662 ret = TRUE; 5663 5664 out_: 5665 /* Note we set *p_end even on failure: If we are not closing fence, caller 5666 * would eat the line anyway without any parsing. */ 5667 *p_end = off; 5668 return ret; 5669 } 5670 5671 /* Returns type of the raw HTML block, or FALSE if it is not HTML block. 5672 * (Refer to CommonMark specification for details about the types.) 5673 */ 5674 int md_is_html_block_start_condition(MD_CTX* ctx, OFF beg) 5675 { 5676 /* Type 6 is started by a long list of allowed tags. We use two-level 5677 * tree to speed-up the search. */ 5678 5679 static immutable string Xend = null; 5680 static immutable string[] t1 = [ "script", "pre", "style", Xend ]; 5681 static immutable string[] a6 = [ "address", "article", "aside", Xend ]; 5682 static immutable string[] b6 = [ "base", "basefont", "blockquote", "body", Xend ]; 5683 static immutable string[] c6 = [ "caption", "center", "col", "colgroup", Xend ]; 5684 static immutable string[] d6 = [ "dd", "details", "dialog", "dir", 5685 "div", "dl", "dt", Xend ]; 5686 static immutable string[] f6 = [ "fieldset", "figcaption", "figure", "footer", 5687 "form", "frame", "frameset", Xend ]; 5688 static immutable string[] h6 = [ "h1", "head", "header", "hr", "html", Xend ]; 5689 static immutable string[] i6 = [ "iframe", Xend ]; 5690 static immutable string[] l6 = [ "legend", "li", "link", Xend ]; 5691 static immutable string[] m6 = [ "main", "menu", "menuitem", Xend ]; 5692 static immutable string[] n6 = [ "nav", "noframes", Xend ]; 5693 static immutable string[] o6 = [ "ol", "optgroup", "option", Xend ]; 5694 static immutable string[] p6 = [ "p", "param", Xend ]; 5695 static immutable string[] s6 = [ "section", "source", "summary", Xend ]; 5696 static immutable string[] t6 = [ "table", "tbody", "td", "tfoot", "th", 5697 "thead", "title", "tr", "track", Xend ]; 5698 static immutable string[] u6 = [ "ul", Xend ]; 5699 static immutable string[] xx = [ Xend ]; 5700 5701 immutable(string)*[26] map6; 5702 map6[0] = a6.ptr; 5703 map6[1] = b6.ptr; 5704 map6[2] = c6.ptr; 5705 map6[3] = d6.ptr; 5706 map6[4] = xx.ptr; 5707 map6[5] = f6.ptr; 5708 map6[6] = xx.ptr; 5709 map6[7] = h6.ptr; 5710 map6[8] = i6.ptr; 5711 map6[9] = xx.ptr; 5712 map6[10] = xx.ptr; 5713 map6[11] = l6.ptr; 5714 map6[12] = m6.ptr; 5715 map6[13] = n6.ptr; 5716 map6[14] = o6.ptr; 5717 map6[15] = p6.ptr; 5718 map6[16] = xx.ptr; 5719 map6[17] = xx.ptr; 5720 map6[18] = s6.ptr; 5721 map6[19] = t6.ptr; 5722 map6[20] = u6.ptr; 5723 map6[21] = xx.ptr; 5724 map6[22] = xx.ptr; 5725 map6[23] = xx.ptr; 5726 map6[24] = xx.ptr; 5727 map6[25] = xx.ptr; 5728 5729 OFF off = beg + 1; 5730 int i; 5731 5732 /* Check for type 1: <script, <pre, or <style */ 5733 for(i = 0; t1[i].ptr != null; i++) 5734 { 5735 if(off + t1[i].length <= ctx.size) 5736 { 5737 if(md_ascii_case_eq(ctx.STR(off), t1[i].ptr, cast(uint)(t1[i].length))) 5738 return 1; 5739 } 5740 } 5741 5742 /* Check for type 2: <!-- */ 5743 if(off + 3 < ctx.size && ctx.CH(off) == '!' && ctx.CH(off+1) == '-' && ctx.CH(off+2) == '-') 5744 return 2; 5745 5746 /* Check for type 3: <? */ 5747 if(off < ctx.size && ctx.CH(off) == '?') 5748 return 3; 5749 5750 /* Check for type 4 or 5: <! */ 5751 if(off < ctx.size && ctx.CH(off) == '!') { 5752 /* Check for type 4: <! followed by uppercase letter. */ 5753 if(off + 1 < ctx.size && ctx.ISUPPER(off+1)) 5754 return 4; 5755 5756 /* Check for type 5: <![CDATA[ */ 5757 if(off + 8 < ctx.size) { 5758 if(md_ascii_eq(ctx.STR(off), "![CDATA[", 8 * CHAR.sizeof)) 5759 return 5; 5760 } 5761 } 5762 5763 /* Check for type 6: Many possible starting tags listed above. */ 5764 if(off + 1 < ctx.size && (ctx.ISALPHA(off) || (ctx.CH(off) == '/' && ctx.ISALPHA(off+1)))) { 5765 int slot; 5766 const(string)* tags; 5767 5768 if(ctx.CH(off) == '/') 5769 off++; 5770 5771 slot = (ctx.ISUPPER(off) ? ctx.CH(off) - 'A' : ctx.CH(off) - 'a'); 5772 tags = map6[slot]; 5773 5774 for(i = 0; tags[i].ptr != null; i++) { 5775 if(off + tags[i].length <= ctx.size) { 5776 if(md_ascii_case_eq(ctx.STR(off), tags[i].ptr, cast(uint) tags[i].length)) { 5777 OFF tmp = cast(uint)(off + tags[i].length); 5778 if(tmp >= ctx.size) 5779 return 6; 5780 if(ctx.ISBLANK(tmp) || ctx.ISNEWLINE(tmp) || ctx.CH(tmp) == '>') 5781 return 6; 5782 if(tmp+1 < ctx.size && ctx.CH(tmp) == '/' && ctx.CH(tmp+1) == '>') 5783 return 6; 5784 break; 5785 } 5786 } 5787 } 5788 } 5789 5790 /* Check for type 7: any COMPLETE other opening or closing tag. */ 5791 if(off + 1 < ctx.size) { 5792 OFF end; 5793 5794 if(md_is_html_tag(ctx, null, 0, beg, ctx.size, &end)) { 5795 /* Only optional whitespace and new line may follow. */ 5796 while(end < ctx.size && ctx.ISWHITESPACE(end)) 5797 end++; 5798 if(end >= ctx.size || ctx.ISNEWLINE(end)) 5799 return 7; 5800 } 5801 } 5802 5803 return FALSE; 5804 } 5805 5806 /* Case sensitive check whether there is a substring 'what' between 'beg' 5807 * and end of line. */ 5808 static int 5809 md_line_contains(MD_CTX* ctx, OFF beg, const(CHAR)* what, SZ what_len, OFF* p_end) 5810 { 5811 OFF i; 5812 for(i = beg; i + what_len < ctx.size; i++) { 5813 if(ctx.ISNEWLINE(i)) 5814 break; 5815 if(memcmp(ctx.STR(i), what, what_len * CHAR.sizeof) == 0) { 5816 *p_end = i + what_len; 5817 return TRUE; 5818 } 5819 } 5820 5821 *p_end = i; 5822 return FALSE; 5823 } 5824 5825 /* Returns type of HTML block end condition or FALSE if not an end condition. 5826 * 5827 * Note it fills p_end even when it is not end condition as the caller 5828 * does not need to analyze contents of a raw HTML block. 5829 */ 5830 int md_is_html_block_end_condition(MD_CTX* ctx, OFF beg, OFF* p_end) 5831 { 5832 switch(ctx.html_block_type) { 5833 case 1: 5834 { 5835 OFF off = beg; 5836 5837 while(off < ctx.size && !ctx.ISNEWLINE(off)) { 5838 if(ctx.CH(off) == '<') { 5839 if(md_ascii_case_eq(ctx.STR(off), "</script>", 9)) { 5840 *p_end = off + 9; 5841 return TRUE; 5842 } 5843 5844 if(md_ascii_case_eq(ctx.STR(off), "</style>", 8)) { 5845 *p_end = off + 8; 5846 return TRUE; 5847 } 5848 5849 if(md_ascii_case_eq(ctx.STR(off), "</pre>", 6)) { 5850 *p_end = off + 6; 5851 return TRUE; 5852 } 5853 } 5854 5855 off++; 5856 } 5857 *p_end = off; 5858 return FALSE; 5859 } 5860 5861 case 2: 5862 return (md_line_contains(ctx, beg, "-->", 3, p_end) ? 2 : FALSE); 5863 5864 case 3: 5865 return (md_line_contains(ctx, beg, "?>", 2, p_end) ? 3 : FALSE); 5866 5867 case 4: 5868 return (md_line_contains(ctx, beg, ">", 1, p_end) ? 4 : FALSE); 5869 5870 case 5: 5871 return (md_line_contains(ctx, beg, "]]>", 3, p_end) ? 5 : FALSE); 5872 5873 case 6: /* Pass through */ 5874 case 7: 5875 *p_end = beg; 5876 return (ctx.ISNEWLINE(beg) ? ctx.html_block_type : FALSE); 5877 5878 default: 5879 assert(false); 5880 } 5881 } 5882 5883 5884 static int 5885 md_is_container_compatible(const MD_CONTAINER* pivot, const MD_CONTAINER* container) 5886 { 5887 /* Block quote has no "items" like lists. */ 5888 if(container.ch == '>') 5889 return FALSE; 5890 5891 if(container.ch != pivot.ch) 5892 return FALSE; 5893 if(container.mark_indent > pivot.contents_indent) 5894 return FALSE; 5895 5896 return TRUE; 5897 } 5898 5899 static int 5900 md_push_container(MD_CTX* ctx, const MD_CONTAINER* container) 5901 { 5902 if(ctx.n_containers >= ctx.alloc_containers) { 5903 MD_CONTAINER* new_containers; 5904 5905 ctx.alloc_containers = (ctx.alloc_containers > 0 ? ctx.alloc_containers * 2 : 16); 5906 new_containers = cast(MD_CONTAINER*) realloc_safe(ctx.containers, ctx.alloc_containers * MD_CONTAINER.sizeof); 5907 if (new_containers == null) { 5908 ctx.MD_LOG("realloc() failed."); 5909 return -1; 5910 } 5911 5912 ctx.containers = new_containers; 5913 } 5914 5915 memcpy(&ctx.containers[ctx.n_containers++], container, MD_CONTAINER.sizeof); 5916 return 0; 5917 } 5918 5919 static int 5920 md_enter_child_containers(MD_CTX* ctx, int n_children, uint data) 5921 { 5922 int i; 5923 int ret = 0; 5924 5925 for(i = ctx.n_containers - n_children; i < ctx.n_containers; i++) { 5926 MD_CONTAINER* c = &ctx.containers[i]; 5927 int is_ordered_list = FALSE; 5928 5929 switch(c.ch) { 5930 case ')': 5931 case '.': 5932 is_ordered_list = TRUE; 5933 /* Pass through */ 5934 goto case '-'; 5935 5936 case '-': 5937 case '+': 5938 case '*': 5939 /* Remember offset in ctx.block_bytes so we can revisit the 5940 * block if we detect it is a loose list. */ 5941 md_end_current_block(ctx); 5942 c.block_byte_off = ctx.n_block_bytes; 5943 5944 ret = (md_push_container_bytes(ctx, 5945 (is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL), 5946 c.start, data, MD_BLOCK_CONTAINER_OPENER)); 5947 if (ret < 0) goto abort; 5948 ret = (md_push_container_bytes(ctx, MD_BLOCK_LI, 5949 c.task_mark_off, 5950 (c.is_task ? ctx.CH(c.task_mark_off) : 0), 5951 MD_BLOCK_CONTAINER_OPENER)); 5952 if (ret < 0) goto abort; 5953 break; 5954 5955 case '>': 5956 ret = (md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0, 0, MD_BLOCK_CONTAINER_OPENER)); 5957 if (ret < 0) goto abort; 5958 break; 5959 5960 default: 5961 assert(false); 5962 } 5963 } 5964 5965 abort: 5966 return ret; 5967 } 5968 5969 static int 5970 md_leave_child_containers(MD_CTX* ctx, int n_keep) 5971 { 5972 int ret = 0; 5973 5974 while(ctx.n_containers > n_keep) { 5975 MD_CONTAINER* c = &ctx.containers[ctx.n_containers-1]; 5976 int is_ordered_list = FALSE; 5977 5978 switch(c.ch) { 5979 case ')': 5980 case '.': 5981 is_ordered_list = TRUE; 5982 /* Pass through */ 5983 goto case '-'; 5984 5985 case '-': 5986 case '+': 5987 case '*': 5988 ret = (md_push_container_bytes(ctx, MD_BLOCK_LI, 5989 c.task_mark_off, (c.is_task ? ctx.CH(c.task_mark_off) : 0), 5990 MD_BLOCK_CONTAINER_CLOSER)); 5991 if (ret < 0) goto abort; 5992 ret = (md_push_container_bytes(ctx, 5993 (is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL), 0, 5994 c.ch, MD_BLOCK_CONTAINER_CLOSER)); 5995 if (ret < 0) goto abort; 5996 break; 5997 5998 case '>': 5999 ret = (md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0, 6000 0, MD_BLOCK_CONTAINER_CLOSER)); 6001 if (ret < 0) goto abort; 6002 break; 6003 6004 default: 6005 assert(false); 6006 } 6007 6008 ctx.n_containers--; 6009 } 6010 6011 abort: 6012 return ret; 6013 } 6014 6015 static int 6016 md_is_container_mark(MD_CTX* ctx, uint indent, OFF beg, OFF* p_end, MD_CONTAINER* p_container) 6017 { 6018 OFF off = beg; 6019 OFF max_end; 6020 6021 if(indent >= ctx.code_indent_offset) 6022 return FALSE; 6023 6024 /* Check for block quote mark. */ 6025 if(off < ctx.size && ctx.CH(off) == '>') { 6026 off++; 6027 p_container.ch = '>'; 6028 p_container.is_loose = FALSE; 6029 p_container.is_task = FALSE; 6030 p_container.mark_indent = indent; 6031 p_container.contents_indent = indent + 1; 6032 *p_end = off; 6033 return TRUE; 6034 } 6035 6036 /* Check for list item bullet mark. */ 6037 if(off+1 < ctx.size && ctx.ISANYOF(off, "-+*") && (ctx.ISBLANK(off+1) || ctx.ISNEWLINE(off+1))) { 6038 p_container.ch = ctx.CH(off); 6039 p_container.is_loose = FALSE; 6040 p_container.is_task = FALSE; 6041 p_container.mark_indent = indent; 6042 p_container.contents_indent = indent + 1; 6043 *p_end = off + 1; 6044 return TRUE; 6045 } 6046 6047 /* Check for ordered list item marks. */ 6048 max_end = off + 9; 6049 if(max_end > ctx.size) 6050 max_end = ctx.size; 6051 p_container.start = 0; 6052 while(off < max_end && ctx.ISDIGIT(off)) { 6053 p_container.start = p_container.start * 10 + ctx.CH(off) - '0'; 6054 off++; 6055 } 6056 if(off+1 < ctx.size && (ctx.CH(off) == '.' || ctx.CH(off) == ')') && (ctx.ISBLANK(off+1) || ctx.ISNEWLINE(off+1))) { 6057 p_container.ch = ctx.CH(off); 6058 p_container.is_loose = FALSE; 6059 p_container.is_task = FALSE; 6060 p_container.mark_indent = indent; 6061 p_container.contents_indent = indent + off - beg + 1; 6062 *p_end = off + 1; 6063 return TRUE; 6064 } 6065 6066 return FALSE; 6067 } 6068 6069 uint md_line_indentation(MD_CTX* ctx, uint total_indent, OFF beg, OFF* p_end) 6070 { 6071 OFF off = beg; 6072 uint indent = total_indent; 6073 6074 while(off < ctx.size && ctx.ISBLANK(off)) { 6075 if(ctx.CH(off) == '\t') 6076 indent = (indent + 4) & ~3; 6077 else 6078 indent++; 6079 off++; 6080 } 6081 6082 *p_end = off; 6083 return indent - total_indent; 6084 } 6085 6086 static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0 }; 6087 6088 /* Analyze type of the line and find some its properties. This serves as a 6089 * main input for determining type and boundaries of a block. */ 6090 int md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, 6091 const(MD_LINE_ANALYSIS)* pivot_line, MD_LINE_ANALYSIS* line) 6092 { 6093 uint total_indent = 0; 6094 int n_parents = 0; 6095 int n_brothers = 0; 6096 int n_children = 0; 6097 MD_CONTAINER container = { 0 }; 6098 int prev_line_has_list_loosening_effect = ctx.last_line_has_list_loosening_effect; 6099 OFF off = beg; 6100 OFF hr_killer = 0; 6101 int ret = 0; 6102 6103 line.indent = md_line_indentation(ctx, total_indent, off, &off); 6104 total_indent += line.indent; 6105 line.beg = off; 6106 6107 /* Given the indentation and block quote marks '>', determine how many of 6108 * the current containers are our parents. */ 6109 while(n_parents < ctx.n_containers) { 6110 MD_CONTAINER* c = &ctx.containers[n_parents]; 6111 6112 if(c.ch == '>' && line.indent < ctx.code_indent_offset && 6113 off < ctx.size && ctx.CH(off) == '>') 6114 { 6115 /* Block quote mark. */ 6116 off++; 6117 total_indent++; 6118 line.indent = md_line_indentation(ctx, total_indent, off, &off); 6119 total_indent += line.indent; 6120 6121 /* The optional 1st space after '>' is part of the block quote mark. */ 6122 if(line.indent > 0) 6123 line.indent--; 6124 6125 line.beg = off; 6126 } else if(c.ch != '>' && line.indent >= c.contents_indent) { 6127 /* List. */ 6128 line.indent -= c.contents_indent; 6129 } else { 6130 break; 6131 } 6132 6133 n_parents++; 6134 } 6135 6136 if(off >= ctx.size || ctx.ISNEWLINE(off)) { 6137 /* Blank line does not need any real indentation to be nested inside 6138 * a list. */ 6139 if(n_brothers + n_children == 0) { 6140 while(n_parents < ctx.n_containers && ctx.containers[n_parents].ch != '>') 6141 n_parents++; 6142 } 6143 } 6144 6145 while(TRUE) { 6146 /* Check whether we are fenced code continuation. */ 6147 if(pivot_line.type == MD_LINE_FENCEDCODE) { 6148 line.beg = off; 6149 6150 /* We are another MD_LINE_FENCEDCODE unless we are closing fence 6151 * which we transform into MD_LINE_BLANK. */ 6152 if(line.indent < ctx.code_indent_offset) { 6153 if(md_is_closing_code_fence(ctx, ctx.CH(pivot_line.beg), off, &off)) { 6154 line.type = MD_LINE_BLANK; 6155 ctx.last_line_has_list_loosening_effect = FALSE; 6156 break; 6157 } 6158 } 6159 6160 /* Change indentation accordingly to the initial code fence. */ 6161 if(n_parents == ctx.n_containers) { 6162 if(line.indent > pivot_line.indent) 6163 line.indent -= pivot_line.indent; 6164 else 6165 line.indent = 0; 6166 6167 line.type = MD_LINE_FENCEDCODE; 6168 break; 6169 } 6170 } 6171 6172 /* Check whether we are HTML block continuation. */ 6173 if(pivot_line.type == MD_LINE_HTML && ctx.html_block_type > 0) { 6174 int html_block_type; 6175 6176 html_block_type = md_is_html_block_end_condition(ctx, off, &off); 6177 if(html_block_type > 0) { 6178 assert(html_block_type == ctx.html_block_type); 6179 6180 /* Make sure this is the last line of the block. */ 6181 ctx.html_block_type = 0; 6182 6183 /* Some end conditions serve as blank lines at the same time. */ 6184 if(html_block_type == 6 || html_block_type == 7) { 6185 line.type = MD_LINE_BLANK; 6186 line.indent = 0; 6187 break; 6188 } 6189 } 6190 6191 if(n_parents == ctx.n_containers) { 6192 line.type = MD_LINE_HTML; 6193 break; 6194 } 6195 } 6196 6197 /* Check for blank line. */ 6198 if(off >= ctx.size || ctx.ISNEWLINE(off)) { 6199 if(pivot_line.type == MD_LINE_INDENTEDCODE && n_parents == ctx.n_containers) { 6200 line.type = MD_LINE_INDENTEDCODE; 6201 if(line.indent > ctx.code_indent_offset) 6202 line.indent -= ctx.code_indent_offset; 6203 else 6204 line.indent = 0; 6205 ctx.last_line_has_list_loosening_effect = FALSE; 6206 } else { 6207 line.type = MD_LINE_BLANK; 6208 ctx.last_line_has_list_loosening_effect = (n_parents > 0 && 6209 n_brothers + n_children == 0 && 6210 ctx.containers[n_parents-1].ch != '>'); 6211 6212 /* See https://github.com/mity/md4c/issues/6 6213 * 6214 * This ugly checking tests we are in (yet empty) list item but not 6215 * its very first line (with the list item mark). 6216 * 6217 * If we are such blank line, then any following non-blank line 6218 * which would be part of this list item actually ends the list 6219 * because "a list item can begin with at most one blank line." 6220 */ 6221 if(n_parents > 0 && ctx.containers[n_parents-1].ch != '>' && 6222 n_brothers + n_children == 0 && ctx.current_block == null && 6223 ctx.n_block_bytes > cast(int) MD_BLOCK.sizeof) 6224 { 6225 MD_BLOCK* top_block = cast(MD_BLOCK*) (cast(char*)ctx.block_bytes + ctx.n_block_bytes - MD_BLOCK.sizeof); 6226 if(top_block.type == MD_BLOCK_LI) 6227 ctx.last_list_item_starts_with_two_blank_lines = TRUE; 6228 } 6229 } 6230 break; 6231 } else { 6232 /* This is 2nd half of the hack. If the flag is set (that is there 6233 * were 2nd blank line at the start of the list item) and we would also 6234 * belonging to such list item, then interrupt the list. */ 6235 ctx.last_line_has_list_loosening_effect = FALSE; 6236 if(ctx.last_list_item_starts_with_two_blank_lines) { 6237 if(n_parents > 0 && ctx.containers[n_parents-1].ch != '>' && 6238 n_brothers + n_children == 0 && ctx.current_block == null && 6239 ctx.n_block_bytes > cast(int) MD_BLOCK.sizeof) 6240 { 6241 MD_BLOCK* top_block = cast(MD_BLOCK*) (cast(char*)ctx.block_bytes + ctx.n_block_bytes - MD_BLOCK.sizeof); 6242 if(top_block.type == MD_BLOCK_LI) 6243 n_parents--; 6244 } 6245 6246 ctx.last_list_item_starts_with_two_blank_lines = FALSE; 6247 } 6248 } 6249 6250 /* Check whether we are Setext underline. */ 6251 if(line.indent < ctx.code_indent_offset && pivot_line.type == MD_LINE_TEXT 6252 && (ctx.CH(off) == '=' || ctx.CH(off) == '-') 6253 && (n_parents == ctx.n_containers)) 6254 { 6255 uint level; 6256 6257 if(md_is_setext_underline(ctx, off, &off, &level)) { 6258 line.type = MD_LINE_SETEXTUNDERLINE; 6259 line.data = level; 6260 break; 6261 } 6262 } 6263 6264 /* Check for thematic break line. */ 6265 if(line.indent < ctx.code_indent_offset && ctx.ISANYOF(off, "-_*") && off >= hr_killer) { 6266 if(md_is_hr_line(ctx, off, &off, &hr_killer)) { 6267 line.type = MD_LINE_HR; 6268 break; 6269 } 6270 } 6271 6272 /* Check for "brother" container. I.e. whether we are another list item 6273 * in already started list. */ 6274 if(n_parents < ctx.n_containers && n_brothers + n_children == 0) { 6275 OFF tmp; 6276 6277 if(md_is_container_mark(ctx, line.indent, off, &tmp, &container) && 6278 md_is_container_compatible(&ctx.containers[n_parents], &container)) 6279 { 6280 pivot_line = &md_dummy_blank_line; 6281 6282 off = tmp; 6283 6284 total_indent += container.contents_indent - container.mark_indent; 6285 line.indent = md_line_indentation(ctx, total_indent, off, &off); 6286 total_indent += line.indent; 6287 line.beg = off; 6288 6289 /* Some of the following whitespace actually still belongs to the mark. */ 6290 if(off >= ctx.size || ctx.ISNEWLINE(off)) { 6291 container.contents_indent++; 6292 } else if(line.indent <= ctx.code_indent_offset) { 6293 container.contents_indent += line.indent; 6294 line.indent = 0; 6295 } else { 6296 container.contents_indent += 1; 6297 line.indent--; 6298 } 6299 6300 ctx.containers[n_parents].mark_indent = container.mark_indent; 6301 ctx.containers[n_parents].contents_indent = container.contents_indent; 6302 6303 n_brothers++; 6304 continue; 6305 } 6306 } 6307 6308 /* Check for indented code. 6309 * Note indented code block cannot interrupt a paragraph. */ 6310 if(line.indent >= ctx.code_indent_offset && 6311 (pivot_line.type == MD_LINE_BLANK || pivot_line.type == MD_LINE_INDENTEDCODE)) 6312 { 6313 line.type = MD_LINE_INDENTEDCODE; 6314 assert(line.indent >= ctx.code_indent_offset); 6315 line.indent -= ctx.code_indent_offset; 6316 line.data = 0; 6317 break; 6318 } 6319 6320 /* Check for start of a new container block. */ 6321 if(line.indent < ctx.code_indent_offset && 6322 md_is_container_mark(ctx, line.indent, off, &off, &container)) 6323 { 6324 if(pivot_line.type == MD_LINE_TEXT && n_parents == ctx.n_containers && 6325 (off >= ctx.size || ctx.ISNEWLINE(off)) && container.ch != '>') 6326 { 6327 /* Noop. List mark followed by a blank line cannot interrupt a paragraph. */ 6328 } else if(pivot_line.type == MD_LINE_TEXT && n_parents == ctx.n_containers && 6329 (container.ch == '.' || container.ch == ')') && container.start != 1) 6330 { 6331 /* Noop. Ordered list cannot interrupt a paragraph unless the start index is 1. */ 6332 } else { 6333 total_indent += container.contents_indent - container.mark_indent; 6334 line.indent = md_line_indentation(ctx, total_indent, off, &off); 6335 total_indent += line.indent; 6336 6337 line.beg = off; 6338 line.data = container.ch; 6339 6340 /* Some of the following whitespace actually still belongs to the mark. */ 6341 if(off >= ctx.size || ctx.ISNEWLINE(off)) { 6342 container.contents_indent++; 6343 } else if(line.indent <= ctx.code_indent_offset) { 6344 container.contents_indent += line.indent; 6345 line.indent = 0; 6346 } else { 6347 container.contents_indent += 1; 6348 line.indent--; 6349 } 6350 6351 if(n_brothers + n_children == 0) 6352 pivot_line = &md_dummy_blank_line; 6353 6354 if(n_children == 0) 6355 { 6356 ret = (md_leave_child_containers(ctx, n_parents + n_brothers)); 6357 if (ret < 0) goto abort; 6358 } 6359 6360 n_children++; 6361 ret = (md_push_container(ctx, &container)); 6362 if (ret < 0) goto abort; 6363 continue; 6364 } 6365 } 6366 6367 /* Check whether we are table continuation. */ 6368 if(pivot_line.type == MD_LINE_TABLE && md_is_table_row(ctx, off, &off) && 6369 n_parents == ctx.n_containers) 6370 { 6371 line.type = MD_LINE_TABLE; 6372 break; 6373 } 6374 6375 /* Check for ATX header. */ 6376 if(line.indent < ctx.code_indent_offset && ctx.CH(off) == '#') { 6377 uint level; 6378 6379 if(md_is_atxheader_line(ctx, off, &line.beg, &off, &level)) { 6380 line.type = MD_LINE_ATXHEADER; 6381 line.data = level; 6382 break; 6383 } 6384 } 6385 6386 /* Check whether we are starting code fence. */ 6387 if(ctx.CH(off) == '`' || ctx.CH(off) == '~') { 6388 if(md_is_opening_code_fence(ctx, off, &off)) { 6389 line.type = MD_LINE_FENCEDCODE; 6390 line.data = 1; 6391 break; 6392 } 6393 } 6394 6395 /* Check for start of raw HTML block. */ 6396 if(ctx.CH(off) == '<' && !(ctx.parser.flags & MD_FLAG_NOHTMLBLOCKS)) 6397 { 6398 ctx.html_block_type = md_is_html_block_start_condition(ctx, off); 6399 6400 /* HTML block type 7 cannot interrupt paragraph. */ 6401 if(ctx.html_block_type == 7 && pivot_line.type == MD_LINE_TEXT) 6402 ctx.html_block_type = 0; 6403 6404 if(ctx.html_block_type > 0) { 6405 /* The line itself also may immediately close the block. */ 6406 if(md_is_html_block_end_condition(ctx, off, &off) == ctx.html_block_type) { 6407 /* Make sure this is the last line of the block. */ 6408 ctx.html_block_type = 0; 6409 } 6410 6411 line.type = MD_LINE_HTML; 6412 break; 6413 } 6414 } 6415 6416 /* Check for table underline. */ 6417 if((ctx.parser.flags & MD_FLAG_TABLES) && pivot_line.type == MD_LINE_TEXT && 6418 (ctx.CH(off) == '|' || ctx.CH(off) == '-' || ctx.CH(off) == ':') && 6419 n_parents == ctx.n_containers) 6420 { 6421 uint col_count; 6422 6423 if(ctx.current_block != null && ctx.current_block.n_lines == 1 && 6424 md_is_table_underline(ctx, off, &off, &col_count) && 6425 md_is_table_row(ctx, pivot_line.beg, null)) 6426 { 6427 line.data = col_count; 6428 line.type = MD_LINE_TABLEUNDERLINE; 6429 break; 6430 } 6431 } 6432 6433 /* By default, we are normal text line. */ 6434 line.type = MD_LINE_TEXT; 6435 if(pivot_line.type == MD_LINE_TEXT && n_brothers + n_children == 0) { 6436 /* Lazy continuation. */ 6437 n_parents = ctx.n_containers; 6438 } 6439 6440 /* Check for task mark. */ 6441 if((ctx.parser.flags & MD_FLAG_TASKLISTS) && n_brothers + n_children > 0 && 6442 ISANYOF_(ctx.containers[ctx.n_containers-1].ch, "-+*.)")) 6443 { 6444 OFF tmp = off; 6445 6446 while(tmp < ctx.size && tmp < off + 3 && ctx.ISBLANK(tmp)) 6447 tmp++; 6448 if(tmp + 2 < ctx.size && ctx.CH(tmp) == '[' && 6449 ctx.ISANYOF(tmp+1, "xX ") && ctx.CH(tmp+2) == ']' && 6450 (tmp + 3 == ctx.size || ctx.ISBLANK(tmp+3) || ctx.ISNEWLINE(tmp+3))) 6451 { 6452 MD_CONTAINER* task_container = (n_children > 0 ? &ctx.containers[ctx.n_containers-1] : &container); 6453 task_container.is_task = TRUE; 6454 task_container.task_mark_off = tmp + 1; 6455 off = tmp + 3; 6456 while(ctx.ISWHITESPACE(off)) 6457 off++; 6458 line.beg = off; 6459 } 6460 } 6461 6462 break; 6463 } 6464 6465 /* Scan for end of the line. 6466 * 6467 * Note this is quite a bottleneck of the parsing as we here iterate almost 6468 * over compete document. 6469 */ 6470 { 6471 /* Optimization: Use some loop unrolling. */ 6472 while(off + 3 < ctx.size && !ctx.ISNEWLINE(off+0) && !ctx.ISNEWLINE(off+1) 6473 && !ctx.ISNEWLINE(off+2) && !ctx.ISNEWLINE(off+3)) 6474 off += 4; 6475 while(off < ctx.size && !ctx.ISNEWLINE(off)) 6476 off++; 6477 } 6478 6479 /* Set end of the line. */ 6480 line.end = off; 6481 6482 /* But for ATX header, we should exclude the optional trailing mark. */ 6483 if(line.type == MD_LINE_ATXHEADER) { 6484 OFF tmp = line.end; 6485 while(tmp > line.beg && ctx.CH(tmp-1) == ' ') 6486 tmp--; 6487 while(tmp > line.beg && ctx.CH(tmp-1) == '#') 6488 tmp--; 6489 if(tmp == line.beg || ctx.CH(tmp-1) == ' ' || (ctx.parser.flags & MD_FLAG_PERMISSIVEATXHEADERS)) 6490 line.end = tmp; 6491 } 6492 6493 /* Trim trailing spaces. */ 6494 if(line.type != MD_LINE_INDENTEDCODE && line.type != MD_LINE_FENCEDCODE) { 6495 while(line.end > line.beg && ctx.CH(line.end-1) == ' ') 6496 line.end--; 6497 } 6498 6499 /* Eat also the new line. */ 6500 if(off < ctx.size && ctx.CH(off) == '\r') 6501 off++; 6502 if(off < ctx.size && ctx.CH(off) == '\n') 6503 off++; 6504 6505 *p_end = off; 6506 6507 /* If we belong to a list after seeing a blank line, the list is loose. */ 6508 if(prev_line_has_list_loosening_effect && line.type != MD_LINE_BLANK && n_parents + n_brothers > 0) { 6509 MD_CONTAINER* c = &ctx.containers[n_parents + n_brothers - 1]; 6510 if(c.ch != '>') { 6511 MD_BLOCK* block = cast(MD_BLOCK*) ((cast(char*)ctx.block_bytes) + c.block_byte_off); 6512 block.flags = block.flags | MD_BLOCK_LOOSE_LIST; 6513 } 6514 } 6515 6516 /* Leave any containers we are not part of anymore. */ 6517 if(n_children == 0 && n_parents + n_brothers < ctx.n_containers) 6518 { 6519 ret = (md_leave_child_containers(ctx, n_parents + n_brothers)); 6520 if (ret < 0) goto abort; 6521 } 6522 6523 /* Enter any container we found a mark for. */ 6524 if(n_brothers > 0) { 6525 assert(n_brothers == 1); 6526 ret = (md_push_container_bytes(ctx, MD_BLOCK_LI, 6527 ctx.containers[n_parents].task_mark_off, 6528 (ctx.containers[n_parents].is_task ? ctx.CH(ctx.containers[n_parents].task_mark_off) : 0), 6529 MD_BLOCK_CONTAINER_CLOSER)); 6530 if (ret < 0) goto abort; 6531 ret = (md_push_container_bytes(ctx, MD_BLOCK_LI, 6532 container.task_mark_off, 6533 (container.is_task ? ctx.CH(container.task_mark_off) : 0), 6534 MD_BLOCK_CONTAINER_OPENER)); 6535 if (ret < 0) goto abort; 6536 ctx.containers[n_parents].is_task = container.is_task; 6537 ctx.containers[n_parents].task_mark_off = container.task_mark_off; 6538 } 6539 6540 if(n_children > 0) 6541 { 6542 ret = (md_enter_child_containers(ctx, n_children, line.data)); 6543 if (ret < 0) goto abort; 6544 } 6545 6546 abort: 6547 return ret; 6548 } 6549 6550 int md_process_line(MD_CTX* ctx, const(MD_LINE_ANALYSIS)** p_pivot_line, MD_LINE_ANALYSIS* line) 6551 { 6552 const(MD_LINE_ANALYSIS)* pivot_line = *p_pivot_line; 6553 int ret = 0; 6554 6555 /* Blank line ends current leaf block. */ 6556 if(line.type == MD_LINE_BLANK) { 6557 ret = (md_end_current_block(ctx)); 6558 if (ret < 0) goto abort; 6559 *p_pivot_line = &md_dummy_blank_line; 6560 return 0; 6561 } 6562 6563 /* Some line types form block on their own. */ 6564 if(line.type == MD_LINE_HR || line.type == MD_LINE_ATXHEADER) { 6565 ret = (md_end_current_block(ctx)); 6566 if (ret < 0) goto abort; 6567 6568 /* Add our single-line block. */ 6569 ret = (md_start_new_block(ctx, line)); 6570 if (ret < 0) goto abort; 6571 ret = (md_add_line_into_current_block(ctx, line)); 6572 if (ret < 0) goto abort; 6573 ret = (md_end_current_block(ctx)); 6574 if (ret < 0) goto abort; 6575 *p_pivot_line = &md_dummy_blank_line; 6576 return 0; 6577 } 6578 6579 /* MD_LINE_SETEXTUNDERLINE changes meaning of the current block and ends it. */ 6580 if(line.type == MD_LINE_SETEXTUNDERLINE) { 6581 assert(ctx.current_block != null); 6582 ctx.current_block.type = MD_BLOCK_H; 6583 ctx.current_block.data = line.data; 6584 ctx.current_block.flags = ctx.current_block.flags | MD_BLOCK_SETEXT_HEADER; 6585 ret = (md_add_line_into_current_block(ctx, line)); 6586 if (ret < 0) goto abort; 6587 ret = (md_end_current_block(ctx)); 6588 if (ret < 0) goto abort; 6589 if(ctx.current_block == null) { 6590 *p_pivot_line = &md_dummy_blank_line; 6591 } else { 6592 /* This happens if we have consumed all the body as link ref. defs. 6593 * and downgraded the underline into start of a new paragraph block. */ 6594 line.type = MD_LINE_TEXT; 6595 *p_pivot_line = line; 6596 } 6597 return 0; 6598 } 6599 6600 /* MD_LINE_TABLEUNDERLINE changes meaning of the current block. */ 6601 if(line.type == MD_LINE_TABLEUNDERLINE) { 6602 assert(ctx.current_block != null); 6603 assert(ctx.current_block.n_lines == 1); 6604 ctx.current_block.type = MD_BLOCK_TABLE; 6605 ctx.current_block.data = line.data; 6606 assert(pivot_line != &md_dummy_blank_line); 6607 (cast(MD_LINE_ANALYSIS*)pivot_line).type = MD_LINE_TABLE; 6608 ret = (md_add_line_into_current_block(ctx, line)); 6609 if (ret < 0) goto abort; 6610 return 0; 6611 } 6612 6613 /* The current block also ends if the line has different type. */ 6614 if(line.type != pivot_line.type) 6615 { 6616 ret = (md_end_current_block(ctx)); 6617 if (ret < 0) goto abort; 6618 } 6619 6620 /* The current line may start a new block. */ 6621 if(ctx.current_block == null) { 6622 ret = (md_start_new_block(ctx, line)); 6623 if (ret < 0) goto abort; 6624 *p_pivot_line = line; 6625 } 6626 6627 /* In all other cases the line is just a continuation of the current block. */ 6628 ret = (md_add_line_into_current_block(ctx, line)); 6629 if (ret < 0) goto abort; 6630 6631 abort: 6632 return ret; 6633 } 6634 6635 int md_process_doc(MD_CTX *ctx) 6636 { 6637 const(MD_LINE_ANALYSIS)* pivot_line = &md_dummy_blank_line; 6638 MD_LINE_ANALYSIS[2] line_buf; 6639 MD_LINE_ANALYSIS* line = &line_buf[0]; 6640 OFF off = 0; 6641 int ret = 0; 6642 6643 ret = MD_ENTER_BLOCK(ctx, MD_BLOCK_DOC, null); 6644 if (ret != 0) goto abort; 6645 6646 while(off < ctx.size) { 6647 if(line == pivot_line) 6648 line = (line == &line_buf[0] ? &line_buf[1] : &line_buf[0]); 6649 6650 ret = (md_analyze_line(ctx, off, &off, pivot_line, line)); 6651 if (ret < 0) goto abort; 6652 ret = (md_process_line(ctx, &pivot_line, line)); 6653 if (ret < 0) goto abort; 6654 } 6655 6656 md_end_current_block(ctx); 6657 6658 ret = (md_build_ref_def_hashtable(ctx)); 6659 if (ret < 0) goto abort; 6660 6661 /* Process all blocks. */ 6662 ret = (md_leave_child_containers(ctx, 0)); 6663 if (ret < 0) goto abort; 6664 ret = (md_process_all_blocks(ctx)); 6665 if (ret < 0) goto abort; 6666 6667 ret = MD_LEAVE_BLOCK(ctx, MD_BLOCK_DOC, null); 6668 if (ret != 0) goto abort; 6669 6670 abort: 6671 6672 debug(bench) 6673 /* Output some memory consumption statistics. */ 6674 { 6675 char[256] buffer; 6676 sprintf(buffer, "Alloced %u bytes for block buffer.", 6677 cast(uint)(ctx.alloc_block_bytes)); 6678 ctx.MD_LOG(buffer); 6679 6680 sprintf(buffer, "Alloced %u bytes for containers buffer.", 6681 cast(uint)(ctx.alloc_containers * MD_CONTAINER.sizeof)); 6682 ctx.MD_LOG(buffer); 6683 6684 sprintf(buffer, "Alloced %u bytes for marks buffer.", 6685 cast(uint)(ctx.alloc_marks * MD_MARK.sizeof)); 6686 ctx.MD_LOG(buffer); 6687 6688 sprintf(buffer, "Alloced %u bytes for aux. buffer.", 6689 cast(uint)(ctx.alloc_buffer * MD_CHAR.sizeof)); 6690 ctx.MD_LOG(buffer); 6691 } 6692 6693 return ret; 6694 } 6695 6696 6697 /******************** 6698 *** Public API *** 6699 ********************/ 6700 6701 /** 6702 * Parse the Markdown document stored in the string 'text' of size 'size'. 6703 * The renderer provides callbacks to be called during the parsing so the 6704 * caller can render the document on the screen or convert the Markdown 6705 * to another format. 6706 * 6707 * Zero is returned on success. If a runtime error occurs (e.g. a memory 6708 * fails), -1 is returned. If the processing is aborted due any callback 6709 * returning non-zero, md_parse() the return value of the callback is returned. 6710 */ 6711 int md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userdata) 6712 { 6713 MD_CTX ctx; 6714 int i; 6715 int ret; 6716 6717 if(parser.abi_version != 0) { 6718 if(parser.debug_log != null) 6719 parser.debug_log("Unsupported abi_version.", userdata); 6720 return -1; 6721 } 6722 6723 /* Setup context structure. */ 6724 memset(&ctx, 0, MD_CTX.sizeof); 6725 ctx.text = text; 6726 ctx.size = size; 6727 memcpy(&ctx.parser, parser, MD_PARSER.sizeof); 6728 ctx.userdata = userdata; 6729 ctx.code_indent_offset = (ctx.parser.flags & MD_FLAG_NOINDENTEDCODEBLOCKS) ? cast(OFF)(-1) : 4; 6730 md_build_mark_char_map(&ctx); 6731 ctx.doc_ends_with_newline = (size > 0 && ISNEWLINE_(text[size-1])); 6732 6733 /* Reset all unresolved opener mark chains. */ 6734 for(i = 0; i < cast(int) (ctx.mark_chains.length); i++) { 6735 ctx.mark_chains[i].head = -1; 6736 ctx.mark_chains[i].tail = -1; 6737 } 6738 ctx.unresolved_link_head = -1; 6739 ctx.unresolved_link_tail = -1; 6740 6741 /* All the work. */ 6742 ret = md_process_doc(&ctx); 6743 6744 /* Clean-up. */ 6745 md_free_ref_defs(&ctx); 6746 md_free_ref_def_hashtable(&ctx); 6747 free(ctx.buffer); 6748 free(ctx.marks); 6749 free(ctx.block_bytes); 6750 free(ctx.containers); 6751 6752 return ret; 6753 } 6754 6755 // 6756 // HTML ENTITIES 6757 // 6758 6759 /* Most entities are formed by single Unicode codepoint, few by two codepoints. 6760 * Single-codepoint entities have codepoints[1] set to zero. */ 6761 struct entity 6762 { 6763 const(char)* name; 6764 uint[2] codepoints; 6765 } 6766 6767 /* The table is generated from https://html.spec.whatwg.org/entities.json */ 6768 static immutable entity[] entity_table = 6769 [ 6770 entity( "Æ", [ 198, 0 ] ), 6771 entity( "&", [ 38, 0 ] ), 6772 entity( "Á", [ 193, 0 ] ), 6773 entity( "Ă", [ 258, 0 ] ), 6774 entity( "Â", [ 194, 0 ] ), 6775 entity( "А", [ 1040, 0 ] ), 6776 entity( "𝔄", [ 120068, 0 ] ), 6777 entity( "À", [ 192, 0 ] ), 6778 entity( "Α", [ 913, 0 ] ), 6779 entity( "Ā", [ 256, 0 ] ), 6780 entity( "⩓", [ 10835, 0 ] ), 6781 entity( "Ą", [ 260, 0 ] ), 6782 entity( "𝔸", [ 120120, 0 ] ), 6783 entity( "⁡", [ 8289, 0 ] ), 6784 entity( "Å", [ 197, 0 ] ), 6785 entity( "𝒜", [ 119964, 0 ] ), 6786 entity( "≔", [ 8788, 0 ] ), 6787 entity( "Ã", [ 195, 0 ] ), 6788 entity( "Ä", [ 196, 0 ] ), 6789 entity( "∖", [ 8726, 0 ] ), 6790 entity( "⫧", [ 10983, 0 ] ), 6791 entity( "⌆", [ 8966, 0 ] ), 6792 entity( "Б", [ 1041, 0 ] ), 6793 entity( "∵", [ 8757, 0 ] ), 6794 entity( "ℬ", [ 8492, 0 ] ), 6795 entity( "Β", [ 914, 0 ] ), 6796 entity( "𝔅", [ 120069, 0 ] ), 6797 entity( "𝔹", [ 120121, 0 ] ), 6798 entity( "˘", [ 728, 0 ] ), 6799 entity( "ℬ", [ 8492, 0 ] ), 6800 entity( "≎", [ 8782, 0 ] ), 6801 entity( "Ч", [ 1063, 0 ] ), 6802 entity( "©", [ 169, 0 ] ), 6803 entity( "Ć", [ 262, 0 ] ), 6804 entity( "⋒", [ 8914, 0 ] ), 6805 entity( "ⅅ", [ 8517, 0 ] ), 6806 entity( "ℭ", [ 8493, 0 ] ), 6807 entity( "Č", [ 268, 0 ] ), 6808 entity( "Ç", [ 199, 0 ] ), 6809 entity( "Ĉ", [ 264, 0 ] ), 6810 entity( "∰", [ 8752, 0 ] ), 6811 entity( "Ċ", [ 266, 0 ] ), 6812 entity( "¸", [ 184, 0 ] ), 6813 entity( "·", [ 183, 0 ] ), 6814 entity( "ℭ", [ 8493, 0 ] ), 6815 entity( "Χ", [ 935, 0 ] ), 6816 entity( "⊙", [ 8857, 0 ] ), 6817 entity( "⊖", [ 8854, 0 ] ), 6818 entity( "⊕", [ 8853, 0 ] ), 6819 entity( "⊗", [ 8855, 0 ] ), 6820 entity( "∲", [ 8754, 0 ] ), 6821 entity( "”", [ 8221, 0 ] ), 6822 entity( "’", [ 8217, 0 ] ), 6823 entity( "∷", [ 8759, 0 ] ), 6824 entity( "⩴", [ 10868, 0 ] ), 6825 entity( "≡", [ 8801, 0 ] ), 6826 entity( "∯", [ 8751, 0 ] ), 6827 entity( "∮", [ 8750, 0 ] ), 6828 entity( "ℂ", [ 8450, 0 ] ), 6829 entity( "∐", [ 8720, 0 ] ), 6830 entity( "∳", [ 8755, 0 ] ), 6831 entity( "⨯", [ 10799, 0 ] ), 6832 entity( "𝒞", [ 119966, 0 ] ), 6833 entity( "⋓", [ 8915, 0 ] ), 6834 entity( "≍", [ 8781, 0 ] ), 6835 entity( "ⅅ", [ 8517, 0 ] ), 6836 entity( "⤑", [ 10513, 0 ] ), 6837 entity( "Ђ", [ 1026, 0 ] ), 6838 entity( "Ѕ", [ 1029, 0 ] ), 6839 entity( "Џ", [ 1039, 0 ] ), 6840 entity( "‡", [ 8225, 0 ] ), 6841 entity( "↡", [ 8609, 0 ] ), 6842 entity( "⫤", [ 10980, 0 ] ), 6843 entity( "Ď", [ 270, 0 ] ), 6844 entity( "Д", [ 1044, 0 ] ), 6845 entity( "∇", [ 8711, 0 ] ), 6846 entity( "Δ", [ 916, 0 ] ), 6847 entity( "𝔇", [ 120071, 0 ] ), 6848 entity( "´", [ 180, 0 ] ), 6849 entity( "˙", [ 729, 0 ] ), 6850 entity( "˝", [ 733, 0 ] ), 6851 entity( "`", [ 96, 0 ] ), 6852 entity( "˜", [ 732, 0 ] ), 6853 entity( "⋄", [ 8900, 0 ] ), 6854 entity( "ⅆ", [ 8518, 0 ] ), 6855 entity( "𝔻", [ 120123, 0 ] ), 6856 entity( "¨", [ 168, 0 ] ), 6857 entity( "⃜", [ 8412, 0 ] ), 6858 entity( "≐", [ 8784, 0 ] ), 6859 entity( "∯", [ 8751, 0 ] ), 6860 entity( "¨", [ 168, 0 ] ), 6861 entity( "⇓", [ 8659, 0 ] ), 6862 entity( "⇐", [ 8656, 0 ] ), 6863 entity( "⇔", [ 8660, 0 ] ), 6864 entity( "⫤", [ 10980, 0 ] ), 6865 entity( "⟸", [ 10232, 0 ] ), 6866 entity( "⟺", [ 10234, 0 ] ), 6867 entity( "⟹", [ 10233, 0 ] ), 6868 entity( "⇒", [ 8658, 0 ] ), 6869 entity( "⊨", [ 8872, 0 ] ), 6870 entity( "⇑", [ 8657, 0 ] ), 6871 entity( "⇕", [ 8661, 0 ] ), 6872 entity( "∥", [ 8741, 0 ] ), 6873 entity( "↓", [ 8595, 0 ] ), 6874 entity( "⤓", [ 10515, 0 ] ), 6875 entity( "⇵", [ 8693, 0 ] ), 6876 entity( "̑", [ 785, 0 ] ), 6877 entity( "⥐", [ 10576, 0 ] ), 6878 entity( "⥞", [ 10590, 0 ] ), 6879 entity( "↽", [ 8637, 0 ] ), 6880 entity( "⥖", [ 10582, 0 ] ), 6881 entity( "⥟", [ 10591, 0 ] ), 6882 entity( "⇁", [ 8641, 0 ] ), 6883 entity( "⥗", [ 10583, 0 ] ), 6884 entity( "⊤", [ 8868, 0 ] ), 6885 entity( "↧", [ 8615, 0 ] ), 6886 entity( "⇓", [ 8659, 0 ] ), 6887 entity( "𝒟", [ 119967, 0 ] ), 6888 entity( "Đ", [ 272, 0 ] ), 6889 entity( "Ŋ", [ 330, 0 ] ), 6890 entity( "Ð", [ 208, 0 ] ), 6891 entity( "É", [ 201, 0 ] ), 6892 entity( "Ě", [ 282, 0 ] ), 6893 entity( "Ê", [ 202, 0 ] ), 6894 entity( "Э", [ 1069, 0 ] ), 6895 entity( "Ė", [ 278, 0 ] ), 6896 entity( "𝔈", [ 120072, 0 ] ), 6897 entity( "È", [ 200, 0 ] ), 6898 entity( "∈", [ 8712, 0 ] ), 6899 entity( "Ē", [ 274, 0 ] ), 6900 entity( "◻", [ 9723, 0 ] ), 6901 entity( "▫", [ 9643, 0 ] ), 6902 entity( "Ę", [ 280, 0 ] ), 6903 entity( "𝔼", [ 120124, 0 ] ), 6904 entity( "Ε", [ 917, 0 ] ), 6905 entity( "⩵", [ 10869, 0 ] ), 6906 entity( "≂", [ 8770, 0 ] ), 6907 entity( "⇌", [ 8652, 0 ] ), 6908 entity( "ℰ", [ 8496, 0 ] ), 6909 entity( "⩳", [ 10867, 0 ] ), 6910 entity( "Η", [ 919, 0 ] ), 6911 entity( "Ë", [ 203, 0 ] ), 6912 entity( "∃", [ 8707, 0 ] ), 6913 entity( "ⅇ", [ 8519, 0 ] ), 6914 entity( "Ф", [ 1060, 0 ] ), 6915 entity( "𝔉", [ 120073, 0 ] ), 6916 entity( "◼", [ 9724, 0 ] ), 6917 entity( "▪", [ 9642, 0 ] ), 6918 entity( "𝔽", [ 120125, 0 ] ), 6919 entity( "∀", [ 8704, 0 ] ), 6920 entity( "ℱ", [ 8497, 0 ] ), 6921 entity( "ℱ", [ 8497, 0 ] ), 6922 entity( "Ѓ", [ 1027, 0 ] ), 6923 entity( ">", [ 62, 0 ] ), 6924 entity( "Γ", [ 915, 0 ] ), 6925 entity( "Ϝ", [ 988, 0 ] ), 6926 entity( "Ğ", [ 286, 0 ] ), 6927 entity( "Ģ", [ 290, 0 ] ), 6928 entity( "Ĝ", [ 284, 0 ] ), 6929 entity( "Г", [ 1043, 0 ] ), 6930 entity( "Ġ", [ 288, 0 ] ), 6931 entity( "𝔊", [ 120074, 0 ] ), 6932 entity( "⋙", [ 8921, 0 ] ), 6933 entity( "𝔾", [ 120126, 0 ] ), 6934 entity( "≥", [ 8805, 0 ] ), 6935 entity( "⋛", [ 8923, 0 ] ), 6936 entity( "≧", [ 8807, 0 ] ), 6937 entity( "⪢", [ 10914, 0 ] ), 6938 entity( "≷", [ 8823, 0 ] ), 6939 entity( "⩾", [ 10878, 0 ] ), 6940 entity( "≳", [ 8819, 0 ] ), 6941 entity( "𝒢", [ 119970, 0 ] ), 6942 entity( "≫", [ 8811, 0 ] ), 6943 entity( "Ъ", [ 1066, 0 ] ), 6944 entity( "ˇ", [ 711, 0 ] ), 6945 entity( "^", [ 94, 0 ] ), 6946 entity( "Ĥ", [ 292, 0 ] ), 6947 entity( "ℌ", [ 8460, 0 ] ), 6948 entity( "ℋ", [ 8459, 0 ] ), 6949 entity( "ℍ", [ 8461, 0 ] ), 6950 entity( "─", [ 9472, 0 ] ), 6951 entity( "ℋ", [ 8459, 0 ] ), 6952 entity( "Ħ", [ 294, 0 ] ), 6953 entity( "≎", [ 8782, 0 ] ), 6954 entity( "≏", [ 8783, 0 ] ), 6955 entity( "Е", [ 1045, 0 ] ), 6956 entity( "IJ", [ 306, 0 ] ), 6957 entity( "Ё", [ 1025, 0 ] ), 6958 entity( "Í", [ 205, 0 ] ), 6959 entity( "Î", [ 206, 0 ] ), 6960 entity( "И", [ 1048, 0 ] ), 6961 entity( "İ", [ 304, 0 ] ), 6962 entity( "ℑ", [ 8465, 0 ] ), 6963 entity( "Ì", [ 204, 0 ] ), 6964 entity( "ℑ", [ 8465, 0 ] ), 6965 entity( "Ī", [ 298, 0 ] ), 6966 entity( "ⅈ", [ 8520, 0 ] ), 6967 entity( "⇒", [ 8658, 0 ] ), 6968 entity( "∬", [ 8748, 0 ] ), 6969 entity( "∫", [ 8747, 0 ] ), 6970 entity( "⋂", [ 8898, 0 ] ), 6971 entity( "⁣", [ 8291, 0 ] ), 6972 entity( "⁢", [ 8290, 0 ] ), 6973 entity( "Į", [ 302, 0 ] ), 6974 entity( "𝕀", [ 120128, 0 ] ), 6975 entity( "Ι", [ 921, 0 ] ), 6976 entity( "ℐ", [ 8464, 0 ] ), 6977 entity( "Ĩ", [ 296, 0 ] ), 6978 entity( "І", [ 1030, 0 ] ), 6979 entity( "Ï", [ 207, 0 ] ), 6980 entity( "Ĵ", [ 308, 0 ] ), 6981 entity( "Й", [ 1049, 0 ] ), 6982 entity( "𝔍", [ 120077, 0 ] ), 6983 entity( "𝕁", [ 120129, 0 ] ), 6984 entity( "𝒥", [ 119973, 0 ] ), 6985 entity( "Ј", [ 1032, 0 ] ), 6986 entity( "Є", [ 1028, 0 ] ), 6987 entity( "Х", [ 1061, 0 ] ), 6988 entity( "Ќ", [ 1036, 0 ] ), 6989 entity( "Κ", [ 922, 0 ] ), 6990 entity( "Ķ", [ 310, 0 ] ), 6991 entity( "К", [ 1050, 0 ] ), 6992 entity( "𝔎", [ 120078, 0 ] ), 6993 entity( "𝕂", [ 120130, 0 ] ), 6994 entity( "𝒦", [ 119974, 0 ] ), 6995 entity( "Љ", [ 1033, 0 ] ), 6996 entity( "<", [ 60, 0 ] ), 6997 entity( "Ĺ", [ 313, 0 ] ), 6998 entity( "Λ", [ 923, 0 ] ), 6999 entity( "⟪", [ 10218, 0 ] ), 7000 entity( "ℒ", [ 8466, 0 ] ), 7001 entity( "↞", [ 8606, 0 ] ), 7002 entity( "Ľ", [ 317, 0 ] ), 7003 entity( "Ļ", [ 315, 0 ] ), 7004 entity( "Л", [ 1051, 0 ] ), 7005 entity( "⟨", [ 10216, 0 ] ), 7006 entity( "←", [ 8592, 0 ] ), 7007 entity( "⇤", [ 8676, 0 ] ), 7008 entity( "⇆", [ 8646, 0 ] ), 7009 entity( "⌈", [ 8968, 0 ] ), 7010 entity( "⟦", [ 10214, 0 ] ), 7011 entity( "⥡", [ 10593, 0 ] ), 7012 entity( "⇃", [ 8643, 0 ] ), 7013 entity( "⥙", [ 10585, 0 ] ), 7014 entity( "⌊", [ 8970, 0 ] ), 7015 entity( "↔", [ 8596, 0 ] ), 7016 entity( "⥎", [ 10574, 0 ] ), 7017 entity( "⊣", [ 8867, 0 ] ), 7018 entity( "↤", [ 8612, 0 ] ), 7019 entity( "⥚", [ 10586, 0 ] ), 7020 entity( "⊲", [ 8882, 0 ] ), 7021 entity( "⧏", [ 10703, 0 ] ), 7022 entity( "⊴", [ 8884, 0 ] ), 7023 entity( "⥑", [ 10577, 0 ] ), 7024 entity( "⥠", [ 10592, 0 ] ), 7025 entity( "↿", [ 8639, 0 ] ), 7026 entity( "⥘", [ 10584, 0 ] ), 7027 entity( "↼", [ 8636, 0 ] ), 7028 entity( "⥒", [ 10578, 0 ] ), 7029 entity( "⇐", [ 8656, 0 ] ), 7030 entity( "⇔", [ 8660, 0 ] ), 7031 entity( "⋚", [ 8922, 0 ] ), 7032 entity( "≦", [ 8806, 0 ] ), 7033 entity( "≶", [ 8822, 0 ] ), 7034 entity( "⪡", [ 10913, 0 ] ), 7035 entity( "⩽", [ 10877, 0 ] ), 7036 entity( "≲", [ 8818, 0 ] ), 7037 entity( "𝔏", [ 120079, 0 ] ), 7038 entity( "⋘", [ 8920, 0 ] ), 7039 entity( "⇚", [ 8666, 0 ] ), 7040 entity( "Ŀ", [ 319, 0 ] ), 7041 entity( "⟵", [ 10229, 0 ] ), 7042 entity( "⟷", [ 10231, 0 ] ), 7043 entity( "⟶", [ 10230, 0 ] ), 7044 entity( "⟸", [ 10232, 0 ] ), 7045 entity( "⟺", [ 10234, 0 ] ), 7046 entity( "⟹", [ 10233, 0 ] ), 7047 entity( "𝕃", [ 120131, 0 ] ), 7048 entity( "↙", [ 8601, 0 ] ), 7049 entity( "↘", [ 8600, 0 ] ), 7050 entity( "ℒ", [ 8466, 0 ] ), 7051 entity( "↰", [ 8624, 0 ] ), 7052 entity( "Ł", [ 321, 0 ] ), 7053 entity( "≪", [ 8810, 0 ] ), 7054 entity( "⤅", [ 10501, 0 ] ), 7055 entity( "М", [ 1052, 0 ] ), 7056 entity( " ", [ 8287, 0 ] ), 7057 entity( "ℳ", [ 8499, 0 ] ), 7058 entity( "𝔐", [ 120080, 0 ] ), 7059 entity( "∓", [ 8723, 0 ] ), 7060 entity( "𝕄", [ 120132, 0 ] ), 7061 entity( "ℳ", [ 8499, 0 ] ), 7062 entity( "Μ", [ 924, 0 ] ), 7063 entity( "Њ", [ 1034, 0 ] ), 7064 entity( "Ń", [ 323, 0 ] ), 7065 entity( "Ň", [ 327, 0 ] ), 7066 entity( "Ņ", [ 325, 0 ] ), 7067 entity( "Н", [ 1053, 0 ] ), 7068 entity( "​", [ 8203, 0 ] ), 7069 entity( "​", [ 8203, 0 ] ), 7070 entity( "​", [ 8203, 0 ] ), 7071 entity( "​", [ 8203, 0 ] ), 7072 entity( "≫", [ 8811, 0 ] ), 7073 entity( "≪", [ 8810, 0 ] ), 7074 entity( "
", [ 10, 0 ] ), 7075 entity( "𝔑", [ 120081, 0 ] ), 7076 entity( "⁠", [ 8288, 0 ] ), 7077 entity( " ", [ 160, 0 ] ), 7078 entity( "ℕ", [ 8469, 0 ] ), 7079 entity( "⫬", [ 10988, 0 ] ), 7080 entity( "≢", [ 8802, 0 ] ), 7081 entity( "≭", [ 8813, 0 ] ), 7082 entity( "∦", [ 8742, 0 ] ), 7083 entity( "∉", [ 8713, 0 ] ), 7084 entity( "≠", [ 8800, 0 ] ), 7085 entity( "≂̸", [ 8770, 824 ] ), 7086 entity( "∄", [ 8708, 0 ] ), 7087 entity( "≯", [ 8815, 0 ] ), 7088 entity( "≱", [ 8817, 0 ] ), 7089 entity( "≧̸", [ 8807, 824 ] ), 7090 entity( "≫̸", [ 8811, 824 ] ), 7091 entity( "≹", [ 8825, 0 ] ), 7092 entity( "⩾̸", [ 10878, 824 ] ), 7093 entity( "≵", [ 8821, 0 ] ), 7094 entity( "≎̸", [ 8782, 824 ] ), 7095 entity( "≏̸", [ 8783, 824 ] ), 7096 entity( "⋪", [ 8938, 0 ] ), 7097 entity( "⧏̸", [ 10703, 824 ] ), 7098 entity( "⋬", [ 8940, 0 ] ), 7099 entity( "≮", [ 8814, 0 ] ), 7100 entity( "≰", [ 8816, 0 ] ), 7101 entity( "≸", [ 8824, 0 ] ), 7102 entity( "≪̸", [ 8810, 824 ] ), 7103 entity( "⩽̸", [ 10877, 824 ] ), 7104 entity( "≴", [ 8820, 0 ] ), 7105 entity( "⪢̸", [ 10914, 824 ] ), 7106 entity( "⪡̸", [ 10913, 824 ] ), 7107 entity( "⊀", [ 8832, 0 ] ), 7108 entity( "⪯̸", [ 10927, 824 ] ), 7109 entity( "⋠", [ 8928, 0 ] ), 7110 entity( "∌", [ 8716, 0 ] ), 7111 entity( "⋫", [ 8939, 0 ] ), 7112 entity( "⧐̸", [ 10704, 824 ] ), 7113 entity( "⋭", [ 8941, 0 ] ), 7114 entity( "⊏̸", [ 8847, 824 ] ), 7115 entity( "⋢", [ 8930, 0 ] ), 7116 entity( "⊐̸", [ 8848, 824 ] ), 7117 entity( "⋣", [ 8931, 0 ] ), 7118 entity( "⊂⃒", [ 8834, 8402 ] ), 7119 entity( "⊈", [ 8840, 0 ] ), 7120 entity( "⊁", [ 8833, 0 ] ), 7121 entity( "⪰̸", [ 10928, 824 ] ), 7122 entity( "⋡", [ 8929, 0 ] ), 7123 entity( "≿̸", [ 8831, 824 ] ), 7124 entity( "⊃⃒", [ 8835, 8402 ] ), 7125 entity( "⊉", [ 8841, 0 ] ), 7126 entity( "≁", [ 8769, 0 ] ), 7127 entity( "≄", [ 8772, 0 ] ), 7128 entity( "≇", [ 8775, 0 ] ), 7129 entity( "≉", [ 8777, 0 ] ), 7130 entity( "∤", [ 8740, 0 ] ), 7131 entity( "𝒩", [ 119977, 0 ] ), 7132 entity( "Ñ", [ 209, 0 ] ), 7133 entity( "Ν", [ 925, 0 ] ), 7134 entity( "Œ", [ 338, 0 ] ), 7135 entity( "Ó", [ 211, 0 ] ), 7136 entity( "Ô", [ 212, 0 ] ), 7137 entity( "О", [ 1054, 0 ] ), 7138 entity( "Ő", [ 336, 0 ] ), 7139 entity( "𝔒", [ 120082, 0 ] ), 7140 entity( "Ò", [ 210, 0 ] ), 7141 entity( "Ō", [ 332, 0 ] ), 7142 entity( "Ω", [ 937, 0 ] ), 7143 entity( "Ο", [ 927, 0 ] ), 7144 entity( "𝕆", [ 120134, 0 ] ), 7145 entity( "“", [ 8220, 0 ] ), 7146 entity( "‘", [ 8216, 0 ] ), 7147 entity( "⩔", [ 10836, 0 ] ), 7148 entity( "𝒪", [ 119978, 0 ] ), 7149 entity( "Ø", [ 216, 0 ] ), 7150 entity( "Õ", [ 213, 0 ] ), 7151 entity( "⨷", [ 10807, 0 ] ), 7152 entity( "Ö", [ 214, 0 ] ), 7153 entity( "‾", [ 8254, 0 ] ), 7154 entity( "⏞", [ 9182, 0 ] ), 7155 entity( "⎴", [ 9140, 0 ] ), 7156 entity( "⏜", [ 9180, 0 ] ), 7157 entity( "∂", [ 8706, 0 ] ), 7158 entity( "П", [ 1055, 0 ] ), 7159 entity( "𝔓", [ 120083, 0 ] ), 7160 entity( "Φ", [ 934, 0 ] ), 7161 entity( "Π", [ 928, 0 ] ), 7162 entity( "±", [ 177, 0 ] ), 7163 entity( "ℌ", [ 8460, 0 ] ), 7164 entity( "ℙ", [ 8473, 0 ] ), 7165 entity( "⪻", [ 10939, 0 ] ), 7166 entity( "≺", [ 8826, 0 ] ), 7167 entity( "⪯", [ 10927, 0 ] ), 7168 entity( "≼", [ 8828, 0 ] ), 7169 entity( "≾", [ 8830, 0 ] ), 7170 entity( "″", [ 8243, 0 ] ), 7171 entity( "∏", [ 8719, 0 ] ), 7172 entity( "∷", [ 8759, 0 ] ), 7173 entity( "∝", [ 8733, 0 ] ), 7174 entity( "𝒫", [ 119979, 0 ] ), 7175 entity( "Ψ", [ 936, 0 ] ), 7176 entity( """, [ 34, 0 ] ), 7177 entity( "𝔔", [ 120084, 0 ] ), 7178 entity( "ℚ", [ 8474, 0 ] ), 7179 entity( "𝒬", [ 119980, 0 ] ), 7180 entity( "⤐", [ 10512, 0 ] ), 7181 entity( "®", [ 174, 0 ] ), 7182 entity( "Ŕ", [ 340, 0 ] ), 7183 entity( "⟫", [ 10219, 0 ] ), 7184 entity( "↠", [ 8608, 0 ] ), 7185 entity( "⤖", [ 10518, 0 ] ), 7186 entity( "Ř", [ 344, 0 ] ), 7187 entity( "Ŗ", [ 342, 0 ] ), 7188 entity( "Р", [ 1056, 0 ] ), 7189 entity( "ℜ", [ 8476, 0 ] ), 7190 entity( "∋", [ 8715, 0 ] ), 7191 entity( "⇋", [ 8651, 0 ] ), 7192 entity( "⥯", [ 10607, 0 ] ), 7193 entity( "ℜ", [ 8476, 0 ] ), 7194 entity( "Ρ", [ 929, 0 ] ), 7195 entity( "⟩", [ 10217, 0 ] ), 7196 entity( "→", [ 8594, 0 ] ), 7197 entity( "⇥", [ 8677, 0 ] ), 7198 entity( "⇄", [ 8644, 0 ] ), 7199 entity( "⌉", [ 8969, 0 ] ), 7200 entity( "⟧", [ 10215, 0 ] ), 7201 entity( "⥝", [ 10589, 0 ] ), 7202 entity( "⇂", [ 8642, 0 ] ), 7203 entity( "⥕", [ 10581, 0 ] ), 7204 entity( "⌋", [ 8971, 0 ] ), 7205 entity( "⊢", [ 8866, 0 ] ), 7206 entity( "↦", [ 8614, 0 ] ), 7207 entity( "⥛", [ 10587, 0 ] ), 7208 entity( "⊳", [ 8883, 0 ] ), 7209 entity( "⧐", [ 10704, 0 ] ), 7210 entity( "⊵", [ 8885, 0 ] ), 7211 entity( "⥏", [ 10575, 0 ] ), 7212 entity( "⥜", [ 10588, 0 ] ), 7213 entity( "↾", [ 8638, 0 ] ), 7214 entity( "⥔", [ 10580, 0 ] ), 7215 entity( "⇀", [ 8640, 0 ] ), 7216 entity( "⥓", [ 10579, 0 ] ), 7217 entity( "⇒", [ 8658, 0 ] ), 7218 entity( "ℝ", [ 8477, 0 ] ), 7219 entity( "⥰", [ 10608, 0 ] ), 7220 entity( "⇛", [ 8667, 0 ] ), 7221 entity( "ℛ", [ 8475, 0 ] ), 7222 entity( "↱", [ 8625, 0 ] ), 7223 entity( "⧴", [ 10740, 0 ] ), 7224 entity( "Щ", [ 1065, 0 ] ), 7225 entity( "Ш", [ 1064, 0 ] ), 7226 entity( "Ь", [ 1068, 0 ] ), 7227 entity( "Ś", [ 346, 0 ] ), 7228 entity( "⪼", [ 10940, 0 ] ), 7229 entity( "Š", [ 352, 0 ] ), 7230 entity( "Ş", [ 350, 0 ] ), 7231 entity( "Ŝ", [ 348, 0 ] ), 7232 entity( "С", [ 1057, 0 ] ), 7233 entity( "𝔖", [ 120086, 0 ] ), 7234 entity( "↓", [ 8595, 0 ] ), 7235 entity( "←", [ 8592, 0 ] ), 7236 entity( "→", [ 8594, 0 ] ), 7237 entity( "↑", [ 8593, 0 ] ), 7238 entity( "Σ", [ 931, 0 ] ), 7239 entity( "∘", [ 8728, 0 ] ), 7240 entity( "𝕊", [ 120138, 0 ] ), 7241 entity( "√", [ 8730, 0 ] ), 7242 entity( "□", [ 9633, 0 ] ), 7243 entity( "⊓", [ 8851, 0 ] ), 7244 entity( "⊏", [ 8847, 0 ] ), 7245 entity( "⊑", [ 8849, 0 ] ), 7246 entity( "⊐", [ 8848, 0 ] ), 7247 entity( "⊒", [ 8850, 0 ] ), 7248 entity( "⊔", [ 8852, 0 ] ), 7249 entity( "𝒮", [ 119982, 0 ] ), 7250 entity( "⋆", [ 8902, 0 ] ), 7251 entity( "⋐", [ 8912, 0 ] ), 7252 entity( "⋐", [ 8912, 0 ] ), 7253 entity( "⊆", [ 8838, 0 ] ), 7254 entity( "≻", [ 8827, 0 ] ), 7255 entity( "⪰", [ 10928, 0 ] ), 7256 entity( "≽", [ 8829, 0 ] ), 7257 entity( "≿", [ 8831, 0 ] ), 7258 entity( "∋", [ 8715, 0 ] ), 7259 entity( "∑", [ 8721, 0 ] ), 7260 entity( "⋑", [ 8913, 0 ] ), 7261 entity( "⊃", [ 8835, 0 ] ), 7262 entity( "⊇", [ 8839, 0 ] ), 7263 entity( "⋑", [ 8913, 0 ] ), 7264 entity( "Þ", [ 222, 0 ] ), 7265 entity( "™", [ 8482, 0 ] ), 7266 entity( "Ћ", [ 1035, 0 ] ), 7267 entity( "Ц", [ 1062, 0 ] ), 7268 entity( "	", [ 9, 0 ] ), 7269 entity( "Τ", [ 932, 0 ] ), 7270 entity( "Ť", [ 356, 0 ] ), 7271 entity( "Ţ", [ 354, 0 ] ), 7272 entity( "Т", [ 1058, 0 ] ), 7273 entity( "𝔗", [ 120087, 0 ] ), 7274 entity( "∴", [ 8756, 0 ] ), 7275 entity( "Θ", [ 920, 0 ] ), 7276 entity( "  ", [ 8287, 8202 ] ), 7277 entity( " ", [ 8201, 0 ] ), 7278 entity( "∼", [ 8764, 0 ] ), 7279 entity( "≃", [ 8771, 0 ] ), 7280 entity( "≅", [ 8773, 0 ] ), 7281 entity( "≈", [ 8776, 0 ] ), 7282 entity( "𝕋", [ 120139, 0 ] ), 7283 entity( "⃛", [ 8411, 0 ] ), 7284 entity( "𝒯", [ 119983, 0 ] ), 7285 entity( "Ŧ", [ 358, 0 ] ), 7286 entity( "Ú", [ 218, 0 ] ), 7287 entity( "↟", [ 8607, 0 ] ), 7288 entity( "⥉", [ 10569, 0 ] ), 7289 entity( "Ў", [ 1038, 0 ] ), 7290 entity( "Ŭ", [ 364, 0 ] ), 7291 entity( "Û", [ 219, 0 ] ), 7292 entity( "У", [ 1059, 0 ] ), 7293 entity( "Ű", [ 368, 0 ] ), 7294 entity( "𝔘", [ 120088, 0 ] ), 7295 entity( "Ù", [ 217, 0 ] ), 7296 entity( "Ū", [ 362, 0 ] ), 7297 entity( "_", [ 95, 0 ] ), 7298 entity( "⏟", [ 9183, 0 ] ), 7299 entity( "⎵", [ 9141, 0 ] ), 7300 entity( "⏝", [ 9181, 0 ] ), 7301 entity( "⋃", [ 8899, 0 ] ), 7302 entity( "⊎", [ 8846, 0 ] ), 7303 entity( "Ų", [ 370, 0 ] ), 7304 entity( "𝕌", [ 120140, 0 ] ), 7305 entity( "↑", [ 8593, 0 ] ), 7306 entity( "⤒", [ 10514, 0 ] ), 7307 entity( "⇅", [ 8645, 0 ] ), 7308 entity( "↕", [ 8597, 0 ] ), 7309 entity( "⥮", [ 10606, 0 ] ), 7310 entity( "⊥", [ 8869, 0 ] ), 7311 entity( "↥", [ 8613, 0 ] ), 7312 entity( "⇑", [ 8657, 0 ] ), 7313 entity( "⇕", [ 8661, 0 ] ), 7314 entity( "↖", [ 8598, 0 ] ), 7315 entity( "↗", [ 8599, 0 ] ), 7316 entity( "ϒ", [ 978, 0 ] ), 7317 entity( "Υ", [ 933, 0 ] ), 7318 entity( "Ů", [ 366, 0 ] ), 7319 entity( "𝒰", [ 119984, 0 ] ), 7320 entity( "Ũ", [ 360, 0 ] ), 7321 entity( "Ü", [ 220, 0 ] ), 7322 entity( "⊫", [ 8875, 0 ] ), 7323 entity( "⫫", [ 10987, 0 ] ), 7324 entity( "В", [ 1042, 0 ] ), 7325 entity( "⊩", [ 8873, 0 ] ), 7326 entity( "⫦", [ 10982, 0 ] ), 7327 entity( "⋁", [ 8897, 0 ] ), 7328 entity( "‖", [ 8214, 0 ] ), 7329 entity( "‖", [ 8214, 0 ] ), 7330 entity( "∣", [ 8739, 0 ] ), 7331 entity( "|", [ 124, 0 ] ), 7332 entity( "❘", [ 10072, 0 ] ), 7333 entity( "≀", [ 8768, 0 ] ), 7334 entity( " ", [ 8202, 0 ] ), 7335 entity( "𝔙", [ 120089, 0 ] ), 7336 entity( "𝕍", [ 120141, 0 ] ), 7337 entity( "𝒱", [ 119985, 0 ] ), 7338 entity( "⊪", [ 8874, 0 ] ), 7339 entity( "Ŵ", [ 372, 0 ] ), 7340 entity( "⋀", [ 8896, 0 ] ), 7341 entity( "𝔚", [ 120090, 0 ] ), 7342 entity( "𝕎", [ 120142, 0 ] ), 7343 entity( "𝒲", [ 119986, 0 ] ), 7344 entity( "𝔛", [ 120091, 0 ] ), 7345 entity( "Ξ", [ 926, 0 ] ), 7346 entity( "𝕏", [ 120143, 0 ] ), 7347 entity( "𝒳", [ 119987, 0 ] ), 7348 entity( "Я", [ 1071, 0 ] ), 7349 entity( "Ї", [ 1031, 0 ] ), 7350 entity( "Ю", [ 1070, 0 ] ), 7351 entity( "Ý", [ 221, 0 ] ), 7352 entity( "Ŷ", [ 374, 0 ] ), 7353 entity( "Ы", [ 1067, 0 ] ), 7354 entity( "𝔜", [ 120092, 0 ] ), 7355 entity( "𝕐", [ 120144, 0 ] ), 7356 entity( "𝒴", [ 119988, 0 ] ), 7357 entity( "Ÿ", [ 376, 0 ] ), 7358 entity( "Ж", [ 1046, 0 ] ), 7359 entity( "Ź", [ 377, 0 ] ), 7360 entity( "Ž", [ 381, 0 ] ), 7361 entity( "З", [ 1047, 0 ] ), 7362 entity( "Ż", [ 379, 0 ] ), 7363 entity( "​", [ 8203, 0 ] ), 7364 entity( "Ζ", [ 918, 0 ] ), 7365 entity( "ℨ", [ 8488, 0 ] ), 7366 entity( "ℤ", [ 8484, 0 ] ), 7367 entity( "𝒵", [ 119989, 0 ] ), 7368 entity( "á", [ 225, 0 ] ), 7369 entity( "ă", [ 259, 0 ] ), 7370 entity( "∾", [ 8766, 0 ] ), 7371 entity( "∾̳", [ 8766, 819 ] ), 7372 entity( "∿", [ 8767, 0 ] ), 7373 entity( "â", [ 226, 0 ] ), 7374 entity( "´", [ 180, 0 ] ), 7375 entity( "а", [ 1072, 0 ] ), 7376 entity( "æ", [ 230, 0 ] ), 7377 entity( "⁡", [ 8289, 0 ] ), 7378 entity( "𝔞", [ 120094, 0 ] ), 7379 entity( "à", [ 224, 0 ] ), 7380 entity( "ℵ", [ 8501, 0 ] ), 7381 entity( "ℵ", [ 8501, 0 ] ), 7382 entity( "α", [ 945, 0 ] ), 7383 entity( "ā", [ 257, 0 ] ), 7384 entity( "⨿", [ 10815, 0 ] ), 7385 entity( "&", [ 38, 0 ] ), 7386 entity( "∧", [ 8743, 0 ] ), 7387 entity( "⩕", [ 10837, 0 ] ), 7388 entity( "⩜", [ 10844, 0 ] ), 7389 entity( "⩘", [ 10840, 0 ] ), 7390 entity( "⩚", [ 10842, 0 ] ), 7391 entity( "∠", [ 8736, 0 ] ), 7392 entity( "⦤", [ 10660, 0 ] ), 7393 entity( "∠", [ 8736, 0 ] ), 7394 entity( "∡", [ 8737, 0 ] ), 7395 entity( "⦨", [ 10664, 0 ] ), 7396 entity( "⦩", [ 10665, 0 ] ), 7397 entity( "⦪", [ 10666, 0 ] ), 7398 entity( "⦫", [ 10667, 0 ] ), 7399 entity( "⦬", [ 10668, 0 ] ), 7400 entity( "⦭", [ 10669, 0 ] ), 7401 entity( "⦮", [ 10670, 0 ] ), 7402 entity( "⦯", [ 10671, 0 ] ), 7403 entity( "∟", [ 8735, 0 ] ), 7404 entity( "⊾", [ 8894, 0 ] ), 7405 entity( "⦝", [ 10653, 0 ] ), 7406 entity( "∢", [ 8738, 0 ] ), 7407 entity( "Å", [ 197, 0 ] ), 7408 entity( "⍼", [ 9084, 0 ] ), 7409 entity( "ą", [ 261, 0 ] ), 7410 entity( "𝕒", [ 120146, 0 ] ), 7411 entity( "≈", [ 8776, 0 ] ), 7412 entity( "⩰", [ 10864, 0 ] ), 7413 entity( "⩯", [ 10863, 0 ] ), 7414 entity( "≊", [ 8778, 0 ] ), 7415 entity( "≋", [ 8779, 0 ] ), 7416 entity( "'", [ 39, 0 ] ), 7417 entity( "≈", [ 8776, 0 ] ), 7418 entity( "≊", [ 8778, 0 ] ), 7419 entity( "å", [ 229, 0 ] ), 7420 entity( "𝒶", [ 119990, 0 ] ), 7421 entity( "*", [ 42, 0 ] ), 7422 entity( "≈", [ 8776, 0 ] ), 7423 entity( "≍", [ 8781, 0 ] ), 7424 entity( "ã", [ 227, 0 ] ), 7425 entity( "ä", [ 228, 0 ] ), 7426 entity( "∳", [ 8755, 0 ] ), 7427 entity( "⨑", [ 10769, 0 ] ), 7428 entity( "⫭", [ 10989, 0 ] ), 7429 entity( "≌", [ 8780, 0 ] ), 7430 entity( "϶", [ 1014, 0 ] ), 7431 entity( "‵", [ 8245, 0 ] ), 7432 entity( "∽", [ 8765, 0 ] ), 7433 entity( "⋍", [ 8909, 0 ] ), 7434 entity( "⊽", [ 8893, 0 ] ), 7435 entity( "⌅", [ 8965, 0 ] ), 7436 entity( "⌅", [ 8965, 0 ] ), 7437 entity( "⎵", [ 9141, 0 ] ), 7438 entity( "⎶", [ 9142, 0 ] ), 7439 entity( "≌", [ 8780, 0 ] ), 7440 entity( "б", [ 1073, 0 ] ), 7441 entity( "„", [ 8222, 0 ] ), 7442 entity( "∵", [ 8757, 0 ] ), 7443 entity( "∵", [ 8757, 0 ] ), 7444 entity( "⦰", [ 10672, 0 ] ), 7445 entity( "϶", [ 1014, 0 ] ), 7446 entity( "ℬ", [ 8492, 0 ] ), 7447 entity( "β", [ 946, 0 ] ), 7448 entity( "ℶ", [ 8502, 0 ] ), 7449 entity( "≬", [ 8812, 0 ] ), 7450 entity( "𝔟", [ 120095, 0 ] ), 7451 entity( "⋂", [ 8898, 0 ] ), 7452 entity( "◯", [ 9711, 0 ] ), 7453 entity( "⋃", [ 8899, 0 ] ), 7454 entity( "⨀", [ 10752, 0 ] ), 7455 entity( "⨁", [ 10753, 0 ] ), 7456 entity( "⨂", [ 10754, 0 ] ), 7457 entity( "⨆", [ 10758, 0 ] ), 7458 entity( "★", [ 9733, 0 ] ), 7459 entity( "▽", [ 9661, 0 ] ), 7460 entity( "△", [ 9651, 0 ] ), 7461 entity( "⨄", [ 10756, 0 ] ), 7462 entity( "⋁", [ 8897, 0 ] ), 7463 entity( "⋀", [ 8896, 0 ] ), 7464 entity( "⤍", [ 10509, 0 ] ), 7465 entity( "⧫", [ 10731, 0 ] ), 7466 entity( "▪", [ 9642, 0 ] ), 7467 entity( "▴", [ 9652, 0 ] ), 7468 entity( "▾", [ 9662, 0 ] ), 7469 entity( "◂", [ 9666, 0 ] ), 7470 entity( "▸", [ 9656, 0 ] ), 7471 entity( "␣", [ 9251, 0 ] ), 7472 entity( "▒", [ 9618, 0 ] ), 7473 entity( "░", [ 9617, 0 ] ), 7474 entity( "▓", [ 9619, 0 ] ), 7475 entity( "█", [ 9608, 0 ] ), 7476 entity( "=⃥", [ 61, 8421 ] ), 7477 entity( "≡⃥", [ 8801, 8421 ] ), 7478 entity( "⌐", [ 8976, 0 ] ), 7479 entity( "𝕓", [ 120147, 0 ] ), 7480 entity( "⊥", [ 8869, 0 ] ), 7481 entity( "⊥", [ 8869, 0 ] ), 7482 entity( "⋈", [ 8904, 0 ] ), 7483 entity( "╗", [ 9559, 0 ] ), 7484 entity( "╔", [ 9556, 0 ] ), 7485 entity( "╖", [ 9558, 0 ] ), 7486 entity( "╓", [ 9555, 0 ] ), 7487 entity( "═", [ 9552, 0 ] ), 7488 entity( "╦", [ 9574, 0 ] ), 7489 entity( "╩", [ 9577, 0 ] ), 7490 entity( "╤", [ 9572, 0 ] ), 7491 entity( "╧", [ 9575, 0 ] ), 7492 entity( "╝", [ 9565, 0 ] ), 7493 entity( "╚", [ 9562, 0 ] ), 7494 entity( "╜", [ 9564, 0 ] ), 7495 entity( "╙", [ 9561, 0 ] ), 7496 entity( "║", [ 9553, 0 ] ), 7497 entity( "╬", [ 9580, 0 ] ), 7498 entity( "╣", [ 9571, 0 ] ), 7499 entity( "╠", [ 9568, 0 ] ), 7500 entity( "╫", [ 9579, 0 ] ), 7501 entity( "╢", [ 9570, 0 ] ), 7502 entity( "╟", [ 9567, 0 ] ), 7503 entity( "⧉", [ 10697, 0 ] ), 7504 entity( "╕", [ 9557, 0 ] ), 7505 entity( "╒", [ 9554, 0 ] ), 7506 entity( "┐", [ 9488, 0 ] ), 7507 entity( "┌", [ 9484, 0 ] ), 7508 entity( "─", [ 9472, 0 ] ), 7509 entity( "╥", [ 9573, 0 ] ), 7510 entity( "╨", [ 9576, 0 ] ), 7511 entity( "┬", [ 9516, 0 ] ), 7512 entity( "┴", [ 9524, 0 ] ), 7513 entity( "⊟", [ 8863, 0 ] ), 7514 entity( "⊞", [ 8862, 0 ] ), 7515 entity( "⊠", [ 8864, 0 ] ), 7516 entity( "╛", [ 9563, 0 ] ), 7517 entity( "╘", [ 9560, 0 ] ), 7518 entity( "┘", [ 9496, 0 ] ), 7519 entity( "└", [ 9492, 0 ] ), 7520 entity( "│", [ 9474, 0 ] ), 7521 entity( "╪", [ 9578, 0 ] ), 7522 entity( "╡", [ 9569, 0 ] ), 7523 entity( "╞", [ 9566, 0 ] ), 7524 entity( "┼", [ 9532, 0 ] ), 7525 entity( "┤", [ 9508, 0 ] ), 7526 entity( "├", [ 9500, 0 ] ), 7527 entity( "‵", [ 8245, 0 ] ), 7528 entity( "˘", [ 728, 0 ] ), 7529 entity( "¦", [ 166, 0 ] ), 7530 entity( "𝒷", [ 119991, 0 ] ), 7531 entity( "⁏", [ 8271, 0 ] ), 7532 entity( "∽", [ 8765, 0 ] ), 7533 entity( "⋍", [ 8909, 0 ] ), 7534 entity( "\", [ 92, 0 ] ), 7535 entity( "⧅", [ 10693, 0 ] ), 7536 entity( "⟈", [ 10184, 0 ] ), 7537 entity( "•", [ 8226, 0 ] ), 7538 entity( "•", [ 8226, 0 ] ), 7539 entity( "≎", [ 8782, 0 ] ), 7540 entity( "⪮", [ 10926, 0 ] ), 7541 entity( "≏", [ 8783, 0 ] ), 7542 entity( "≏", [ 8783, 0 ] ), 7543 entity( "ć", [ 263, 0 ] ), 7544 entity( "∩", [ 8745, 0 ] ), 7545 entity( "⩄", [ 10820, 0 ] ), 7546 entity( "⩉", [ 10825, 0 ] ), 7547 entity( "⩋", [ 10827, 0 ] ), 7548 entity( "⩇", [ 10823, 0 ] ), 7549 entity( "⩀", [ 10816, 0 ] ), 7550 entity( "∩︀", [ 8745, 65024 ] ), 7551 entity( "⁁", [ 8257, 0 ] ), 7552 entity( "ˇ", [ 711, 0 ] ), 7553 entity( "⩍", [ 10829, 0 ] ), 7554 entity( "č", [ 269, 0 ] ), 7555 entity( "ç", [ 231, 0 ] ), 7556 entity( "ĉ", [ 265, 0 ] ), 7557 entity( "⩌", [ 10828, 0 ] ), 7558 entity( "⩐", [ 10832, 0 ] ), 7559 entity( "ċ", [ 267, 0 ] ), 7560 entity( "¸", [ 184, 0 ] ), 7561 entity( "⦲", [ 10674, 0 ] ), 7562 entity( "¢", [ 162, 0 ] ), 7563 entity( "·", [ 183, 0 ] ), 7564 entity( "𝔠", [ 120096, 0 ] ), 7565 entity( "ч", [ 1095, 0 ] ), 7566 entity( "✓", [ 10003, 0 ] ), 7567 entity( "✓", [ 10003, 0 ] ), 7568 entity( "χ", [ 967, 0 ] ), 7569 entity( "○", [ 9675, 0 ] ), 7570 entity( "⧃", [ 10691, 0 ] ), 7571 entity( "ˆ", [ 710, 0 ] ), 7572 entity( "≗", [ 8791, 0 ] ), 7573 entity( "↺", [ 8634, 0 ] ), 7574 entity( "↻", [ 8635, 0 ] ), 7575 entity( "®", [ 174, 0 ] ), 7576 entity( "Ⓢ", [ 9416, 0 ] ), 7577 entity( "⊛", [ 8859, 0 ] ), 7578 entity( "⊚", [ 8858, 0 ] ), 7579 entity( "⊝", [ 8861, 0 ] ), 7580 entity( "≗", [ 8791, 0 ] ), 7581 entity( "⨐", [ 10768, 0 ] ), 7582 entity( "⫯", [ 10991, 0 ] ), 7583 entity( "⧂", [ 10690, 0 ] ), 7584 entity( "♣", [ 9827, 0 ] ), 7585 entity( "♣", [ 9827, 0 ] ), 7586 entity( ":", [ 58, 0 ] ), 7587 entity( "≔", [ 8788, 0 ] ), 7588 entity( "≔", [ 8788, 0 ] ), 7589 entity( ",", [ 44, 0 ] ), 7590 entity( "@", [ 64, 0 ] ), 7591 entity( "∁", [ 8705, 0 ] ), 7592 entity( "∘", [ 8728, 0 ] ), 7593 entity( "∁", [ 8705, 0 ] ), 7594 entity( "ℂ", [ 8450, 0 ] ), 7595 entity( "≅", [ 8773, 0 ] ), 7596 entity( "⩭", [ 10861, 0 ] ), 7597 entity( "∮", [ 8750, 0 ] ), 7598 entity( "𝕔", [ 120148, 0 ] ), 7599 entity( "∐", [ 8720, 0 ] ), 7600 entity( "©", [ 169, 0 ] ), 7601 entity( "℗", [ 8471, 0 ] ), 7602 entity( "↵", [ 8629, 0 ] ), 7603 entity( "✗", [ 10007, 0 ] ), 7604 entity( "𝒸", [ 119992, 0 ] ), 7605 entity( "⫏", [ 10959, 0 ] ), 7606 entity( "⫑", [ 10961, 0 ] ), 7607 entity( "⫐", [ 10960, 0 ] ), 7608 entity( "⫒", [ 10962, 0 ] ), 7609 entity( "⋯", [ 8943, 0 ] ), 7610 entity( "⤸", [ 10552, 0 ] ), 7611 entity( "⤵", [ 10549, 0 ] ), 7612 entity( "⋞", [ 8926, 0 ] ), 7613 entity( "⋟", [ 8927, 0 ] ), 7614 entity( "↶", [ 8630, 0 ] ), 7615 entity( "⤽", [ 10557, 0 ] ), 7616 entity( "∪", [ 8746, 0 ] ), 7617 entity( "⩈", [ 10824, 0 ] ), 7618 entity( "⩆", [ 10822, 0 ] ), 7619 entity( "⩊", [ 10826, 0 ] ), 7620 entity( "⊍", [ 8845, 0 ] ), 7621 entity( "⩅", [ 10821, 0 ] ), 7622 entity( "∪︀", [ 8746, 65024 ] ), 7623 entity( "↷", [ 8631, 0 ] ), 7624 entity( "⤼", [ 10556, 0 ] ), 7625 entity( "⋞", [ 8926, 0 ] ), 7626 entity( "⋟", [ 8927, 0 ] ), 7627 entity( "⋎", [ 8910, 0 ] ), 7628 entity( "⋏", [ 8911, 0 ] ), 7629 entity( "¤", [ 164, 0 ] ), 7630 entity( "↶", [ 8630, 0 ] ), 7631 entity( "↷", [ 8631, 0 ] ), 7632 entity( "⋎", [ 8910, 0 ] ), 7633 entity( "⋏", [ 8911, 0 ] ), 7634 entity( "∲", [ 8754, 0 ] ), 7635 entity( "∱", [ 8753, 0 ] ), 7636 entity( "⌭", [ 9005, 0 ] ), 7637 entity( "⇓", [ 8659, 0 ] ), 7638 entity( "⥥", [ 10597, 0 ] ), 7639 entity( "†", [ 8224, 0 ] ), 7640 entity( "ℸ", [ 8504, 0 ] ), 7641 entity( "↓", [ 8595, 0 ] ), 7642 entity( "‐", [ 8208, 0 ] ), 7643 entity( "⊣", [ 8867, 0 ] ), 7644 entity( "⤏", [ 10511, 0 ] ), 7645 entity( "˝", [ 733, 0 ] ), 7646 entity( "ď", [ 271, 0 ] ), 7647 entity( "д", [ 1076, 0 ] ), 7648 entity( "ⅆ", [ 8518, 0 ] ), 7649 entity( "‡", [ 8225, 0 ] ), 7650 entity( "⇊", [ 8650, 0 ] ), 7651 entity( "⩷", [ 10871, 0 ] ), 7652 entity( "°", [ 176, 0 ] ), 7653 entity( "δ", [ 948, 0 ] ), 7654 entity( "⦱", [ 10673, 0 ] ), 7655 entity( "⥿", [ 10623, 0 ] ), 7656 entity( "𝔡", [ 120097, 0 ] ), 7657 entity( "⇃", [ 8643, 0 ] ), 7658 entity( "⇂", [ 8642, 0 ] ), 7659 entity( "⋄", [ 8900, 0 ] ), 7660 entity( "⋄", [ 8900, 0 ] ), 7661 entity( "♦", [ 9830, 0 ] ), 7662 entity( "♦", [ 9830, 0 ] ), 7663 entity( "¨", [ 168, 0 ] ), 7664 entity( "ϝ", [ 989, 0 ] ), 7665 entity( "⋲", [ 8946, 0 ] ), 7666 entity( "÷", [ 247, 0 ] ), 7667 entity( "÷", [ 247, 0 ] ), 7668 entity( "⋇", [ 8903, 0 ] ), 7669 entity( "⋇", [ 8903, 0 ] ), 7670 entity( "ђ", [ 1106, 0 ] ), 7671 entity( "⌞", [ 8990, 0 ] ), 7672 entity( "⌍", [ 8973, 0 ] ), 7673 entity( "$", [ 36, 0 ] ), 7674 entity( "𝕕", [ 120149, 0 ] ), 7675 entity( "˙", [ 729, 0 ] ), 7676 entity( "≐", [ 8784, 0 ] ), 7677 entity( "≑", [ 8785, 0 ] ), 7678 entity( "∸", [ 8760, 0 ] ), 7679 entity( "∔", [ 8724, 0 ] ), 7680 entity( "⊡", [ 8865, 0 ] ), 7681 entity( "⌆", [ 8966, 0 ] ), 7682 entity( "↓", [ 8595, 0 ] ), 7683 entity( "⇊", [ 8650, 0 ] ), 7684 entity( "⇃", [ 8643, 0 ] ), 7685 entity( "⇂", [ 8642, 0 ] ), 7686 entity( "⤐", [ 10512, 0 ] ), 7687 entity( "⌟", [ 8991, 0 ] ), 7688 entity( "⌌", [ 8972, 0 ] ), 7689 entity( "𝒹", [ 119993, 0 ] ), 7690 entity( "ѕ", [ 1109, 0 ] ), 7691 entity( "⧶", [ 10742, 0 ] ), 7692 entity( "đ", [ 273, 0 ] ), 7693 entity( "⋱", [ 8945, 0 ] ), 7694 entity( "▿", [ 9663, 0 ] ), 7695 entity( "▾", [ 9662, 0 ] ), 7696 entity( "⇵", [ 8693, 0 ] ), 7697 entity( "⥯", [ 10607, 0 ] ), 7698 entity( "⦦", [ 10662, 0 ] ), 7699 entity( "џ", [ 1119, 0 ] ), 7700 entity( "⟿", [ 10239, 0 ] ), 7701 entity( "⩷", [ 10871, 0 ] ), 7702 entity( "≑", [ 8785, 0 ] ), 7703 entity( "é", [ 233, 0 ] ), 7704 entity( "⩮", [ 10862, 0 ] ), 7705 entity( "ě", [ 283, 0 ] ), 7706 entity( "≖", [ 8790, 0 ] ), 7707 entity( "ê", [ 234, 0 ] ), 7708 entity( "≕", [ 8789, 0 ] ), 7709 entity( "э", [ 1101, 0 ] ), 7710 entity( "ė", [ 279, 0 ] ), 7711 entity( "ⅇ", [ 8519, 0 ] ), 7712 entity( "≒", [ 8786, 0 ] ), 7713 entity( "𝔢", [ 120098, 0 ] ), 7714 entity( "⪚", [ 10906, 0 ] ), 7715 entity( "è", [ 232, 0 ] ), 7716 entity( "⪖", [ 10902, 0 ] ), 7717 entity( "⪘", [ 10904, 0 ] ), 7718 entity( "⪙", [ 10905, 0 ] ), 7719 entity( "⏧", [ 9191, 0 ] ), 7720 entity( "ℓ", [ 8467, 0 ] ), 7721 entity( "⪕", [ 10901, 0 ] ), 7722 entity( "⪗", [ 10903, 0 ] ), 7723 entity( "ē", [ 275, 0 ] ), 7724 entity( "∅", [ 8709, 0 ] ), 7725 entity( "∅", [ 8709, 0 ] ), 7726 entity( "∅", [ 8709, 0 ] ), 7727 entity( " ", [ 8196, 0 ] ), 7728 entity( " ", [ 8197, 0 ] ), 7729 entity( " ", [ 8195, 0 ] ), 7730 entity( "ŋ", [ 331, 0 ] ), 7731 entity( " ", [ 8194, 0 ] ), 7732 entity( "ę", [ 281, 0 ] ), 7733 entity( "𝕖", [ 120150, 0 ] ), 7734 entity( "⋕", [ 8917, 0 ] ), 7735 entity( "⧣", [ 10723, 0 ] ), 7736 entity( "⩱", [ 10865, 0 ] ), 7737 entity( "ε", [ 949, 0 ] ), 7738 entity( "ε", [ 949, 0 ] ), 7739 entity( "ϵ", [ 1013, 0 ] ), 7740 entity( "≖", [ 8790, 0 ] ), 7741 entity( "≕", [ 8789, 0 ] ), 7742 entity( "≂", [ 8770, 0 ] ), 7743 entity( "⪖", [ 10902, 0 ] ), 7744 entity( "⪕", [ 10901, 0 ] ), 7745 entity( "=", [ 61, 0 ] ), 7746 entity( "≟", [ 8799, 0 ] ), 7747 entity( "≡", [ 8801, 0 ] ), 7748 entity( "⩸", [ 10872, 0 ] ), 7749 entity( "⧥", [ 10725, 0 ] ), 7750 entity( "≓", [ 8787, 0 ] ), 7751 entity( "⥱", [ 10609, 0 ] ), 7752 entity( "ℯ", [ 8495, 0 ] ), 7753 entity( "≐", [ 8784, 0 ] ), 7754 entity( "≂", [ 8770, 0 ] ), 7755 entity( "η", [ 951, 0 ] ), 7756 entity( "ð", [ 240, 0 ] ), 7757 entity( "ë", [ 235, 0 ] ), 7758 entity( "€", [ 8364, 0 ] ), 7759 entity( "!", [ 33, 0 ] ), 7760 entity( "∃", [ 8707, 0 ] ), 7761 entity( "ℰ", [ 8496, 0 ] ), 7762 entity( "ⅇ", [ 8519, 0 ] ), 7763 entity( "≒", [ 8786, 0 ] ), 7764 entity( "ф", [ 1092, 0 ] ), 7765 entity( "♀", [ 9792, 0 ] ), 7766 entity( "ffi", [ 64259, 0 ] ), 7767 entity( "ff", [ 64256, 0 ] ), 7768 entity( "ffl", [ 64260, 0 ] ), 7769 entity( "𝔣", [ 120099, 0 ] ), 7770 entity( "fi", [ 64257, 0 ] ), 7771 entity( "fj", [ 102, 106 ] ), 7772 entity( "♭", [ 9837, 0 ] ), 7773 entity( "fl", [ 64258, 0 ] ), 7774 entity( "▱", [ 9649, 0 ] ), 7775 entity( "ƒ", [ 402, 0 ] ), 7776 entity( "𝕗", [ 120151, 0 ] ), 7777 entity( "∀", [ 8704, 0 ] ), 7778 entity( "⋔", [ 8916, 0 ] ), 7779 entity( "⫙", [ 10969, 0 ] ), 7780 entity( "⨍", [ 10765, 0 ] ), 7781 entity( "½", [ 189, 0 ] ), 7782 entity( "½", [ 189, 0 ] ), 7783 entity( "⅓", [ 8531, 0 ] ), 7784 entity( "¼", [ 188, 0 ] ), 7785 entity( "¼", [ 188, 0 ] ), 7786 entity( "⅕", [ 8533, 0 ] ), 7787 entity( "⅙", [ 8537, 0 ] ), 7788 entity( "⅛", [ 8539, 0 ] ), 7789 entity( "⅔", [ 8532, 0 ] ), 7790 entity( "⅖", [ 8534, 0 ] ), 7791 entity( "¾", [ 190, 0 ] ), 7792 entity( "¾", [ 190, 0 ] ), 7793 entity( "⅗", [ 8535, 0 ] ), 7794 entity( "⅜", [ 8540, 0 ] ), 7795 entity( "⅘", [ 8536, 0 ] ), 7796 entity( "⅚", [ 8538, 0 ] ), 7797 entity( "⅝", [ 8541, 0 ] ), 7798 entity( "⅞", [ 8542, 0 ] ), 7799 entity( "⁄", [ 8260, 0 ] ), 7800 entity( "⌢", [ 8994, 0 ] ), 7801 entity( "𝒻", [ 119995, 0 ] ), 7802 entity( "≧", [ 8807, 0 ] ), 7803 entity( "⪌", [ 10892, 0 ] ), 7804 entity( "ǵ", [ 501, 0 ] ), 7805 entity( "γ", [ 947, 0 ] ), 7806 entity( "ϝ", [ 989, 0 ] ), 7807 entity( "⪆", [ 10886, 0 ] ), 7808 entity( "ğ", [ 287, 0 ] ), 7809 entity( "ĝ", [ 285, 0 ] ), 7810 entity( "г", [ 1075, 0 ] ), 7811 entity( "ġ", [ 289, 0 ] ), 7812 entity( "≥", [ 8805, 0 ] ), 7813 entity( "⋛", [ 8923, 0 ] ), 7814 entity( "≥", [ 8805, 0 ] ), 7815 entity( "≧", [ 8807, 0 ] ), 7816 entity( "⩾", [ 10878, 0 ] ), 7817 entity( "⩾", [ 10878, 0 ] ), 7818 entity( "⪩", [ 10921, 0 ] ), 7819 entity( "⪀", [ 10880, 0 ] ), 7820 entity( "⪂", [ 10882, 0 ] ), 7821 entity( "⪄", [ 10884, 0 ] ), 7822 entity( "⋛︀", [ 8923, 65024 ] ), 7823 entity( "⪔", [ 10900, 0 ] ), 7824 entity( "𝔤", [ 120100, 0 ] ), 7825 entity( "≫", [ 8811, 0 ] ), 7826 entity( "⋙", [ 8921, 0 ] ), 7827 entity( "ℷ", [ 8503, 0 ] ), 7828 entity( "ѓ", [ 1107, 0 ] ), 7829 entity( "≷", [ 8823, 0 ] ), 7830 entity( "⪒", [ 10898, 0 ] ), 7831 entity( "⪥", [ 10917, 0 ] ), 7832 entity( "⪤", [ 10916, 0 ] ), 7833 entity( "≩", [ 8809, 0 ] ), 7834 entity( "⪊", [ 10890, 0 ] ), 7835 entity( "⪊", [ 10890, 0 ] ), 7836 entity( "⪈", [ 10888, 0 ] ), 7837 entity( "⪈", [ 10888, 0 ] ), 7838 entity( "≩", [ 8809, 0 ] ), 7839 entity( "⋧", [ 8935, 0 ] ), 7840 entity( "𝕘", [ 120152, 0 ] ), 7841 entity( "`", [ 96, 0 ] ), 7842 entity( "ℊ", [ 8458, 0 ] ), 7843 entity( "≳", [ 8819, 0 ] ), 7844 entity( "⪎", [ 10894, 0 ] ), 7845 entity( "⪐", [ 10896, 0 ] ), 7846 entity( ">", [ 62, 0 ] ), 7847 entity( "⪧", [ 10919, 0 ] ), 7848 entity( "⩺", [ 10874, 0 ] ), 7849 entity( "⋗", [ 8919, 0 ] ), 7850 entity( "⦕", [ 10645, 0 ] ), 7851 entity( "⩼", [ 10876, 0 ] ), 7852 entity( "⪆", [ 10886, 0 ] ), 7853 entity( "⥸", [ 10616, 0 ] ), 7854 entity( "⋗", [ 8919, 0 ] ), 7855 entity( "⋛", [ 8923, 0 ] ), 7856 entity( "⪌", [ 10892, 0 ] ), 7857 entity( "≷", [ 8823, 0 ] ), 7858 entity( "≳", [ 8819, 0 ] ), 7859 entity( "≩︀", [ 8809, 65024 ] ), 7860 entity( "≩︀", [ 8809, 65024 ] ), 7861 entity( "⇔", [ 8660, 0 ] ), 7862 entity( " ", [ 8202, 0 ] ), 7863 entity( "½", [ 189, 0 ] ), 7864 entity( "ℋ", [ 8459, 0 ] ), 7865 entity( "ъ", [ 1098, 0 ] ), 7866 entity( "↔", [ 8596, 0 ] ), 7867 entity( "⥈", [ 10568, 0 ] ), 7868 entity( "↭", [ 8621, 0 ] ), 7869 entity( "ℏ", [ 8463, 0 ] ), 7870 entity( "ĥ", [ 293, 0 ] ), 7871 entity( "♥", [ 9829, 0 ] ), 7872 entity( "♥", [ 9829, 0 ] ), 7873 entity( "…", [ 8230, 0 ] ), 7874 entity( "⊹", [ 8889, 0 ] ), 7875 entity( "𝔥", [ 120101, 0 ] ), 7876 entity( "⤥", [ 10533, 0 ] ), 7877 entity( "⤦", [ 10534, 0 ] ), 7878 entity( "⇿", [ 8703, 0 ] ), 7879 entity( "∻", [ 8763, 0 ] ), 7880 entity( "↩", [ 8617, 0 ] ), 7881 entity( "↪", [ 8618, 0 ] ), 7882 entity( "𝕙", [ 120153, 0 ] ), 7883 entity( "―", [ 8213, 0 ] ), 7884 entity( "𝒽", [ 119997, 0 ] ), 7885 entity( "ℏ", [ 8463, 0 ] ), 7886 entity( "ħ", [ 295, 0 ] ), 7887 entity( "⁃", [ 8259, 0 ] ), 7888 entity( "‐", [ 8208, 0 ] ), 7889 entity( "í", [ 237, 0 ] ), 7890 entity( "⁣", [ 8291, 0 ] ), 7891 entity( "î", [ 238, 0 ] ), 7892 entity( "и", [ 1080, 0 ] ), 7893 entity( "е", [ 1077, 0 ] ), 7894 entity( "¡", [ 161, 0 ] ), 7895 entity( "⇔", [ 8660, 0 ] ), 7896 entity( "𝔦", [ 120102, 0 ] ), 7897 entity( "ì", [ 236, 0 ] ), 7898 entity( "ⅈ", [ 8520, 0 ] ), 7899 entity( "⨌", [ 10764, 0 ] ), 7900 entity( "∭", [ 8749, 0 ] ), 7901 entity( "⧜", [ 10716, 0 ] ), 7902 entity( "℩", [ 8489, 0 ] ), 7903 entity( "ij", [ 307, 0 ] ), 7904 entity( "ī", [ 299, 0 ] ), 7905 entity( "ℑ", [ 8465, 0 ] ), 7906 entity( "ℐ", [ 8464, 0 ] ), 7907 entity( "ℑ", [ 8465, 0 ] ), 7908 entity( "ı", [ 305, 0 ] ), 7909 entity( "⊷", [ 8887, 0 ] ), 7910 entity( "Ƶ", [ 437, 0 ] ), 7911 entity( "∈", [ 8712, 0 ] ), 7912 entity( "℅", [ 8453, 0 ] ), 7913 entity( "∞", [ 8734, 0 ] ), 7914 entity( "⧝", [ 10717, 0 ] ), 7915 entity( "ı", [ 305, 0 ] ), 7916 entity( "∫", [ 8747, 0 ] ), 7917 entity( "⊺", [ 8890, 0 ] ), 7918 entity( "ℤ", [ 8484, 0 ] ), 7919 entity( "⊺", [ 8890, 0 ] ), 7920 entity( "⨗", [ 10775, 0 ] ), 7921 entity( "⨼", [ 10812, 0 ] ), 7922 entity( "ё", [ 1105, 0 ] ), 7923 entity( "į", [ 303, 0 ] ), 7924 entity( "𝕚", [ 120154, 0 ] ), 7925 entity( "ι", [ 953, 0 ] ), 7926 entity( "⨼", [ 10812, 0 ] ), 7927 entity( "¿", [ 191, 0 ] ), 7928 entity( "𝒾", [ 119998, 0 ] ), 7929 entity( "∈", [ 8712, 0 ] ), 7930 entity( "⋹", [ 8953, 0 ] ), 7931 entity( "⋵", [ 8949, 0 ] ), 7932 entity( "⋴", [ 8948, 0 ] ), 7933 entity( "⋳", [ 8947, 0 ] ), 7934 entity( "∈", [ 8712, 0 ] ), 7935 entity( "⁢", [ 8290, 0 ] ), 7936 entity( "ĩ", [ 297, 0 ] ), 7937 entity( "і", [ 1110, 0 ] ), 7938 entity( "ï", [ 239, 0 ] ), 7939 entity( "ĵ", [ 309, 0 ] ), 7940 entity( "й", [ 1081, 0 ] ), 7941 entity( "𝔧", [ 120103, 0 ] ), 7942 entity( "ȷ", [ 567, 0 ] ), 7943 entity( "𝕛", [ 120155, 0 ] ), 7944 entity( "𝒿", [ 119999, 0 ] ), 7945 entity( "ј", [ 1112, 0 ] ), 7946 entity( "є", [ 1108, 0 ] ), 7947 entity( "κ", [ 954, 0 ] ), 7948 entity( "ϰ", [ 1008, 0 ] ), 7949 entity( "ķ", [ 311, 0 ] ), 7950 entity( "к", [ 1082, 0 ] ), 7951 entity( "𝔨", [ 120104, 0 ] ), 7952 entity( "ĸ", [ 312, 0 ] ), 7953 entity( "х", [ 1093, 0 ] ), 7954 entity( "ќ", [ 1116, 0 ] ), 7955 entity( "𝕜", [ 120156, 0 ] ), 7956 entity( "𝓀", [ 120000, 0 ] ), 7957 entity( "⇚", [ 8666, 0 ] ), 7958 entity( "⇐", [ 8656, 0 ] ), 7959 entity( "⤛", [ 10523, 0 ] ), 7960 entity( "⤎", [ 10510, 0 ] ), 7961 entity( "≦", [ 8806, 0 ] ), 7962 entity( "⪋", [ 10891, 0 ] ), 7963 entity( "⥢", [ 10594, 0 ] ), 7964 entity( "ĺ", [ 314, 0 ] ), 7965 entity( "⦴", [ 10676, 0 ] ), 7966 entity( "ℒ", [ 8466, 0 ] ), 7967 entity( "λ", [ 955, 0 ] ), 7968 entity( "⟨", [ 10216, 0 ] ), 7969 entity( "⦑", [ 10641, 0 ] ), 7970 entity( "⟨", [ 10216, 0 ] ), 7971 entity( "⪅", [ 10885, 0 ] ), 7972 entity( "«", [ 171, 0 ] ), 7973 entity( "←", [ 8592, 0 ] ), 7974 entity( "⇤", [ 8676, 0 ] ), 7975 entity( "⤟", [ 10527, 0 ] ), 7976 entity( "⤝", [ 10525, 0 ] ), 7977 entity( "↩", [ 8617, 0 ] ), 7978 entity( "↫", [ 8619, 0 ] ), 7979 entity( "⤹", [ 10553, 0 ] ), 7980 entity( "⥳", [ 10611, 0 ] ), 7981 entity( "↢", [ 8610, 0 ] ), 7982 entity( "⪫", [ 10923, 0 ] ), 7983 entity( "⤙", [ 10521, 0 ] ), 7984 entity( "⪭", [ 10925, 0 ] ), 7985 entity( "⪭︀", [ 10925, 65024 ] ), 7986 entity( "⤌", [ 10508, 0 ] ), 7987 entity( "❲", [ 10098, 0 ] ), 7988 entity( "{", [ 123, 0 ] ), 7989 entity( "[", [ 91, 0 ] ), 7990 entity( "⦋", [ 10635, 0 ] ), 7991 entity( "⦏", [ 10639, 0 ] ), 7992 entity( "⦍", [ 10637, 0 ] ), 7993 entity( "ľ", [ 318, 0 ] ), 7994 entity( "ļ", [ 316, 0 ] ), 7995 entity( "⌈", [ 8968, 0 ] ), 7996 entity( "{", [ 123, 0 ] ), 7997 entity( "л", [ 1083, 0 ] ), 7998 entity( "⤶", [ 10550, 0 ] ), 7999 entity( "“", [ 8220, 0 ] ), 8000 entity( "„", [ 8222, 0 ] ), 8001 entity( "⥧", [ 10599, 0 ] ), 8002 entity( "⥋", [ 10571, 0 ] ), 8003 entity( "↲", [ 8626, 0 ] ), 8004 entity( "≤", [ 8804, 0 ] ), 8005 entity( "←", [ 8592, 0 ] ), 8006 entity( "↢", [ 8610, 0 ] ), 8007 entity( "↽", [ 8637, 0 ] ), 8008 entity( "↼", [ 8636, 0 ] ), 8009 entity( "⇇", [ 8647, 0 ] ), 8010 entity( "↔", [ 8596, 0 ] ), 8011 entity( "⇆", [ 8646, 0 ] ), 8012 entity( "⇋", [ 8651, 0 ] ), 8013 entity( "↭", [ 8621, 0 ] ), 8014 entity( "⋋", [ 8907, 0 ] ), 8015 entity( "⋚", [ 8922, 0 ] ), 8016 entity( "≤", [ 8804, 0 ] ), 8017 entity( "≦", [ 8806, 0 ] ), 8018 entity( "⩽", [ 10877, 0 ] ), 8019 entity( "⩽", [ 10877, 0 ] ), 8020 entity( "⪨", [ 10920, 0 ] ), 8021 entity( "⩿", [ 10879, 0 ] ), 8022 entity( "⪁", [ 10881, 0 ] ), 8023 entity( "⪃", [ 10883, 0 ] ), 8024 entity( "⋚︀", [ 8922, 65024 ] ), 8025 entity( "⪓", [ 10899, 0 ] ), 8026 entity( "⪅", [ 10885, 0 ] ), 8027 entity( "⋖", [ 8918, 0 ] ), 8028 entity( "⋚", [ 8922, 0 ] ), 8029 entity( "⪋", [ 10891, 0 ] ), 8030 entity( "≶", [ 8822, 0 ] ), 8031 entity( "≲", [ 8818, 0 ] ), 8032 entity( "⥼", [ 10620, 0 ] ), 8033 entity( "⌊", [ 8970, 0 ] ), 8034 entity( "𝔩", [ 120105, 0 ] ), 8035 entity( "≶", [ 8822, 0 ] ), 8036 entity( "⪑", [ 10897, 0 ] ), 8037 entity( "↽", [ 8637, 0 ] ), 8038 entity( "↼", [ 8636, 0 ] ), 8039 entity( "⥪", [ 10602, 0 ] ), 8040 entity( "▄", [ 9604, 0 ] ), 8041 entity( "љ", [ 1113, 0 ] ), 8042 entity( "≪", [ 8810, 0 ] ), 8043 entity( "⇇", [ 8647, 0 ] ), 8044 entity( "⌞", [ 8990, 0 ] ), 8045 entity( "⥫", [ 10603, 0 ] ), 8046 entity( "◺", [ 9722, 0 ] ), 8047 entity( "ŀ", [ 320, 0 ] ), 8048 entity( "⎰", [ 9136, 0 ] ), 8049 entity( "⎰", [ 9136, 0 ] ), 8050 entity( "≨", [ 8808, 0 ] ), 8051 entity( "⪉", [ 10889, 0 ] ), 8052 entity( "⪉", [ 10889, 0 ] ), 8053 entity( "⪇", [ 10887, 0 ] ), 8054 entity( "⪇", [ 10887, 0 ] ), 8055 entity( "≨", [ 8808, 0 ] ), 8056 entity( "⋦", [ 8934, 0 ] ), 8057 entity( "⟬", [ 10220, 0 ] ), 8058 entity( "⇽", [ 8701, 0 ] ), 8059 entity( "⟦", [ 10214, 0 ] ), 8060 entity( "⟵", [ 10229, 0 ] ), 8061 entity( "⟷", [ 10231, 0 ] ), 8062 entity( "⟼", [ 10236, 0 ] ), 8063 entity( "⟶", [ 10230, 0 ] ), 8064 entity( "↫", [ 8619, 0 ] ), 8065 entity( "↬", [ 8620, 0 ] ), 8066 entity( "⦅", [ 10629, 0 ] ), 8067 entity( "𝕝", [ 120157, 0 ] ), 8068 entity( "⨭", [ 10797, 0 ] ), 8069 entity( "⨴", [ 10804, 0 ] ), 8070 entity( "∗", [ 8727, 0 ] ), 8071 entity( "_", [ 95, 0 ] ), 8072 entity( "◊", [ 9674, 0 ] ), 8073 entity( "◊", [ 9674, 0 ] ), 8074 entity( "⧫", [ 10731, 0 ] ), 8075 entity( "(", [ 40, 0 ] ), 8076 entity( "⦓", [ 10643, 0 ] ), 8077 entity( "⇆", [ 8646, 0 ] ), 8078 entity( "⌟", [ 8991, 0 ] ), 8079 entity( "⇋", [ 8651, 0 ] ), 8080 entity( "⥭", [ 10605, 0 ] ), 8081 entity( "‎", [ 8206, 0 ] ), 8082 entity( "⊿", [ 8895, 0 ] ), 8083 entity( "‹", [ 8249, 0 ] ), 8084 entity( "𝓁", [ 120001, 0 ] ), 8085 entity( "↰", [ 8624, 0 ] ), 8086 entity( "≲", [ 8818, 0 ] ), 8087 entity( "⪍", [ 10893, 0 ] ), 8088 entity( "⪏", [ 10895, 0 ] ), 8089 entity( "[", [ 91, 0 ] ), 8090 entity( "‘", [ 8216, 0 ] ), 8091 entity( "‚", [ 8218, 0 ] ), 8092 entity( "ł", [ 322, 0 ] ), 8093 entity( "<", [ 60, 0 ] ), 8094 entity( "⪦", [ 10918, 0 ] ), 8095 entity( "⩹", [ 10873, 0 ] ), 8096 entity( "⋖", [ 8918, 0 ] ), 8097 entity( "⋋", [ 8907, 0 ] ), 8098 entity( "⋉", [ 8905, 0 ] ), 8099 entity( "⥶", [ 10614, 0 ] ), 8100 entity( "⩻", [ 10875, 0 ] ), 8101 entity( "⦖", [ 10646, 0 ] ), 8102 entity( "◃", [ 9667, 0 ] ), 8103 entity( "⊴", [ 8884, 0 ] ), 8104 entity( "◂", [ 9666, 0 ] ), 8105 entity( "⥊", [ 10570, 0 ] ), 8106 entity( "⥦", [ 10598, 0 ] ), 8107 entity( "≨︀", [ 8808, 65024 ] ), 8108 entity( "≨︀", [ 8808, 65024 ] ), 8109 entity( "∺", [ 8762, 0 ] ), 8110 entity( "¯", [ 175, 0 ] ), 8111 entity( "♂", [ 9794, 0 ] ), 8112 entity( "✠", [ 10016, 0 ] ), 8113 entity( "✠", [ 10016, 0 ] ), 8114 entity( "↦", [ 8614, 0 ] ), 8115 entity( "↦", [ 8614, 0 ] ), 8116 entity( "↧", [ 8615, 0 ] ), 8117 entity( "↤", [ 8612, 0 ] ), 8118 entity( "↥", [ 8613, 0 ] ), 8119 entity( "▮", [ 9646, 0 ] ), 8120 entity( "⨩", [ 10793, 0 ] ), 8121 entity( "м", [ 1084, 0 ] ), 8122 entity( "—", [ 8212, 0 ] ), 8123 entity( "∡", [ 8737, 0 ] ), 8124 entity( "𝔪", [ 120106, 0 ] ), 8125 entity( "℧", [ 8487, 0 ] ), 8126 entity( "µ", [ 181, 0 ] ), 8127 entity( "∣", [ 8739, 0 ] ), 8128 entity( "*", [ 42, 0 ] ), 8129 entity( "⫰", [ 10992, 0 ] ), 8130 entity( "·", [ 183, 0 ] ), 8131 entity( "−", [ 8722, 0 ] ), 8132 entity( "⊟", [ 8863, 0 ] ), 8133 entity( "∸", [ 8760, 0 ] ), 8134 entity( "⨪", [ 10794, 0 ] ), 8135 entity( "⫛", [ 10971, 0 ] ), 8136 entity( "…", [ 8230, 0 ] ), 8137 entity( "∓", [ 8723, 0 ] ), 8138 entity( "⊧", [ 8871, 0 ] ), 8139 entity( "𝕞", [ 120158, 0 ] ), 8140 entity( "∓", [ 8723, 0 ] ), 8141 entity( "𝓂", [ 120002, 0 ] ), 8142 entity( "∾", [ 8766, 0 ] ), 8143 entity( "μ", [ 956, 0 ] ), 8144 entity( "⊸", [ 8888, 0 ] ), 8145 entity( "⊸", [ 8888, 0 ] ), 8146 entity( "⋙̸", [ 8921, 824 ] ), 8147 entity( "≫⃒", [ 8811, 8402 ] ), 8148 entity( "≫̸", [ 8811, 824 ] ), 8149 entity( "⇍", [ 8653, 0 ] ), 8150 entity( "⇎", [ 8654, 0 ] ), 8151 entity( "⋘̸", [ 8920, 824 ] ), 8152 entity( "≪⃒", [ 8810, 8402 ] ), 8153 entity( "≪̸", [ 8810, 824 ] ), 8154 entity( "⇏", [ 8655, 0 ] ), 8155 entity( "⊯", [ 8879, 0 ] ), 8156 entity( "⊮", [ 8878, 0 ] ), 8157 entity( "∇", [ 8711, 0 ] ), 8158 entity( "ń", [ 324, 0 ] ), 8159 entity( "∠⃒", [ 8736, 8402 ] ), 8160 entity( "≉", [ 8777, 0 ] ), 8161 entity( "⩰̸", [ 10864, 824 ] ), 8162 entity( "≋̸", [ 8779, 824 ] ), 8163 entity( "ʼn", [ 329, 0 ] ), 8164 entity( "≉", [ 8777, 0 ] ), 8165 entity( "♮", [ 9838, 0 ] ), 8166 entity( "♮", [ 9838, 0 ] ), 8167 entity( "ℕ", [ 8469, 0 ] ), 8168 entity( " ", [ 160, 0 ] ), 8169 entity( "≎̸", [ 8782, 824 ] ), 8170 entity( "≏̸", [ 8783, 824 ] ), 8171 entity( "⩃", [ 10819, 0 ] ), 8172 entity( "ň", [ 328, 0 ] ), 8173 entity( "ņ", [ 326, 0 ] ), 8174 entity( "≇", [ 8775, 0 ] ), 8175 entity( "⩭̸", [ 10861, 824 ] ), 8176 entity( "⩂", [ 10818, 0 ] ), 8177 entity( "н", [ 1085, 0 ] ), 8178 entity( "–", [ 8211, 0 ] ), 8179 entity( "≠", [ 8800, 0 ] ), 8180 entity( "⇗", [ 8663, 0 ] ), 8181 entity( "⤤", [ 10532, 0 ] ), 8182 entity( "↗", [ 8599, 0 ] ), 8183 entity( "↗", [ 8599, 0 ] ), 8184 entity( "≐̸", [ 8784, 824 ] ), 8185 entity( "≢", [ 8802, 0 ] ), 8186 entity( "⤨", [ 10536, 0 ] ), 8187 entity( "≂̸", [ 8770, 824 ] ), 8188 entity( "∄", [ 8708, 0 ] ), 8189 entity( "∄", [ 8708, 0 ] ), 8190 entity( "𝔫", [ 120107, 0 ] ), 8191 entity( "≧̸", [ 8807, 824 ] ), 8192 entity( "≱", [ 8817, 0 ] ), 8193 entity( "≱", [ 8817, 0 ] ), 8194 entity( "≧̸", [ 8807, 824 ] ), 8195 entity( "⩾̸", [ 10878, 824 ] ), 8196 entity( "⩾̸", [ 10878, 824 ] ), 8197 entity( "≵", [ 8821, 0 ] ), 8198 entity( "≯", [ 8815, 0 ] ), 8199 entity( "≯", [ 8815, 0 ] ), 8200 entity( "⇎", [ 8654, 0 ] ), 8201 entity( "↮", [ 8622, 0 ] ), 8202 entity( "⫲", [ 10994, 0 ] ), 8203 entity( "∋", [ 8715, 0 ] ), 8204 entity( "⋼", [ 8956, 0 ] ), 8205 entity( "⋺", [ 8954, 0 ] ), 8206 entity( "∋", [ 8715, 0 ] ), 8207 entity( "њ", [ 1114, 0 ] ), 8208 entity( "⇍", [ 8653, 0 ] ), 8209 entity( "≦̸", [ 8806, 824 ] ), 8210 entity( "↚", [ 8602, 0 ] ), 8211 entity( "‥", [ 8229, 0 ] ), 8212 entity( "≰", [ 8816, 0 ] ), 8213 entity( "↚", [ 8602, 0 ] ), 8214 entity( "↮", [ 8622, 0 ] ), 8215 entity( "≰", [ 8816, 0 ] ), 8216 entity( "≦̸", [ 8806, 824 ] ), 8217 entity( "⩽̸", [ 10877, 824 ] ), 8218 entity( "⩽̸", [ 10877, 824 ] ), 8219 entity( "≮", [ 8814, 0 ] ), 8220 entity( "≴", [ 8820, 0 ] ), 8221 entity( "≮", [ 8814, 0 ] ), 8222 entity( "⋪", [ 8938, 0 ] ), 8223 entity( "⋬", [ 8940, 0 ] ), 8224 entity( "∤", [ 8740, 0 ] ), 8225 entity( "𝕟", [ 120159, 0 ] ), 8226 entity( "¬", [ 172, 0 ] ), 8227 entity( "∉", [ 8713, 0 ] ), 8228 entity( "⋹̸", [ 8953, 824 ] ), 8229 entity( "⋵̸", [ 8949, 824 ] ), 8230 entity( "∉", [ 8713, 0 ] ), 8231 entity( "⋷", [ 8951, 0 ] ), 8232 entity( "⋶", [ 8950, 0 ] ), 8233 entity( "∌", [ 8716, 0 ] ), 8234 entity( "∌", [ 8716, 0 ] ), 8235 entity( "⋾", [ 8958, 0 ] ), 8236 entity( "⋽", [ 8957, 0 ] ), 8237 entity( "∦", [ 8742, 0 ] ), 8238 entity( "∦", [ 8742, 0 ] ), 8239 entity( "⫽⃥", [ 11005, 8421 ] ), 8240 entity( "∂̸", [ 8706, 824 ] ), 8241 entity( "⨔", [ 10772, 0 ] ), 8242 entity( "⊀", [ 8832, 0 ] ), 8243 entity( "⋠", [ 8928, 0 ] ), 8244 entity( "⪯̸", [ 10927, 824 ] ), 8245 entity( "⊀", [ 8832, 0 ] ), 8246 entity( "⪯̸", [ 10927, 824 ] ), 8247 entity( "⇏", [ 8655, 0 ] ), 8248 entity( "↛", [ 8603, 0 ] ), 8249 entity( "⤳̸", [ 10547, 824 ] ), 8250 entity( "↝̸", [ 8605, 824 ] ), 8251 entity( "↛", [ 8603, 0 ] ), 8252 entity( "⋫", [ 8939, 0 ] ), 8253 entity( "⋭", [ 8941, 0 ] ), 8254 entity( "⊁", [ 8833, 0 ] ), 8255 entity( "⋡", [ 8929, 0 ] ), 8256 entity( "⪰̸", [ 10928, 824 ] ), 8257 entity( "𝓃", [ 120003, 0 ] ), 8258 entity( "∤", [ 8740, 0 ] ), 8259 entity( "∦", [ 8742, 0 ] ), 8260 entity( "≁", [ 8769, 0 ] ), 8261 entity( "≄", [ 8772, 0 ] ), 8262 entity( "≄", [ 8772, 0 ] ), 8263 entity( "∤", [ 8740, 0 ] ), 8264 entity( "∦", [ 8742, 0 ] ), 8265 entity( "⋢", [ 8930, 0 ] ), 8266 entity( "⋣", [ 8931, 0 ] ), 8267 entity( "⊄", [ 8836, 0 ] ), 8268 entity( "⫅̸", [ 10949, 824 ] ), 8269 entity( "⊈", [ 8840, 0 ] ), 8270 entity( "⊂⃒", [ 8834, 8402 ] ), 8271 entity( "⊈", [ 8840, 0 ] ), 8272 entity( "⫅̸", [ 10949, 824 ] ), 8273 entity( "⊁", [ 8833, 0 ] ), 8274 entity( "⪰̸", [ 10928, 824 ] ), 8275 entity( "⊅", [ 8837, 0 ] ), 8276 entity( "⫆̸", [ 10950, 824 ] ), 8277 entity( "⊉", [ 8841, 0 ] ), 8278 entity( "⊃⃒", [ 8835, 8402 ] ), 8279 entity( "⊉", [ 8841, 0 ] ), 8280 entity( "⫆̸", [ 10950, 824 ] ), 8281 entity( "≹", [ 8825, 0 ] ), 8282 entity( "ñ", [ 241, 0 ] ), 8283 entity( "≸", [ 8824, 0 ] ), 8284 entity( "⋪", [ 8938, 0 ] ), 8285 entity( "⋬", [ 8940, 0 ] ), 8286 entity( "⋫", [ 8939, 0 ] ), 8287 entity( "⋭", [ 8941, 0 ] ), 8288 entity( "ν", [ 957, 0 ] ), 8289 entity( "#", [ 35, 0 ] ), 8290 entity( "№", [ 8470, 0 ] ), 8291 entity( " ", [ 8199, 0 ] ), 8292 entity( "⊭", [ 8877, 0 ] ), 8293 entity( "⤄", [ 10500, 0 ] ), 8294 entity( "≍⃒", [ 8781, 8402 ] ), 8295 entity( "⊬", [ 8876, 0 ] ), 8296 entity( "≥⃒", [ 8805, 8402 ] ), 8297 entity( ">⃒", [ 62, 8402 ] ), 8298 entity( "⧞", [ 10718, 0 ] ), 8299 entity( "⤂", [ 10498, 0 ] ), 8300 entity( "≤⃒", [ 8804, 8402 ] ), 8301 entity( "<⃒", [ 60, 8402 ] ), 8302 entity( "⊴⃒", [ 8884, 8402 ] ), 8303 entity( "⤃", [ 10499, 0 ] ), 8304 entity( "⊵⃒", [ 8885, 8402 ] ), 8305 entity( "∼⃒", [ 8764, 8402 ] ), 8306 entity( "⇖", [ 8662, 0 ] ), 8307 entity( "⤣", [ 10531, 0 ] ), 8308 entity( "↖", [ 8598, 0 ] ), 8309 entity( "↖", [ 8598, 0 ] ), 8310 entity( "⤧", [ 10535, 0 ] ), 8311 entity( "Ⓢ", [ 9416, 0 ] ), 8312 entity( "ó", [ 243, 0 ] ), 8313 entity( "⊛", [ 8859, 0 ] ), 8314 entity( "⊚", [ 8858, 0 ] ), 8315 entity( "ô", [ 244, 0 ] ), 8316 entity( "о", [ 1086, 0 ] ), 8317 entity( "⊝", [ 8861, 0 ] ), 8318 entity( "ő", [ 337, 0 ] ), 8319 entity( "⨸", [ 10808, 0 ] ), 8320 entity( "⊙", [ 8857, 0 ] ), 8321 entity( "⦼", [ 10684, 0 ] ), 8322 entity( "œ", [ 339, 0 ] ), 8323 entity( "⦿", [ 10687, 0 ] ), 8324 entity( "𝔬", [ 120108, 0 ] ), 8325 entity( "˛", [ 731, 0 ] ), 8326 entity( "ò", [ 242, 0 ] ), 8327 entity( "⧁", [ 10689, 0 ] ), 8328 entity( "⦵", [ 10677, 0 ] ), 8329 entity( "Ω", [ 937, 0 ] ), 8330 entity( "∮", [ 8750, 0 ] ), 8331 entity( "↺", [ 8634, 0 ] ), 8332 entity( "⦾", [ 10686, 0 ] ), 8333 entity( "⦻", [ 10683, 0 ] ), 8334 entity( "‾", [ 8254, 0 ] ), 8335 entity( "⧀", [ 10688, 0 ] ), 8336 entity( "ō", [ 333, 0 ] ), 8337 entity( "ω", [ 969, 0 ] ), 8338 entity( "ο", [ 959, 0 ] ), 8339 entity( "⦶", [ 10678, 0 ] ), 8340 entity( "⊖", [ 8854, 0 ] ), 8341 entity( "𝕠", [ 120160, 0 ] ), 8342 entity( "⦷", [ 10679, 0 ] ), 8343 entity( "⦹", [ 10681, 0 ] ), 8344 entity( "⊕", [ 8853, 0 ] ), 8345 entity( "∨", [ 8744, 0 ] ), 8346 entity( "↻", [ 8635, 0 ] ), 8347 entity( "⩝", [ 10845, 0 ] ), 8348 entity( "ℴ", [ 8500, 0 ] ), 8349 entity( "ℴ", [ 8500, 0 ] ), 8350 entity( "ª", [ 170, 0 ] ), 8351 entity( "º", [ 186, 0 ] ), 8352 entity( "⊶", [ 8886, 0 ] ), 8353 entity( "⩖", [ 10838, 0 ] ), 8354 entity( "⩗", [ 10839, 0 ] ), 8355 entity( "⩛", [ 10843, 0 ] ), 8356 entity( "ℴ", [ 8500, 0 ] ), 8357 entity( "ø", [ 248, 0 ] ), 8358 entity( "⊘", [ 8856, 0 ] ), 8359 entity( "õ", [ 245, 0 ] ), 8360 entity( "⊗", [ 8855, 0 ] ), 8361 entity( "⨶", [ 10806, 0 ] ), 8362 entity( "ö", [ 246, 0 ] ), 8363 entity( "⌽", [ 9021, 0 ] ), 8364 entity( "∥", [ 8741, 0 ] ), 8365 entity( "¶", [ 182, 0 ] ), 8366 entity( "∥", [ 8741, 0 ] ), 8367 entity( "⫳", [ 10995, 0 ] ), 8368 entity( "⫽", [ 11005, 0 ] ), 8369 entity( "∂", [ 8706, 0 ] ), 8370 entity( "п", [ 1087, 0 ] ), 8371 entity( "%", [ 37, 0 ] ), 8372 entity( ".", [ 46, 0 ] ), 8373 entity( "‰", [ 8240, 0 ] ), 8374 entity( "⊥", [ 8869, 0 ] ), 8375 entity( "‱", [ 8241, 0 ] ), 8376 entity( "𝔭", [ 120109, 0 ] ), 8377 entity( "φ", [ 966, 0 ] ), 8378 entity( "ϕ", [ 981, 0 ] ), 8379 entity( "ℳ", [ 8499, 0 ] ), 8380 entity( "☎", [ 9742, 0 ] ), 8381 entity( "π", [ 960, 0 ] ), 8382 entity( "⋔", [ 8916, 0 ] ), 8383 entity( "ϖ", [ 982, 0 ] ), 8384 entity( "ℏ", [ 8463, 0 ] ), 8385 entity( "ℎ", [ 8462, 0 ] ), 8386 entity( "ℏ", [ 8463, 0 ] ), 8387 entity( "+", [ 43, 0 ] ), 8388 entity( "⨣", [ 10787, 0 ] ), 8389 entity( "⊞", [ 8862, 0 ] ), 8390 entity( "⨢", [ 10786, 0 ] ), 8391 entity( "∔", [ 8724, 0 ] ), 8392 entity( "⨥", [ 10789, 0 ] ), 8393 entity( "⩲", [ 10866, 0 ] ), 8394 entity( "±", [ 177, 0 ] ), 8395 entity( "⨦", [ 10790, 0 ] ), 8396 entity( "⨧", [ 10791, 0 ] ), 8397 entity( "±", [ 177, 0 ] ), 8398 entity( "⨕", [ 10773, 0 ] ), 8399 entity( "𝕡", [ 120161, 0 ] ), 8400 entity( "£", [ 163, 0 ] ), 8401 entity( "≺", [ 8826, 0 ] ), 8402 entity( "⪳", [ 10931, 0 ] ), 8403 entity( "⪷", [ 10935, 0 ] ), 8404 entity( "≼", [ 8828, 0 ] ), 8405 entity( "⪯", [ 10927, 0 ] ), 8406 entity( "≺", [ 8826, 0 ] ), 8407 entity( "⪷", [ 10935, 0 ] ), 8408 entity( "≼", [ 8828, 0 ] ), 8409 entity( "⪯", [ 10927, 0 ] ), 8410 entity( "⪹", [ 10937, 0 ] ), 8411 entity( "⪵", [ 10933, 0 ] ), 8412 entity( "⋨", [ 8936, 0 ] ), 8413 entity( "≾", [ 8830, 0 ] ), 8414 entity( "′", [ 8242, 0 ] ), 8415 entity( "ℙ", [ 8473, 0 ] ), 8416 entity( "⪵", [ 10933, 0 ] ), 8417 entity( "⪹", [ 10937, 0 ] ), 8418 entity( "⋨", [ 8936, 0 ] ), 8419 entity( "∏", [ 8719, 0 ] ), 8420 entity( "⌮", [ 9006, 0 ] ), 8421 entity( "⌒", [ 8978, 0 ] ), 8422 entity( "⌓", [ 8979, 0 ] ), 8423 entity( "∝", [ 8733, 0 ] ), 8424 entity( "∝", [ 8733, 0 ] ), 8425 entity( "≾", [ 8830, 0 ] ), 8426 entity( "⊰", [ 8880, 0 ] ), 8427 entity( "𝓅", [ 120005, 0 ] ), 8428 entity( "ψ", [ 968, 0 ] ), 8429 entity( " ", [ 8200, 0 ] ), 8430 entity( "𝔮", [ 120110, 0 ] ), 8431 entity( "⨌", [ 10764, 0 ] ), 8432 entity( "𝕢", [ 120162, 0 ] ), 8433 entity( "⁗", [ 8279, 0 ] ), 8434 entity( "𝓆", [ 120006, 0 ] ), 8435 entity( "ℍ", [ 8461, 0 ] ), 8436 entity( "⨖", [ 10774, 0 ] ), 8437 entity( "?", [ 63, 0 ] ), 8438 entity( "≟", [ 8799, 0 ] ), 8439 entity( """, [ 34, 0 ] ), 8440 entity( "⇛", [ 8667, 0 ] ), 8441 entity( "⇒", [ 8658, 0 ] ), 8442 entity( "⤜", [ 10524, 0 ] ), 8443 entity( "⤏", [ 10511, 0 ] ), 8444 entity( "⥤", [ 10596, 0 ] ), 8445 entity( "∽̱", [ 8765, 817 ] ), 8446 entity( "ŕ", [ 341, 0 ] ), 8447 entity( "√", [ 8730, 0 ] ), 8448 entity( "⦳", [ 10675, 0 ] ), 8449 entity( "⟩", [ 10217, 0 ] ), 8450 entity( "⦒", [ 10642, 0 ] ), 8451 entity( "⦥", [ 10661, 0 ] ), 8452 entity( "⟩", [ 10217, 0 ] ), 8453 entity( "»", [ 187, 0 ] ), 8454 entity( "→", [ 8594, 0 ] ), 8455 entity( "⥵", [ 10613, 0 ] ), 8456 entity( "⇥", [ 8677, 0 ] ), 8457 entity( "⤠", [ 10528, 0 ] ), 8458 entity( "⤳", [ 10547, 0 ] ), 8459 entity( "⤞", [ 10526, 0 ] ), 8460 entity( "↪", [ 8618, 0 ] ), 8461 entity( "↬", [ 8620, 0 ] ), 8462 entity( "⥅", [ 10565, 0 ] ), 8463 entity( "⥴", [ 10612, 0 ] ), 8464 entity( "↣", [ 8611, 0 ] ), 8465 entity( "↝", [ 8605, 0 ] ), 8466 entity( "⤚", [ 10522, 0 ] ), 8467 entity( "∶", [ 8758, 0 ] ), 8468 entity( "ℚ", [ 8474, 0 ] ), 8469 entity( "⤍", [ 10509, 0 ] ), 8470 entity( "❳", [ 10099, 0 ] ), 8471 entity( "}", [ 125, 0 ] ), 8472 entity( "]", [ 93, 0 ] ), 8473 entity( "⦌", [ 10636, 0 ] ), 8474 entity( "⦎", [ 10638, 0 ] ), 8475 entity( "⦐", [ 10640, 0 ] ), 8476 entity( "ř", [ 345, 0 ] ), 8477 entity( "ŗ", [ 343, 0 ] ), 8478 entity( "⌉", [ 8969, 0 ] ), 8479 entity( "}", [ 125, 0 ] ), 8480 entity( "р", [ 1088, 0 ] ), 8481 entity( "⤷", [ 10551, 0 ] ), 8482 entity( "⥩", [ 10601, 0 ] ), 8483 entity( "”", [ 8221, 0 ] ), 8484 entity( "”", [ 8221, 0 ] ), 8485 entity( "↳", [ 8627, 0 ] ), 8486 entity( "ℜ", [ 8476, 0 ] ), 8487 entity( "ℛ", [ 8475, 0 ] ), 8488 entity( "ℜ", [ 8476, 0 ] ), 8489 entity( "ℝ", [ 8477, 0 ] ), 8490 entity( "▭", [ 9645, 0 ] ), 8491 entity( "®", [ 174, 0 ] ), 8492 entity( "⥽", [ 10621, 0 ] ), 8493 entity( "⌋", [ 8971, 0 ] ), 8494 entity( "𝔯", [ 120111, 0 ] ), 8495 entity( "⇁", [ 8641, 0 ] ), 8496 entity( "⇀", [ 8640, 0 ] ), 8497 entity( "⥬", [ 10604, 0 ] ), 8498 entity( "ρ", [ 961, 0 ] ), 8499 entity( "ϱ", [ 1009, 0 ] ), 8500 entity( "→", [ 8594, 0 ] ), 8501 entity( "↣", [ 8611, 0 ] ), 8502 entity( "⇁", [ 8641, 0 ] ), 8503 entity( "⇀", [ 8640, 0 ] ), 8504 entity( "⇄", [ 8644, 0 ] ), 8505 entity( "⇌", [ 8652, 0 ] ), 8506 entity( "⇉", [ 8649, 0 ] ), 8507 entity( "↝", [ 8605, 0 ] ), 8508 entity( "⋌", [ 8908, 0 ] ), 8509 entity( "˚", [ 730, 0 ] ), 8510 entity( "≓", [ 8787, 0 ] ), 8511 entity( "⇄", [ 8644, 0 ] ), 8512 entity( "⇌", [ 8652, 0 ] ), 8513 entity( "‏", [ 8207, 0 ] ), 8514 entity( "⎱", [ 9137, 0 ] ), 8515 entity( "⎱", [ 9137, 0 ] ), 8516 entity( "⫮", [ 10990, 0 ] ), 8517 entity( "⟭", [ 10221, 0 ] ), 8518 entity( "⇾", [ 8702, 0 ] ), 8519 entity( "⟧", [ 10215, 0 ] ), 8520 entity( "⦆", [ 10630, 0 ] ), 8521 entity( "𝕣", [ 120163, 0 ] ), 8522 entity( "⨮", [ 10798, 0 ] ), 8523 entity( "⨵", [ 10805, 0 ] ), 8524 entity( ")", [ 41, 0 ] ), 8525 entity( "⦔", [ 10644, 0 ] ), 8526 entity( "⨒", [ 10770, 0 ] ), 8527 entity( "⇉", [ 8649, 0 ] ), 8528 entity( "›", [ 8250, 0 ] ), 8529 entity( "𝓇", [ 120007, 0 ] ), 8530 entity( "↱", [ 8625, 0 ] ), 8531 entity( "]", [ 93, 0 ] ), 8532 entity( "’", [ 8217, 0 ] ), 8533 entity( "’", [ 8217, 0 ] ), 8534 entity( "⋌", [ 8908, 0 ] ), 8535 entity( "⋊", [ 8906, 0 ] ), 8536 entity( "▹", [ 9657, 0 ] ), 8537 entity( "⊵", [ 8885, 0 ] ), 8538 entity( "▸", [ 9656, 0 ] ), 8539 entity( "⧎", [ 10702, 0 ] ), 8540 entity( "⥨", [ 10600, 0 ] ), 8541 entity( "℞", [ 8478, 0 ] ), 8542 entity( "ś", [ 347, 0 ] ), 8543 entity( "‚", [ 8218, 0 ] ), 8544 entity( "≻", [ 8827, 0 ] ), 8545 entity( "⪴", [ 10932, 0 ] ), 8546 entity( "⪸", [ 10936, 0 ] ), 8547 entity( "š", [ 353, 0 ] ), 8548 entity( "≽", [ 8829, 0 ] ), 8549 entity( "⪰", [ 10928, 0 ] ), 8550 entity( "ş", [ 351, 0 ] ), 8551 entity( "ŝ", [ 349, 0 ] ), 8552 entity( "⪶", [ 10934, 0 ] ), 8553 entity( "⪺", [ 10938, 0 ] ), 8554 entity( "⋩", [ 8937, 0 ] ), 8555 entity( "⨓", [ 10771, 0 ] ), 8556 entity( "≿", [ 8831, 0 ] ), 8557 entity( "с", [ 1089, 0 ] ), 8558 entity( "⋅", [ 8901, 0 ] ), 8559 entity( "⊡", [ 8865, 0 ] ), 8560 entity( "⩦", [ 10854, 0 ] ), 8561 entity( "⇘", [ 8664, 0 ] ), 8562 entity( "⤥", [ 10533, 0 ] ), 8563 entity( "↘", [ 8600, 0 ] ), 8564 entity( "↘", [ 8600, 0 ] ), 8565 entity( "§", [ 167, 0 ] ), 8566 entity( ";", [ 59, 0 ] ), 8567 entity( "⤩", [ 10537, 0 ] ), 8568 entity( "∖", [ 8726, 0 ] ), 8569 entity( "∖", [ 8726, 0 ] ), 8570 entity( "✶", [ 10038, 0 ] ), 8571 entity( "𝔰", [ 120112, 0 ] ), 8572 entity( "⌢", [ 8994, 0 ] ), 8573 entity( "♯", [ 9839, 0 ] ), 8574 entity( "щ", [ 1097, 0 ] ), 8575 entity( "ш", [ 1096, 0 ] ), 8576 entity( "∣", [ 8739, 0 ] ), 8577 entity( "∥", [ 8741, 0 ] ), 8578 entity( "­", [ 173, 0 ] ), 8579 entity( "σ", [ 963, 0 ] ), 8580 entity( "ς", [ 962, 0 ] ), 8581 entity( "ς", [ 962, 0 ] ), 8582 entity( "∼", [ 8764, 0 ] ), 8583 entity( "⩪", [ 10858, 0 ] ), 8584 entity( "≃", [ 8771, 0 ] ), 8585 entity( "≃", [ 8771, 0 ] ), 8586 entity( "⪞", [ 10910, 0 ] ), 8587 entity( "⪠", [ 10912, 0 ] ), 8588 entity( "⪝", [ 10909, 0 ] ), 8589 entity( "⪟", [ 10911, 0 ] ), 8590 entity( "≆", [ 8774, 0 ] ), 8591 entity( "⨤", [ 10788, 0 ] ), 8592 entity( "⥲", [ 10610, 0 ] ), 8593 entity( "←", [ 8592, 0 ] ), 8594 entity( "∖", [ 8726, 0 ] ), 8595 entity( "⨳", [ 10803, 0 ] ), 8596 entity( "⧤", [ 10724, 0 ] ), 8597 entity( "∣", [ 8739, 0 ] ), 8598 entity( "⌣", [ 8995, 0 ] ), 8599 entity( "⪪", [ 10922, 0 ] ), 8600 entity( "⪬", [ 10924, 0 ] ), 8601 entity( "⪬︀", [ 10924, 65024 ] ), 8602 entity( "ь", [ 1100, 0 ] ), 8603 entity( "/", [ 47, 0 ] ), 8604 entity( "⧄", [ 10692, 0 ] ), 8605 entity( "⌿", [ 9023, 0 ] ), 8606 entity( "𝕤", [ 120164, 0 ] ), 8607 entity( "♠", [ 9824, 0 ] ), 8608 entity( "♠", [ 9824, 0 ] ), 8609 entity( "∥", [ 8741, 0 ] ), 8610 entity( "⊓", [ 8851, 0 ] ), 8611 entity( "⊓︀", [ 8851, 65024 ] ), 8612 entity( "⊔", [ 8852, 0 ] ), 8613 entity( "⊔︀", [ 8852, 65024 ] ), 8614 entity( "⊏", [ 8847, 0 ] ), 8615 entity( "⊑", [ 8849, 0 ] ), 8616 entity( "⊏", [ 8847, 0 ] ), 8617 entity( "⊑", [ 8849, 0 ] ), 8618 entity( "⊐", [ 8848, 0 ] ), 8619 entity( "⊒", [ 8850, 0 ] ), 8620 entity( "⊐", [ 8848, 0 ] ), 8621 entity( "⊒", [ 8850, 0 ] ), 8622 entity( "□", [ 9633, 0 ] ), 8623 entity( "□", [ 9633, 0 ] ), 8624 entity( "▪", [ 9642, 0 ] ), 8625 entity( "▪", [ 9642, 0 ] ), 8626 entity( "→", [ 8594, 0 ] ), 8627 entity( "𝓈", [ 120008, 0 ] ), 8628 entity( "∖", [ 8726, 0 ] ), 8629 entity( "⌣", [ 8995, 0 ] ), 8630 entity( "⋆", [ 8902, 0 ] ), 8631 entity( "☆", [ 9734, 0 ] ), 8632 entity( "★", [ 9733, 0 ] ), 8633 entity( "ϵ", [ 1013, 0 ] ), 8634 entity( "ϕ", [ 981, 0 ] ), 8635 entity( "¯", [ 175, 0 ] ), 8636 entity( "⊂", [ 8834, 0 ] ), 8637 entity( "⫅", [ 10949, 0 ] ), 8638 entity( "⪽", [ 10941, 0 ] ), 8639 entity( "⊆", [ 8838, 0 ] ), 8640 entity( "⫃", [ 10947, 0 ] ), 8641 entity( "⫁", [ 10945, 0 ] ), 8642 entity( "⫋", [ 10955, 0 ] ), 8643 entity( "⊊", [ 8842, 0 ] ), 8644 entity( "⪿", [ 10943, 0 ] ), 8645 entity( "⥹", [ 10617, 0 ] ), 8646 entity( "⊂", [ 8834, 0 ] ), 8647 entity( "⊆", [ 8838, 0 ] ), 8648 entity( "⫅", [ 10949, 0 ] ), 8649 entity( "⊊", [ 8842, 0 ] ), 8650 entity( "⫋", [ 10955, 0 ] ), 8651 entity( "⫇", [ 10951, 0 ] ), 8652 entity( "⫕", [ 10965, 0 ] ), 8653 entity( "⫓", [ 10963, 0 ] ), 8654 entity( "≻", [ 8827, 0 ] ), 8655 entity( "⪸", [ 10936, 0 ] ), 8656 entity( "≽", [ 8829, 0 ] ), 8657 entity( "⪰", [ 10928, 0 ] ), 8658 entity( "⪺", [ 10938, 0 ] ), 8659 entity( "⪶", [ 10934, 0 ] ), 8660 entity( "⋩", [ 8937, 0 ] ), 8661 entity( "≿", [ 8831, 0 ] ), 8662 entity( "∑", [ 8721, 0 ] ), 8663 entity( "♪", [ 9834, 0 ] ), 8664 entity( "¹", [185, 0 ] ), 8665 entity( "¹", [ 185, 0 ] ), 8666 entity( "²", [178, 0 ] ), 8667 entity( "²", [ 178, 0 ] ), 8668 entity( "³", [179, 0 ] ), 8669 entity( "³", [ 179, 0 ] ), 8670 entity( "⊃", [ 8835, 0 ] ), 8671 entity( "⫆", [ 10950, 0 ] ), 8672 entity( "⪾", [ 10942, 0 ] ), 8673 entity( "⫘", [ 10968, 0 ] ), 8674 entity( "⊇", [ 8839, 0 ] ), 8675 entity( "⫄", [ 10948, 0 ] ), 8676 entity( "⟉", [ 10185, 0 ] ), 8677 entity( "⫗", [ 10967, 0 ] ), 8678 entity( "⥻", [ 10619, 0 ] ), 8679 entity( "⫂", [ 10946, 0 ] ), 8680 entity( "⫌", [ 10956, 0 ] ), 8681 entity( "⊋", [ 8843, 0 ] ), 8682 entity( "⫀", [ 10944, 0 ] ), 8683 entity( "⊃", [ 8835, 0 ] ), 8684 entity( "⊇", [ 8839, 0 ] ), 8685 entity( "⫆", [ 10950, 0 ] ), 8686 entity( "⊋", [ 8843, 0 ] ), 8687 entity( "⫌", [ 10956, 0 ] ), 8688 entity( "⫈", [ 10952, 0 ] ), 8689 entity( "⫔", [ 10964, 0 ] ), 8690 entity( "⫖", [ 10966, 0 ] ), 8691 entity( "⇙", [ 8665, 0 ] ), 8692 entity( "⤦", [ 10534, 0 ] ), 8693 entity( "↙", [ 8601, 0 ] ), 8694 entity( "↙", [ 8601, 0 ] ), 8695 entity( "⤪", [ 10538, 0 ] ), 8696 entity( "ß", [ 223, 0 ] ), 8697 entity( "⌖", [ 8982, 0 ] ), 8698 entity( "τ", [ 964, 0 ] ), 8699 entity( "⎴", [ 9140, 0 ] ), 8700 entity( "ť", [ 357, 0 ] ), 8701 entity( "ţ", [ 355, 0 ] ), 8702 entity( "т", [ 1090, 0 ] ), 8703 entity( "⃛", [ 8411, 0 ] ), 8704 entity( "⌕", [ 8981, 0 ] ), 8705 entity( "𝔱", [ 120113, 0 ] ), 8706 entity( "∴", [ 8756, 0 ] ), 8707 entity( "∴", [ 8756, 0 ] ), 8708 entity( "θ", [ 952, 0 ] ), 8709 entity( "ϑ", [ 977, 0 ] ), 8710 entity( "ϑ", [ 977, 0 ] ), 8711 entity( "≈", [ 8776, 0 ] ), 8712 entity( "∼", [ 8764, 0 ] ), 8713 entity( " ", [ 8201, 0 ] ), 8714 entity( "≈", [ 8776, 0 ] ), 8715 entity( "∼", [ 8764, 0 ] ), 8716 entity( "þ", [ 254, 0 ] ), 8717 entity( "˜", [ 732, 0 ] ), 8718 entity( "×", [ 215, 0 ] ), 8719 entity( "⊠", [ 8864, 0 ] ), 8720 entity( "⨱", [ 10801, 0 ] ), 8721 entity( "⨰", [ 10800, 0 ] ), 8722 entity( "∭", [ 8749, 0 ] ), 8723 entity( "⤨", [ 10536, 0 ] ), 8724 entity( "⊤", [ 8868, 0 ] ), 8725 entity( "⌶", [ 9014, 0 ] ), 8726 entity( "⫱", [ 10993, 0 ] ), 8727 entity( "𝕥", [ 120165, 0 ] ), 8728 entity( "⫚", [ 10970, 0 ] ), 8729 entity( "⤩", [ 10537, 0 ] ), 8730 entity( "‴", [ 8244, 0 ] ), 8731 entity( "™", [ 8482, 0 ] ), 8732 entity( "▵", [ 9653, 0 ] ), 8733 entity( "▿", [ 9663, 0 ] ), 8734 entity( "◃", [ 9667, 0 ] ), 8735 entity( "⊴", [ 8884, 0 ] ), 8736 entity( "≜", [ 8796, 0 ] ), 8737 entity( "▹", [ 9657, 0 ] ), 8738 entity( "⊵", [ 8885, 0 ] ), 8739 entity( "◬", [ 9708, 0 ] ), 8740 entity( "≜", [ 8796, 0 ] ), 8741 entity( "⨺", [ 10810, 0 ] ), 8742 entity( "⨹", [ 10809, 0 ] ), 8743 entity( "⧍", [ 10701, 0 ] ), 8744 entity( "⨻", [ 10811, 0 ] ), 8745 entity( "⏢", [ 9186, 0 ] ), 8746 entity( "𝓉", [ 120009, 0 ] ), 8747 entity( "ц", [ 1094, 0 ] ), 8748 entity( "ћ", [ 1115, 0 ] ), 8749 entity( "ŧ", [ 359, 0 ] ), 8750 entity( "≬", [ 8812, 0 ] ), 8751 entity( "↞", [ 8606, 0 ] ), 8752 entity( "↠", [ 8608, 0 ] ), 8753 entity( "⇑", [ 8657, 0 ] ), 8754 entity( "⥣", [ 10595, 0 ] ), 8755 entity( "ú", [ 250, 0 ] ), 8756 entity( "↑", [ 8593, 0 ] ), 8757 entity( "ў", [ 1118, 0 ] ), 8758 entity( "ŭ", [ 365, 0 ] ), 8759 entity( "û", [ 251, 0 ] ), 8760 entity( "у", [ 1091, 0 ] ), 8761 entity( "⇅", [ 8645, 0 ] ), 8762 entity( "ű", [ 369, 0 ] ), 8763 entity( "⥮", [ 10606, 0 ] ), 8764 entity( "⥾", [ 10622, 0 ] ), 8765 entity( "𝔲", [ 120114, 0 ] ), 8766 entity( "ù", [ 249, 0 ] ), 8767 entity( "↿", [ 8639, 0 ] ), 8768 entity( "↾", [ 8638, 0 ] ), 8769 entity( "▀", [ 9600, 0 ] ), 8770 entity( "⌜", [ 8988, 0 ] ), 8771 entity( "⌜", [ 8988, 0 ] ), 8772 entity( "⌏", [ 8975, 0 ] ), 8773 entity( "◸", [ 9720, 0 ] ), 8774 entity( "ū", [ 363, 0 ] ), 8775 entity( "¨", [ 168, 0 ] ), 8776 entity( "ų", [ 371, 0 ] ), 8777 entity( "𝕦", [ 120166, 0 ] ), 8778 entity( "↑", [ 8593, 0 ] ), 8779 entity( "↕", [ 8597, 0 ] ), 8780 entity( "↿", [ 8639, 0 ] ), 8781 entity( "↾", [ 8638, 0 ] ), 8782 entity( "⊎", [ 8846, 0 ] ), 8783 entity( "υ", [ 965, 0 ] ), 8784 entity( "ϒ", [ 978, 0 ] ), 8785 entity( "υ", [ 965, 0 ] ), 8786 entity( "⇈", [ 8648, 0 ] ), 8787 entity( "⌝", [ 8989, 0 ] ), 8788 entity( "⌝", [ 8989, 0 ] ), 8789 entity( "⌎", [ 8974, 0 ] ), 8790 entity( "ů", [ 367, 0 ] ), 8791 entity( "◹", [ 9721, 0 ] ), 8792 entity( "𝓊", [ 120010, 0 ] ), 8793 entity( "⋰", [ 8944, 0 ] ), 8794 entity( "ũ", [ 361, 0 ] ), 8795 entity( "▵", [ 9653, 0 ] ), 8796 entity( "▴", [ 9652, 0 ] ), 8797 entity( "⇈", [ 8648, 0 ] ), 8798 entity( "ü", [ 252, 0 ] ), 8799 entity( "⦧", [ 10663, 0 ] ), 8800 entity( "⇕", [ 8661, 0 ] ), 8801 entity( "⫨", [ 10984, 0 ] ), 8802 entity( "⫩", [ 10985, 0 ] ), 8803 entity( "⊨", [ 8872, 0 ] ), 8804 entity( "⦜", [ 10652, 0 ] ), 8805 entity( "ϵ", [ 1013, 0 ] ), 8806 entity( "ϰ", [ 1008, 0 ] ), 8807 entity( "∅", [ 8709, 0 ] ), 8808 entity( "ϕ", [ 981, 0 ] ), 8809 entity( "ϖ", [ 982, 0 ] ), 8810 entity( "∝", [ 8733, 0 ] ), 8811 entity( "↕", [ 8597, 0 ] ), 8812 entity( "ϱ", [ 1009, 0 ] ), 8813 entity( "ς", [ 962, 0 ] ), 8814 entity( "⊊︀", [ 8842, 65024 ] ), 8815 entity( "⫋︀", [ 10955, 65024 ] ), 8816 entity( "⊋︀", [ 8843, 65024 ] ), 8817 entity( "⫌︀", [ 10956, 65024 ] ), 8818 entity( "ϑ", [ 977, 0 ] ), 8819 entity( "⊲", [ 8882, 0 ] ), 8820 entity( "⊳", [ 8883, 0 ] ), 8821 entity( "в", [ 1074, 0 ] ), 8822 entity( "⊢", [ 8866, 0 ] ), 8823 entity( "∨", [ 8744, 0 ] ), 8824 entity( "⊻", [ 8891, 0 ] ), 8825 entity( "≚", [ 8794, 0 ] ), 8826 entity( "⋮", [ 8942, 0 ] ), 8827 entity( "|", [ 124, 0 ] ), 8828 entity( "|", [ 124, 0 ] ), 8829 entity( "𝔳", [ 120115, 0 ] ), 8830 entity( "⊲", [ 8882, 0 ] ), 8831 entity( "⊂⃒", [ 8834, 8402 ] ), 8832 entity( "⊃⃒", [ 8835, 8402 ] ), 8833 entity( "𝕧", [ 120167, 0 ] ), 8834 entity( "∝", [ 8733, 0 ] ), 8835 entity( "⊳", [ 8883, 0 ] ), 8836 entity( "𝓋", [ 120011, 0 ] ), 8837 entity( "⫋︀", [ 10955, 65024 ] ), 8838 entity( "⊊︀", [ 8842, 65024 ] ), 8839 entity( "⫌︀", [ 10956, 65024 ] ), 8840 entity( "⊋︀", [ 8843, 65024 ] ), 8841 entity( "⦚", [ 10650, 0 ] ), 8842 entity( "ŵ", [ 373, 0 ] ), 8843 entity( "⩟", [ 10847, 0 ] ), 8844 entity( "∧", [ 8743, 0 ] ), 8845 entity( "≙", [ 8793, 0 ] ), 8846 entity( "℘", [ 8472, 0 ] ), 8847 entity( "𝔴", [ 120116, 0 ] ), 8848 entity( "𝕨", [ 120168, 0 ] ), 8849 entity( "℘", [ 8472, 0 ] ), 8850 entity( "≀", [ 8768, 0 ] ), 8851 entity( "≀", [ 8768, 0 ] ), 8852 entity( "𝓌", [ 120012, 0 ] ), 8853 entity( "⋂", [ 8898, 0 ] ), 8854 entity( "◯", [ 9711, 0 ] ), 8855 entity( "⋃", [ 8899, 0 ] ), 8856 entity( "▽", [ 9661, 0 ] ), 8857 entity( "𝔵", [ 120117, 0 ] ), 8858 entity( "⟺", [ 10234, 0 ] ), 8859 entity( "⟷", [ 10231, 0 ] ), 8860 entity( "ξ", [ 958, 0 ] ), 8861 entity( "⟸", [ 10232, 0 ] ), 8862 entity( "⟵", [ 10229, 0 ] ), 8863 entity( "⟼", [ 10236, 0 ] ), 8864 entity( "⋻", [ 8955, 0 ] ), 8865 entity( "⨀", [ 10752, 0 ] ), 8866 entity( "𝕩", [ 120169, 0 ] ), 8867 entity( "⨁", [ 10753, 0 ] ), 8868 entity( "⨂", [ 10754, 0 ] ), 8869 entity( "⟹", [ 10233, 0 ] ), 8870 entity( "⟶", [ 10230, 0 ] ), 8871 entity( "𝓍", [ 120013, 0 ] ), 8872 entity( "⨆", [ 10758, 0 ] ), 8873 entity( "⨄", [ 10756, 0 ] ), 8874 entity( "△", [ 9651, 0 ] ), 8875 entity( "⋁", [ 8897, 0 ] ), 8876 entity( "⋀", [ 8896, 0 ] ), 8877 entity( "ý", [ 253, 0 ] ), 8878 entity( "я", [ 1103, 0 ] ), 8879 entity( "ŷ", [ 375, 0 ] ), 8880 entity( "ы", [ 1099, 0 ] ), 8881 entity( "¥", [ 165, 0 ] ), 8882 entity( "𝔶", [ 120118, 0 ] ), 8883 entity( "ї", [ 1111, 0 ] ), 8884 entity( "𝕪", [ 120170, 0 ] ), 8885 entity( "𝓎", [ 120014, 0 ] ), 8886 entity( "ю", [ 1102, 0 ] ), 8887 entity( "ÿ", [ 255, 0 ] ), 8888 entity( "ź", [ 378, 0 ] ), 8889 entity( "ž", [ 382, 0 ] ), 8890 entity( "з", [ 1079, 0 ] ), 8891 entity( "ż", [ 380, 0 ] ), 8892 entity( "ℨ", [ 8488, 0 ] ), 8893 entity( "ζ", [ 950, 0 ] ), 8894 entity( "𝔷", [ 120119, 0 ] ), 8895 entity( "ж", [ 1078, 0 ] ), 8896 entity( "⇝", [ 8669, 0 ] ), 8897 entity( "𝕫", [ 120171, 0 ] ), 8898 entity( "𝓏", [ 120015, 0 ] ), 8899 entity( "‍", [ 8205, 0 ] ), 8900 entity( "‌", [ 8204, 0 ] ), 8901 ]; 8902 8903 8904 struct entity_key 8905 { 8906 const(char)* name; 8907 size_t name_size; 8908 } 8909 8910 extern(C) int entity_cmp(scope const(void)* p_key, scope const(void)* p_entity) 8911 { 8912 entity_key* key = cast(entity_key*) p_key; 8913 entity* ent = cast(entity*) p_entity; 8914 return strncmp(key.name, ent.name, key.name_size); 8915 } 8916 8917 const(entity)* entity_lookup(const(char)* name, size_t name_size) 8918 { 8919 entity_key key = entity_key(name, name_size); 8920 const(void)* result = bsearch(&key, cast(const(void)*)entity_table.ptr, entity_table.length, entity.sizeof, &entity_cmp); 8921 return cast(const(entity)*)result; 8922 } 8923 8924 // 8925 // HTML RENDERING 8926 // 8927 8928 /* If set, debug output from md_parse() is sent to stderr. */ 8929 enum MD_RENDER_FLAG_DEBUG = 0x0001; 8930 8931 enum MD_RENDER_FLAG_VERBATIM_ENTITIES = 0x0002; 8932 8933 8934 struct MD_RENDER_HTML 8935 { 8936 void function(const(MD_CHAR)*, MD_SIZE, void*) nothrow @nogc process_output; 8937 void* userdata; 8938 uint flags; 8939 int image_nesting_level; 8940 char[256] escape_map; 8941 } 8942 8943 8944 /***************************************** 8945 *** HTML rendering helper functions *** 8946 *****************************************/ 8947 8948 /* 8949 #define ISDIGIT(ch) 8950 #define ISLOWER(ch) 8951 #define ISUPPER(ch) 8952 */ 8953 bool ISALNUM_HTML(CHAR ch) 8954 { 8955 return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ('0' <= ch && ch <= '9'); 8956 } 8957 8958 void render_text(MD_RENDER_HTML* r, const(MD_CHAR)* text, MD_SIZE size) 8959 { 8960 r.process_output(text, size, r.userdata); 8961 } 8962 8963 void RENDER_LITERAL(MD_RENDER_HTML* r, const(MD_CHAR)* literal) 8964 { 8965 render_text(r, literal, cast(uint) strlen(literal)); 8966 } 8967 8968 /* Some characters need to be escaped in normal HTML text. */ 8969 bool HTML_NEED_ESCAPE(MD_RENDER_HTML* r, CHAR ch) 8970 { 8971 return (r.escape_map[cast(ubyte)(ch)] != 0); 8972 } 8973 8974 void render_html_escaped(MD_RENDER_HTML* r, const MD_CHAR* data, MD_SIZE size) 8975 { 8976 MD_OFFSET beg = 0; 8977 MD_OFFSET off = 0; 8978 8979 while(1) { 8980 /* Optimization: Use some loop unrolling. */ 8981 while(off + 3 < size && !HTML_NEED_ESCAPE(r, data[off+0]) && !HTML_NEED_ESCAPE(r, data[off+1]) 8982 && !HTML_NEED_ESCAPE(r, data[off+2]) && !HTML_NEED_ESCAPE(r, data[off+3])) 8983 off += 4; 8984 while(off < size && !HTML_NEED_ESCAPE(r, data[off])) 8985 off++; 8986 8987 if(off > beg) 8988 render_text(r, data + beg, off - beg); 8989 8990 if(off < size) { 8991 switch(data[off]) { 8992 case '&': RENDER_LITERAL(r, "&"); break; 8993 case '<': RENDER_LITERAL(r, "<"); break; 8994 case '>': RENDER_LITERAL(r, ">"); break; 8995 case '"': RENDER_LITERAL(r, """); break; 8996 default: break; 8997 } 8998 off++; 8999 } else { 9000 break; 9001 } 9002 beg = off; 9003 } 9004 } 9005 9006 9007 bool URL_NEED_ESCAPE(CHAR ch) 9008 { 9009 return (!ISALNUM_HTML(ch) && strchr("-_.+!*'(),%#@?=;:/,+$", ch) == null); 9010 } 9011 9012 void render_url_escaped(MD_RENDER_HTML* r, const MD_CHAR* data, MD_SIZE size) 9013 { 9014 static immutable(MD_CHAR)[] hex_chars = "0123456789ABCDEF"; 9015 MD_OFFSET beg = 0; 9016 MD_OFFSET off = 0; 9017 9018 while(1) { 9019 while(off < size && !URL_NEED_ESCAPE(data[off])) 9020 off++; 9021 if(off > beg) 9022 render_text(r, data + beg, off - beg); 9023 9024 if(off < size) { 9025 char[3] hex; 9026 9027 switch(data[off]) { 9028 case '&': RENDER_LITERAL(r, "&"); break; 9029 case '\'': RENDER_LITERAL(r, "'"); break; 9030 default: 9031 hex[0] = '%'; 9032 hex[1] = hex_chars[(cast(uint)data[off] >> 4) & 0xf]; 9033 hex[2] = hex_chars[(cast(uint)data[off] >> 0) & 0xf]; 9034 render_text(r, hex.ptr, 3); 9035 break; 9036 } 9037 off++; 9038 } else { 9039 break; 9040 } 9041 9042 beg = off; 9043 } 9044 } 9045 9046 uint hex_val(char ch) 9047 { 9048 if('0' <= ch && ch <= '9') 9049 return ch - '0'; 9050 if('A' <= ch && ch <= 'Z') 9051 return ch - 'A' + 10; 9052 else 9053 return ch - 'a' + 10; 9054 } 9055 9056 alias appendFunc = nothrow @nogc void function(MD_RENDER_HTML*, const(MD_CHAR)*, MD_SIZE); 9057 9058 void render_utf8_codepoint(MD_RENDER_HTML* r, uint codepoint, 9059 appendFunc fn_append) 9060 { 9061 static immutable(MD_CHAR)[] utf8_replacement_char = [ 0xef, 0xbf, 0xbd ]; 9062 9063 char[4] utf8; 9064 size_t n; 9065 9066 if(codepoint <= 0x7f) { 9067 n = 1; 9068 utf8[0] = cast(ubyte) codepoint; 9069 } else if(codepoint <= 0x7ff) { 9070 n = 2; 9071 utf8[0] = 0xc0 | ((codepoint >> 6) & 0x1f); 9072 utf8[1] = 0x80 + ((codepoint >> 0) & 0x3f); 9073 } else if(codepoint <= 0xffff) { 9074 n = 3; 9075 utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf); 9076 utf8[1] = 0x80 + ((codepoint >> 6) & 0x3f); 9077 utf8[2] = 0x80 + ((codepoint >> 0) & 0x3f); 9078 } else { 9079 n = 4; 9080 utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7); 9081 utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f); 9082 utf8[2] = 0x80 + ((codepoint >> 6) & 0x3f); 9083 utf8[3] = 0x80 + ((codepoint >> 0) & 0x3f); 9084 } 9085 9086 if(0 < codepoint && codepoint <= 0x10ffff) 9087 fn_append(r, utf8.ptr, cast(uint)n); 9088 else 9089 fn_append(r, utf8_replacement_char.ptr, 3); 9090 } 9091 9092 /* Translate entity to its UTF-8 equivalent, or output the verbatim one 9093 * if such entity is unknown (or if the translation is disabled). */ 9094 void render_entity(MD_RENDER_HTML* r, const(MD_CHAR)* text, MD_SIZE size, 9095 appendFunc fn_append) 9096 { 9097 if(r.flags & MD_RENDER_FLAG_VERBATIM_ENTITIES) { 9098 fn_append(r, text, size); 9099 return; 9100 } 9101 9102 /* We assume UTF-8 output is what is desired. */ 9103 if(size > 3 && text[1] == '#') { 9104 uint codepoint = 0; 9105 9106 if(text[2] == 'x' || text[2] == 'X') { 9107 /* Hexadecimal entity (e.g. "�")). */ 9108 MD_SIZE i; 9109 for(i = 3; i < size-1; i++) 9110 codepoint = 16 * codepoint + hex_val(text[i]); 9111 } else { 9112 /* Decimal entity (e.g. "&1234;") */ 9113 MD_SIZE i; 9114 for(i = 2; i < size-1; i++) 9115 codepoint = 10 * codepoint + (text[i] - '0'); 9116 } 9117 9118 render_utf8_codepoint(r, codepoint, fn_append); 9119 return; 9120 } else { 9121 /* Named entity (e.g. " "). */ 9122 const(entity)* ent; 9123 9124 ent = entity_lookup(text, size); 9125 if(ent != null) { 9126 render_utf8_codepoint(r, ent.codepoints[0], fn_append); 9127 if(ent.codepoints[1]) 9128 render_utf8_codepoint(r, ent.codepoints[1], fn_append); 9129 return; 9130 } 9131 } 9132 9133 fn_append(r, text, size); 9134 } 9135 9136 void render_attribute(MD_RENDER_HTML* r, const MD_ATTRIBUTE* attr, 9137 appendFunc fn_append) 9138 { 9139 int i; 9140 9141 for(i = 0; attr.substr_offsets[i] < attr.size; i++) { 9142 MD_TEXTTYPE type = attr.substr_types[i]; 9143 MD_OFFSET off = attr.substr_offsets[i]; 9144 MD_SIZE size = attr.substr_offsets[i+1] - off; 9145 const MD_CHAR* text = attr.text + off; 9146 9147 switch(type) { 9148 case MD_TEXT_NULLCHAR: render_utf8_codepoint(r, 0x0000, &render_text); break; 9149 case MD_TEXT_ENTITY: render_entity(r, text, size, fn_append); break; 9150 default: fn_append(r, text, size); break; 9151 } 9152 } 9153 } 9154 9155 9156 void render_open_ol_block(MD_RENDER_HTML* r, const(MD_BLOCK_OL_DETAIL)* det) 9157 { 9158 char[64] buf; 9159 9160 if(det.start == 1) { 9161 RENDER_LITERAL(r, "<ol>\n"); 9162 return; 9163 } 9164 9165 snprintf(buf.ptr, buf.length, "<ol start=\"%u\">\n", det.start); 9166 RENDER_LITERAL(r, buf.ptr); 9167 } 9168 9169 void render_open_li_block(MD_RENDER_HTML* r, const(MD_BLOCK_LI_DETAIL)* det) 9170 { 9171 if(det.is_task) { 9172 RENDER_LITERAL(r, "<li class=\"task-list-item\">" ~ 9173 "<input type=\"checkbox\" class=\"task-list-item-checkbox\" disabled"); 9174 if(det.task_mark == 'x' || det.task_mark == 'X') 9175 RENDER_LITERAL(r, " checked"); 9176 RENDER_LITERAL(r, ">"); 9177 } else { 9178 RENDER_LITERAL(r, "<li>"); 9179 } 9180 } 9181 9182 void render_open_code_block(MD_RENDER_HTML* r, const(MD_BLOCK_CODE_DETAIL)* det) 9183 { 9184 RENDER_LITERAL(r, "<pre><code"); 9185 9186 /* If known, output the HTML 5 attribute class="language-LANGNAME". */ 9187 if(det.lang.text != null) { 9188 RENDER_LITERAL(r, " class=\"language-"); 9189 render_attribute(r, &det.lang, &render_html_escaped); 9190 RENDER_LITERAL(r, "\""); 9191 } 9192 9193 RENDER_LITERAL(r, ">"); 9194 } 9195 9196 void render_open_td_block(MD_RENDER_HTML* r, const(MD_CHAR)* cell_type, const(MD_BLOCK_TD_DETAIL)* det) 9197 { 9198 RENDER_LITERAL(r, "<"); 9199 RENDER_LITERAL(r, cell_type); 9200 9201 switch(det.align_) 9202 { 9203 case MD_ALIGN_LEFT: RENDER_LITERAL(r, " align=\"left\">"); break; 9204 case MD_ALIGN_CENTER: RENDER_LITERAL(r, " align=\"center\">"); break; 9205 case MD_ALIGN_RIGHT: RENDER_LITERAL(r, " align=\"right\">"); break; 9206 default: RENDER_LITERAL(r, ">"); break; 9207 } 9208 } 9209 9210 void render_open_a_span(MD_RENDER_HTML* r, const(MD_SPAN_A_DETAIL)* det) 9211 { 9212 RENDER_LITERAL(r, "<a href=\""); 9213 render_attribute(r, &det.href, &render_url_escaped); 9214 9215 if(det.title.text != null) { 9216 RENDER_LITERAL(r, "\" title=\""); 9217 render_attribute(r, &det.title, &render_html_escaped); 9218 } 9219 9220 RENDER_LITERAL(r, "\">"); 9221 } 9222 9223 void render_open_img_span(MD_RENDER_HTML* r, const(MD_SPAN_IMG_DETAIL)* det) 9224 { 9225 RENDER_LITERAL(r, "<img src=\""); 9226 render_attribute(r, &det.src, &render_url_escaped); 9227 9228 RENDER_LITERAL(r, "\" alt=\""); 9229 9230 r.image_nesting_level++; 9231 } 9232 9233 void render_close_img_span(MD_RENDER_HTML* r, const(MD_SPAN_IMG_DETAIL)* det) 9234 { 9235 if(det.title.text != null) { 9236 RENDER_LITERAL(r, "\" title=\""); 9237 render_attribute(r, &det.title, &render_html_escaped); 9238 } 9239 RENDER_LITERAL(r, "\" />"); 9240 r.image_nesting_level--; 9241 } 9242 9243 9244 /************************************** 9245 *** HTML renderer implementation *** 9246 **************************************/ 9247 9248 int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) 9249 { 9250 static immutable(MD_CHAR)*[6] head = [ "<h1>", "<h2>", "<h3>", "<h4>", "<h5>", "<h6>" ]; 9251 MD_RENDER_HTML* r = cast(MD_RENDER_HTML*) userdata; 9252 9253 switch(type) 9254 { 9255 case MD_BLOCK_DOC: /* noop */ break; 9256 case MD_BLOCK_QUOTE: RENDER_LITERAL(r, "<blockquote>\n"); break; 9257 case MD_BLOCK_UL: RENDER_LITERAL(r, "<ul>\n"); break; 9258 case MD_BLOCK_OL: render_open_ol_block(r, cast(const(MD_BLOCK_OL_DETAIL)*)detail); break; 9259 case MD_BLOCK_LI: render_open_li_block(r, cast(const(MD_BLOCK_LI_DETAIL)*)detail); break; 9260 case MD_BLOCK_HR: RENDER_LITERAL(r, "<hr />\n"); break; 9261 case MD_BLOCK_H: RENDER_LITERAL(r, head[(cast(MD_BLOCK_H_DETAIL*)detail).level - 1]); break; 9262 case MD_BLOCK_CODE: render_open_code_block(r, cast(const(MD_BLOCK_CODE_DETAIL)*) detail); break; 9263 case MD_BLOCK_HTML: /* noop */ break; 9264 case MD_BLOCK_P: RENDER_LITERAL(r, "<p>"); break; 9265 case MD_BLOCK_TABLE: RENDER_LITERAL(r, "<table>\n"); break; 9266 case MD_BLOCK_THEAD: RENDER_LITERAL(r, "<thead>\n"); break; 9267 case MD_BLOCK_TBODY: RENDER_LITERAL(r, "<tbody>\n"); break; 9268 case MD_BLOCK_TR: RENDER_LITERAL(r, "<tr>\n"); break; 9269 case MD_BLOCK_TH: render_open_td_block(r, "th", cast(MD_BLOCK_TD_DETAIL*)detail); break; 9270 case MD_BLOCK_TD: render_open_td_block(r, "td", cast(MD_BLOCK_TD_DETAIL*)detail); break; 9271 default: assert(false); 9272 } 9273 9274 return 0; 9275 } 9276 9277 int leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) 9278 { 9279 static immutable(MD_CHAR)*[6] head = [ "</h1>\n", "</h2>\n", "</h3>\n", "</h4>\n", "</h5>\n", "</h6>\n" ]; 9280 MD_RENDER_HTML* r = cast(MD_RENDER_HTML*) userdata; 9281 9282 switch(type) { 9283 case MD_BLOCK_DOC: /*noop*/ break; 9284 case MD_BLOCK_QUOTE: RENDER_LITERAL(r, "</blockquote>\n"); break; 9285 case MD_BLOCK_UL: RENDER_LITERAL(r, "</ul>\n"); break; 9286 case MD_BLOCK_OL: RENDER_LITERAL(r, "</ol>\n"); break; 9287 case MD_BLOCK_LI: RENDER_LITERAL(r, "</li>\n"); break; 9288 case MD_BLOCK_HR: /*noop*/ break; 9289 case MD_BLOCK_H: RENDER_LITERAL(r, head[(cast(MD_BLOCK_H_DETAIL*)detail).level - 1]); break; 9290 case MD_BLOCK_CODE: RENDER_LITERAL(r, "</code></pre>\n"); break; 9291 case MD_BLOCK_HTML: /* noop */ break; 9292 case MD_BLOCK_P: RENDER_LITERAL(r, "</p>\n"); break; 9293 case MD_BLOCK_TABLE: RENDER_LITERAL(r, "</table>\n"); break; 9294 case MD_BLOCK_THEAD: RENDER_LITERAL(r, "</thead>\n"); break; 9295 case MD_BLOCK_TBODY: RENDER_LITERAL(r, "</tbody>\n"); break; 9296 case MD_BLOCK_TR: RENDER_LITERAL(r, "</tr>\n"); break; 9297 case MD_BLOCK_TH: RENDER_LITERAL(r, "</th>\n"); break; 9298 case MD_BLOCK_TD: RENDER_LITERAL(r, "</td>\n"); break; 9299 default: assert(false); 9300 } 9301 9302 return 0; 9303 } 9304 9305 int enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata) 9306 { 9307 MD_RENDER_HTML* r = cast(MD_RENDER_HTML*) userdata; 9308 9309 if(r.image_nesting_level > 0) { 9310 /* We are inside an image, i.e. rendering the ALT attribute of 9311 * <IMG> tag. */ 9312 return 0; 9313 } 9314 9315 switch(type) { 9316 case MD_SPAN_EM: RENDER_LITERAL(r, "<em>"); break; 9317 case MD_SPAN_STRONG: RENDER_LITERAL(r, "<strong>"); break; 9318 case MD_SPAN_A: render_open_a_span(r, cast(MD_SPAN_A_DETAIL*) detail); break; 9319 case MD_SPAN_IMG: render_open_img_span(r, cast(MD_SPAN_IMG_DETAIL*) detail); break; 9320 case MD_SPAN_CODE: RENDER_LITERAL(r, "<code>"); break; 9321 case MD_SPAN_DEL: RENDER_LITERAL(r, "<del>"); break; 9322 case MD_SPAN_LATEXMATH: RENDER_LITERAL(r, "<equation>"); break; 9323 case MD_SPAN_LATEXMATH_DISPLAY: RENDER_LITERAL(r, "<equation type=\"display\">"); break; 9324 default: assert(false); 9325 } 9326 9327 return 0; 9328 } 9329 9330 int leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata) 9331 { 9332 MD_RENDER_HTML* r = cast(MD_RENDER_HTML*) userdata; 9333 9334 if(r.image_nesting_level > 0) { 9335 /* We are inside an image, i.e. rendering the ALT attribute of 9336 * <IMG> tag. */ 9337 if(r.image_nesting_level == 1 && type == MD_SPAN_IMG) 9338 render_close_img_span(r, cast(MD_SPAN_IMG_DETAIL*) detail); 9339 return 0; 9340 } 9341 9342 switch(type) { 9343 case MD_SPAN_EM: RENDER_LITERAL(r, "</em>"); break; 9344 case MD_SPAN_STRONG: RENDER_LITERAL(r, "</strong>"); break; 9345 case MD_SPAN_A: RENDER_LITERAL(r, "</a>"); break; 9346 case MD_SPAN_IMG: /*noop, handled above*/ break; 9347 case MD_SPAN_CODE: RENDER_LITERAL(r, "</code>"); break; 9348 case MD_SPAN_DEL: RENDER_LITERAL(r, "</del>"); break; 9349 case MD_SPAN_LATEXMATH: /*fall through*/ 9350 case MD_SPAN_LATEXMATH_DISPLAY: RENDER_LITERAL(r, "</equation>"); break; 9351 default: assert(false); 9352 } 9353 9354 return 0; 9355 } 9356 9357 int text_callback(MD_TEXTTYPE type, const(MD_CHAR)* text, MD_SIZE size, void* userdata) 9358 { 9359 MD_RENDER_HTML* r = cast(MD_RENDER_HTML*) userdata; 9360 9361 switch(type) { 9362 case MD_TEXT_NULLCHAR: render_utf8_codepoint(r, 0x0000, &render_text); break; 9363 case MD_TEXT_BR: RENDER_LITERAL(r, (r.image_nesting_level == 0 ? "<br />\n" : " ")); break; 9364 case MD_TEXT_SOFTBR: RENDER_LITERAL(r, (r.image_nesting_level == 0 ? "\n" : " ")); break; 9365 case MD_TEXT_HTML: render_text(r, text, size); break; 9366 case MD_TEXT_ENTITY: render_entity(r, text, size, &render_html_escaped); break; 9367 default: render_html_escaped(r, text, size); break; 9368 } 9369 9370 return 0; 9371 } 9372 9373 void debug_log_callback(const(char)* msg, void* userdata) 9374 { 9375 MD_RENDER_HTML* r = cast(MD_RENDER_HTML*) userdata; 9376 if(r.flags & MD_RENDER_FLAG_DEBUG) 9377 fprintf(stderr, "MD4C: %s\n", msg); 9378 } 9379 9380 9381 /* Render Markdown into HTML. 9382 * 9383 * Note only contents of <body> tag is generated. Caller must generate 9384 * HTML header/footer manually before/after calling md_render_html(). 9385 * 9386 * Params input and input_size specify the Markdown input. 9387 * Callback process_output() gets called with chunks of HTML output. 9388 * (Typical implementation may just output the bytes to file or append to 9389 * some buffer). 9390 * Param userdata is just propagated back to process_output() callback. 9391 * Param parser_flags are flags from md4c.h propagated to md_parse(). 9392 * Param render_flags is bitmask of MD_RENDER_FLAG_xxxx. 9393 * 9394 * Returns -1 on error (if md_parse() fails.) 9395 * Returns 0 on success. 9396 */ 9397 int md_render_html(const(MD_CHAR)* input, MD_SIZE input_size, 9398 void function(const(MD_CHAR)*, MD_SIZE, void*) nothrow @nogc process_output, 9399 void* userdata, uint parser_flags, uint renderer_flags) 9400 { 9401 MD_RENDER_HTML render = MD_RENDER_HTML(process_output, userdata, renderer_flags, 0); 9402 render.escape_map[] = '\x00'; 9403 9404 MD_PARSER parser = MD_PARSER( 9405 0, 9406 parser_flags, 9407 &enter_block_callback, 9408 &leave_block_callback, 9409 &enter_span_callback, 9410 &leave_span_callback, 9411 &text_callback, 9412 &debug_log_callback, 9413 null 9414 ); 9415 9416 render.escape_map['"'] = 1; 9417 render.escape_map['&'] = 1; 9418 render.escape_map['<'] = 1; 9419 render.escape_map['>'] = 1; 9420 9421 return md_parse(input, input_size, &parser, cast(void*) &render); 9422 } 9423