1 // FIXME: xml namespace support??? 2 // FIXME: https://developer.mozilla.org/en-US/docs/Web/API/Element/insertAdjacentHTML 3 // FIXME: parentElement is parentNode that skips DocumentFragment etc but will be hard to work in with my compatibility... 4 5 // FIXME: the scriptable list is quite arbitrary 6 7 8 // xml entity references?! 9 10 /++ 11 This is an html DOM implementation, started with cloning 12 what the browser offers in Javascript, but going well beyond 13 it in convenience. 14 15 If you can do it in Javascript, you can probably do it with 16 this module, and much more. 17 18 --- 19 import arsd.dom; 20 21 void main() { 22 auto document = new Document("<html><p>paragraph</p></html>"); 23 writeln(document.querySelector("p")); 24 document.root.innerHTML = "<p>hey</p>"; 25 writeln(document); 26 } 27 --- 28 29 BTW: this file optionally depends on `arsd.characterencodings`, to 30 help it correctly read files from the internet. You should be able to 31 get characterencodings.d from the same place you got this file. 32 33 If you want it to stand alone, just always use the `Document.parseUtf8` 34 function or the constructor that takes a string. 35 36 Symbol_groups: 37 38 core_functionality = 39 40 These members provide core functionality. The members on these classes 41 will provide most your direct interaction. 42 43 bonus_functionality = 44 45 These provide additional functionality for special use cases. 46 47 implementations = 48 49 These provide implementations of other functionality. 50 +/ 51 module arsd.dom; 52 53 static import arsd.core; 54 import arsd.core : encodeUriComponent, decodeUriComponent; 55 56 // FIXME: support the css standard namespace thing in the selectors too 57 58 version(with_arsd_jsvar) 59 import arsd.jsvar; 60 else { 61 enum scriptable = "arsd_jsvar_compatible"; 62 } 63 64 // this is only meant to be used at compile time, as a filter for opDispatch 65 // lists the attributes we want to allow without the use of .attr 66 bool isConvenientAttribute(string name) { 67 static immutable list = [ 68 "name", "id", "href", "value", 69 "checked", "selected", "type", 70 "src", "content", "pattern", 71 "placeholder", "required", "alt", 72 "rel", 73 "method", "action", "enctype" 74 ]; 75 foreach(l; list) 76 if(name == l) return true; 77 return false; 78 } 79 80 81 // FIXME: something like <ol>spam <ol> with no closing </ol> should read the second tag as the closer in garbage mode 82 // FIXME: failing to close a paragraph sometimes messes things up too 83 84 // FIXME: it would be kinda cool to have some support for internal DTDs 85 // and maybe XPath as well, to some extent 86 /* 87 we could do 88 meh this sux 89 90 auto xpath = XPath(element); 91 92 // get the first p 93 xpath.p[0].a["href"] 94 */ 95 96 97 /++ 98 The main document interface, including a html or xml parser. 99 100 There's three main ways to create a Document: 101 102 If you want to parse something and inspect the tags, you can use the [this|constructor]: 103 --- 104 // create and parse some HTML in one call 105 auto document = new Document("<html></html>"); 106 107 // or some XML 108 auto document = new Document("<xml></xml>", true, true); // strict mode enabled 109 110 // or better yet: 111 auto document = new XmlDocument("<xml></xml>"); // specialized subclass 112 --- 113 114 If you want to download something and parse it in one call, the [fromUrl] static function can help: 115 --- 116 auto document = Document.fromUrl("http://dlang.org/"); 117 --- 118 (note that this requires my [arsd.characterencodings] and [arsd.http2] libraries) 119 120 And, if you need to inspect things like `<%= foo %>` tags and comments, you can add them to the dom like this, with the [enableAddingSpecialTagsToDom] 121 and [parseUtf8] or [parseGarbage] functions: 122 --- 123 auto document = new Document(); 124 document.enableAddingSpecialTagsToDom(); 125 document.parseUtf8("<example></example>", true, true); // changes the trues to false to switch from xml to html mode 126 --- 127 128 You can also modify things like [selfClosedElements] and [rawSourceElements] before calling the `parse` family of functions to do further advanced tasks. 129 130 However you parse it, it will put a few things into special variables. 131 132 [root] contains the root document. 133 [prolog] contains the instructions before the root (like `<!DOCTYPE html>`). To keep the original things, you will need to [enableAddingSpecialTagsToDom] first, otherwise the library will return generic strings in there. [piecesBeforeRoot] will have other parsed instructions, if [enableAddingSpecialTagsToDom] is called. 134 [piecesAfterRoot] will contain any xml-looking data after the root tag is closed. 135 136 Most often though, you will not need to look at any of that data, since `Document` itself has methods like [querySelector], [appendChild], and more which will forward to the root [Element] for you. 137 +/ 138 /// Group: core_functionality 139 class Document : FileResource, DomParent { 140 inout(Document) asDocument() inout { return this; } 141 inout(Element) asElement() inout { return null; } 142 143 /++ 144 These three functions, `processTagOpen`, `processTagClose`, and `processNodeWhileParsing`, allow you to process elements as they are parsed and choose to not append them to the dom tree. 145 146 147 `processTagOpen` is called as soon as it reads the tag name and attributes into the passed `Element` structure, in order 148 of appearance in the file. `processTagClose` is called similarly, when that tag has been closed. In between, all descendant 149 nodes - including tags as well as text and other nodes - are passed to `processNodeWhileParsing`. Finally, after `processTagClose`, 150 the node itself is passed to `processNodeWhileParsing` only after its children. 151 152 So, given: 153 154 ```xml 155 <thing> 156 <child> 157 <grandchild></grandchild> 158 </child> 159 </thing> 160 ``` 161 162 It would call: 163 164 $(NUMBERED_LIST 165 * processTagOpen(thing) 166 * processNodeWhileParsing(thing, whitespace text) // the newlines, spaces, and tabs between the thing tag and child tag 167 * processTagOpen(child) 168 * processNodeWhileParsing(child, whitespace text) 169 * processTagOpen(grandchild) 170 * processTagClose(grandchild) 171 * processNodeWhileParsing(child, grandchild) 172 * processNodeWhileParsing(child, whitespace text) // whitespace after the grandchild 173 * processTagClose(child) 174 * processNodeWhileParsing(thing, child) 175 * processNodeWhileParsing(thing, whitespace text) 176 * processTagClose(thing) 177 ) 178 179 The Element objects passed to those functions are the same ones you'd see; the tag open and tag close calls receive the same 180 object, so you can compare them with the `is` operator if you want. 181 182 The default behavior of each function is that `processTagOpen` and `processTagClose` do nothing. 183 `processNodeWhileParsing`'s default behavior is to call `parent.appendChild(child)`, in order to 184 build the dom tree. If you do not want the dom tree, you can do override this function to do nothing. 185 186 If you do not choose to append child to parent in `processNodeWhileParsing`, the garbage collector is free to clean up 187 the node even as the document is not finished parsing, allowing memory use to stay lower. Memory use will tend to scale 188 approximately with the max depth in the element tree rather the entire document size. 189 190 To cancel processing before the end of a document, you'll have to throw an exception and catch it at your call to parse. 191 There is no other way to stop early and there are no concrete plans to add one. 192 193 There are several approaches to use this: you might might use `processTagOpen` and `processTagClose` to keep a stack or 194 other state variables to process nodes as they come and never add them to the actual tree. You might also build partial 195 subtrees to use all the convenient methods in `processTagClose`, but then not add that particular node to the rest of the 196 tree to keep memory usage down. 197 198 Examples: 199 200 Suppose you have a large array of items under the root element you'd like to process individually, without 201 taking all the items into memory at once. You can do that with code like this: 202 --- 203 import arsd.dom; 204 class MyStream : XmlDocument { 205 this(string s) { super(s); } // need to forward the constructor we use 206 207 override void processNodeWhileParsing(Element parent, Element child) { 208 // don't append anything to the root node, since we don't need them 209 // all in the tree - that'd take too much memory - 210 // but still build any subtree for each individual item for ease of processing 211 if(parent is root) 212 return; 213 else 214 super.processNodeWhileParsing(parent, child); 215 } 216 217 int count; 218 override void processTagClose(Element element) { 219 if(element.tagName == "item") { 220 // process the element here with all the regular dom functions on `element` 221 count++; 222 // can still use dom functions on the subtree we built 223 assert(element.requireSelector("name").textContent == "sample"); 224 } 225 } 226 } 227 228 void main() { 229 // generate an example file with a million items 230 string xml = "<list>"; 231 foreach(i; 0 .. 1_000_000) { 232 xml ~= "<item><name>sample</name><type>example</type></item>"; 233 } 234 xml ~= "</list>"; 235 236 auto document = new MyStream(xml); 237 assert(document.count == 1_000_000); 238 } 239 --- 240 241 This example runs in about 1/10th of the memory and 2/3 of the time on my computer relative to a default [XmlDocument] full tree dom. 242 243 By overriding these three functions to fit the specific document and processing requirements you have, you might realize even bigger 244 gains over the normal full document tree while still getting most the benefits of the convenient dom functions. 245 246 Tip: if you use a [Utf8Stream] instead of a string, you might be able to bring the memory use further down. The easiest way to do that 247 is something like this when loading from a file: 248 249 --- 250 import std.stdio; 251 auto file = File("filename.xml", "rb"); 252 auto textStream = new Utf8Stream(() { 253 // get more 254 auto buffer = new char[](32 * 1024); 255 return cast(string) file.rawRead(buffer); 256 }, () { 257 // has more 258 return !file.eof; 259 }); 260 261 auto document = new XmlDocument(textStream); 262 --- 263 264 You'll need to forward a constructor in your subclasses that takes `Utf8Stream` too if you want to subclass to override the streaming parsing functions. 265 266 Note that if you do save parts of the document strings or objects, it might prevent the GC from freeing that string block anyway, since dom.d will often slice into its buffer while parsing instead of copying strings. It will depend on your specific case to know if this actually saves memory or not for you. 267 268 Bugs: 269 Even if you use a [Utf8Stream] to feed data and decline to append to the tree, the entire xml text is likely to 270 end up in memory anyway. 271 272 See_Also: 273 [Document#examples]'s high level streaming example. 274 275 History: 276 `processNodeWhileParsing` was added January 6, 2023. 277 278 `processTagOpen` and `processTagClose` were added February 21, 2025. 279 +/ 280 void processTagOpen(Element what) { 281 } 282 283 /// ditto 284 void processTagClose(Element what) { 285 } 286 287 /// ditto 288 void processNodeWhileParsing(Element parent, Element child) { 289 parent.appendChild(child); 290 } 291 292 /++ 293 Convenience method for web scraping. Requires [arsd.http2] to be 294 included in the build as well as [arsd.characterencodings]. 295 296 This will download the file from the given url and create a document 297 off it, using a strict constructor or a [parseGarbage], depending on 298 the value of `strictMode`. 299 +/ 300 static Document fromUrl()(string url, bool strictMode = false) { 301 import arsd.http2; 302 auto client = new HttpClient(); 303 304 auto req = client.navigateTo(Uri(url), HttpVerb.GET); 305 auto res = req.waitForCompletion(); 306 307 auto document = new Document(); 308 if(strictMode) { 309 document.parse(cast(string) res.content, true, true, res.contentTypeCharset); 310 } else { 311 document.parseGarbage(cast(string) res.content); 312 } 313 314 return document; 315 } 316 317 /++ 318 Creates a document with the given source data. If you want HTML behavior, use `caseSensitive` and `struct` set to `false`. For XML mode, set them to `true`. 319 320 Please note that anything after the root element will be found in [piecesAfterRoot]. Comments, processing instructions, and other special tags will be stripped out b default. You can customize this by using the zero-argument constructor and setting callbacks on the [parseSawComment], [parseSawBangInstruction], [parseSawAspCode], [parseSawPhpCode], and [parseSawQuestionInstruction] members, then calling one of the [parseUtf8], [parseGarbage], or [parse] functions. Calling the convenience method, [enableAddingSpecialTagsToDom], will enable all those things at once. 321 322 See_Also: 323 [parseGarbage] 324 [parseUtf8] 325 [parseUrl] 326 +/ 327 this(string data, bool caseSensitive = false, bool strict = false) { 328 parseUtf8(data, caseSensitive, strict); 329 } 330 331 /** 332 Creates an empty document. It has *nothing* in it at all, ready. 333 */ 334 this() { 335 336 } 337 338 /++ 339 This is just something I'm toying with. Right now, you use opIndex to put in css selectors. 340 It returns a struct that forwards calls to all elements it holds, and returns itself so you 341 can chain it. 342 343 Example: document["p"].innerText("hello").addClass("modified"); 344 345 Equivalent to: foreach(e; document.getElementsBySelector("p")) { e.innerText("hello"); e.addClas("modified"); } 346 347 Note: always use function calls (not property syntax) and don't use toString in there for best results. 348 349 You can also do things like: document["p"]["b"] though tbh I'm not sure why since the selector string can do all that anyway. Maybe 350 you could put in some kind of custom filter function tho. 351 +/ 352 ElementCollection opIndex(string selector) { 353 auto e = ElementCollection(this.root); 354 return e[selector]; 355 } 356 357 string _contentType = "text/html; charset=utf-8"; 358 359 /// If you're using this for some other kind of XML, you can 360 /// set the content type here. 361 /// 362 /// Note: this has no impact on the function of this class. 363 /// It is only used if the document is sent via a protocol like HTTP. 364 /// 365 /// This may be called by parse() if it recognizes the data. Otherwise, 366 /// if you don't set it, it assumes text/html; charset=utf-8. 367 @property string contentType(string mimeType) { 368 _contentType = mimeType; 369 return _contentType; 370 } 371 372 /// implementing the FileResource interface, useful for sending via 373 /// http automatically. 374 @property string filename() const { return null; } 375 376 /// implementing the FileResource interface, useful for sending via 377 /// http automatically. 378 override @property string contentType() const { 379 return _contentType; 380 } 381 382 /// implementing the FileResource interface; it calls toString. 383 override immutable(ubyte)[] getData() const { 384 return cast(immutable(ubyte)[]) this.toString(); 385 } 386 387 388 /* 389 /// Concatenates any consecutive text nodes 390 void normalize() { 391 392 } 393 */ 394 395 /// This will set delegates for parseSaw* (note: this overwrites anything else you set, and you setting subsequently will overwrite this) that add those things to the dom tree when it sees them. 396 /// Call this before calling parse(). 397 398 /++ 399 Adds objects to the dom representing things normally stripped out during the default parse, like comments, `<!instructions>`, `<% code%>`, and `<? code?>` all at once. 400 401 Note this will also preserve the prolog and doctype from the original file, if there was one. 402 403 See_Also: 404 [parseSawComment] 405 [parseSawAspCode] 406 [parseSawPhpCode] 407 [parseSawQuestionInstruction] 408 [parseSawBangInstruction] 409 +/ 410 void enableAddingSpecialTagsToDom() { 411 parseSawComment = (string) => true; 412 parseSawAspCode = (string) => true; 413 parseSawPhpCode = (string) => true; 414 parseSawQuestionInstruction = (string) => true; 415 parseSawBangInstruction = (string) => true; 416 } 417 418 /// If the parser sees a html comment, it will call this callback 419 /// <!-- comment --> will call parseSawComment(" comment ") 420 /// Return true if you want the node appended to the document. It will be in a [HtmlComment] object. 421 bool delegate(string) parseSawComment; 422 423 /// If the parser sees <% asp code... %>, it will call this callback. 424 /// It will be passed "% asp code... %" or "%= asp code .. %" 425 /// Return true if you want the node appended to the document. It will be in an [AspCode] object. 426 bool delegate(string) parseSawAspCode; 427 428 /// If the parser sees <?php php code... ?>, it will call this callback. 429 /// It will be passed "?php php code... ?" or "?= asp code .. ?" 430 /// Note: dom.d cannot identify the other php <? code ?> short format. 431 /// Return true if you want the node appended to the document. It will be in a [PhpCode] object. 432 bool delegate(string) parseSawPhpCode; 433 434 /// if it sees a <?xxx> that is not php or asp 435 /// it calls this function with the contents. 436 /// <?SOMETHING foo> calls parseSawQuestionInstruction("?SOMETHING foo") 437 /// Unlike the php/asp ones, this ends on the first > it sees, without requiring ?>. 438 /// Return true if you want the node appended to the document. It will be in a [QuestionInstruction] object. 439 bool delegate(string) parseSawQuestionInstruction; 440 441 /// if it sees a <! that is not CDATA or comment (CDATA is handled automatically and comments call parseSawComment), 442 /// it calls this function with the contents. 443 /// <!SOMETHING foo> calls parseSawBangInstruction("SOMETHING foo") 444 /// Return true if you want the node appended to the document. It will be in a [BangInstruction] object. 445 bool delegate(string) parseSawBangInstruction; 446 447 /// Given the kind of garbage you find on the Internet, try to make sense of it. 448 /// Equivalent to document.parse(data, false, false, null); 449 /// (Case-insensitive, non-strict, determine character encoding from the data.) 450 451 /// NOTE: this makes no attempt at added security, but it will try to recover from anything instead of throwing. 452 /// 453 /// It is a template so it lazily imports characterencodings. 454 void parseGarbage()(string data) { 455 parse(data, false, false, null); 456 } 457 458 /// Parses well-formed UTF-8, case-sensitive, XML or XHTML 459 /// Will throw exceptions on things like unclosed tags. 460 void parseStrict(string data, bool pureXmlMode = false) { 461 parseStream(toUtf8Stream(data), true, true, pureXmlMode); 462 } 463 464 /// Parses well-formed UTF-8 in loose mode (by default). Tries to correct 465 /// tag soup, but does NOT try to correct bad character encodings. 466 /// 467 /// They will still throw an exception. 468 void parseUtf8(string data, bool caseSensitive = false, bool strict = false) { 469 parseStream(toUtf8Stream(data), caseSensitive, strict); 470 } 471 472 // this is a template so we get lazy import behavior 473 Utf8Stream handleDataEncoding()(in string rawdata, string dataEncoding, bool strict) { 474 import arsd.characterencodings; 475 // gotta determine the data encoding. If you know it, pass it in above to skip all this. 476 if(dataEncoding is null) { 477 dataEncoding = tryToDetermineEncoding(cast(const(ubyte[])) rawdata); 478 // it can't tell... probably a random 8 bit encoding. Let's check the document itself. 479 // Now, XML and HTML can both list encoding in the document, but we can't really parse 480 // it here without changing a lot of code until we know the encoding. So I'm going to 481 // do some hackish string checking. 482 if(dataEncoding is null) { 483 auto dataAsBytes = cast(immutable(ubyte)[]) rawdata; 484 // first, look for an XML prolog 485 auto idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "encoding=\""); 486 if(idx != -1) { 487 idx += "encoding=\"".length; 488 // we're probably past the prolog if it's this far in; we might be looking at 489 // content. Forget about it. 490 if(idx > 100) 491 idx = -1; 492 } 493 // if that fails, we're looking for Content-Type http-equiv or a meta charset (see html5).. 494 if(idx == -1) { 495 idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "charset="); 496 if(idx != -1) { 497 idx += "charset=".length; 498 if(dataAsBytes[idx] == '"') 499 idx++; 500 } 501 } 502 503 // found something in either branch... 504 if(idx != -1) { 505 // read till a quote or about 12 chars, whichever comes first... 506 auto end = idx; 507 while(end < dataAsBytes.length && dataAsBytes[end] != '"' && end - idx < 12) 508 end++; 509 510 dataEncoding = cast(string) dataAsBytes[idx .. end]; 511 } 512 // otherwise, we just don't know. 513 } 514 } 515 516 if(dataEncoding is null) { 517 if(strict) 518 throw new MarkupException("I couldn't figure out the encoding of this document."); 519 else 520 // if we really don't know by here, it means we already tried UTF-8, 521 // looked for utf 16 and 32 byte order marks, and looked for xml or meta 522 // tags... let's assume it's Windows-1252, since that's probably the most 523 // common aside from utf that wouldn't be labeled. 524 525 dataEncoding = "Windows 1252"; 526 } 527 528 // and now, go ahead and convert it. 529 530 string data; 531 532 if(!strict) { 533 // if we're in non-strict mode, we need to check 534 // the document for mislabeling too; sometimes 535 // web documents will say they are utf-8, but aren't 536 // actually properly encoded. If it fails to validate, 537 // we'll assume it's actually Windows encoding - the most 538 // likely candidate for mislabeled garbage. 539 dataEncoding = dataEncoding.toLower(); 540 dataEncoding = dataEncoding.replace(" ", ""); 541 dataEncoding = dataEncoding.replace("-", ""); 542 dataEncoding = dataEncoding.replace("_", ""); 543 if(dataEncoding == "utf8") { 544 try { 545 validate(rawdata); 546 } catch(UTFException e) { 547 dataEncoding = "Windows 1252"; 548 } 549 } 550 } 551 552 if(dataEncoding != "UTF-8") { 553 if(strict) 554 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 555 else { 556 try { 557 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 558 } catch(Exception e) { 559 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, "Windows 1252"); 560 } 561 } 562 } else 563 data = rawdata; 564 565 return toUtf8Stream(data); 566 } 567 568 private 569 Utf8Stream toUtf8Stream(in string rawdata) { 570 string data = rawdata; 571 static if(is(Utf8Stream == string)) 572 return data; 573 else 574 return new Utf8Stream(data); 575 } 576 577 /++ 578 List of elements that can be assumed to be self-closed 579 in this document. The default for a Document are a hard-coded 580 list of ones appropriate for HTML. For [XmlDocument], it defaults 581 to empty. You can modify this after construction but before parsing. 582 583 History: 584 Added February 8, 2021 (included in dub release 9.2) 585 586 Changed from `string[]` to `immutable(string)[]` on 587 February 4, 2024 (dub v11.5) to plug a hole discovered 588 by the OpenD compiler's diagnostics. 589 +/ 590 immutable(string)[] selfClosedElements = htmlSelfClosedElements; 591 592 /++ 593 List of elements that contain raw CDATA content for this 594 document, e.g. `<script>` and `<style>` for HTML. The parser 595 will read until the closing string and put everything else 596 in a [RawSource] object for future processing, not trying to 597 do any further child nodes or attributes, etc. 598 599 History: 600 Added February 4, 2024 (dub v11.5) 601 602 +/ 603 immutable(string)[] rawSourceElements = htmlRawSourceElements; 604 605 /++ 606 List of elements that are considered inline for pretty printing. 607 The default for a Document are hard-coded to something appropriate 608 for HTML. For [XmlDocument], it defaults to empty. You can modify 609 this after construction but before parsing. 610 611 History: 612 Added June 21, 2021 (included in dub release 10.1) 613 614 Changed from `string[]` to `immutable(string)[]` on 615 February 4, 2024 (dub v11.5) to plug a hole discovered 616 by the OpenD compiler's diagnostics. 617 +/ 618 immutable(string)[] inlineElements = htmlInlineElements; 619 620 /** 621 Take XMLish data and try to make the DOM tree out of it. 622 623 The goal isn't to be perfect, but to just be good enough to 624 approximate Javascript's behavior. 625 626 If strict, it throws on something that doesn't make sense. 627 (Examples: mismatched tags. It doesn't validate!) 628 If not strict, it tries to recover anyway, and only throws 629 when something is REALLY unworkable. 630 631 If strict is false, it uses a magic list of tags that needn't 632 be closed. If you are writing a document specifically for this, 633 try to avoid such - use self closed tags at least. Easier to parse. 634 635 The dataEncoding argument can be used to pass a specific 636 charset encoding for automatic conversion. If null (which is NOT 637 the default!), it tries to determine from the data itself, 638 using the xml prolog or meta tags, and assumes UTF-8 if unsure. 639 640 If this assumption is wrong, it can throw on non-ascii 641 characters! 642 643 644 Note that it previously assumed the data was encoded as UTF-8, which 645 is why the dataEncoding argument defaults to that. 646 647 So it shouldn't break backward compatibility. 648 649 But, if you want the best behavior on wild data - figuring it out from the document 650 instead of assuming - you'll probably want to change that argument to null. 651 652 This is a template so it lazily imports arsd.characterencodings, which is required 653 to fix up data encodings. 654 655 If you are sure the encoding is good, try parseUtf8 or parseStrict to avoid the 656 dependency. If it is data from the Internet though, a random website, the encoding 657 is often a lie. This function, if dataEncoding == null, can correct for that, or 658 you can try parseGarbage. In those cases, arsd.characterencodings is required to 659 compile. 660 */ 661 void parse()(in string rawdata, bool caseSensitive = false, bool strict = false, string dataEncoding = "UTF-8") { 662 auto data = handleDataEncoding(rawdata, dataEncoding, strict); 663 parseStream(data, caseSensitive, strict); 664 } 665 666 // note: this work best in strict mode, unless data is just a simple string wrapper 667 void parseStream(Utf8Stream data, bool caseSensitive = false, bool strict = false, bool pureXmlMode = false) { 668 // FIXME: this parser could be faster; it's in the top ten biggest tree times according to the profiler 669 // of my big app. 670 671 assert(data !is null); 672 673 // go through character by character. 674 // if you see a <, consider it a tag. 675 // name goes until the first non tagname character 676 // then see if it self closes or has an attribute 677 678 // if not in a tag, anything not a tag is a big text 679 // node child. It ends as soon as it sees a < 680 681 // Whitespace in text or attributes is preserved, but not between attributes 682 683 // & and friends are converted when I know them, left the same otherwise 684 685 686 // this it should already be done correctly.. so I'm leaving it off to net a ~10% speed boost on my typical test file (really) 687 //validate(data); // it *must* be UTF-8 for this to work correctly 688 689 sizediff_t pos = 0; 690 691 clear(); 692 693 loose = !caseSensitive; 694 695 bool sawImproperNesting = false; 696 bool nonNestableHackRequired = false; 697 698 int getLineNumber(sizediff_t p) { 699 return data.getLineNumber(p); 700 } 701 702 void parseError(string message) { 703 throw new MarkupException(format("char %d (line %d): %s", pos, getLineNumber(pos), message)); 704 } 705 706 bool eatWhitespace() { 707 bool ateAny = false; 708 while(pos < data.length && data[pos].isSimpleWhite) { 709 pos++; 710 ateAny = true; 711 } 712 return ateAny; 713 } 714 715 string readTagName() { 716 717 data.markDataDiscardable(pos); 718 719 // remember to include : for namespaces 720 // basically just keep going until >, /, or whitespace 721 auto start = pos; 722 while(data[pos] != '>' && data[pos] != '/' && !data[pos].isSimpleWhite) 723 { 724 pos++; 725 if(pos == data.length) { 726 if(strict) 727 throw new Exception("tag name incomplete when file ended"); 728 else 729 break; 730 } 731 } 732 733 if(!caseSensitive) 734 return toLower(data[start..pos]); 735 else 736 return data[start..pos]; 737 } 738 739 string readAttributeName() { 740 // remember to include : for namespaces 741 // basically just keep going until >, /, or whitespace 742 auto start = pos; 743 while(data[pos] != '>' && data[pos] != '/' && data[pos] != '=' && !data[pos].isSimpleWhite) 744 { 745 if(data[pos] == '<') { 746 if(strict) 747 throw new MarkupException("The character < can never appear in an attribute name. Line " ~ to!string(getLineNumber(pos))); 748 else 749 break; // e.g. <a href="something" <img src="poo" /></a>. The > should have been after the href, but some shitty files don't do that right and the browser handles it, so we will too, by pretending the > was indeed there 750 } 751 pos++; 752 if(pos == data.length) { 753 if(strict) 754 throw new Exception("unterminated attribute name"); 755 else 756 break; 757 } 758 } 759 760 if(!caseSensitive) 761 return toLower(data[start..pos]); 762 else 763 return data[start..pos]; 764 } 765 766 string readAttributeValue() { 767 if(pos >= data.length) { 768 if(strict) 769 throw new Exception("no attribute value before end of file"); 770 else 771 return null; 772 } 773 switch(data[pos]) { 774 case '\'': 775 case '"': 776 auto started = pos; 777 char end = data[pos]; 778 pos++; 779 auto start = pos; 780 while(pos < data.length && data[pos] != end) 781 pos++; 782 if(strict && pos == data.length) 783 throw new MarkupException("Unclosed attribute value, started on char " ~ to!string(started)); 784 string v = htmlEntitiesDecode(data[start..pos], strict); 785 pos++; // skip over the end 786 return v; 787 default: 788 if(strict) 789 parseError("Attributes must be quoted"); 790 // read until whitespace or terminator (/> or >) 791 auto start = pos; 792 while( 793 pos < data.length && 794 data[pos] != '>' && 795 // unquoted attributes might be urls, so gotta be careful with them and self-closed elements 796 !(data[pos] == '/' && pos + 1 < data.length && data[pos+1] == '>') && 797 !data[pos].isSimpleWhite) 798 pos++; 799 800 string v = htmlEntitiesDecode(data[start..pos], strict); 801 // don't skip the end - we'll need it later 802 return v; 803 } 804 } 805 806 TextNode readTextNode() { 807 auto start = pos; 808 while(pos < data.length && data[pos] != '<') { 809 pos++; 810 } 811 812 return TextNode.fromUndecodedString(this, data[start..pos]); 813 } 814 815 // this is obsolete! 816 RawSource readCDataNode() { 817 auto start = pos; 818 while(pos < data.length && data[pos] != '<') { 819 pos++; 820 } 821 822 return new RawSource(this, data[start..pos]); 823 } 824 825 826 struct Ele { 827 int type; // element or closing tag or nothing 828 /* 829 type == 0 means regular node, self-closed (element is valid) 830 type == 1 means closing tag (payload is the tag name, element may be valid) 831 type == 2 means you should ignore it completely 832 type == 3 means it is a special element that should be appended, if possible, e.g. a <!DOCTYPE> that was chosen to be kept, php code, or comment. It will be appended at the current element if inside the root, and to a special document area if not 833 type == 4 means the document was totally empty 834 */ 835 Element element; // for type == 0 or type == 3 836 string payload; // for type == 1 837 } 838 // recursively read a tag 839 Ele readElement(string[] parentChain = null) { 840 // FIXME: this is the slowest function in this module, by far, even in strict mode. 841 // Loose mode should perform decently, but strict mode is the important one. 842 if(!strict && parentChain is null) 843 parentChain = []; 844 845 static string[] recentAutoClosedTags; 846 847 if(pos >= data.length) 848 { 849 if(strict) { 850 throw new MarkupException("Gone over the input (is there no root element or did it never close?), chain: " ~ to!string(parentChain)); 851 } else { 852 if(parentChain.length) 853 return Ele(1, null, parentChain[0]); // in loose mode, we just assume the document has ended 854 else 855 return Ele(4); // signal emptiness upstream 856 } 857 } 858 859 if(data[pos] != '<') { 860 return Ele(0, readTextNode(), null); 861 } 862 863 enforce(data[pos] == '<'); 864 pos++; 865 if(pos == data.length) { 866 if(strict) 867 throw new MarkupException("Found trailing < at end of file"); 868 // if not strict, we'll just skip the switch 869 } else 870 switch(data[pos]) { 871 // I don't care about these, so I just want to skip them 872 case '!': // might be a comment, a doctype, or a special instruction 873 pos++; 874 875 // FIXME: we should store these in the tree too 876 // though I like having it stripped out tbh. 877 878 if(pos == data.length) { 879 if(strict) 880 throw new MarkupException("<! opened at end of file"); 881 } else if(data[pos] == '-' && (pos + 1 < data.length) && data[pos+1] == '-') { 882 // comment 883 pos += 2; 884 885 // FIXME: technically, a comment is anything 886 // between -- and -- inside a <!> block. 887 // so in <!-- test -- lol> , the " lol" is NOT a comment 888 // and should probably be handled differently in here, but for now 889 // I'll just keep running until --> since that's the common way 890 891 auto commentStart = pos; 892 while(pos+3 < data.length && data[pos..pos+3] != "-->") 893 pos++; 894 895 auto end = commentStart; 896 897 if(pos + 3 >= data.length) { 898 if(strict) 899 throw new MarkupException("unclosed comment"); 900 end = data.length; 901 pos = data.length; 902 } else { 903 end = pos; 904 assert(data[pos] == '-'); 905 pos++; 906 assert(data[pos] == '-'); 907 pos++; 908 assert(data[pos] == '>'); 909 pos++; 910 } 911 912 if(parseSawComment !is null) 913 if(parseSawComment(data[commentStart .. end])) { 914 return Ele(3, new HtmlComment(this, data[commentStart .. end]), null); 915 } 916 } else if(pos + 7 <= data.length && data[pos..pos + 7] == "[CDATA[") { 917 pos += 7; 918 919 auto cdataStart = pos; 920 921 ptrdiff_t end = -1; 922 typeof(end) cdataEnd; 923 924 if(pos < data.length) { 925 // cdata isn't allowed to nest, so this should be generally ok, as long as it is found 926 end = data[pos .. $].indexOf("]]>"); 927 } 928 929 if(end == -1) { 930 if(strict) 931 throw new MarkupException("Unclosed CDATA section"); 932 end = pos; 933 cdataEnd = pos; 934 } else { 935 cdataEnd = pos + end; 936 pos = cdataEnd + 3; 937 } 938 939 return Ele(0, new TextNode(this, data[cdataStart .. cdataEnd]), null); 940 } else { 941 auto start = pos; 942 while(pos < data.length && data[pos] != '>') 943 pos++; 944 945 auto bangEnds = pos; 946 if(pos == data.length) { 947 if(strict) 948 throw new MarkupException("unclosed processing instruction (<!xxx>)"); 949 } else pos++; // skipping the > 950 951 if(parseSawBangInstruction !is null) 952 if(parseSawBangInstruction(data[start .. bangEnds])) { 953 // FIXME: these should be able to modify the parser state, 954 // doing things like adding entities, somehow. 955 956 return Ele(3, new BangInstruction(this, data[start .. bangEnds]), null); 957 } 958 } 959 960 /* 961 if(pos < data.length && data[pos] == '>') 962 pos++; // skip the > 963 else 964 assert(!strict); 965 */ 966 break; 967 case '%': 968 case '?': 969 /* 970 Here's what we want to support: 971 972 <% asp code %> 973 <%= asp code %> 974 <?php php code ?> 975 <?= php code ?> 976 977 The contents don't really matter, just if it opens with 978 one of the above for, it ends on the two char terminator. 979 980 <?something> 981 this is NOT php code 982 because I've seen this in the wild: <?EM-dummyText> 983 984 This could be php with shorttags which would be cut off 985 prematurely because if(a >) - that > counts as the close 986 of the tag, but since dom.d can't tell the difference 987 between that and the <?EM> real world example, it will 988 not try to look for the ?> ending. 989 990 The difference between this and the asp/php stuff is that it 991 ends on >, not ?>. ONLY <?php or <?= ends on ?>. The rest end 992 on >. 993 */ 994 995 char end = data[pos]; 996 auto started = pos; 997 bool isAsp = end == '%'; 998 int currentIndex = 0; 999 bool isPhp = false; 1000 bool isEqualTag = false; 1001 int phpCount = 0; 1002 1003 more: 1004 pos++; // skip the start 1005 if(pos == data.length) { 1006 if(strict) 1007 throw new MarkupException("Unclosed <"~end~" by end of file"); 1008 } else { 1009 currentIndex++; 1010 if(currentIndex == 1 && data[pos] == '=') { 1011 if(!isAsp) 1012 isPhp = true; 1013 isEqualTag = true; 1014 goto more; 1015 } 1016 if(currentIndex == 1 && data[pos] == 'p') 1017 phpCount++; 1018 if(currentIndex == 2 && data[pos] == 'h') 1019 phpCount++; 1020 if(currentIndex == 3 && data[pos] == 'p' && phpCount == 2) 1021 isPhp = true; 1022 1023 if(data[pos] == '>') { 1024 if((isAsp || isPhp) && data[pos - 1] != end) 1025 goto more; 1026 // otherwise we're done 1027 } else 1028 goto more; 1029 } 1030 1031 //writefln("%s: %s", isAsp ? "ASP" : isPhp ? "PHP" : "<? ", data[started .. pos]); 1032 auto code = data[started .. pos]; 1033 1034 1035 assert((pos < data.length && data[pos] == '>') || (!strict && pos == data.length)); 1036 if(pos < data.length) 1037 pos++; // get past the > 1038 1039 if(isAsp && parseSawAspCode !is null) { 1040 if(parseSawAspCode(code)) { 1041 return Ele(3, new AspCode(this, code), null); 1042 } 1043 } else if(isPhp && parseSawPhpCode !is null) { 1044 if(parseSawPhpCode(code)) { 1045 return Ele(3, new PhpCode(this, code), null); 1046 } 1047 } else if(!isAsp && !isPhp && parseSawQuestionInstruction !is null) { 1048 if(parseSawQuestionInstruction(code)) { 1049 return Ele(3, new QuestionInstruction(this, code), null); 1050 } 1051 } 1052 break; 1053 case '/': // closing an element 1054 pos++; // skip the start 1055 auto p = pos; 1056 while(pos < data.length && data[pos] != '>') 1057 pos++; 1058 //writefln("</%s>", data[p..pos]); 1059 if(pos == data.length && data[pos-1] != '>') { 1060 if(strict) 1061 throw new MarkupException("File ended before closing tag had a required >"); 1062 else 1063 data ~= ">"; // just hack it in 1064 } 1065 pos++; // skip the '>' 1066 1067 string tname = data[p..pos-1]; 1068 if(!strict) 1069 tname = tname.strip; 1070 if(!caseSensitive) 1071 tname = tname.toLower(); 1072 1073 return Ele(1, null, tname); // closing tag reports itself here 1074 case ' ': // assume it isn't a real element... 1075 if(strict) { 1076 parseError("bad markup - improperly placed <"); 1077 assert(0); // parseError always throws 1078 } else 1079 return Ele(0, TextNode.fromUndecodedString(this, "<"), null); 1080 default: 1081 1082 if(!strict) { 1083 // what about something that kinda looks like a tag, but isn't? 1084 auto nextTag = data[pos .. $].indexOf("<"); 1085 auto closeTag = data[pos .. $].indexOf(">"); 1086 if(closeTag != -1 && nextTag != -1) 1087 if(nextTag < closeTag) { 1088 // since attribute names cannot possibly have a < in them, we'll look for an equal since it might be an attribute value... and even in garbage mode, it'd have to be a quoted one realistically 1089 1090 auto equal = data[pos .. $].indexOf("=\""); 1091 if(equal != -1 && equal < closeTag) { 1092 // this MIGHT be ok, soldier on 1093 } else { 1094 // definitely no good, this must be a (horribly distorted) text node 1095 pos++; // skip the < we're on - don't want text node to end prematurely 1096 auto node = readTextNode(); 1097 node.contents = "<" ~ node.contents; // put this back 1098 return Ele(0, node, null); 1099 } 1100 } 1101 } 1102 1103 string tagName = readTagName(); 1104 AttributesHolder attributes; 1105 1106 Ele addTag(bool selfClosed) { 1107 if(selfClosed) 1108 pos++; 1109 else { 1110 if(!strict) 1111 if(tagName.isInArray(selfClosedElements)) 1112 // these are de-facto self closed 1113 selfClosed = true; 1114 } 1115 1116 import std.algorithm.comparison; 1117 1118 if(strict) { 1119 enforce(data[pos] == '>', format("got %s when expecting > (possible missing attribute name)\nContext:\n%s", data[pos], data[max(0, pos - data.contextToKeep) .. min(data.length, pos + data.contextToKeep)])); 1120 } else { 1121 // if we got here, it's probably because a slash was in an 1122 // unquoted attribute - don't trust the selfClosed value 1123 if(!selfClosed) 1124 selfClosed = tagName.isInArray(selfClosedElements); 1125 1126 while(pos < data.length && data[pos] != '>') 1127 pos++; 1128 1129 if(pos >= data.length) { 1130 // the tag never closed 1131 assert(data.length != 0); 1132 pos = data.length - 1; // rewinding so it hits the end at the bottom.. 1133 } 1134 } 1135 1136 auto whereThisTagStarted = pos; // for better error messages 1137 1138 pos++; 1139 1140 auto e = createElement(tagName); 1141 e.attributes = attributes; 1142 version(dom_node_indexes) { 1143 if(e.dataset.nodeIndex.length == 0) 1144 e.dataset.nodeIndex = to!string(&(e.attributes)); 1145 } 1146 e.selfClosed = selfClosed; 1147 e.parseAttributes(); 1148 1149 // might temporarily set root to the first element we encounter, 1150 // then the final root element assignment will be at the end of the parse, 1151 // when the recursive work is complete. 1152 if(this.root is null) 1153 this.root = e; 1154 this.processTagOpen(e); 1155 scope(exit) 1156 this.processTagClose(e); 1157 1158 1159 // HACK to handle script and style as a raw data section as it is in HTML browsers 1160 if(!pureXmlMode && tagName.isInArray(rawSourceElements)) { 1161 if(!selfClosed) { 1162 string closer = "</" ~ tagName ~ ">"; 1163 ptrdiff_t ending; 1164 if(pos >= data.length) 1165 ending = -1; 1166 else 1167 ending = indexOf(data[pos..$], closer); 1168 1169 ending = indexOf(data[pos..$], closer, 0, (loose ? CaseSensitive.no : CaseSensitive.yes)); 1170 /* 1171 if(loose && ending == -1 && pos < data.length) 1172 ending = indexOf(data[pos..$], closer.toUpper()); 1173 */ 1174 if(ending == -1) { 1175 if(strict) 1176 throw new Exception("tag " ~ tagName ~ " never closed"); 1177 else { 1178 // let's call it totally empty and do the rest of the file as text. doing it as html could still result in some weird stuff like if(a<4) being read as <4 being a tag so it comes out if(a<4></4> and other weirdness) It is either a closed script tag or the rest of the file is forfeit. 1179 if(pos < data.length) { 1180 e = new TextNode(this, data[pos .. $]); 1181 pos = data.length; 1182 } 1183 } 1184 } else { 1185 ending += pos; 1186 e.innerRawSource = data[pos..ending]; 1187 pos = ending + closer.length; 1188 } 1189 } 1190 return Ele(0, e, null); 1191 } 1192 1193 bool closed = selfClosed; 1194 1195 void considerHtmlNonNestableElementHack(Element n) { 1196 assert(!strict); 1197 if(!canNestElementsInHtml(e.tagName, n.tagName)) { 1198 // html lets you write <p> para 1 <p> para 1 1199 // but in the dom tree, they should be siblings, not children. 1200 nonNestableHackRequired = true; 1201 } 1202 } 1203 1204 //writef("<%s>", tagName); 1205 while(!closed) { 1206 Ele n; 1207 if(strict) 1208 n = readElement(); 1209 else 1210 n = readElement(parentChain ~ tagName); 1211 1212 if(n.type == 4) return n; // the document is empty 1213 1214 if(n.type == 3 && n.element !is null) { 1215 // special node, append if possible 1216 if(e !is null) 1217 processNodeWhileParsing(e, n.element); 1218 else 1219 piecesBeforeRoot ~= n.element; 1220 } else if(n.type == 0) { 1221 if(!strict) 1222 considerHtmlNonNestableElementHack(n.element); 1223 processNodeWhileParsing(e, n.element); 1224 } else if(n.type == 1) { 1225 bool found = false; 1226 if(n.payload != tagName) { 1227 if(strict) 1228 parseError(format("mismatched tag: </%s> != <%s> (opened on line %d)", n.payload, tagName, getLineNumber(whereThisTagStarted))); 1229 else { 1230 sawImproperNesting = true; 1231 // this is so we don't drop several levels of awful markup 1232 if(n.element) { 1233 if(!strict) 1234 considerHtmlNonNestableElementHack(n.element); 1235 processNodeWhileParsing(e, n.element); 1236 n.element = null; 1237 } 1238 1239 // is the element open somewhere up the chain? 1240 foreach(i, parent; parentChain) 1241 if(parent == n.payload) { 1242 recentAutoClosedTags ~= tagName; 1243 // just rotating it so we don't inadvertently break stuff with vile crap 1244 if(recentAutoClosedTags.length > 4) 1245 recentAutoClosedTags = recentAutoClosedTags[1 .. $]; 1246 1247 n.element = e; 1248 return n; 1249 } 1250 1251 /+ 1252 // COMMENTED OUT BLOCK 1253 // dom.d used to replace improper close tags with their 1254 // text so they'd be visible in the output. the html 1255 // spec says to just ignore them, and browsers do indeed 1256 // seem to jsut ignore them, even checking back on IE6. 1257 // so i guess i was wrong to do this (tho tbh i find it kinda 1258 // useful to call out an obvious mistake in the source... 1259 // but for calling out obvious mistakes, just use strict 1260 // mode.) 1261 1262 // if not, this is a text node; we can't fix it up... 1263 1264 // If it's already in the tree somewhere, assume it is closed by algorithm 1265 // and we shouldn't output it - odds are the user just flipped a couple tags 1266 foreach(ele; e.tree) { 1267 if(ele.tagName == n.payload) { 1268 found = true; 1269 break; 1270 } 1271 } 1272 1273 foreach(ele; recentAutoClosedTags) { 1274 if(ele == n.payload) { 1275 found = true; 1276 break; 1277 } 1278 } 1279 1280 if(!found) // if not found in the tree though, it's probably just text 1281 processNodeWhileParsing(e, TextNode.fromUndecodedString(this, "</"~n.payload~">")); 1282 1283 +/ 1284 } 1285 } else { 1286 if(n.element) { 1287 if(!strict) 1288 considerHtmlNonNestableElementHack(n.element); 1289 processNodeWhileParsing(e, n.element); 1290 } 1291 } 1292 1293 if(n.payload == tagName) // in strict mode, this is always true 1294 closed = true; 1295 } else { /*throw new Exception("wtf " ~ tagName);*/ } 1296 } 1297 //writef("</%s>\n", tagName); 1298 return Ele(0, e, null); 1299 } 1300 1301 // if a tag was opened but not closed by end of file, we can arrive here 1302 if(!strict && pos >= data.length) 1303 return addTag(false); 1304 //else if(strict) assert(0); // should be caught before 1305 1306 switch(data[pos]) { 1307 default: assert(0); 1308 case '/': // self closing tag 1309 return addTag(true); 1310 case '>': 1311 return addTag(false); 1312 case ' ': 1313 case '\t': 1314 case '\n': 1315 case '\r': 1316 // there might be attributes... 1317 moreAttributes: 1318 eatWhitespace(); 1319 1320 // same deal as above the switch.... 1321 if(!strict && pos >= data.length) 1322 return addTag(false); 1323 1324 if(strict && pos >= data.length) 1325 throw new MarkupException("tag open, didn't find > before end of file"); 1326 1327 switch(data[pos]) { 1328 case '/': // self closing tag 1329 return addTag(true); 1330 case '>': // closed tag; open -- we now read the contents 1331 return addTag(false); 1332 default: // it is an attribute 1333 string attrName = readAttributeName(); 1334 string attrValue = attrName; 1335 1336 bool ateAny = eatWhitespace(); 1337 // the spec allows this too, sigh https://www.w3.org/TR/REC-xml/#NT-Eq 1338 //if(strict && ateAny) 1339 //throw new MarkupException("inappropriate whitespace after attribute name"); 1340 1341 if(pos >= data.length) { 1342 if(strict) 1343 assert(0, "this should have thrown in readAttributeName"); 1344 else { 1345 data ~= ">"; 1346 goto blankValue; 1347 } 1348 } 1349 if(data[pos] == '=') { 1350 pos++; 1351 1352 ateAny = eatWhitespace(); 1353 // the spec actually allows this! 1354 //if(strict && ateAny) 1355 //throw new MarkupException("inappropriate whitespace after attribute equals"); 1356 1357 attrValue = readAttributeValue(); 1358 1359 eatWhitespace(); 1360 } 1361 1362 blankValue: 1363 1364 if(strict && attrName in attributes) 1365 throw new MarkupException("Repeated attribute: " ~ attrName); 1366 1367 if(attrName.strip().length) 1368 attributes[attrName] = attrValue; 1369 else if(strict) throw new MarkupException("wtf, zero length attribute name"); 1370 1371 if(!strict && pos < data.length && data[pos] == '<') { 1372 // this is the broken tag that doesn't have a > at the end 1373 data = data[0 .. pos] ~ ">" ~ data[pos.. $]; 1374 // let's insert one as a hack 1375 goto case '>'; 1376 } 1377 1378 goto moreAttributes; 1379 } 1380 } 1381 } 1382 1383 return Ele(2, null, null); // this is a <! or <? thing that got ignored prolly. 1384 //assert(0); 1385 } 1386 1387 eatWhitespace(); 1388 Ele r; 1389 do { 1390 r = readElement(); // there SHOULD only be one element... 1391 1392 if(r.type == 3 && r.element !is null) 1393 piecesBeforeRoot ~= r.element; 1394 1395 if(r.type == 4) 1396 break; // the document is completely empty... 1397 } while (r.type != 0 || r.element.nodeType != 1); // we look past the xml prologue and doctype; root only begins on a regular node 1398 1399 root = r.element; 1400 if(root !is null) 1401 root.parent_ = this; 1402 1403 if(!strict) // in strict mode, we'll just ignore stuff after the xml 1404 while(r.type != 4) { 1405 r = readElement(); 1406 if(r.type != 4 && r.type != 2) { // if not empty and not ignored 1407 if(r.element !is null) 1408 piecesAfterRoot ~= r.element; 1409 } 1410 } 1411 1412 if(root is null) 1413 { 1414 if(strict) 1415 assert(0, "empty document should be impossible in strict mode"); 1416 else 1417 parseUtf8(`<html><head></head><body></body></html>`); // fill in a dummy document in loose mode since that's what browsers do 1418 } 1419 1420 if(nonNestableHackRequired) { 1421 assert(!strict); // this should never happen in strict mode; it ought to never set the hack flag... 1422 1423 // in loose mode, we can see some "bad" nesting (it's valid html, but poorly formed xml). 1424 // It's hard to handle above though because my code sucks. So, we'll fix it here. 1425 1426 // Where to insert based on the parent (for mixed closed/unclosed <p> tags). See #120 1427 // Kind of inefficient because we can't detect when we recurse back out of a node. 1428 Element[Element] insertLocations; 1429 auto iterator = root.tree; 1430 foreach(ele; iterator) { 1431 if(ele.parentNode is null) 1432 continue; 1433 1434 if(!canNestElementsInHtml(ele.parentNode.tagName, ele.tagName)) { 1435 auto shouldBePreviousSibling = ele.parentNode; 1436 auto holder = shouldBePreviousSibling.parentNode; // this is the two element's mutual holder... 1437 if (auto p = holder in insertLocations) { 1438 shouldBePreviousSibling = *p; 1439 assert(shouldBePreviousSibling.parentNode is holder); 1440 } 1441 ele = holder.insertAfter(shouldBePreviousSibling, ele.removeFromTree()); 1442 insertLocations[holder] = ele; 1443 iterator.currentKilled(); // the current branch can be skipped; we'll hit it soon anyway since it's now next up. 1444 } 1445 } 1446 } 1447 } 1448 1449 /* end massive parse function */ 1450 1451 /// Gets the <title> element's innerText, if one exists 1452 @property string title() { 1453 bool doesItMatch(Element e) { 1454 return (e.tagName == "title"); 1455 } 1456 1457 auto e = findFirst(&doesItMatch); 1458 if(e) 1459 return e.innerText(); 1460 return ""; 1461 } 1462 1463 /// Sets the title of the page, creating a <title> element if needed. 1464 @property void title(string t) { 1465 bool doesItMatch(Element e) { 1466 return (e.tagName == "title"); 1467 } 1468 1469 auto e = findFirst(&doesItMatch); 1470 1471 if(!e) { 1472 e = createElement("title"); 1473 auto heads = getElementsByTagName("head"); 1474 if(heads.length) 1475 heads[0].appendChild(e); 1476 } 1477 1478 if(e) 1479 e.innerText = t; 1480 } 1481 1482 // FIXME: would it work to alias root this; ???? might be a good idea 1483 /// These functions all forward to the root element. See the documentation in the Element class. 1484 Element getElementById(string id) { 1485 return root.getElementById(id); 1486 } 1487 1488 /// ditto 1489 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 1490 if( is(SomeElementType : Element)) 1491 out(ret) { assert(ret !is null); } 1492 do { 1493 return root.requireElementById!(SomeElementType)(id, file, line); 1494 } 1495 1496 /// ditto 1497 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1498 if( is(SomeElementType : Element)) 1499 out(ret) { assert(ret !is null); } 1500 do { 1501 auto e = cast(SomeElementType) querySelector(selector); 1502 if(e is null) 1503 throw new ElementNotFoundException(SomeElementType.stringof, selector, this.root, file, line); 1504 return e; 1505 } 1506 1507 /// ditto 1508 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 1509 if(is(SomeElementType : Element)) 1510 { 1511 auto e = cast(SomeElementType) querySelector(selector); 1512 return MaybeNullElement!SomeElementType(e); 1513 } 1514 1515 /// ditto 1516 @scriptable 1517 Element querySelector(string selector) { 1518 // see comment below on Document.querySelectorAll 1519 auto s = Selector(selector);//, !loose); 1520 foreach(ref comp; s.components) 1521 if(comp.parts.length && comp.parts[0].separation == 0) 1522 comp.parts[0].separation = -1; 1523 foreach(e; s.getMatchingElementsLazy(this.root)) 1524 return e; 1525 return null; 1526 1527 } 1528 1529 /// ditto 1530 @scriptable 1531 Element[] querySelectorAll(string selector) { 1532 // In standards-compliant code, the document is slightly magical 1533 // in that it is a pseudoelement at top level. It should actually 1534 // match the root as one of its children. 1535 // 1536 // In versions of dom.d before Dec 29 2019, this worked because 1537 // querySelectorAll was willing to return itself. With that bug fix 1538 // (search "arbitrary id asduiwh" in this file for associated unittest) 1539 // this would have failed. Hence adding back the root if it matches the 1540 // selector itself. 1541 // 1542 // I'd love to do this better later. 1543 1544 auto s = Selector(selector);//, !loose); 1545 foreach(ref comp; s.components) 1546 if(comp.parts.length && comp.parts[0].separation == 0) 1547 comp.parts[0].separation = -1; 1548 return s.getMatchingElements(this.root, null); 1549 } 1550 1551 /// ditto 1552 deprecated("use querySelectorAll instead") 1553 Element[] getElementsBySelector(string selector) { 1554 return root.getElementsBySelector(selector); 1555 } 1556 1557 /// ditto 1558 @scriptable 1559 Element[] getElementsByTagName(string tag) { 1560 return root.getElementsByTagName(tag); 1561 } 1562 1563 /// ditto 1564 @scriptable 1565 Element[] getElementsByClassName(string tag) { 1566 return root.getElementsByClassName(tag); 1567 } 1568 1569 /** FIXME: btw, this could just be a lazy range...... */ 1570 Element getFirstElementByTagName(string tag) { 1571 if(loose) 1572 tag = tag.toLower(); 1573 bool doesItMatch(Element e) { 1574 return e.tagName == tag; 1575 } 1576 return findFirst(&doesItMatch); 1577 } 1578 1579 /++ 1580 This returns the <body> element, if there is one. (It different than Javascript, where it is called 'body', because body used to be a keyword in D.) 1581 1582 History: 1583 `body` alias added February 26, 2024 1584 +/ 1585 Element mainBody() { 1586 return getFirstElementByTagName("body"); 1587 } 1588 1589 /// ditto 1590 alias body = mainBody; 1591 1592 /// this uses a weird thing... it's [name=] if no colon and 1593 /// [property=] if colon 1594 string getMeta(string name) { 1595 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1596 auto e = querySelector("head meta["~thing~"="~name~"]"); 1597 if(e is null) 1598 return null; 1599 return e.content; 1600 } 1601 1602 /// Sets a meta tag in the document header. It is kinda hacky to work easily for both Facebook open graph and traditional html meta tags/ 1603 void setMeta(string name, string value) { 1604 string thing = name.indexOf(":") == -1 ? "name" : "property"; 1605 auto e = querySelector("head meta["~thing~"="~name~"]"); 1606 if(e is null) { 1607 e = requireSelector("head").addChild("meta"); 1608 e.setAttribute(thing, name); 1609 } 1610 1611 e.content = value; 1612 } 1613 1614 ///. 1615 Form[] forms() { 1616 return cast(Form[]) getElementsByTagName("form"); 1617 } 1618 1619 ///. 1620 Form createForm() 1621 out(ret) { 1622 assert(ret !is null); 1623 } 1624 do { 1625 return cast(Form) createElement("form"); 1626 } 1627 1628 ///. 1629 Element createElement(string name) { 1630 if(loose) 1631 name = name.toLower(); 1632 1633 auto e = Element.make(name, null, null, selfClosedElements); 1634 1635 return e; 1636 1637 // return new Element(this, name, null, selfClosed); 1638 } 1639 1640 ///. 1641 Element createFragment() { 1642 return new DocumentFragment(this); 1643 } 1644 1645 ///. 1646 Element createTextNode(string content) { 1647 return new TextNode(this, content); 1648 } 1649 1650 1651 ///. 1652 Element findFirst(bool delegate(Element) doesItMatch) { 1653 if(root is null) 1654 return null; 1655 Element result; 1656 1657 bool goThroughElement(Element e) { 1658 if(doesItMatch(e)) { 1659 result = e; 1660 return true; 1661 } 1662 1663 foreach(child; e.children) { 1664 if(goThroughElement(child)) 1665 return true; 1666 } 1667 1668 return false; 1669 } 1670 1671 goThroughElement(root); 1672 1673 return result; 1674 } 1675 1676 ///. 1677 void clear() { 1678 root = null; 1679 loose = false; 1680 } 1681 1682 private string _prolog = "<!DOCTYPE html>\n"; 1683 private bool prologWasSet = false; // set to true if the user changed it 1684 1685 /++ 1686 Returns or sets the string before the root element. This is, for example, 1687 `<!DOCTYPE html>\n` or similar. 1688 +/ 1689 @property string prolog() const { 1690 // if the user explicitly changed it, do what they want 1691 // or if we didn't keep/find stuff from the document itself, 1692 // we'll use the builtin one as a default. 1693 if(prologWasSet || piecesBeforeRoot.length == 0) 1694 return _prolog; 1695 1696 string p; 1697 foreach(e; piecesBeforeRoot) 1698 p ~= e.toString() ~ "\n"; 1699 return p; 1700 } 1701 1702 /// ditto 1703 void setProlog(string d) { 1704 _prolog = d; 1705 prologWasSet = true; 1706 } 1707 1708 /++ 1709 Returns the document as string form. Please note that if there is anything in [piecesAfterRoot], 1710 they are discarded. If you want to add them to the file, loop over that and append it yourself 1711 (but remember xml isn't supposed to have anything after the root element). 1712 +/ 1713 override string toString() const { 1714 return prolog ~ root.toString(); 1715 } 1716 1717 /++ 1718 Writes it out with whitespace for easier eyeball debugging 1719 1720 Do NOT use for anything other than eyeball debugging, 1721 because whitespace may be significant content in XML. 1722 +/ 1723 string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 1724 import std.string; 1725 string s = prolog.strip; 1726 1727 /* 1728 if(insertComments) s ~= "<!--"; 1729 s ~= "\n"; 1730 if(insertComments) s ~= "-->"; 1731 */ 1732 1733 s ~= root.toPrettyStringImpl(insertComments, indentationLevel, indentWith); 1734 foreach(a; piecesAfterRoot) 1735 s ~= a.toPrettyStringImpl(insertComments, indentationLevel, indentWith); 1736 return s; 1737 } 1738 1739 /// The root element, like `<html>`. Most the methods on Document forward to this object. 1740 Element root; 1741 1742 /// if these were kept, this is stuff that appeared before the root element, such as <?xml version ?> decls and <!DOCTYPE>s 1743 Element[] piecesBeforeRoot; 1744 1745 /// stuff after the root, only stored in non-strict mode and not used in toString, but available in case you want it 1746 Element[] piecesAfterRoot; 1747 1748 ///. 1749 bool loose; 1750 1751 1752 1753 // what follows are for mutation events that you can observe 1754 void delegate(DomMutationEvent)[] eventObservers; 1755 1756 void dispatchMutationEvent(DomMutationEvent e) { 1757 foreach(o; eventObservers) 1758 o(e); 1759 } 1760 } 1761 1762 /++ 1763 Basic parsing of HTML tag soup 1764 1765 If you simply make a `new Document("some string")` or use [Document.fromUrl] to automatically 1766 download a page (that's function is shorthand for `new Document(arsd.http2.get(your_given_url).contentText)`), 1767 the Document parser will assume it is broken HTML. It will try to fix up things like charset messes, missing 1768 closing tags, flipped tags, inconsistent letter cases, and other forms of commonly found HTML on the web. 1769 1770 It isn't exactly the same as what a HTML5 web browser does in all cases, but it usually it, and where it 1771 disagrees, it is still usually good enough (but sometimes a bug). 1772 +/ 1773 unittest { 1774 auto document = new Document(`<html><body><p>hello <P>there`); 1775 // this will automatically try to normalize the html and fix up broken tags, etc 1776 // so notice how it added the missing closing tags here and made them all lower case 1777 assert(document.toString() == "<!DOCTYPE html>\n<html><body><p>hello </p><p>there</p></body></html>", document.toString()); 1778 } 1779 1780 /++ 1781 Stricter parsing of HTML 1782 1783 When you are writing the HTML yourself, you can remove most ambiguity by making it throw exceptions instead 1784 of trying to automatically fix up things basic parsing tries to do. Using strict mode accomplishes this. 1785 1786 This will help guarantee that you have well-formed HTML, which means it is going to parse a lot more reliably 1787 by all users - browsers, dom.d, other libraries, all behave better with well-formed input... people too! 1788 1789 (note it is not a full *validator*, just a well-formedness checker. Full validation is a lot more work for very 1790 little benefit in my experience, so I stopped here.) 1791 +/ 1792 unittest { 1793 try { 1794 auto document = new Document(`<html><body><p>hello <P>there`, true, true); // turns on strict and case sensitive mode to ctor 1795 assert(0); // never reached, the constructor will throw because strict mode is turned on 1796 } catch(Exception e) { 1797 1798 } 1799 1800 // you can also create the object first, then use the [parseStrict] method 1801 auto document = new Document; 1802 document.parseStrict(`<foo></foo>`); // this is invalid html - no such foo tag - but it is well-formed, since it is opened and closed properly, so it passes 1803 1804 } 1805 1806 /++ 1807 Custom HTML extensions 1808 1809 dom.d is a custom HTML parser, which means you can add custom HTML extensions to it too. It normally reads 1810 and discards things like ASP style `<% ... %>` code as well as XML processing instruction / PHP style embeds `<? ... ?>` 1811 but you can keep this data if you call a function to opt into it in before parsing. 1812 1813 Additionally, you can add special tags to be read like `<script>` to preserve its insides for future processing 1814 via the `.innerRawSource` member. 1815 +/ 1816 unittest { 1817 auto document = new Document; // construct an empty thing first 1818 document.enableAddingSpecialTagsToDom(); // add the special tags like <% ... %> etc 1819 document.rawSourceElements ~= "embedded-plaintext"; // tell it we want a custom 1820 1821 document.parseStrict(`<html> 1822 <% some asp code %> 1823 <script>embedded && javascript</script> 1824 <embedded-plaintext>my <custom> plaintext & stuff</embedded-plaintext> 1825 </html>`); 1826 1827 // please note that if we did `document.toString()` right now, the original source - almost your same 1828 // string you passed to parseStrict - would be spit back out. Meaning the embedded-plaintext still has its 1829 // special text inside it. Another parser won't understand how to use this! So if you want to pass this 1830 // document somewhere else, you need to do some transformations. 1831 // 1832 // This differs from cases like CDATA sections, which dom.d will automatically convert into plain html entities 1833 // on the output that can be read by anyone. 1834 1835 assert(document.root.tagName == "html"); // the root element is normal 1836 1837 int foundCount; 1838 // now let's loop through the whole tree 1839 foreach(element; document.root.tree) { 1840 // the asp thing will be in 1841 if(auto asp = cast(AspCode) element) { 1842 // you use the `asp.source` member to get the code for these 1843 assert(asp.source == "% some asp code %"); 1844 foundCount++; 1845 } else if(element.tagName == "script") { 1846 // and for raw source elements - script, style, or the ones you add, 1847 // you use the innerHTML method to get the code inside 1848 assert(element.innerHTML == "embedded && javascript"); 1849 foundCount++; 1850 } else if(element.tagName == "embedded-plaintext") { 1851 // and innerHTML again 1852 assert(element.innerHTML == "my <custom> plaintext & stuff"); 1853 foundCount++; 1854 } 1855 1856 } 1857 1858 assert(foundCount == 3); 1859 1860 // writeln(document.toString()); 1861 } 1862 1863 // FIXME: <textarea> contents are treated kinda special in html5 as well... 1864 1865 /++ 1866 Demoing CDATA, entities, and non-ascii characters. 1867 1868 The previous example mentioned CDATA, let's show you what that does too. These are all read in as plain strings accessible in the DOM - there is no CDATA, no entities once you get inside the object model - but when you convert back into a string, it will normalize them in a particular way. 1869 1870 This is not exactly standards compliant completely in and out thanks to it doing some transformations... but I find it more useful - it reads the data in consistently and writes it out consistently, both in ways that work well for interop. Take a look: 1871 +/ 1872 unittest { 1873 auto document = new Document(`<html> 1874 <p>¤ is a non-ascii character. It will be converted to a numbered entity in string output.</p> 1875 <p>¤ is the same thing, but as a named entity. It also will be changed to a numbered entity in string output.</p> 1876 <p><![CDATA[xml cdata segments, which can contain <tag> looking things, are converted to encode the embedded special-to-xml characters to entities too.]]></p> 1877 </html>`, true, true); // strict mode turned on 1878 1879 // Inside the object model, things are simplified to D strings. 1880 auto paragraphs = document.querySelectorAll("p"); 1881 // no surprise on the first paragraph, we wrote it with the character, and it is still there in the D string 1882 assert(paragraphs[0].textContent == "¤ is a non-ascii character. It will be converted to a numbered entity in string output."); 1883 // but note on the second paragraph, the entity has been converted to the appropriate *character* in the object 1884 assert(paragraphs[1].textContent == "¤ is the same thing, but as a named entity. It also will be changed to a numbered entity in string output."); 1885 // and the CDATA bit is completely gone from the DOM; it just read it in as a text node. The txt content shows the text as a plain string: 1886 assert(paragraphs[2].textContent == "xml cdata segments, which can contain <tag> looking things, are converted to encode the embedded special-to-xml characters to entities too."); 1887 // and the dom node beneath it is just a single text node; no trace of the original CDATA detail is left after parsing. 1888 assert(paragraphs[2].childNodes.length == 1 && paragraphs[2].childNodes[0].nodeType == NodeType.Text); 1889 1890 // And now, in the output string, we can see they are normalized thusly: 1891 assert(document.toString() == "<!DOCTYPE html>\n<html> 1892 <p>¤ is a non-ascii character. It will be converted to a numbered entity in string output.</p> 1893 <p>¤ is the same thing, but as a named entity. It also will be changed to a numbered entity in string output.</p> 1894 <p>xml cdata segments, which can contain <tag> looking things, are converted to encode the embedded special-to-xml characters to entities too.</p> 1895 </html>"); 1896 } 1897 1898 /++ 1899 Streaming parsing 1900 1901 dom.d normally takes a big string and returns a big DOM object tree - hence its name. This is usually the simplest 1902 code to read and write, so I prefer to stick to that, but if you wanna jump through a few hoops, you can still make 1903 dom.d work with streams. 1904 1905 It is awkward - again, dom.d's whole design is based on building the dom tree, but you can do it if you're willing to 1906 subclass a little and trust the garbage collector. Here's how. 1907 +/ 1908 unittest { 1909 bool encountered; 1910 class StreamDocument : Document { 1911 // the normal behavior for this function is to `parent.appendChild(child)` 1912 // but we can override to read it as it is processed and not append it 1913 override void processNodeWhileParsing(Element parent, Element child) { 1914 if(child.tagName == "bar") 1915 encountered = true; 1916 // note that each element's object is created but then discarded as garbage. 1917 // the GC will take care of it, even with a large document, whereas the normal 1918 // object tree could become quite large. 1919 } 1920 1921 this() { 1922 super("<foo><bar></bar></foo>"); 1923 } 1924 } 1925 1926 auto test = new StreamDocument(); 1927 assert(encountered); // it should have been seen 1928 assert(test.querySelector("bar") is null); // but not appended to the dom node, since we didn't append it 1929 } 1930 1931 /++ 1932 Basic parsing of XML. 1933 1934 dom.d is not technically a standards-compliant xml parser and doesn't implement all xml features, 1935 but its stricter parse options together with turning off HTML's special tag handling (e.g. treating 1936 `<script>` and `<style>` the same as any other tag) gets close enough to work fine for a great many 1937 use cases. 1938 1939 For more information, see [XmlDocument]. 1940 +/ 1941 unittest { 1942 auto xml = new XmlDocument(`<my-stuff>hello</my-stuff>`); 1943 } 1944 1945 bool canNestElementsInHtml(string parentTagName, string childTagName) { 1946 switch(parentTagName) { 1947 case "p", "h1", "h2", "h3", "h4", "h5", "h6": 1948 // only should include "phrasing content" 1949 switch(childTagName) { 1950 case "p", "dl", "dt", "dd", "h1", "h2", "h3", "h4", "h5", "h6": 1951 return false; 1952 default: return true; 1953 } 1954 case "dt", "dd": 1955 switch(childTagName) { 1956 case "dd", "dt": 1957 return false; 1958 default: return true; 1959 } 1960 default: 1961 return true; 1962 } 1963 } 1964 1965 interface DomParent { 1966 inout(Document) asDocument() inout; 1967 inout(Element) asElement() inout; 1968 } 1969 1970 /++ 1971 This represents almost everything in the DOM and offers a lot of inspection and manipulation functions. Element, or its subclasses, are what makes the dom tree. 1972 +/ 1973 /// Group: core_functionality 1974 class Element : DomParent { 1975 inout(Document) asDocument() inout { return null; } 1976 inout(Element) asElement() inout { return this; } 1977 1978 /// Returns a collection of elements by selector. 1979 /// See: [Document.opIndex] 1980 ElementCollection opIndex(string selector) { 1981 auto e = ElementCollection(this); 1982 return e[selector]; 1983 } 1984 1985 /++ 1986 Returns the child node with the particular index. 1987 1988 Be aware that child nodes include text nodes, including 1989 whitespace-only nodes. 1990 +/ 1991 Element opIndex(size_t index) { 1992 if(index >= children.length) 1993 return null; 1994 return this.children[index]; 1995 } 1996 1997 /// Calls getElementById, but throws instead of returning null if the element is not found. You can also ask for a specific subclass of Element to dynamically cast to, which also throws if it cannot be done. 1998 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 1999 if( 2000 is(SomeElementType : Element) 2001 ) 2002 out(ret) { 2003 assert(ret !is null); 2004 } 2005 do { 2006 auto e = cast(SomeElementType) getElementById(id); 2007 if(e is null) 2008 throw new ElementNotFoundException(SomeElementType.stringof, "id=" ~ id, this, file, line); 2009 return e; 2010 } 2011 2012 /// ditto but with selectors instead of ids 2013 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 2014 if( 2015 is(SomeElementType : Element) 2016 ) 2017 out(ret) { 2018 assert(ret !is null); 2019 } 2020 do { 2021 auto e = cast(SomeElementType) querySelector(selector); 2022 if(e is null) 2023 throw new ElementNotFoundException(SomeElementType.stringof, selector, this, file, line); 2024 return e; 2025 } 2026 2027 2028 /++ 2029 If a matching selector is found, it returns that Element. Otherwise, the returned object returns null for all methods. 2030 +/ 2031 final MaybeNullElement!SomeElementType optionSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 2032 if(is(SomeElementType : Element)) 2033 { 2034 auto e = cast(SomeElementType) querySelector(selector); 2035 return MaybeNullElement!SomeElementType(e); 2036 } 2037 2038 2039 2040 /// get all the classes on this element 2041 @property string[] classes() const { 2042 // FIXME: remove blank names 2043 auto cs = split(className, " "); 2044 foreach(ref c; cs) 2045 c = c.strip(); 2046 return cs; 2047 } 2048 2049 /++ 2050 The object [classList] returns. 2051 +/ 2052 static struct ClassListHelper { 2053 Element this_; 2054 this(inout(Element) this_) inout { 2055 this.this_ = this_; 2056 } 2057 2058 /// 2059 bool contains(string cn) const { 2060 return this_.hasClass(cn); 2061 } 2062 2063 /// 2064 void add(string cn) { 2065 this_.addClass(cn); 2066 } 2067 2068 /// 2069 void remove(string cn) { 2070 this_.removeClass(cn); 2071 } 2072 2073 /// 2074 void toggle(string cn) { 2075 if(contains(cn)) 2076 remove(cn); 2077 else 2078 add(cn); 2079 } 2080 2081 // this thing supposed to be iterable in javascript but idk how i want to do it in D. meh 2082 /+ 2083 string[] opIndex() const { 2084 return this_.classes; 2085 } 2086 +/ 2087 } 2088 2089 /++ 2090 Returns a helper object to work with classes, just like javascript. 2091 2092 History: 2093 Added August 25, 2022 2094 +/ 2095 @property inout(ClassListHelper) classList() inout { 2096 return inout(ClassListHelper)(this); 2097 } 2098 // FIXME: classList is supposed to whitespace and duplicates when you use it. need to test. 2099 2100 unittest { 2101 Element element = Element.make("div"); 2102 element.classList.add("foo"); 2103 assert(element.classList.contains("foo")); 2104 element.classList.remove("foo"); 2105 assert(!element.classList.contains("foo")); 2106 element.classList.toggle("bar"); 2107 assert(element.classList.contains("bar")); 2108 } 2109 2110 /// ditto 2111 alias classNames = classes; 2112 2113 2114 /// Adds a string to the class attribute. The class attribute is used a lot in CSS. 2115 @scriptable 2116 Element addClass(string c) { 2117 if(hasClass(c)) 2118 return this; // don't add it twice 2119 2120 string cn = getAttribute("class"); 2121 if(cn.length == 0) { 2122 setAttribute("class", c); 2123 return this; 2124 } else { 2125 setAttribute("class", cn ~ " " ~ c); 2126 } 2127 2128 return this; 2129 } 2130 2131 /// Removes a particular class name. 2132 @scriptable 2133 Element removeClass(string c) { 2134 if(!hasClass(c)) 2135 return this; 2136 string n; 2137 foreach(name; classes) { 2138 if(c == name) 2139 continue; // cut it out 2140 if(n.length) 2141 n ~= " "; 2142 n ~= name; 2143 } 2144 2145 className = n.strip(); 2146 2147 return this; 2148 } 2149 2150 /// Returns whether the given class appears in this element. 2151 bool hasClass(string c) const { 2152 string cn = className; 2153 2154 auto idx = cn.indexOf(c); 2155 if(idx == -1) 2156 return false; 2157 2158 foreach(cla; cn.split(" ")) 2159 if(cla.strip == c) 2160 return true; 2161 return false; 2162 2163 /* 2164 int rightSide = idx + c.length; 2165 2166 bool checkRight() { 2167 if(rightSide == cn.length) 2168 return true; // it's the only class 2169 else if(iswhite(cn[rightSide])) 2170 return true; 2171 return false; // this is a substring of something else.. 2172 } 2173 2174 if(idx == 0) { 2175 return checkRight(); 2176 } else { 2177 if(!iswhite(cn[idx - 1])) 2178 return false; // substring 2179 return checkRight(); 2180 } 2181 2182 assert(0); 2183 */ 2184 } 2185 2186 2187 /* ******************************* 2188 DOM Mutation 2189 *********************************/ 2190 /++ 2191 Family of convenience functions to quickly add a tag with some text or 2192 other relevant info (for example, it's a src for an <img> element 2193 instead of inner text). They forward to [Element.make] then calls [appendChild]. 2194 2195 --- 2196 div.addChild("span", "hello there"); 2197 div.addChild("div", Html("<p>children of the div</p>")); 2198 --- 2199 +/ 2200 Element addChild(string tagName, string childInfo = null, string childInfo2 = null) 2201 in { 2202 assert(tagName !is null); 2203 } 2204 out(e) { 2205 //assert(e.parentNode is this); 2206 //assert(e.parentDocument is this.parentDocument); 2207 } 2208 do { 2209 auto e = Element.make(tagName, childInfo, childInfo2); 2210 // FIXME (maybe): if the thing is self closed, we might want to go ahead and 2211 // return the parent. That will break existing code though. 2212 return appendChild(e); 2213 } 2214 2215 /// ditto 2216 Element addChild(Element e) { 2217 return this.appendChild(e); 2218 } 2219 2220 /// ditto 2221 Element addChild(string tagName, Element firstChild, string info2 = null) 2222 in { 2223 assert(firstChild !is null); 2224 } 2225 out(ret) { 2226 assert(ret !is null); 2227 assert(ret.parentNode is this); 2228 assert(firstChild.parentNode is ret); 2229 2230 assert(ret.parentDocument is this.parentDocument); 2231 //assert(firstChild.parentDocument is this.parentDocument); 2232 } 2233 do { 2234 auto e = Element.make(tagName, "", info2); 2235 e.appendChild(firstChild); 2236 this.appendChild(e); 2237 return e; 2238 } 2239 2240 /// ditto 2241 Element addChild(string tagName, in Html innerHtml, string info2 = null) 2242 in { 2243 } 2244 out(ret) { 2245 assert(ret !is null); 2246 assert((cast(DocumentFragment) this !is null) || (ret.parentNode is this), ret.toString);// e.parentNode ? e.parentNode.toString : "null"); 2247 assert(ret.parentDocument is this.parentDocument); 2248 } 2249 do { 2250 auto e = Element.make(tagName, "", info2); 2251 this.appendChild(e); 2252 e.innerHTML = innerHtml.source; 2253 return e; 2254 } 2255 2256 2257 /// Another convenience function. Adds a child directly after the current one, returning 2258 /// the new child. 2259 /// 2260 /// Between this, addChild, and parentNode, you can build a tree as a single expression. 2261 /// See_Also: [addChild] 2262 Element addSibling(string tagName, string childInfo = null, string childInfo2 = null) 2263 in { 2264 assert(tagName !is null); 2265 assert(parentNode !is null); 2266 } 2267 out(e) { 2268 assert(e.parentNode is this.parentNode); 2269 assert(e.parentDocument is this.parentDocument); 2270 } 2271 do { 2272 auto e = Element.make(tagName, childInfo, childInfo2); 2273 return parentNode.insertAfter(this, e); 2274 } 2275 2276 /// ditto 2277 Element addSibling(Element e) { 2278 return parentNode.insertAfter(this, e); 2279 } 2280 2281 /// Convenience function to append text intermixed with other children. 2282 /// For example: div.addChildren("You can visit my website by ", new Link("mysite.com", "clicking here"), "."); 2283 /// or div.addChildren("Hello, ", user.name, "!"); 2284 /// See also: appendHtml. This might be a bit simpler though because you don't have to think about escaping. 2285 void addChildren(T...)(T t) { 2286 foreach(item; t) { 2287 static if(is(item : Element)) 2288 appendChild(item); 2289 else static if (is(isSomeString!(item))) 2290 appendText(to!string(item)); 2291 else static assert(0, "Cannot pass " ~ typeof(item).stringof ~ " to addChildren"); 2292 } 2293 } 2294 2295 /// Appends the list of children to this element. 2296 void appendChildren(Element[] children) { 2297 foreach(ele; children) 2298 appendChild(ele); 2299 } 2300 2301 /// Removes this element form its current parent and appends it to the given `newParent`. 2302 void reparent(Element newParent) 2303 in { 2304 assert(newParent !is null); 2305 assert(parentNode !is null); 2306 } 2307 out { 2308 assert(this.parentNode is newParent); 2309 //assert(isInArray(this, newParent.children)); 2310 } 2311 do { 2312 parentNode.removeChild(this); 2313 newParent.appendChild(this); 2314 } 2315 2316 /** 2317 Strips this tag out of the document, putting its inner html 2318 as children of the parent. 2319 2320 For example, given: `<p>hello <b>there</b></p>`, if you 2321 call `stripOut` on the `b` element, you'll be left with 2322 `<p>hello there<p>`. 2323 2324 The idea here is to make it easy to get rid of garbage 2325 markup you aren't interested in. 2326 */ 2327 void stripOut() 2328 in { 2329 assert(parentNode !is null); 2330 } 2331 out { 2332 assert(parentNode is null); 2333 assert(children.length == 0); 2334 } 2335 do { 2336 foreach(c; children) 2337 c.parentNode = null; // remove the parent 2338 if(children.length) 2339 parentNode.replaceChild(this, this.children); 2340 else 2341 parentNode.removeChild(this); 2342 this.children.length = 0; // we reparented them all above 2343 } 2344 2345 /// shorthand for `this.parentNode.removeChild(this)` with `parentNode` `null` check 2346 /// if the element already isn't in a tree, it does nothing. 2347 Element removeFromTree() 2348 in { 2349 2350 } 2351 out(var) { 2352 assert(this.parentNode is null); 2353 assert(var is this); 2354 } 2355 do { 2356 if(this.parentNode is null) 2357 return this; 2358 2359 this.parentNode.removeChild(this); 2360 2361 return this; 2362 } 2363 2364 /++ 2365 Wraps this element inside the given element. 2366 It's like `this.replaceWith(what); what.appendchild(this);` 2367 2368 Given: `<b>cool</b>`, if you call `b.wrapIn(new Link("site.com", "my site is "));` 2369 you'll end up with: `<a href="site.com">my site is <b>cool</b></a>`. 2370 +/ 2371 Element wrapIn(Element what) 2372 in { 2373 assert(what !is null); 2374 } 2375 out(ret) { 2376 assert(this.parentNode is what); 2377 assert(ret is what); 2378 } 2379 do { 2380 this.replaceWith(what); 2381 what.appendChild(this); 2382 2383 return what; 2384 } 2385 2386 /// Replaces this element with something else in the tree. 2387 Element replaceWith(Element e) 2388 in { 2389 assert(this.parentNode !is null); 2390 } 2391 do { 2392 e.removeFromTree(); 2393 this.parentNode.replaceChild(this, e); 2394 return e; 2395 } 2396 2397 /** 2398 Fetches the first consecutive text nodes concatenated together. 2399 2400 2401 `firstInnerText` of `<example>some text<span>more text</span></example>` is `some text`. It stops at the first child tag encountered. 2402 2403 See_also: [directText], [innerText] 2404 */ 2405 string firstInnerText() const { 2406 string s; 2407 foreach(child; children) { 2408 if(child.nodeType != NodeType.Text) 2409 break; 2410 2411 s ~= child.nodeValue(); 2412 } 2413 return s; 2414 } 2415 2416 2417 /** 2418 Returns the text directly under this element. 2419 2420 2421 Unlike [innerText], it does not recurse, and unlike [firstInnerText], it continues 2422 past child tags. So, `<example>some <b>bold</b> text</example>` 2423 will return `some text` because it only gets the text, skipping non-text children. 2424 2425 See_also: [firstInnerText], [innerText] 2426 */ 2427 @property string directText() { 2428 string ret; 2429 foreach(e; children) { 2430 if(e.nodeType == NodeType.Text) 2431 ret ~= e.nodeValue(); 2432 } 2433 2434 return ret; 2435 } 2436 2437 /** 2438 Sets the direct text, without modifying other child nodes. 2439 2440 2441 Unlike [innerText], this does *not* remove existing elements in the element. 2442 2443 It only replaces the first text node it sees. 2444 2445 If there are no text nodes, it calls [appendText]. 2446 2447 So, given `<div><img />text here</div>`, it will keep the `<img />`, and replace the `text here`. 2448 */ 2449 @property void directText(string text) { 2450 foreach(e; children) { 2451 if(e.nodeType == NodeType.Text) { 2452 auto it = cast(TextNode) e; 2453 it.contents = text; 2454 return; 2455 } 2456 } 2457 2458 appendText(text); 2459 } 2460 2461 // do nothing, this is primarily a virtual hook 2462 // for links and forms 2463 void setValue(string field, string value) { } 2464 void setValue(string field, string[] value) { } 2465 2466 2467 // this is a thing so i can remove observer support if it gets slow 2468 // I have not implemented all these yet 2469 private void sendObserverEvent(DomMutationOperations operation, string s1 = null, string s2 = null, Element r = null, Element r2 = null) { 2470 if(parentDocument is null) return; 2471 DomMutationEvent me; 2472 me.operation = operation; 2473 me.target = this; 2474 me.relatedString = s1; 2475 me.relatedString2 = s2; 2476 me.related = r; 2477 me.related2 = r2; 2478 parentDocument.dispatchMutationEvent(me); 2479 } 2480 2481 // putting all the members up front 2482 2483 // this ought to be private. don't use it directly. 2484 Element[] children; 2485 2486 /// The name of the tag. Remember, changing this doesn't change the dynamic type of the object. 2487 string tagName; 2488 2489 /++ 2490 This is where the attributes are actually stored. You should use getAttribute, setAttribute, and hasAttribute instead. 2491 2492 History: 2493 `AttributesHolder` replaced `string[string]` on August 22, 2024 2494 +/ 2495 AttributesHolder attributes; 2496 2497 /// In XML, it is valid to write <tag /> for all elements with no children, but that breaks HTML, so I don't do it here. 2498 /// Instead, this flag tells if it should be. It is based on the source document's notation and a html element list. 2499 private bool selfClosed; 2500 2501 private DomParent parent_; 2502 2503 /// Get the parent Document object that contains this element. 2504 /// It may be null, so remember to check for that. 2505 @property inout(Document) parentDocument() inout { 2506 if(this.parent_ is null) 2507 return null; 2508 auto p = cast() this.parent_.asElement; 2509 auto prev = cast() this; 2510 while(p) { 2511 prev = p; 2512 if(p.parent_ is null) 2513 return null; 2514 p = cast() p.parent_.asElement; 2515 } 2516 return cast(inout) prev.parent_.asDocument; 2517 } 2518 2519 /*deprecated*/ @property void parentDocument(Document doc) { 2520 parent_ = doc; 2521 } 2522 2523 /// Returns the parent node in the tree this element is attached to. 2524 inout(Element) parentNode() inout { 2525 if(parent_ is null) 2526 return null; 2527 2528 auto p = parent_.asElement; 2529 2530 if(cast(DocumentFragment) p) { 2531 if(p.parent_ is null) 2532 return null; 2533 else 2534 return p.parent_.asElement; 2535 } 2536 2537 return p; 2538 } 2539 2540 //protected 2541 Element parentNode(Element e) { 2542 parent_ = e; 2543 return e; 2544 } 2545 2546 // these are here for event handlers. Don't forget that this library never fires events. 2547 // (I'm thinking about putting this in a version statement so you don't have the baggage. The instance size of this class is 56 bytes right now.) 2548 2549 version(dom_with_events) { 2550 EventHandler[][string] bubblingEventHandlers; 2551 EventHandler[][string] capturingEventHandlers; 2552 EventHandler[string] defaultEventHandlers; 2553 2554 void addEventListener(string event, EventHandler handler, bool useCapture = false) { 2555 if(event.length > 2 && event[0..2] == "on") 2556 event = event[2 .. $]; 2557 2558 if(useCapture) 2559 capturingEventHandlers[event] ~= handler; 2560 else 2561 bubblingEventHandlers[event] ~= handler; 2562 } 2563 } 2564 2565 2566 // and now methods 2567 2568 /++ 2569 Convenience function to try to do the right thing for HTML. This is the main way I create elements. 2570 2571 History: 2572 On February 8, 2021, the `selfClosedElements` parameter was added. Previously, it used a private 2573 immutable global list for HTML. It still defaults to the same list, but you can change it now via 2574 the parameter. 2575 See_Also: 2576 [addChild], [addSibling] 2577 +/ 2578 static Element make(string tagName, string childInfo = null, string childInfo2 = null, const string[] selfClosedElements = htmlSelfClosedElements) { 2579 bool selfClosed = tagName.isInArray(selfClosedElements); 2580 2581 Element e; 2582 // want to create the right kind of object for the given tag... 2583 switch(tagName) { 2584 case "#text": 2585 e = new TextNode(null, childInfo); 2586 return e; 2587 // break; 2588 case "table": 2589 e = new Table(null); 2590 break; 2591 case "a": 2592 e = new Link(null); 2593 break; 2594 case "form": 2595 e = new Form(null); 2596 break; 2597 case "tr": 2598 e = new TableRow(null); 2599 break; 2600 case "td", "th": 2601 e = new TableCell(null, tagName); 2602 break; 2603 default: 2604 e = new Element(null, tagName, null, selfClosed); // parent document should be set elsewhere 2605 } 2606 2607 // make sure all the stuff is constructed properly FIXME: should probably be in all the right constructors too 2608 e.tagName = tagName; 2609 e.selfClosed = selfClosed; 2610 2611 if(childInfo !is null) 2612 switch(tagName) { 2613 /* html5 convenience tags */ 2614 case "audio": 2615 if(childInfo.length) 2616 e.addChild("source", childInfo); 2617 if(childInfo2 !is null) 2618 e.appendText(childInfo2); 2619 break; 2620 case "source": 2621 e.src = childInfo; 2622 if(childInfo2 !is null) 2623 e.type = childInfo2; 2624 break; 2625 /* regular html 4 stuff */ 2626 case "img": 2627 e.src = childInfo; 2628 if(childInfo2 !is null) 2629 e.alt = childInfo2; 2630 break; 2631 case "link": 2632 e.href = childInfo; 2633 if(childInfo2 !is null) 2634 e.rel = childInfo2; 2635 break; 2636 case "option": 2637 e.innerText = childInfo; 2638 if(childInfo2 !is null) 2639 e.value = childInfo2; 2640 break; 2641 case "input": 2642 e.type = "hidden"; 2643 e.name = childInfo; 2644 if(childInfo2 !is null) 2645 e.value = childInfo2; 2646 break; 2647 case "button": 2648 e.innerText = childInfo; 2649 if(childInfo2 !is null) 2650 e.type = childInfo2; 2651 break; 2652 case "a": 2653 e.innerText = childInfo; 2654 if(childInfo2 !is null) 2655 e.href = childInfo2; 2656 break; 2657 case "script": 2658 case "style": 2659 e.innerRawSource = childInfo; 2660 break; 2661 case "meta": 2662 e.name = childInfo; 2663 if(childInfo2 !is null) 2664 e.content = childInfo2; 2665 break; 2666 /* generically, assume we were passed text and perhaps class */ 2667 default: 2668 e.innerText = childInfo; 2669 if(childInfo2.length) 2670 e.className = childInfo2; 2671 } 2672 2673 return e; 2674 } 2675 2676 /// ditto 2677 static Element make(string tagName, in Html innerHtml, string childInfo2 = null) { 2678 // FIXME: childInfo2 is ignored when info1 is null 2679 auto m = Element.make(tagName, "not null"[0..0], childInfo2); 2680 m.innerHTML = innerHtml.source; 2681 return m; 2682 } 2683 2684 /// ditto 2685 static Element make(string tagName, Element child, string childInfo2 = null) { 2686 auto m = Element.make(tagName, cast(string) null, childInfo2); 2687 m.appendChild(child); 2688 return m; 2689 } 2690 2691 2692 /// Generally, you don't want to call this yourself - use Element.make or document.createElement instead. 2693 this(Document _parentDocument, string _tagName, string[string] _attributes = null, bool _selfClosed = false) { 2694 tagName = _tagName; 2695 foreach(k, v; _attributes) 2696 attributes[k] = v; 2697 selfClosed = _selfClosed; 2698 2699 version(dom_node_indexes) 2700 this.dataset.nodeIndex = to!string(&(this.attributes)); 2701 2702 assert(_tagName.indexOf(" ") == -1);//, "<" ~ _tagName ~ "> is invalid"); 2703 } 2704 2705 /++ 2706 Convenience constructor when you don't care about the parentDocument. Note this might break things on the document. 2707 Note also that without a parent document, elements are always in strict, case-sensitive mode. 2708 2709 History: 2710 On February 8, 2021, the `selfClosedElements` parameter was added. It defaults to the same behavior as 2711 before: using the hard-coded list of HTML elements, but it can now be overridden. If you use 2712 [Document.createElement], it will use the list set for the current document. Otherwise, you can pass 2713 something here if you like. 2714 +/ 2715 this(string _tagName, string[string] _attributes = null, const string[] selfClosedElements = htmlSelfClosedElements) { 2716 tagName = _tagName; 2717 foreach(k, v; _attributes) 2718 attributes[k] = v; 2719 selfClosed = tagName.isInArray(selfClosedElements); 2720 2721 // this is meant to reserve some memory. It makes a small, but consistent improvement. 2722 //children.length = 8; 2723 //children.length = 0; 2724 2725 version(dom_node_indexes) 2726 this.dataset.nodeIndex = to!string(&(this.attributes)); 2727 } 2728 2729 private this(Document _parentDocument) { 2730 version(dom_node_indexes) 2731 this.dataset.nodeIndex = to!string(&(this.attributes)); 2732 } 2733 2734 2735 /* ******************************* 2736 Navigating the DOM 2737 *********************************/ 2738 2739 /// Returns the first child of this element. If it has no children, returns null. 2740 /// Remember, text nodes are children too. 2741 @property Element firstChild() { 2742 return children.length ? children[0] : null; 2743 } 2744 2745 /// Returns the last child of the element, or null if it has no children. Remember, text nodes are children too. 2746 @property Element lastChild() { 2747 return children.length ? children[$ - 1] : null; 2748 } 2749 2750 // FIXME UNTESTED 2751 /// the next or previous element you would encounter if you were reading it in the source. May be a text node or other special non-tag object if you enabled them. 2752 Element nextInSource() { 2753 auto n = firstChild; 2754 if(n is null) 2755 n = nextSibling(); 2756 if(n is null) { 2757 auto p = this.parentNode; 2758 while(p !is null && n is null) { 2759 n = p.nextSibling; 2760 } 2761 } 2762 2763 return n; 2764 } 2765 2766 /// ditto 2767 Element previousInSource() { 2768 auto p = previousSibling; 2769 if(p is null) { 2770 auto par = parentNode; 2771 if(par) 2772 p = par.lastChild; 2773 if(p is null) 2774 p = par; 2775 } 2776 return p; 2777 } 2778 2779 /++ 2780 Returns the next or previous sibling that is not a text node. Please note: the behavior with comments is subject to change. Currently, it will return a comment or other nodes if it is in the tree (if you enabled it with [Document.enableAddingSpecialTagsToDom] or [Document.parseSawComment]) and not if you didn't, but the implementation will probably change at some point to skip them regardless. 2781 2782 Equivalent to [previousSibling]/[nextSibling]("*"). 2783 2784 Please note it may return `null`. 2785 +/ 2786 @property Element previousElementSibling() { 2787 return previousSibling("*"); 2788 } 2789 2790 /// ditto 2791 @property Element nextElementSibling() { 2792 return nextSibling("*"); 2793 } 2794 2795 /++ 2796 Returns the next or previous sibling matching the `tagName` filter. The default filter of `null` will return the first sibling it sees, even if it is a comment or text node, or anything else. A filter of `"*"` will match any tag with a name. Otherwise, the string must match the [tagName] of the sibling you want to find. 2797 +/ 2798 @property Element previousSibling(string tagName = null) { 2799 if(this.parentNode is null) 2800 return null; 2801 Element ps = null; 2802 foreach(e; this.parentNode.childNodes) { 2803 if(e is this) 2804 break; 2805 if(tagName == "*" && e.nodeType != NodeType.Text) { 2806 ps = e; 2807 } else if(tagName is null || e.tagName == tagName) 2808 ps = e; 2809 } 2810 2811 return ps; 2812 } 2813 2814 /// ditto 2815 @property Element nextSibling(string tagName = null) { 2816 if(this.parentNode is null) 2817 return null; 2818 Element ns = null; 2819 bool mightBe = false; 2820 foreach(e; this.parentNode.childNodes) { 2821 if(e is this) { 2822 mightBe = true; 2823 continue; 2824 } 2825 if(mightBe) { 2826 if(tagName == "*" && e.nodeType != NodeType.Text) { 2827 ns = e; 2828 break; 2829 } 2830 if(tagName is null || e.tagName == tagName) { 2831 ns = e; 2832 break; 2833 } 2834 } 2835 } 2836 2837 return ns; 2838 } 2839 2840 2841 /++ 2842 Gets the nearest node, going up the chain, with the given tagName 2843 May return null or throw. The type `T` will specify a subclass like 2844 [Form], [Table], or [Link], which it will cast for you when found. 2845 +/ 2846 T getParent(T = Element)(string tagName = null) if(is(T : Element)) { 2847 if(tagName is null) { 2848 static if(is(T == Form)) 2849 tagName = "form"; 2850 else static if(is(T == Table)) 2851 tagName = "table"; 2852 else static if(is(T == Link)) 2853 tagName == "a"; 2854 } 2855 2856 auto par = this.parentNode; 2857 while(par !is null) { 2858 if(tagName is null || par.tagName == tagName) 2859 break; 2860 par = par.parentNode; 2861 } 2862 2863 static if(!is(T == Element)) { 2864 auto t = cast(T) par; 2865 if(t is null) 2866 throw new ElementNotFoundException("", tagName ~ " parent not found", this); 2867 } else 2868 auto t = par; 2869 2870 return t; 2871 } 2872 2873 /++ 2874 Searches this element and the tree of elements under it for one matching the given `id` attribute. 2875 +/ 2876 Element getElementById(string id) { 2877 // FIXME: I use this function a lot, and it's kinda slow 2878 // not terribly slow, but not great. 2879 foreach(e; tree) 2880 if(e.id == id) 2881 return e; 2882 return null; 2883 } 2884 2885 /++ 2886 Returns a child element that matches the given `selector`. 2887 2888 Note: you can give multiple selectors, separated by commas. 2889 It will return the first match it finds. 2890 2891 Tip: to use namespaces, escape the colon in the name: 2892 2893 --- 2894 element.querySelector(`ns\:tag`); // the backticks are raw strings then the backslash is interpreted by querySelector 2895 --- 2896 +/ 2897 @scriptable 2898 Element querySelector(string selector) { 2899 Selector s = Selector(selector); 2900 2901 foreach(ref comp; s.components) 2902 if(comp.parts.length && comp.parts[0].separation > 0) { 2903 // this is illegal in standard dom, but i use it a lot 2904 // gonna insert a :scope thing 2905 2906 SelectorPart part; 2907 part.separation = -1; 2908 part.scopeElement = true; 2909 comp.parts = part ~ comp.parts; 2910 } 2911 2912 foreach(ele; tree) 2913 if(s.matchesElement(ele, this)) 2914 return ele; 2915 return null; 2916 } 2917 2918 /// If the element matches the given selector. Previously known as `matchesSelector`. 2919 @scriptable 2920 bool matches(string selector) { 2921 /+ 2922 bool caseSensitiveTags = true; 2923 if(parentDocument && parentDocument.loose) 2924 caseSensitiveTags = false; 2925 +/ 2926 2927 Selector s = Selector(selector); 2928 return s.matchesElement(this); 2929 } 2930 2931 /// Returns itself or the closest parent that matches the given selector, or null if none found 2932 /// See_also: https://developer.mozilla.org/en-US/docs/Web/API/Element/closest 2933 @scriptable 2934 Element closest(string selector) { 2935 Element e = this; 2936 while(e !is null) { 2937 if(e.matches(selector)) 2938 return e; 2939 e = e.parentNode; 2940 } 2941 return null; 2942 } 2943 2944 /** 2945 Returns elements that match the given CSS selector 2946 2947 * -- all, default if nothing else is there 2948 2949 tag#id.class.class.class:pseudo[attrib=what][attrib=what] OP selector 2950 2951 It is all additive 2952 2953 OP 2954 2955 space = descendant 2956 > = direct descendant 2957 + = sibling (E+F Matches any F element immediately preceded by a sibling element E) 2958 2959 [foo] Foo is present as an attribute 2960 [foo="warning"] Matches any E element whose "foo" attribute value is exactly equal to "warning". 2961 E[foo~="warning"] Matches any E element whose "foo" attribute value is a list of space-separated values, one of which is exactly equal to "warning" 2962 E[lang|="en"] Matches any E element whose "lang" attribute has a hyphen-separated list of values beginning (from the left) with "en". 2963 2964 [item$=sdas] ends with 2965 [item^-sdsad] begins with 2966 2967 Quotes are optional here. 2968 2969 Pseudos: 2970 :first-child 2971 :last-child 2972 :link (same as a[href] for our purposes here) 2973 2974 2975 There can be commas separating the selector. A comma separated list result is OR'd onto the main. 2976 2977 2978 2979 This ONLY cares about elements. text, etc, are ignored 2980 2981 2982 There should be two functions: given element, does it match the selector? and given a selector, give me all the elements 2983 2984 The name `getElementsBySelector` was the original name, written back before the name `querySelector` was standardized (this library is older than you might think!), but they do the same thing.. 2985 */ 2986 @scriptable 2987 Element[] querySelectorAll(string selector) { 2988 // FIXME: this function could probably use some performance attention 2989 // ... but only mildly so according to the profiler in the big scheme of things; probably negligible in a big app. 2990 2991 2992 bool caseSensitiveTags = true; 2993 if(parentDocument && parentDocument.loose) 2994 caseSensitiveTags = false; 2995 2996 Element[] ret; 2997 foreach(sel; parseSelectorString(selector, caseSensitiveTags)) 2998 ret ~= sel.getElements(this, null); 2999 return ret; 3000 } 3001 3002 /// ditto 3003 alias getElementsBySelector = querySelectorAll; 3004 3005 /++ 3006 Returns child elements that have the given class name or tag name. 3007 3008 Please note the standard specifies this should return a live node list. This means, in Javascript for example, if you loop over the value returned by getElementsByTagName and getElementsByClassName and remove the elements, the length of the list will decrease. When I implemented this, I figured that was more trouble than it was worth and returned a plain array instead. By the time I had the infrastructure to make it simple, I didn't want to do the breaking change. 3009 3010 So these is incompatible with Javascript in the face of live dom mutation and will likely remain so. 3011 +/ 3012 Element[] getElementsByClassName(string cn) { 3013 // is this correct? 3014 return getElementsBySelector("." ~ cn); 3015 } 3016 3017 /// ditto 3018 Element[] getElementsByTagName(string tag) { 3019 if(parentDocument && parentDocument.loose) 3020 tag = tag.toLower(); 3021 Element[] ret; 3022 foreach(e; tree) 3023 if(e.tagName == tag || tag == "*") 3024 ret ~= e; 3025 return ret; 3026 } 3027 3028 3029 /* ******************************* 3030 Attributes 3031 *********************************/ 3032 3033 /** 3034 Gets the given attribute value, or null if the 3035 attribute is not set. 3036 3037 Note that the returned string is decoded, so it no longer contains any xml entities. 3038 */ 3039 @scriptable 3040 string getAttribute(string name) const { 3041 if(parentDocument && parentDocument.loose) 3042 name = name.toLower(); 3043 return attributes.get(name, null); 3044 } 3045 3046 /** 3047 Sets an attribute. Returns this for easy chaining 3048 */ 3049 @scriptable 3050 Element setAttribute(string name, string value) { 3051 if(parentDocument && parentDocument.loose) 3052 name = name.toLower(); 3053 3054 // I never use this shit legitimately and neither should you 3055 auto it = name.toLower(); 3056 if(it == "href" || it == "src") { 3057 auto v = value.strip().toLower(); 3058 if(v.startsWith("vbscript:")) 3059 value = value[9..$]; 3060 if(v.startsWith("javascript:")) 3061 value = value[11..$]; 3062 } 3063 3064 attributes[name] = value; 3065 3066 sendObserverEvent(DomMutationOperations.setAttribute, name, value); 3067 3068 return this; 3069 } 3070 3071 /** 3072 Returns if the attribute exists. 3073 */ 3074 @scriptable 3075 bool hasAttribute(string name) { 3076 if(parentDocument && parentDocument.loose) 3077 name = name.toLower(); 3078 3079 if(name in attributes) 3080 return true; 3081 else 3082 return false; 3083 } 3084 3085 /** 3086 Removes the given attribute from the element. 3087 */ 3088 @scriptable 3089 Element removeAttribute(string name) 3090 out(ret) { 3091 assert(ret is this); 3092 } 3093 do { 3094 if(parentDocument && parentDocument.loose) 3095 name = name.toLower(); 3096 if(name in attributes) 3097 attributes.remove(name); 3098 3099 sendObserverEvent(DomMutationOperations.removeAttribute, name); 3100 return this; 3101 } 3102 3103 /** 3104 Gets or sets the class attribute's contents. Returns 3105 an empty string if it has no class. 3106 */ 3107 @property string className() const { 3108 auto c = getAttribute("class"); 3109 if(c is null) 3110 return ""; 3111 return c; 3112 } 3113 3114 /// ditto 3115 @property Element className(string c) { 3116 setAttribute("class", c); 3117 return this; 3118 } 3119 3120 /** 3121 Provides easy access to common HTML attributes, object style. 3122 3123 --- 3124 auto element = Element.make("a"); 3125 a.href = "cool.html"; // this is the same as a.setAttribute("href", "cool.html"); 3126 string where = a.href; // same as a.getAttribute("href"); 3127 --- 3128 3129 */ 3130 @property string opDispatch(string name)(string v = null) if(isConvenientAttribute(name)) { 3131 if(v !is null) 3132 setAttribute(name, v); 3133 return getAttribute(name); 3134 } 3135 3136 /** 3137 Old access to attributes. Use [attrs] instead. 3138 3139 DEPRECATED: generally open opDispatch caused a lot of unforeseen trouble with compile time duck typing and UFCS extensions. 3140 so I want to remove it. A small whitelist of attributes is still allowed, but others are not. 3141 3142 Instead, use element.attrs.attribute, element.attrs["attribute"], 3143 or element.getAttribute("attribute")/element.setAttribute("attribute"). 3144 */ 3145 @property string opDispatch(string name)(string v = null) if(!isConvenientAttribute(name)) { 3146 static assert(0, "Don't use " ~ name ~ " direct on Element, instead use element.attrs.attributeName"); 3147 } 3148 3149 /* 3150 // this would be nice for convenience, but it broke the getter above. 3151 @property void opDispatch(string name)(bool boolean) if(name != "popFront") { 3152 if(boolean) 3153 setAttribute(name, name); 3154 else 3155 removeAttribute(name); 3156 } 3157 */ 3158 3159 /** 3160 Returns the element's children. 3161 */ 3162 @property inout(Element[]) childNodes() inout { 3163 return children; 3164 } 3165 3166 /++ 3167 HTML5's dataset property. It is an alternate view into attributes with the data- prefix. 3168 Given `<a data-my-property="cool" />`, we get `assert(a.dataset.myProperty == "cool");` 3169 +/ 3170 @property DataSet dataset() { 3171 return DataSet(this); 3172 } 3173 3174 /++ 3175 Gives dot/opIndex access to attributes 3176 --- 3177 ele.attrs.largeSrc = "foo"; // same as ele.setAttribute("largeSrc", "foo") 3178 --- 3179 +/ 3180 @property AttributeSet attrs() { 3181 return AttributeSet(this); 3182 } 3183 3184 /++ 3185 Provides both string and object style (like in Javascript) access to the style attribute. 3186 3187 --- 3188 element.style.color = "red"; // translates into setting `color: red;` in the `style` attribute 3189 --- 3190 +/ 3191 @property ElementStyle style() { 3192 return ElementStyle(this); 3193 } 3194 3195 /++ 3196 This sets the style attribute with a string. 3197 +/ 3198 @property ElementStyle style(string s) { 3199 this.setAttribute("style", s); 3200 return this.style; 3201 } 3202 3203 private void parseAttributes(string[] whichOnes = null) { 3204 /+ 3205 if(whichOnes is null) 3206 whichOnes = attributes.keys; 3207 foreach(attr; whichOnes) { 3208 switch(attr) { 3209 case "id": 3210 3211 break; 3212 case "class": 3213 3214 break; 3215 case "style": 3216 3217 break; 3218 default: 3219 // we don't care about it 3220 } 3221 } 3222 +/ 3223 } 3224 3225 3226 // if you change something here, it won't apply... FIXME const? but changing it would be nice if it applies to the style attribute too though you should use style there. 3227 3228 // the next few methods are for implementing interactive kind of things 3229 private CssStyle _computedStyle; 3230 3231 /// Don't use this. It can try to parse out the style element but it isn't complete and if I get back to it, it won't be for a while. 3232 @property CssStyle computedStyle() { 3233 if(_computedStyle is null) { 3234 auto style = this.getAttribute("style"); 3235 /* we'll treat shitty old html attributes as css here */ 3236 if(this.hasAttribute("width")) 3237 style ~= "; width: " ~ this.attrs.width; 3238 if(this.hasAttribute("height")) 3239 style ~= "; height: " ~ this.attrs.height; 3240 if(this.hasAttribute("bgcolor")) 3241 style ~= "; background-color: " ~ this.attrs.bgcolor; 3242 if(this.tagName == "body" && this.hasAttribute("text")) 3243 style ~= "; color: " ~ this.attrs.text; 3244 if(this.hasAttribute("color")) 3245 style ~= "; color: " ~ this.attrs.color; 3246 /* done */ 3247 3248 3249 _computedStyle = computedStyleFactory(this); 3250 } 3251 return _computedStyle; 3252 } 3253 3254 /// These properties are useless in most cases, but if you write a layout engine on top of this lib, they may be good 3255 version(browser) { 3256 void* expansionHook; ///ditto 3257 int offsetWidth; ///ditto 3258 int offsetHeight; ///ditto 3259 int offsetLeft; ///ditto 3260 int offsetTop; ///ditto 3261 Element offsetParent; ///ditto 3262 bool hasLayout; ///ditto 3263 int zIndex; ///ditto 3264 3265 ///ditto 3266 int absoluteLeft() { 3267 int a = offsetLeft; 3268 auto p = offsetParent; 3269 while(p) { 3270 a += p.offsetLeft; 3271 p = p.offsetParent; 3272 } 3273 3274 return a; 3275 } 3276 3277 ///ditto 3278 int absoluteTop() { 3279 int a = offsetTop; 3280 auto p = offsetParent; 3281 while(p) { 3282 a += p.offsetTop; 3283 p = p.offsetParent; 3284 } 3285 3286 return a; 3287 } 3288 } 3289 3290 // Back to the regular dom functions 3291 3292 public: 3293 3294 3295 /* ******************************* 3296 DOM Mutation 3297 *********************************/ 3298 3299 /// Removes all inner content from the tag; all child text and elements are gone. 3300 void removeAllChildren() 3301 out { 3302 assert(this.children.length == 0); 3303 } 3304 do { 3305 foreach(child; children) 3306 child.parentNode = null; 3307 children = null; 3308 } 3309 3310 /++ 3311 Adds a sibling element before or after this one in the dom. 3312 3313 History: added June 13, 2020 3314 +/ 3315 Element appendSibling(Element e) { 3316 parentNode.insertAfter(this, e); 3317 return e; 3318 } 3319 3320 /// ditto 3321 Element prependSibling(Element e) { 3322 parentNode.insertBefore(this, e); 3323 return e; 3324 } 3325 3326 3327 /++ 3328 Appends the given element to this one. If it already has a parent, it is removed from that tree and moved to this one. 3329 3330 See_also: https://developer.mozilla.org/en-US/docs/Web/API/Node/appendChild 3331 3332 History: 3333 Prior to 1 Jan 2020 (git tag v4.4.1 and below), it required that the given element must not have a parent already. This was in violation of standard, so it changed the behavior to remove it from the existing parent and instead move it here. 3334 +/ 3335 Element appendChild(Element e) 3336 in { 3337 assert(e !is null); 3338 assert(e !is this); 3339 } 3340 out (ret) { 3341 assert((cast(DocumentFragment) this !is null) || (e.parentNode is this), e.toString);// e.parentNode ? e.parentNode.toString : "null"); 3342 assert(e.parentDocument is this.parentDocument); 3343 assert(e is ret); 3344 } 3345 do { 3346 if(e.parentNode !is null) 3347 e.parentNode.removeChild(e); 3348 3349 selfClosed = false; 3350 if(auto frag = cast(DocumentFragment) e) 3351 children ~= frag.children; 3352 else 3353 children ~= e; 3354 3355 e.parentNode = this; 3356 3357 /+ 3358 foreach(item; e.tree) 3359 item.parentDocument = this.parentDocument; 3360 +/ 3361 3362 sendObserverEvent(DomMutationOperations.appendChild, null, null, e); 3363 3364 return e; 3365 } 3366 3367 /// Inserts the second element to this node, right before the first param 3368 Element insertBefore(in Element where, Element what) 3369 in { 3370 assert(where !is null); 3371 assert(where.parentNode is this); 3372 assert(what !is null); 3373 assert(what.parentNode is null); 3374 } 3375 out (ret) { 3376 assert(where.parentNode is this); 3377 assert(what.parentNode is this); 3378 3379 assert(what.parentDocument is this.parentDocument); 3380 assert(ret is what); 3381 } 3382 do { 3383 foreach(i, e; children) { 3384 if(e is where) { 3385 if(auto frag = cast(DocumentFragment) what) { 3386 children = children[0..i] ~ frag.children ~ children[i..$]; 3387 foreach(child; frag.children) 3388 child.parentNode = this; 3389 } else { 3390 children = children[0..i] ~ what ~ children[i..$]; 3391 } 3392 what.parentNode = this; 3393 return what; 3394 } 3395 } 3396 3397 return what; 3398 3399 assert(0); 3400 } 3401 3402 /++ 3403 Inserts the given element `what` as a sibling of the `this` element, after the element `where` in the parent node. 3404 +/ 3405 Element insertAfter(in Element where, Element what) 3406 in { 3407 assert(where !is null); 3408 assert(where.parentNode is this); 3409 assert(what !is null); 3410 assert(what.parentNode is null); 3411 } 3412 out (ret) { 3413 assert(where.parentNode is this); 3414 assert(what.parentNode is this); 3415 assert(what.parentDocument is this.parentDocument); 3416 assert(ret is what); 3417 } 3418 do { 3419 foreach(i, e; children) { 3420 if(e is where) { 3421 if(auto frag = cast(DocumentFragment) what) { 3422 children = children[0 .. i + 1] ~ what.children ~ children[i + 1 .. $]; 3423 foreach(child; frag.children) 3424 child.parentNode = this; 3425 } else 3426 children = children[0 .. i + 1] ~ what ~ children[i + 1 .. $]; 3427 what.parentNode = this; 3428 return what; 3429 } 3430 } 3431 3432 return what; 3433 3434 assert(0); 3435 } 3436 3437 /// swaps one child for a new thing. Returns the old child which is now parentless. 3438 Element swapNode(Element child, Element replacement) 3439 in { 3440 assert(child !is null); 3441 assert(replacement !is null); 3442 assert(child.parentNode is this); 3443 } 3444 out(ret) { 3445 assert(ret is child); 3446 assert(ret.parentNode is null); 3447 assert(replacement.parentNode is this); 3448 assert(replacement.parentDocument is this.parentDocument); 3449 } 3450 do { 3451 foreach(ref c; this.children) 3452 if(c is child) { 3453 c.parentNode = null; 3454 c = replacement; 3455 c.parentNode = this; 3456 return child; 3457 } 3458 assert(0); 3459 } 3460 3461 3462 /++ 3463 Appends the given to the node. 3464 3465 3466 Calling `e.appendText(" hi")` on `<example>text <b>bold</b></example>` 3467 yields `<example>text <b>bold</b> hi</example>`. 3468 3469 See_Also: 3470 [firstInnerText], [directText], [innerText], [appendChild] 3471 +/ 3472 @scriptable 3473 Element appendText(string text) { 3474 Element e = new TextNode(parentDocument, text); 3475 appendChild(e); 3476 return this; 3477 } 3478 3479 /++ 3480 Returns child elements which are of a tag type (excludes text, comments, etc.). 3481 3482 3483 childElements of `<example>text <b>bold</b></example>` is just the `<b>` tag. 3484 3485 Params: 3486 tagName = filter results to only the child elements with the given tag name. 3487 +/ 3488 @property Element[] childElements(string tagName = null) { 3489 Element[] ret; 3490 foreach(c; children) 3491 if(c.nodeType == 1 && (tagName is null || c.tagName == tagName)) 3492 ret ~= c; 3493 return ret; 3494 } 3495 3496 /++ 3497 Appends the given html to the element, returning the elements appended 3498 3499 3500 This is similar to `element.innerHTML += "html string";` in Javascript. 3501 +/ 3502 @scriptable 3503 Element[] appendHtml(string html) { 3504 Document d = new Document("<root>" ~ html ~ "</root>"); 3505 return stealChildren(d.root); 3506 } 3507 3508 /++ 3509 Returns `this` for use inside `with` expressions. 3510 3511 History: 3512 Added December 20, 2024 3513 +/ 3514 inout(Element) self() inout pure @nogc nothrow @safe scope return { 3515 return this; 3516 } 3517 3518 /++ 3519 Inserts a child under this element after the element `where`. 3520 +/ 3521 void insertChildAfter(Element child, Element where) 3522 in { 3523 assert(child !is null); 3524 assert(where !is null); 3525 assert(where.parentNode is this); 3526 assert(!selfClosed); 3527 //assert(isInArray(where, children)); 3528 } 3529 out { 3530 assert(child.parentNode is this); 3531 assert(where.parentNode is this); 3532 //assert(isInArray(where, children)); 3533 //assert(isInArray(child, children)); 3534 } 3535 do { 3536 foreach(ref i, c; children) { 3537 if(c is where) { 3538 i++; 3539 if(auto frag = cast(DocumentFragment) child) { 3540 children = children[0..i] ~ child.children ~ children[i..$]; 3541 //foreach(child; frag.children) 3542 //child.parentNode = this; 3543 } else 3544 children = children[0..i] ~ child ~ children[i..$]; 3545 child.parentNode = this; 3546 break; 3547 } 3548 } 3549 } 3550 3551 /++ 3552 Reparents all the child elements of `e` to `this`, leaving `e` childless. 3553 3554 Params: 3555 e = the element whose children you want to steal 3556 position = an existing child element in `this` before which you want the stolen children to be inserted. If `null`, it will append the stolen children at the end of our current children. 3557 +/ 3558 Element[] stealChildren(Element e, Element position = null) 3559 in { 3560 assert(!selfClosed); 3561 assert(e !is null); 3562 //if(position !is null) 3563 //assert(isInArray(position, children)); 3564 } 3565 out (ret) { 3566 assert(e.children.length == 0); 3567 // all the parentNode is this checks fail because DocumentFragments do not appear in the parent tree, they are invisible... 3568 version(none) 3569 debug foreach(child; ret) { 3570 assert(child.parentNode is this); 3571 assert(child.parentDocument is this.parentDocument); 3572 } 3573 } 3574 do { 3575 foreach(c; e.children) { 3576 c.parentNode = this; 3577 } 3578 if(position is null) 3579 children ~= e.children; 3580 else { 3581 foreach(i, child; children) { 3582 if(child is position) { 3583 children = children[0..i] ~ 3584 e.children ~ 3585 children[i..$]; 3586 break; 3587 } 3588 } 3589 } 3590 3591 auto ret = e.children[]; 3592 e.children.length = 0; 3593 3594 return ret; 3595 } 3596 3597 /// Puts the current element first in our children list. The given element must not have a parent already. 3598 Element prependChild(Element e) 3599 in { 3600 assert(e.parentNode is null); 3601 assert(!selfClosed); 3602 } 3603 out { 3604 assert(e.parentNode is this); 3605 assert(e.parentDocument is this.parentDocument); 3606 assert(children[0] is e); 3607 } 3608 do { 3609 if(auto frag = cast(DocumentFragment) e) { 3610 children = e.children ~ children; 3611 foreach(child; frag.children) 3612 child.parentNode = this; 3613 } else 3614 children = e ~ children; 3615 e.parentNode = this; 3616 return e; 3617 } 3618 3619 3620 /** 3621 Returns a string containing all child elements, formatted such that it could be pasted into 3622 an XML file. 3623 */ 3624 @property string innerHTML(Appender!string where = appender!string()) const { 3625 if(children is null) 3626 return ""; 3627 3628 auto start = where.data.length; 3629 3630 foreach(child; children) { 3631 assert(child !is null); 3632 3633 child.writeToAppender(where); 3634 } 3635 3636 return where.data[start .. $]; 3637 } 3638 3639 /** 3640 Takes some html and replaces the element's children with the tree made from the string. 3641 */ 3642 @property Element innerHTML(string html, bool strict = false) { 3643 if(html.length) 3644 selfClosed = false; 3645 3646 if(html.length == 0) { 3647 // I often say innerHTML = ""; as a shortcut to clear it out, 3648 // so let's optimize that slightly. 3649 removeAllChildren(); 3650 return this; 3651 } 3652 3653 auto doc = new Document(); 3654 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>", strict, strict); // FIXME: this should preserve the strictness of the parent document 3655 3656 children = doc.root.children; 3657 foreach(c; children) { 3658 c.parentNode = this; 3659 } 3660 3661 doc.root.children = null; 3662 3663 return this; 3664 } 3665 3666 /// ditto 3667 @property Element innerHTML(Html html) { 3668 return this.innerHTML = html.source; 3669 } 3670 3671 /** 3672 Replaces this node with the given html string, which is parsed 3673 3674 Note: this invalidates the this reference, since it is removed 3675 from the tree. 3676 3677 Returns the new children that replace this. 3678 */ 3679 @property Element[] outerHTML(string html) { 3680 auto doc = new Document(); 3681 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>"); // FIXME: needs to preserve the strictness 3682 3683 children = doc.root.children; 3684 foreach(c; children) { 3685 c.parentNode = this; 3686 } 3687 3688 stripOut(); 3689 3690 return doc.root.children; 3691 } 3692 3693 /++ 3694 Returns all the html for this element, including the tag itself. 3695 3696 This is equivalent to calling toString(). 3697 +/ 3698 @property string outerHTML() { 3699 return this.toString(); 3700 } 3701 3702 /// This sets the inner content of the element *without* trying to parse it. 3703 /// You can inject any code in there; this serves as an escape hatch from the dom. 3704 /// 3705 /// The only times you might actually need it are for < style > and < script > tags in html. 3706 /// Other than that, innerHTML and/or innerText should do the job. 3707 @property void innerRawSource(string rawSource) { 3708 children.length = 0; 3709 auto rs = new RawSource(parentDocument, rawSource); 3710 children ~= rs; 3711 rs.parentNode = this; 3712 } 3713 3714 /++ 3715 Replaces the element `find`, which must be a child of `this`, with the element `replace`, which must have no parent. 3716 +/ 3717 Element replaceChild(Element find, Element replace) 3718 in { 3719 assert(find !is null); 3720 assert(find.parentNode is this); 3721 assert(replace !is null); 3722 assert(replace.parentNode is null); 3723 } 3724 out(ret) { 3725 assert(ret is replace); 3726 assert(replace.parentNode is this); 3727 assert(replace.parentDocument is this.parentDocument); 3728 assert(find.parentNode is null); 3729 } 3730 do { 3731 // FIXME 3732 //if(auto frag = cast(DocumentFragment) replace) 3733 //return this.replaceChild(frag, replace.children); 3734 for(int i = 0; i < children.length; i++) { 3735 if(children[i] is find) { 3736 replace.parentNode = this; 3737 children[i].parentNode = null; 3738 children[i] = replace; 3739 return replace; 3740 } 3741 } 3742 3743 throw new Exception("no such child ");// ~ find.toString ~ " among " ~ typeid(this).toString);//.toString ~ " magic \n\n\n" ~ find.parentNode.toString); 3744 } 3745 3746 /** 3747 Replaces the given element with a whole group. 3748 */ 3749 void replaceChild(Element find, Element[] replace) 3750 in { 3751 assert(find !is null); 3752 assert(replace !is null); 3753 assert(find.parentNode is this); 3754 debug foreach(r; replace) 3755 assert(r.parentNode is null); 3756 } 3757 out { 3758 assert(find.parentNode is null); 3759 assert(children.length >= replace.length); 3760 debug foreach(child; children) 3761 assert(child !is find); 3762 debug foreach(r; replace) 3763 assert(r.parentNode is this); 3764 } 3765 do { 3766 if(replace.length == 0) { 3767 removeChild(find); 3768 return; 3769 } 3770 assert(replace.length); 3771 for(int i = 0; i < children.length; i++) { 3772 if(children[i] is find) { 3773 children[i].parentNode = null; // this element should now be dead 3774 children[i] = replace[0]; 3775 foreach(e; replace) { 3776 e.parentNode = this; 3777 } 3778 3779 children = .insertAfter(children, i, replace[1..$]); 3780 3781 return; 3782 } 3783 } 3784 3785 throw new Exception("no such child"); 3786 } 3787 3788 3789 /** 3790 Removes the given child from this list. 3791 3792 Returns the removed element. 3793 */ 3794 Element removeChild(Element c) 3795 in { 3796 assert(c !is null); 3797 assert(c.parentNode is this); 3798 } 3799 out { 3800 debug foreach(child; children) 3801 assert(child !is c); 3802 assert(c.parentNode is null); 3803 } 3804 do { 3805 foreach(i, e; children) { 3806 if(e is c) { 3807 children = children[0..i] ~ children [i+1..$]; 3808 c.parentNode = null; 3809 return c; 3810 } 3811 } 3812 3813 throw new Exception("no such child"); 3814 } 3815 3816 /// This removes all the children from this element, returning the old list. 3817 Element[] removeChildren() 3818 out (ret) { 3819 assert(children.length == 0); 3820 debug foreach(r; ret) 3821 assert(r.parentNode is null); 3822 } 3823 do { 3824 Element[] oldChildren = children.dup; 3825 foreach(c; oldChildren) 3826 c.parentNode = null; 3827 3828 children.length = 0; 3829 3830 return oldChildren; 3831 } 3832 3833 /** 3834 Fetch the inside text, with all tags stripped out. 3835 3836 <p>cool <b>api</b> & code dude<p> 3837 innerText of that is "cool api & code dude". 3838 3839 This does not match what real innerText does! 3840 http://perfectionkills.com/the-poor-misunderstood-innerText/ 3841 3842 It is more like [textContent]. 3843 3844 See_Also: 3845 [visibleText], which is closer to what the real `innerText` 3846 does. 3847 */ 3848 @scriptable 3849 @property string innerText() const { 3850 string s; 3851 foreach(child; children) { 3852 if(child.nodeType != NodeType.Text) 3853 s ~= child.innerText; 3854 else 3855 s ~= child.nodeValue(); 3856 } 3857 return s; 3858 } 3859 3860 /// ditto 3861 alias textContent = innerText; 3862 3863 /++ 3864 Gets the element's visible text, similar to how it would look assuming 3865 the document was HTML being displayed by a browser. This means it will 3866 attempt whitespace normalization (unless it is a `<pre>` tag), add `\n` 3867 characters for `<br>` tags, and I reserve the right to make it process 3868 additional css and tags in the future. 3869 3870 If you need specific output, use the more stable [textContent] property 3871 or iterate yourself with [tree] or a recursive function with [children]. 3872 3873 History: 3874 Added March 25, 2022 (dub v10.8) 3875 +/ 3876 string visibleText() const { 3877 return this.visibleTextHelper(this.tagName == "pre"); 3878 } 3879 3880 private string visibleTextHelper(bool pre) const { 3881 string result; 3882 foreach(thing; this.children) { 3883 if(thing.nodeType == NodeType.Text) 3884 result ~= pre ? thing.nodeValue : normalizeWhitespace(thing.nodeValue); 3885 else if(thing.tagName == "br") 3886 result ~= "\n"; 3887 else 3888 result ~= thing.visibleTextHelper(pre || thing.tagName == "pre"); 3889 } 3890 return result; 3891 } 3892 3893 /** 3894 Sets the inside text, replacing all children. You don't 3895 have to worry about entity encoding. 3896 */ 3897 @scriptable 3898 @property void innerText(string text) { 3899 selfClosed = false; 3900 Element e = new TextNode(parentDocument, text); 3901 children = [e]; 3902 e.parentNode = this; 3903 } 3904 3905 /** 3906 Strips this node out of the document, replacing it with the given text 3907 */ 3908 @property void outerText(string text) { 3909 parentNode.replaceChild(this, new TextNode(parentDocument, text)); 3910 } 3911 3912 /** 3913 Same result as innerText; the tag with all inner tags stripped out 3914 */ 3915 @property string outerText() const { 3916 return innerText; 3917 } 3918 3919 3920 /* ******************************* 3921 Miscellaneous 3922 *********************************/ 3923 3924 /// This is a full clone of the element. Alias for cloneNode(true) now. Don't extend it. 3925 @property Element cloned() 3926 /+ 3927 out(ret) { 3928 // FIXME: not sure why these fail... 3929 assert(ret.children.length == this.children.length, format("%d %d", ret.children.length, this.children.length)); 3930 assert(ret.tagName == this.tagName); 3931 } 3932 do { 3933 +/ 3934 { 3935 return this.cloneNode(true); 3936 } 3937 3938 /// Clones the node. If deepClone is true, clone all inner tags too. If false, only do this tag (and its attributes), but it will have no contents. 3939 Element cloneNode(bool deepClone) { 3940 auto e = Element.make(this.tagName); 3941 e.attributes = this.attributes.aadup; 3942 e.selfClosed = this.selfClosed; 3943 3944 if(deepClone) { 3945 foreach(child; children) { 3946 e.appendChild(child.cloneNode(true)); 3947 } 3948 } 3949 3950 3951 return e; 3952 } 3953 3954 /// W3C DOM interface. Only really meaningful on [TextNode] instances, but the interface is present on the base class. 3955 string nodeValue() const { 3956 return ""; 3957 } 3958 3959 // should return int 3960 ///. 3961 @property int nodeType() const { 3962 return 1; 3963 } 3964 3965 3966 invariant () { 3967 debug assert(tagName.indexOf(" ") == -1); 3968 3969 // commented cuz it gets into recursive pain and eff dat. 3970 /+ 3971 if(children !is null) 3972 foreach(child; children) { 3973 // assert(parentNode !is null); 3974 assert(child !is null); 3975 assert(child.parent_.asElement is this, format("%s is not a parent of %s (it thought it was %s)", tagName, child.tagName, child.parent_.asElement is null ? "null" : child.parent_.asElement.tagName)); 3976 assert(child !is this); 3977 //assert(child !is parentNode); 3978 } 3979 +/ 3980 3981 /+ 3982 // this isn't helping 3983 if(parent_ && parent_.asElement) { 3984 bool found = false; 3985 foreach(child; parent_.asElement.children) 3986 if(child is this) 3987 found = true; 3988 assert(found, format("%s lists %s as parent, but it is not in children", typeid(this), typeid(this.parent_.asElement))); 3989 } 3990 +/ 3991 3992 /+ // only depend on parentNode's accuracy if you shuffle things around and use the top elements - where the contracts guarantee it on out 3993 if(parentNode !is null) { 3994 // if you have a parent, you should share the same parentDocument; this is appendChild()'s job 3995 auto lol = cast(TextNode) this; 3996 assert(parentDocument is parentNode.parentDocument, lol is null ? this.tagName : lol.contents); 3997 } 3998 +/ 3999 //assert(parentDocument !is null); // no more; if it is present, we use it, but it is not required 4000 // reason is so you can create these without needing a reference to the document 4001 } 4002 4003 /** 4004 Turns the whole element, including tag, attributes, and children, into a string which could be pasted into 4005 an XML file. 4006 */ 4007 override string toString() const { 4008 return writeToAppender(); 4009 } 4010 4011 /++ 4012 Returns if the node would be printed to string as `<tag />` or `<tag></tag>`. In other words, if it has no non-empty text nodes and no element nodes. Please note that whitespace text nodes are NOT considered empty; `Html("<tag> </tag>").isEmpty == false`. 4013 4014 4015 The value is undefined if there are comment or processing instruction nodes. The current implementation returns false if it sees those, assuming the nodes haven't been stripped out during parsing. But I'm not married to the current implementation and reserve the right to change it without notice. 4016 4017 History: 4018 Added December 3, 2021 (dub v10.5) 4019 4020 +/ 4021 public bool isEmpty() const { 4022 foreach(child; this.children) { 4023 // any non-text node is of course not empty since that's a tag 4024 if(child.nodeType != NodeType.Text) 4025 return false; 4026 // or a text node is empty if it is is a null or empty string, so this length check fixes that 4027 if(child.nodeValue.length) 4028 return false; 4029 } 4030 4031 return true; 4032 } 4033 4034 protected string toPrettyStringIndent(bool insertComments, int indentationLevel, string indentWith) const { 4035 if(indentWith is null) 4036 return null; 4037 4038 // at the top we don't have anything to really do 4039 //if(parent_ is null) 4040 //return null; 4041 4042 // I've used isEmpty before but this other check seems better.... 4043 //|| this.isEmpty()) 4044 4045 string s; 4046 4047 if(insertComments) s ~= "<!--"; 4048 s ~= "\n"; 4049 foreach(indent; 0 .. indentationLevel) 4050 s ~= indentWith; 4051 if(insertComments) s ~= "-->"; 4052 4053 return s; 4054 } 4055 4056 /++ 4057 Writes out with formatting. Be warned: formatting changes the contents. Use ONLY 4058 for eyeball debugging. 4059 4060 $(PITFALL 4061 This function is not stable. Its interface and output may change without 4062 notice. The only promise I make is that it will continue to make a best- 4063 effort attempt at being useful for debugging by human eyes. 4064 4065 I have used it in the past for diffing html documents, but even then, it 4066 might change between versions. If it is useful, great, but beware; this 4067 use is at your own risk. 4068 ) 4069 4070 History: 4071 On November 19, 2021, I changed this to `final`. If you were overriding it, 4072 change our override to `toPrettyStringImpl` instead. It now just calls 4073 `toPrettyStringImpl.strip` to be an entry point for a stand-alone call. 4074 4075 If you are calling it as part of another implementation, you might want to 4076 change that call to `toPrettyStringImpl` as well. 4077 4078 I am NOT considering this a breaking change since this function is documented 4079 to only be used for eyeball debugging anyway, which means the exact format is 4080 not specified and the override behavior can generally not be relied upon. 4081 4082 (And I find it extremely unlikely anyone was subclassing anyway, but if you were, 4083 email me, and we'll see what we can do. I'd like to know at least.) 4084 4085 I reserve the right to make future changes in the future without considering 4086 them breaking as well. 4087 +/ 4088 final string toPrettyString(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 4089 return toPrettyStringImpl(insertComments, indentationLevel, indentWith).strip; 4090 } 4091 4092 string toPrettyStringImpl(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 4093 4094 // first step is to concatenate any consecutive text nodes to simplify 4095 // the white space analysis. this changes the tree! but i'm allowed since 4096 // the comment always says it changes the comments 4097 // 4098 // actually i'm not allowed cuz it is const so i will cheat and lie 4099 /+ 4100 TextNode lastTextChild = null; 4101 for(int a = 0; a < this.children.length; a++) { 4102 auto child = this.children[a]; 4103 if(auto tn = cast(TextNode) child) { 4104 if(lastTextChild) { 4105 lastTextChild.contents ~= tn.contents; 4106 for(int b = a; b < this.children.length - 1; b++) 4107 this.children[b] = this.children[b + 1]; 4108 this.children = this.children[0 .. $-1]; 4109 } else { 4110 lastTextChild = tn; 4111 } 4112 } else { 4113 lastTextChild = null; 4114 } 4115 } 4116 +/ 4117 4118 auto inlineElements = (parentDocument is null ? null : parentDocument.inlineElements); 4119 4120 const(Element)[] children; 4121 4122 TextNode lastTextChild = null; 4123 for(int a = 0; a < this.children.length; a++) { 4124 auto child = this.children[a]; 4125 if(auto tn = cast(const(TextNode)) child) { 4126 if(lastTextChild !is null) { 4127 lastTextChild.contents ~= tn.contents; 4128 } else { 4129 lastTextChild = new TextNode(""); 4130 lastTextChild.parentNode = cast(Element) this; 4131 lastTextChild.contents ~= tn.contents; 4132 children ~= lastTextChild; 4133 } 4134 } else { 4135 lastTextChild = null; 4136 children ~= child; 4137 } 4138 } 4139 4140 string s = toPrettyStringIndent(insertComments, indentationLevel, indentWith); 4141 4142 s ~= "<"; 4143 s ~= tagName; 4144 4145 // i sort these for consistent output. might be more legible 4146 // but especially it keeps it the same for diff purposes. 4147 import std.algorithm : sort; 4148 auto keys = sort(attributes.keys); 4149 foreach(n; keys) { 4150 auto v = attributes[n]; 4151 s ~= " "; 4152 s ~= n; 4153 s ~= "=\""; 4154 s ~= htmlEntitiesEncode(v); 4155 s ~= "\""; 4156 } 4157 4158 if(selfClosed){ 4159 s ~= " />"; 4160 return s; 4161 } 4162 4163 s ~= ">"; 4164 4165 // for simple `<collection><item>text</item><item>text</item></collection>`, let's 4166 // just keep them on the same line 4167 4168 if(isEmpty) { 4169 // no work needed, this is empty so don't indent just for a blank line 4170 } else if(children.length == 1 && children[0].isEmpty) { 4171 // just one empty one, can put it inline too 4172 s ~= children[0].toString(); 4173 } else if(tagName.isInArray(inlineElements) || allAreInlineHtml(children, inlineElements)) { 4174 foreach(child; children) { 4175 s ~= child.toString();//toPrettyString(false, 0, null); 4176 } 4177 } else { 4178 foreach(child; children) { 4179 assert(child !is null); 4180 4181 s ~= child.toPrettyStringImpl(insertComments, indentationLevel + 1, indentWith); 4182 } 4183 4184 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 4185 } 4186 4187 s ~= "</"; 4188 s ~= tagName; 4189 s ~= ">"; 4190 4191 return s; 4192 } 4193 4194 /+ 4195 /// Writes out the opening tag only, if applicable. 4196 string writeTagOnly(Appender!string where = appender!string()) const { 4197 +/ 4198 4199 /++ 4200 This is the actual implementation used by toString. You can pass it a preallocated buffer to save some time. 4201 Note: the ordering of attributes in the string is undefined. 4202 Returns the string it creates. 4203 4204 Implementation_Notes: 4205 The order of attributes printed by this function is undefined, as permitted by the XML spec. You should NOT rely on any implementation detail noted here. 4206 4207 However, in practice, between June 14, 2019 and August 22, 2024, it actually did sort attributes by key name. After August 22, 2024, it changed to track attribute append order and will print them back out in the order in which the keys were first seen. 4208 4209 This is subject to change again at any time. Use [toPrettyString] if you want a defined output (toPrettyString always sorts by name for consistent diffing). 4210 +/ 4211 string writeToAppender(Appender!string where = appender!string()) const { 4212 assert(tagName !is null); 4213 4214 where.reserve((this.children.length + 1) * 512); 4215 4216 auto start = where.data.length; 4217 4218 where.put("<"); 4219 where.put(tagName); 4220 4221 /+ 4222 import std.algorithm : sort; 4223 auto keys = sort(attributes.keys); 4224 foreach(n; keys) { 4225 auto v = attributes[n]; // I am sorting these for convenience with another project. order of AAs is undefined, so I'm allowed to do it.... and it is still undefined, I might change it back later. 4226 +/ 4227 foreach(n, v; attributes) { 4228 //assert(v !is null); 4229 where.put(" "); 4230 where.put(n); 4231 where.put("=\""); 4232 htmlEntitiesEncode(v, where); 4233 where.put("\""); 4234 } 4235 4236 if(selfClosed){ 4237 where.put(" />"); 4238 return where.data[start .. $]; 4239 } 4240 4241 where.put('>'); 4242 4243 innerHTML(where); 4244 4245 where.put("</"); 4246 where.put(tagName); 4247 where.put('>'); 4248 4249 return where.data[start .. $]; 4250 } 4251 4252 /** 4253 Returns a lazy range of all its children, recursively. 4254 */ 4255 @property ElementStream tree() { 4256 return new ElementStream(this); 4257 } 4258 4259 // I moved these from Form because they are generally useful. 4260 // Ideally, I'd put them in arsd.html and use UFCS, but that doesn't work with the opDispatch here. 4261 // FIXME: add overloads for other label types... 4262 /++ 4263 Adds a form field to this element, normally a `<input>` but `type` can also be `"textarea"`. 4264 4265 This is fairly html specific and the label uses my style. I recommend you view the source before you use it to better understand what it does. 4266 +/ 4267 /// Tags: HTML, HTML5 4268 Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 4269 auto fs = this; 4270 auto i = fs.addChild("label"); 4271 4272 if(!(type == "checkbox" || type == "radio")) 4273 i.addChild("span", label); 4274 4275 Element input; 4276 if(type == "textarea") 4277 input = i.addChild("textarea"). 4278 setAttribute("name", name). 4279 setAttribute("rows", "6"); 4280 else 4281 input = i.addChild("input"). 4282 setAttribute("name", name). 4283 setAttribute("type", type); 4284 4285 if(type == "checkbox" || type == "radio") 4286 i.addChild("span", label); 4287 4288 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 4289 fieldOptions.applyToElement(input); 4290 return i; 4291 } 4292 4293 /// ditto 4294 Element addField(Element label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 4295 auto fs = this; 4296 auto i = fs.addChild("label"); 4297 i.addChild(label); 4298 Element input; 4299 if(type == "textarea") 4300 input = i.addChild("textarea"). 4301 setAttribute("name", name). 4302 setAttribute("rows", "6"); 4303 else 4304 input = i.addChild("input"). 4305 setAttribute("name", name). 4306 setAttribute("type", type); 4307 4308 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 4309 fieldOptions.applyToElement(input); 4310 return i; 4311 } 4312 4313 /// ditto 4314 Element addField(string label, string name, FormFieldOptions fieldOptions) { 4315 return addField(label, name, "text", fieldOptions); 4316 } 4317 4318 /// ditto 4319 Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 4320 auto fs = this; 4321 auto i = fs.addChild("label"); 4322 i.addChild("span", label); 4323 auto sel = i.addChild("select").setAttribute("name", name); 4324 4325 foreach(k, opt; options) 4326 sel.addChild("option", opt, k); 4327 4328 // FIXME: implement requirements somehow 4329 4330 return i; 4331 } 4332 4333 /// ditto 4334 Element addSubmitButton(string label = null) { 4335 auto t = this; 4336 auto holder = t.addChild("div"); 4337 holder.addClass("submit-holder"); 4338 auto i = holder.addChild("input"); 4339 i.type = "submit"; 4340 if(label.length) 4341 i.value = label; 4342 return holder; 4343 } 4344 4345 } 4346 4347 // computedStyle could argubaly be removed to bring size down 4348 //pragma(msg, __traits(classInstanceSize, Element)); 4349 //pragma(msg, Element.tupleof); 4350 4351 // FIXME: since Document loosens the input requirements, it should probably be the sub class... 4352 /++ 4353 Specializes Document for handling generic XML. (always uses strict mode, uses xml mime type and file header) 4354 4355 History: 4356 On December 16, 2022, it disabled the special case treatment of `<script>` and `<style>` that [Document] 4357 does for HTML. To get the old behavior back, add `, true` to your constructor call. 4358 +/ 4359 /// Group: core_functionality 4360 class XmlDocument : Document { 4361 /++ 4362 Constructs a stricter-mode XML parser and parses the given data source. 4363 4364 History: 4365 The `Utf8Stream` version of the constructor was added on February 22, 2025. 4366 +/ 4367 this(string data, bool enableHtmlHacks = false) { 4368 this(new Utf8Stream(data), enableHtmlHacks); 4369 } 4370 4371 /// ditto 4372 this(Utf8Stream data, bool enableHtmlHacks = false) { 4373 selfClosedElements = null; 4374 inlineElements = null; 4375 rawSourceElements = null; 4376 contentType = "text/xml; charset=utf-8"; 4377 _prolog = `<?xml version="1.0" encoding="UTF-8"?>` ~ "\n"; 4378 4379 parseStream(data, true, true, !enableHtmlHacks); 4380 } 4381 } 4382 4383 unittest { 4384 // FIXME: i should also make XmlDocument do different entities than just html too. 4385 auto str = "<html><style>foo {}</style><script>void function() { a < b; }</script></html>"; 4386 auto document = new Document(str, true, true); 4387 assert(document.requireSelector("style").children[0].tagName == "#raw"); 4388 assert(document.requireSelector("script").children[0].tagName == "#raw"); 4389 try { 4390 auto xml = new XmlDocument(str); 4391 assert(0); 4392 } catch(MarkupException e) { 4393 // failure expected, script special case is not valid XML without a dtd (which isn't here) 4394 } 4395 //assert(xml.requireSelector("style").children[0].tagName == "#raw"); 4396 //assert(xml.requireSelector("script").children[0].tagName == "#raw"); 4397 } 4398 4399 4400 4401 import std.string; 4402 4403 /* domconvenience follows { */ 4404 4405 /// finds comments that match the given txt. Case insensitive, strips whitespace. 4406 /// Group: core_functionality 4407 Element[] findComments(Document document, string txt) { 4408 return findComments(document.root, txt); 4409 } 4410 4411 /// ditto 4412 Element[] findComments(Element element, string txt) { 4413 txt = txt.strip().toLower(); 4414 Element[] ret; 4415 4416 foreach(comment; element.getElementsByTagName("#comment")) { 4417 string t = comment.nodeValue().strip().toLower(); 4418 if(t == txt) 4419 ret ~= comment; 4420 } 4421 4422 return ret; 4423 } 4424 4425 /// An option type that propagates null. See: [Element.optionSelector] 4426 /// Group: implementations 4427 struct MaybeNullElement(SomeElementType) { 4428 this(SomeElementType ele) { 4429 this.element = ele; 4430 } 4431 SomeElementType element; 4432 4433 /// Forwards to the element, wit a null check inserted that propagates null. 4434 auto opDispatch(string method, T...)(T args) { 4435 alias type = typeof(__traits(getMember, element, method)(args)); 4436 static if(is(type : Element)) { 4437 if(element is null) 4438 return MaybeNullElement!type(null); 4439 return __traits(getMember, element, method)(args); 4440 } else static if(is(type == string)) { 4441 if(element is null) 4442 return cast(string) null; 4443 return __traits(getMember, element, method)(args); 4444 } else static if(is(type == void)) { 4445 if(element is null) 4446 return; 4447 __traits(getMember, element, method)(args); 4448 } else { 4449 static assert(0); 4450 } 4451 } 4452 4453 /// Allows implicit casting to the wrapped element. 4454 alias element this; 4455 } 4456 4457 /++ 4458 A collection of elements which forwards methods to the children. 4459 +/ 4460 /// Group: implementations 4461 struct ElementCollection { 4462 /// 4463 this(Element e) { 4464 elements = [e]; 4465 } 4466 4467 /// 4468 this(Element e, string selector) { 4469 elements = e.querySelectorAll(selector); 4470 } 4471 4472 /// 4473 this(Element[] e) { 4474 elements = e; 4475 } 4476 4477 Element[] elements; 4478 //alias elements this; // let it implicitly convert to the underlying array 4479 4480 /// 4481 ElementCollection opIndex(string selector) { 4482 ElementCollection ec; 4483 foreach(e; elements) 4484 ec.elements ~= e.getElementsBySelector(selector); 4485 return ec; 4486 } 4487 4488 /// 4489 Element opIndex(int i) { 4490 return elements[i]; 4491 } 4492 4493 /// if you slice it, give the underlying array for easy forwarding of the 4494 /// collection to range expecting algorithms or looping over. 4495 Element[] opSlice() { 4496 return elements; 4497 } 4498 4499 /// And input range primitives so we can foreach over this 4500 void popFront() { 4501 elements = elements[1..$]; 4502 } 4503 4504 /// ditto 4505 Element front() { 4506 return elements[0]; 4507 } 4508 4509 /// ditto 4510 bool empty() { 4511 return !elements.length; 4512 } 4513 4514 /++ 4515 Collects strings from the collection, concatenating them together 4516 Kinda like running reduce and ~= on it. 4517 4518 --- 4519 document["p"].collect!"innerText"; 4520 --- 4521 +/ 4522 string collect(string method)(string separator = "") { 4523 string text; 4524 foreach(e; elements) { 4525 text ~= mixin("e." ~ method); 4526 text ~= separator; 4527 } 4528 return text; 4529 } 4530 4531 /// Forward method calls to each individual [Element|element] of the collection 4532 /// returns this so it can be chained. 4533 ElementCollection opDispatch(string name, T...)(T t) { 4534 foreach(e; elements) { 4535 mixin("e." ~ name)(t); 4536 } 4537 return this; 4538 } 4539 4540 /++ 4541 Calls [Element.wrapIn] on each member of the collection, but clones the argument `what` for each one. 4542 +/ 4543 ElementCollection wrapIn(Element what) { 4544 foreach(e; elements) { 4545 e.wrapIn(what.cloneNode(false)); 4546 } 4547 4548 return this; 4549 } 4550 4551 /// Concatenates two ElementCollection together. 4552 ElementCollection opBinary(string op : "~")(ElementCollection rhs) { 4553 return ElementCollection(this.elements ~ rhs.elements); 4554 } 4555 } 4556 4557 4558 /// this puts in operators and opDispatch to handle string indexes and properties, forwarding to get and set functions. 4559 /// Group: implementations 4560 mixin template JavascriptStyleDispatch() { 4561 /// 4562 string opDispatch(string name)(string v = null) if(name != "popFront") { // popFront will make this look like a range. Do not want. 4563 if(v !is null) 4564 return set(name, v); 4565 return get(name); 4566 } 4567 4568 /// 4569 string opIndex(string key) const { 4570 return get(key); 4571 } 4572 4573 /// 4574 string opIndexAssign(string value, string field) { 4575 return set(field, value); 4576 } 4577 4578 // FIXME: doesn't seem to work 4579 string* opBinary(string op)(string key) if(op == "in") { 4580 return key in fields; 4581 } 4582 } 4583 4584 /// A proxy object to do the Element class' dataset property. See Element.dataset for more info. 4585 /// 4586 /// Do not create this object directly. 4587 /// Group: implementations 4588 struct DataSet { 4589 /// 4590 this(Element e) { 4591 this._element = e; 4592 } 4593 4594 private Element _element; 4595 /// 4596 string set(string name, string value) { 4597 _element.setAttribute("data-" ~ unCamelCase(name), value); 4598 return value; 4599 } 4600 4601 /// 4602 string get(string name) const { 4603 return _element.getAttribute("data-" ~ unCamelCase(name)); 4604 } 4605 4606 /// 4607 mixin JavascriptStyleDispatch!(); 4608 } 4609 4610 /// Proxy object for attributes which will replace the main opDispatch eventually 4611 /// Group: implementations 4612 struct AttributeSet { 4613 /// Generally, you shouldn't create this yourself, since you can use [Element.attrs] instead. 4614 this(Element e) { 4615 this._element = e; 4616 } 4617 4618 private Element _element; 4619 /++ 4620 Sets a `value` for attribute with `name`. If the attribute doesn't exist, this will create it, even if `value` is `null`. 4621 +/ 4622 string set(string name, string value) { 4623 _element.setAttribute(name, value); 4624 return value; 4625 } 4626 4627 /++ 4628 Provides support for testing presence of an attribute with the `in` operator. 4629 4630 History: 4631 Added December 16, 2020 (dub v10.10) 4632 +/ 4633 auto opBinaryRight(string op : "in")(string name) const 4634 { 4635 return name in _element.attributes; 4636 } 4637 /// 4638 unittest 4639 { 4640 auto doc = new XmlDocument(`<test attr="test"/>`); 4641 assert("attr" in doc.root.attrs); 4642 assert("test" !in doc.root.attrs); 4643 } 4644 4645 /++ 4646 Returns the value of attribute `name`, or `null` if doesn't exist 4647 +/ 4648 string get(string name) const { 4649 return _element.getAttribute(name); 4650 } 4651 4652 /// 4653 mixin JavascriptStyleDispatch!(); 4654 } 4655 4656 private struct InternalAttribute { 4657 // variable length structure 4658 private InternalAttribute* next; 4659 private uint totalLength; 4660 private ushort keyLength; 4661 private char[0] chars; 4662 4663 // this really should be immutable tbh 4664 inout(char)[] key() inout return { 4665 return chars.ptr[0 .. keyLength]; 4666 } 4667 4668 inout(char)[] value() inout return { 4669 return chars.ptr[keyLength .. totalLength]; 4670 } 4671 4672 static InternalAttribute* make(in char[] key, in char[] value) { 4673 // old code was 4674 //auto data = new ubyte[](InternalAttribute.sizeof + key.length + value.length); 4675 //GC.addRange(data.ptr, data.length); // MUST add the range to scan it! 4676 4677 import core.memory; 4678 // but this code is a bit better, notice we did NOT set the NO_SCAN attribute because of the presence of the next pointer 4679 // (this can sometimes be a pessimization over the separate strings but meh, most of these attributes are supposed to be small) 4680 auto obj = cast(InternalAttribute*) GC.calloc(InternalAttribute.sizeof + key.length + value.length); 4681 4682 // assert(key.length > 0); 4683 4684 obj.totalLength = cast(uint) (key.length + value.length); 4685 obj.keyLength = cast(ushort) key.length; 4686 if(key.length != obj.keyLength) 4687 throw new Exception("attribute key overflow"); 4688 if(key.length + value.length != obj.totalLength) 4689 throw new Exception("attribute length overflow"); 4690 4691 obj.key[] = key[]; 4692 obj.value[] = value[]; 4693 4694 return obj; 4695 } 4696 4697 // FIXME: disable default ctor and op new 4698 } 4699 4700 import core.exception; 4701 4702 struct AttributesHolder { 4703 private @system InternalAttribute* attributes; 4704 4705 /+ 4706 invariant() { 4707 const(InternalAttribute)* wtf = attributes; 4708 while(wtf) { 4709 assert(wtf != cast(void*) 1); 4710 assert(wtf.keyLength != 0); 4711 import std.stdio; writeln(wtf.key, "=", wtf.value); 4712 wtf = wtf.next; 4713 } 4714 } 4715 +/ 4716 4717 /+ 4718 It is legal to do foo["key", "default"] to call it with no error... 4719 +/ 4720 string opIndex(scope const char[] key) const { 4721 auto found = find(key); 4722 if(found is null) 4723 throw new RangeError(key.idup); // FIXME 4724 return cast(string) found.value; 4725 } 4726 4727 string get(scope const char[] key, string returnedIfKeyNotFound = null) const { 4728 auto attr = this.find(key); 4729 if(attr is null) 4730 return returnedIfKeyNotFound; 4731 else 4732 return cast(string) attr.value; 4733 } 4734 4735 private string[] keys() const { 4736 string[] ret; 4737 foreach(k, v; this) 4738 ret ~= k; 4739 return ret; 4740 } 4741 4742 /+ 4743 If this were to return a string* it'd be tricky cuz someone could try to rebind it, which is impossible. 4744 4745 This is a breaking change. You can get a similar result though with [get]. 4746 +/ 4747 bool opBinaryRight(string op : "in")(scope const char[] key) const { 4748 return find(key) !is null; 4749 } 4750 4751 private inout(InternalAttribute)* find(scope const char[] key) inout @trusted { 4752 inout(InternalAttribute)* current = attributes; 4753 while(current) { 4754 // assert(current > cast(void*) 1); 4755 if(current.key == key) 4756 return current; 4757 current = current.next; 4758 } 4759 return null; 4760 } 4761 4762 void remove(scope const char[] key) @trusted { 4763 if(attributes is null) 4764 return; 4765 auto current = attributes; 4766 InternalAttribute* previous; 4767 while(current) { 4768 if(current.key == key) 4769 break; 4770 previous = current; 4771 current = current.next; 4772 } 4773 if(current is null) 4774 return; 4775 if(previous is null) 4776 attributes = current.next; 4777 else 4778 previous.next = current.next; 4779 // assert(previous.next != cast(void*) 1); 4780 // assert(attributes != cast(void*) 1); 4781 } 4782 4783 void opIndexAssign(scope const char[] value, scope const char[] key) @trusted { 4784 if(attributes is null) { 4785 attributes = InternalAttribute.make(key, value); 4786 return; 4787 } 4788 auto current = attributes; 4789 4790 if(current.key == key) { 4791 if(current.value != value) { 4792 auto replacement = InternalAttribute.make(key, value); 4793 attributes = replacement; 4794 replacement.next = current.next; 4795 // assert(replacement.next != cast(void*) 1); 4796 // assert(attributes != cast(void*) 1); 4797 } 4798 return; 4799 } 4800 4801 while(current.next) { 4802 if(current.next.key == key) { 4803 if(current.next.value == value) 4804 return; // replacing immutable value with self, no change 4805 break; 4806 } 4807 current = current.next; 4808 } 4809 assert(current !is null); 4810 4811 auto replacement = InternalAttribute.make(key, value); 4812 if(current.next !is null) 4813 replacement.next = current.next.next; 4814 current.next = replacement; 4815 // assert(current.next != cast(void*) 1); 4816 // assert(replacement.next != cast(void*) 1); 4817 } 4818 4819 int opApply(int delegate(string key, string value) dg) const @trusted { 4820 const(InternalAttribute)* current = attributes; 4821 while(current !is null) { 4822 if(auto res = dg(cast(string) current.key, cast(string) current.value)) 4823 return res; 4824 current = current.next; 4825 } 4826 return 0; 4827 } 4828 } 4829 4830 unittest { 4831 AttributesHolder holder; 4832 holder["one"] = "1"; 4833 holder["two"] = "2"; 4834 holder["three"] = "3"; 4835 4836 { 4837 assert("one" in holder); 4838 assert("two" in holder); 4839 assert("three" in holder); 4840 assert("four" !in holder); 4841 4842 int count; 4843 foreach(k, v; holder) { 4844 switch(count) { 4845 case 0: assert(k == "one" && v == "1"); break; 4846 case 1: assert(k == "two" && v == "2"); break; 4847 case 2: assert(k == "three" && v == "3"); break; 4848 default: assert(0); 4849 } 4850 count++; 4851 } 4852 } 4853 4854 holder["two"] = "dos"; 4855 4856 { 4857 assert("one" in holder); 4858 assert("two" in holder); 4859 assert("three" in holder); 4860 assert("four" !in holder); 4861 4862 int count; 4863 foreach(k, v; holder) { 4864 switch(count) { 4865 case 0: assert(k == "one" && v == "1"); break; 4866 case 1: assert(k == "two" && v == "dos"); break; 4867 case 2: assert(k == "three" && v == "3"); break; 4868 default: assert(0); 4869 } 4870 count++; 4871 } 4872 } 4873 4874 holder["four"] = "4"; 4875 4876 { 4877 assert("one" in holder); 4878 assert("two" in holder); 4879 assert("three" in holder); 4880 assert("four" in holder); 4881 4882 int count; 4883 foreach(k, v; holder) { 4884 switch(count) { 4885 case 0: assert(k == "one" && v == "1"); break; 4886 case 1: assert(k == "two" && v == "dos"); break; 4887 case 2: assert(k == "three" && v == "3"); break; 4888 case 3: assert(k == "four" && v == "4"); break; 4889 default: assert(0); 4890 } 4891 count++; 4892 } 4893 } 4894 } 4895 4896 /// for style, i want to be able to set it with a string like a plain attribute, 4897 /// but also be able to do properties Javascript style. 4898 4899 /// Group: implementations 4900 struct ElementStyle { 4901 this(Element parent) { 4902 _element = parent; 4903 _attribute = _element.getAttribute("style"); 4904 originalAttribute = _attribute; 4905 } 4906 4907 ~this() { 4908 if(_attribute !is originalAttribute) 4909 _element.setAttribute("style", _attribute); 4910 } 4911 4912 Element _element; 4913 string _attribute; 4914 string originalAttribute; 4915 4916 /+ 4917 @property ref inout(string) _attribute() inout { 4918 auto s = "style" in _element.attributes; 4919 if(s is null) { 4920 auto e = cast() _element; // const_cast 4921 e.attributes["style"] = ""; // we need something to reference 4922 s = cast(inout) ("style" in e.attributes); 4923 } 4924 4925 assert(s !is null); 4926 return *s; 4927 } 4928 +/ 4929 4930 alias _attribute this; // this is meant to allow element.style = element.style ~ " string "; to still work. 4931 4932 string set(string name, string value) { 4933 if(name.length == 0) 4934 return value; 4935 if(name == "cssFloat") 4936 name = "float"; 4937 else 4938 name = unCamelCase(name); 4939 auto r = rules(); 4940 r[name] = value; 4941 4942 _attribute = ""; 4943 foreach(k, v; r) { 4944 if(v is null || v.length == 0) /* css can't do empty rules anyway so we'll use that to remove */ 4945 continue; 4946 if(_attribute.length) 4947 _attribute ~= " "; 4948 _attribute ~= k ~ ": " ~ v ~ ";"; 4949 } 4950 4951 _element.setAttribute("style", _attribute); // this is to trigger the observer call 4952 4953 return value; 4954 } 4955 string get(string name) const { 4956 if(name == "cssFloat") 4957 name = "float"; 4958 else 4959 name = unCamelCase(name); 4960 auto r = rules(); 4961 if(name in r) 4962 return r[name]; 4963 return null; 4964 } 4965 4966 string[string] rules() const { 4967 string[string] ret; 4968 foreach(rule; _attribute.split(";")) { 4969 rule = rule.strip(); 4970 if(rule.length == 0) 4971 continue; 4972 auto idx = rule.indexOf(":"); 4973 if(idx == -1) 4974 ret[rule] = ""; 4975 else { 4976 auto name = rule[0 .. idx].strip(); 4977 auto value = rule[idx + 1 .. $].strip(); 4978 4979 ret[name] = value; 4980 } 4981 } 4982 4983 return ret; 4984 } 4985 4986 mixin JavascriptStyleDispatch!(); 4987 } 4988 4989 /// Converts a camel cased propertyName to a css style dashed property-name 4990 string unCamelCase(string a) { 4991 string ret; 4992 foreach(c; a) 4993 if((c >= 'A' && c <= 'Z')) 4994 ret ~= "-" ~ toLower("" ~ c)[0]; 4995 else 4996 ret ~= c; 4997 return ret; 4998 } 4999 5000 /// Translates a css style property-name to a camel cased propertyName 5001 string camelCase(string a) { 5002 string ret; 5003 bool justSawDash = false; 5004 foreach(c; a) 5005 if(c == '-') { 5006 justSawDash = true; 5007 } else { 5008 if(justSawDash) { 5009 justSawDash = false; 5010 ret ~= toUpper("" ~ c); 5011 } else 5012 ret ~= c; 5013 } 5014 return ret; 5015 } 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 // domconvenience ends } 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 // @safe: 5038 5039 // NOTE: do *NOT* override toString on Element subclasses. It won't work. 5040 // Instead, override writeToAppender(); 5041 5042 // FIXME: should I keep processing instructions like <?blah ?> and <!-- blah --> (comments too lol)? I *want* them stripped out of most my output, but I want to be able to parse and create them too. 5043 5044 // Stripping them is useful for reading php as html.... but adding them 5045 // is good for building php. 5046 5047 // I need to maintain compatibility with the way it is now too. 5048 5049 import std.string; 5050 import std.exception; 5051 import std.array; 5052 import std.range; 5053 5054 //import std.stdio; 5055 5056 // tag soup works for most the crap I know now! If you have two bad closing tags back to back, it might erase one, but meh 5057 // that's rarer than the flipped closing tags that hack fixes so I'm ok with it. (Odds are it should be erased anyway; it's 5058 // most likely a typo so I say kill kill kill. 5059 5060 5061 /++ 5062 This might belong in another module, but it represents a file with a mime type and some data. 5063 Document implements this interface with type = text/html (see Document.contentType for more info) 5064 and data = document.toString, so you can return Documents anywhere web.d expects FileResources. 5065 +/ 5066 /// Group: bonus_functionality 5067 interface FileResource { 5068 /// the content-type of the file. e.g. "text/html; charset=utf-8" or "image/png" 5069 @property string contentType() const; 5070 /// the data 5071 immutable(ubyte)[] getData() const; 5072 /++ 5073 filename, return null if none 5074 5075 History: 5076 Added December 25, 2020 5077 +/ 5078 @property string filename() const; 5079 } 5080 5081 5082 5083 5084 ///. 5085 /// Group: bonus_functionality 5086 enum NodeType { Text = 3 } 5087 5088 5089 /// You can use this to do an easy null check or a dynamic cast+null check on any element. 5090 /// Group: core_functionality 5091 T require(T = Element, string file = __FILE__, int line = __LINE__)(Element e) if(is(T : Element)) 5092 in {} 5093 out(ret) { assert(ret !is null); } 5094 do { 5095 auto ret = cast(T) e; 5096 if(ret is null) 5097 throw new ElementNotFoundException(T.stringof, "passed value", e, file, line); 5098 return ret; 5099 } 5100 5101 5102 ///. 5103 /// Group: core_functionality 5104 class DocumentFragment : Element { 5105 ///. 5106 this(Document _parentDocument) { 5107 tagName = "#fragment"; 5108 super(_parentDocument); 5109 } 5110 5111 /++ 5112 Creates a document fragment from the given HTML. Note that the HTML is assumed to close all tags contained inside it. 5113 5114 Since: March 29, 2018 (or git tagged v2.1.0) 5115 +/ 5116 this(Html html) { 5117 this(null); 5118 5119 this.innerHTML = html.source; 5120 } 5121 5122 ///. 5123 override string writeToAppender(Appender!string where = appender!string()) const { 5124 return this.innerHTML(where); 5125 } 5126 5127 override string toPrettyStringImpl(bool insertComments, int indentationLevel, string indentWith) const { 5128 string s; 5129 foreach(child; children) 5130 s ~= child.toPrettyStringImpl(insertComments, indentationLevel, indentWith); 5131 return s; 5132 } 5133 5134 /// DocumentFragments don't really exist in a dom, so they ignore themselves in parent nodes 5135 /* 5136 override inout(Element) parentNode() inout { 5137 return children.length ? children[0].parentNode : null; 5138 } 5139 */ 5140 /+ 5141 override Element parentNode(Element p) { 5142 this.parentNode = p; 5143 foreach(child; children) 5144 child.parentNode = p; 5145 return p; 5146 } 5147 +/ 5148 } 5149 5150 /// Given text, encode all html entities on it - &, <, >, and ". This function also 5151 /// encodes all 8 bit characters as entities, thus ensuring the resultant text will work 5152 /// even if your charset isn't set right. You can suppress with by setting encodeNonAscii = false 5153 /// 5154 /// The output parameter can be given to append to an existing buffer. You don't have to 5155 /// pass one; regardless, the return value will be usable for you, with just the data encoded. 5156 /// Group: core_functionality 5157 string htmlEntitiesEncode(string data, Appender!string output = appender!string(), bool encodeNonAscii = true) { 5158 // if there's no entities, we can save a lot of time by not bothering with the 5159 // decoding loop. This check cuts the net toString time by better than half in my test. 5160 // let me know if it made your tests worse though, since if you use an entity in just about 5161 // every location, the check will add time... but I suspect the average experience is like mine 5162 // since the check gives up as soon as it can anyway. 5163 5164 bool shortcut = true; 5165 foreach(char c; data) { 5166 // non ascii chars are always higher than 127 in utf8; we'd better go to the full decoder if we see it. 5167 if(c == '<' || c == '>' || c == '"' || c == '&' || (encodeNonAscii && cast(uint) c > 127)) { 5168 shortcut = false; // there's actual work to be done 5169 break; 5170 } 5171 } 5172 5173 if(shortcut) { 5174 output.put(data); 5175 return data; 5176 } 5177 5178 auto start = output.data.length; 5179 5180 output.reserve(data.length + 64); // grab some extra space for the encoded entities 5181 5182 foreach(dchar d; data) { 5183 if(d == '&') 5184 output.put("&"); 5185 else if (d == '<') 5186 output.put("<"); 5187 else if (d == '>') 5188 output.put(">"); 5189 else if (d == '\"') 5190 output.put("""); 5191 // else if (d == '\'') 5192 // output.put("'"); // if you are in an attribute, it might be important to encode for the same reason as double quotes 5193 // FIXME: should I encode apostrophes too? as '... I could also do space but if your html is so bad that it doesn't 5194 // quote attributes at all, maybe you deserve the xss. Encoding spaces will make everything really ugly so meh 5195 // idk about apostrophes though. Might be worth it, might not. 5196 else if (!encodeNonAscii || (d < 128 && d > 0)) 5197 output.put(d); 5198 else 5199 output.put("&#" ~ std.conv.to!string(cast(int) d) ~ ";"); 5200 } 5201 5202 //assert(output !is null); // this fails on empty attributes..... 5203 return output.data[start .. $]; 5204 5205 // data = data.replace("\u00a0", " "); 5206 } 5207 5208 /// An alias for htmlEntitiesEncode; it works for xml too 5209 /// Group: core_functionality 5210 string xmlEntitiesEncode(string data) { 5211 return htmlEntitiesEncode(data); 5212 } 5213 5214 /// This helper function is used for decoding html entities. It has a hard-coded list of entities and characters. 5215 /// Group: core_functionality 5216 dchar parseEntity(in dchar[] entity) { 5217 5218 char[128] buffer; 5219 int bpos; 5220 foreach(char c; entity[1 .. $-1]) 5221 buffer[bpos++] = c; 5222 char[] entityAsString = buffer[0 .. bpos]; 5223 5224 int min = 0; 5225 int max = cast(int) availableEntities.length; 5226 5227 keep_looking: 5228 if(min + 1 < max) { 5229 int spot = (max - min) / 2 + min; 5230 if(availableEntities[spot] == entityAsString) { 5231 return availableEntitiesValues[spot]; 5232 } else if(entityAsString < availableEntities[spot]) { 5233 max = spot; 5234 goto keep_looking; 5235 } else { 5236 min = spot; 5237 goto keep_looking; 5238 } 5239 } 5240 5241 switch(entity[1..$-1]) { 5242 case "quot": 5243 return '"'; 5244 case "apos": 5245 return '\''; 5246 case "lt": 5247 return '<'; 5248 case "gt": 5249 return '>'; 5250 case "amp": 5251 return '&'; 5252 // the next are html rather than xml 5253 5254 // and handling numeric entities 5255 default: 5256 if(entity[1] == '#') { 5257 if(entity[2] == 'x' /*|| (!strict && entity[2] == 'X')*/) { 5258 auto hex = entity[3..$-1]; 5259 5260 auto p = intFromHex(to!string(hex).toLower()); 5261 return cast(dchar) p; 5262 } else { 5263 auto decimal = entity[2..$-1]; 5264 5265 // dealing with broken html entities 5266 while(decimal.length && (decimal[0] < '0' || decimal[0] > '9')) 5267 decimal = decimal[1 .. $]; 5268 5269 while(decimal.length && (decimal[$-1] < '0' || decimal[$-1] > '9')) 5270 decimal = decimal[0 .. $ - 1]; 5271 5272 if(decimal.length == 0) 5273 return ' '; // this is really broken html 5274 // done with dealing with broken stuff 5275 5276 auto p = std.conv.to!int(decimal); 5277 return cast(dchar) p; 5278 } 5279 } else 5280 return '\ufffd'; // replacement character diamond thing 5281 } 5282 5283 assert(0); 5284 } 5285 5286 unittest { 5287 // not in the binary search 5288 assert(parseEntity("""d) == '"'); 5289 5290 // numeric value 5291 assert(parseEntity("Դ") == '\u0534'); 5292 5293 // not found at all 5294 assert(parseEntity("&asdasdasd;"d) == '\ufffd'); 5295 5296 // random values in the bin search 5297 assert(parseEntity("	"d) == '\t'); 5298 assert(parseEntity("»"d) == '\»'); 5299 5300 // near the middle and edges of the bin search 5301 assert(parseEntity("𝒶"d) == '\U0001d4b6'); 5302 assert(parseEntity("*"d) == '\u002a'); 5303 assert(parseEntity("Æ"d) == '\u00c6'); 5304 assert(parseEntity("‌"d) == '\u200c'); 5305 } 5306 5307 import std.utf; 5308 import std.stdio; 5309 5310 /// This takes a string of raw HTML and decodes the entities into a nice D utf-8 string. 5311 /// By default, it uses loose mode - it will try to return a useful string from garbage input too. 5312 /// Set the second parameter to true if you'd prefer it to strictly throw exceptions on garbage input. 5313 /// Group: core_functionality 5314 string htmlEntitiesDecode(string data, bool strict = false) { 5315 // this check makes a *big* difference; about a 50% improvement of parse speed on my test. 5316 if(data.indexOf("&") == -1) // all html entities begin with & 5317 return data; // if there are no entities in here, we can return the original slice and save some time 5318 5319 char[] a; // this seems to do a *better* job than appender! 5320 5321 char[4] buffer; 5322 5323 bool tryingEntity = false; 5324 bool tryingNumericEntity = false; 5325 bool tryingHexEntity = false; 5326 dchar[16] entityBeingTried; 5327 int entityBeingTriedLength = 0; 5328 int entityAttemptIndex = 0; 5329 5330 foreach(dchar ch; data) { 5331 if(tryingEntity) { 5332 entityAttemptIndex++; 5333 entityBeingTried[entityBeingTriedLength++] = ch; 5334 5335 if(entityBeingTriedLength == 2 && ch == '#') { 5336 tryingNumericEntity = true; 5337 continue; 5338 } else if(tryingNumericEntity && entityBeingTriedLength == 3 && ch == 'x') { 5339 tryingHexEntity = true; 5340 continue; 5341 } 5342 5343 // I saw some crappy html in the wild that looked like &0ї this tries to handle that. 5344 if(ch == '&') { 5345 if(strict) 5346 throw new Exception("unterminated entity; & inside another at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5347 5348 // if not strict, let's try to parse both. 5349 5350 if(entityBeingTried[0 .. entityBeingTriedLength] == "&&") { 5351 a ~= "&"; // double amp means keep the first one, still try to parse the next one 5352 } else { 5353 auto ch2 = parseEntity(entityBeingTried[0 .. entityBeingTriedLength]); 5354 if(ch2 == '\ufffd') { // either someone put this in intentionally (lol) or we failed to get it 5355 // but either way, just abort and keep the plain text 5356 foreach(char c; entityBeingTried[0 .. entityBeingTriedLength - 1]) // cut off the & we're on now 5357 a ~= c; 5358 } else { 5359 a ~= buffer[0.. std.utf.encode(buffer, ch2)]; 5360 } 5361 } 5362 5363 // tryingEntity is still true 5364 goto new_entity; 5365 } else 5366 if(ch == ';') { 5367 tryingEntity = false; 5368 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 5369 } else if(ch == ' ') { 5370 // e.g. you & i 5371 if(strict) 5372 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5373 else { 5374 tryingEntity = false; 5375 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength - 1]); 5376 a ~= buffer[0 .. std.utf.encode(buffer, ch)]; 5377 } 5378 } else { 5379 if(tryingNumericEntity) { 5380 if(ch < '0' || ch > '9') { 5381 if(tryingHexEntity) { 5382 if(ch < 'A') 5383 goto trouble; 5384 if(ch > 'Z' && ch < 'a') 5385 goto trouble; 5386 if(ch > 'z') 5387 goto trouble; 5388 } else { 5389 trouble: 5390 if(strict) 5391 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5392 tryingEntity = false; 5393 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried[0 .. entityBeingTriedLength]))]; 5394 a ~= ch; 5395 continue; 5396 } 5397 } 5398 } 5399 5400 5401 if(entityAttemptIndex >= 9) { 5402 done: 5403 if(strict) 5404 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5405 else { 5406 tryingEntity = false; 5407 a ~= to!(char[])(entityBeingTried[0 .. entityBeingTriedLength]); 5408 } 5409 } 5410 } 5411 } else { 5412 if(ch == '&') { 5413 new_entity: 5414 tryingEntity = true; 5415 tryingNumericEntity = false; 5416 tryingHexEntity = false; 5417 entityBeingTriedLength = 0; 5418 entityBeingTried[entityBeingTriedLength++] = ch; 5419 entityAttemptIndex = 0; 5420 } else { 5421 a ~= buffer[0 .. std.utf.encode(buffer, ch)]; 5422 } 5423 } 5424 } 5425 5426 if(tryingEntity) { 5427 if(strict) 5428 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried[0 .. entityBeingTriedLength])); 5429 5430 // otherwise, let's try to recover, at least so we don't drop any data 5431 a ~= to!string(entityBeingTried[0 .. entityBeingTriedLength]); 5432 // FIXME: what if we have "cool &"? should we try to parse it? 5433 } 5434 5435 return cast(string) a; // assumeUnique is actually kinda slow, lol 5436 } 5437 5438 unittest { 5439 // error recovery 5440 assert(htmlEntitiesDecode("<&foo") == "<&foo"); // unterminated turned back to thing 5441 assert(htmlEntitiesDecode("<&foo") == "<&foo"); // semi-terminated... parse and carry on (is this really sane?) 5442 assert(htmlEntitiesDecode("loc=en_us&tracknum=111") == "loc=en_us&tracknum=111"); // a bit of both, seen in a real life email 5443 assert(htmlEntitiesDecode("& test") == "& test"); // unterminated, just abort 5444 5445 // in strict mode all of these should fail 5446 try { assert(htmlEntitiesDecode("<&foo", true) == "<&foo"); assert(0); } catch(Exception e) { } 5447 try { assert(htmlEntitiesDecode("<&foo", true) == "<&foo"); assert(0); } catch(Exception e) { } 5448 try { assert(htmlEntitiesDecode("loc=en_us&tracknum=111", true) == "<&foo"); assert(0); } catch(Exception e) { } 5449 try { assert(htmlEntitiesDecode("& test", true) == "& test"); assert(0); } catch(Exception e) { } 5450 5451 // correct cases that should pass the same in strict or loose mode 5452 foreach(strict; [false, true]) { 5453 assert(htmlEntitiesDecode("&hello» win", strict) == "&hello\» win"); 5454 } 5455 } 5456 5457 /// Group: implementations 5458 abstract class SpecialElement : Element { 5459 this(Document _parentDocument) { 5460 super(_parentDocument); 5461 } 5462 5463 ///. 5464 override Element appendChild(Element e) { 5465 assert(0, "Cannot append to a special node"); 5466 } 5467 5468 ///. 5469 @property override int nodeType() const { 5470 return 100; 5471 } 5472 } 5473 5474 ///. 5475 /// Group: implementations 5476 class RawSource : SpecialElement { 5477 ///. 5478 this(Document _parentDocument, string s) { 5479 super(_parentDocument); 5480 source = s; 5481 tagName = "#raw"; 5482 } 5483 5484 ///. 5485 override string nodeValue() const { 5486 return this.toString(); 5487 } 5488 5489 ///. 5490 override string writeToAppender(Appender!string where = appender!string()) const { 5491 where.put(source); 5492 return source; 5493 } 5494 5495 override string toPrettyStringImpl(bool, int, string) const { 5496 return source; 5497 } 5498 5499 5500 override RawSource cloneNode(bool deep) { 5501 return new RawSource(parentDocument, source); 5502 } 5503 5504 ///. 5505 string source; 5506 } 5507 5508 /// Group: implementations 5509 abstract class ServerSideCode : SpecialElement { 5510 this(Document _parentDocument, string type) { 5511 super(_parentDocument); 5512 tagName = "#" ~ type; 5513 } 5514 5515 ///. 5516 override string nodeValue() const { 5517 return this.source; 5518 } 5519 5520 ///. 5521 override string writeToAppender(Appender!string where = appender!string()) const { 5522 auto start = where.data.length; 5523 where.put("<"); 5524 where.put(source); 5525 where.put(">"); 5526 return where.data[start .. $]; 5527 } 5528 5529 override string toPrettyStringImpl(bool, int, string) const { 5530 return "<" ~ source ~ ">"; 5531 } 5532 5533 ///. 5534 string source; 5535 } 5536 5537 ///. 5538 /// Group: implementations 5539 class PhpCode : ServerSideCode { 5540 ///. 5541 this(Document _parentDocument, string s) { 5542 super(_parentDocument, "php"); 5543 source = s; 5544 } 5545 5546 override PhpCode cloneNode(bool deep) { 5547 return new PhpCode(parentDocument, source); 5548 } 5549 } 5550 5551 ///. 5552 /// Group: implementations 5553 class AspCode : ServerSideCode { 5554 ///. 5555 this(Document _parentDocument, string s) { 5556 super(_parentDocument, "asp"); 5557 source = s; 5558 } 5559 5560 override AspCode cloneNode(bool deep) { 5561 return new AspCode(parentDocument, source); 5562 } 5563 } 5564 5565 ///. 5566 /// Group: implementations 5567 class BangInstruction : SpecialElement { 5568 ///. 5569 this(Document _parentDocument, string s) { 5570 super(_parentDocument); 5571 source = s; 5572 tagName = "#bpi"; 5573 } 5574 5575 ///. 5576 override string nodeValue() const { 5577 return this.source; 5578 } 5579 5580 override BangInstruction cloneNode(bool deep) { 5581 return new BangInstruction(parentDocument, source); 5582 } 5583 5584 ///. 5585 override string writeToAppender(Appender!string where = appender!string()) const { 5586 auto start = where.data.length; 5587 where.put("<!"); 5588 where.put(source); 5589 where.put(">"); 5590 return where.data[start .. $]; 5591 } 5592 5593 override string toPrettyStringImpl(bool, int, string) const { 5594 string s; 5595 s ~= "<!"; 5596 s ~= source; 5597 s ~= ">"; 5598 return s; 5599 } 5600 5601 ///. 5602 string source; 5603 } 5604 5605 ///. 5606 /// Group: implementations 5607 class QuestionInstruction : SpecialElement { 5608 ///. 5609 this(Document _parentDocument, string s) { 5610 super(_parentDocument); 5611 source = s; 5612 tagName = "#qpi"; 5613 } 5614 5615 override QuestionInstruction cloneNode(bool deep) { 5616 return new QuestionInstruction(parentDocument, source); 5617 } 5618 5619 ///. 5620 override string nodeValue() const { 5621 return this.source; 5622 } 5623 5624 ///. 5625 override string writeToAppender(Appender!string where = appender!string()) const { 5626 auto start = where.data.length; 5627 where.put("<"); 5628 where.put(source); 5629 where.put(">"); 5630 return where.data[start .. $]; 5631 } 5632 5633 override string toPrettyStringImpl(bool, int, string) const { 5634 string s; 5635 s ~= "<"; 5636 s ~= source; 5637 s ~= ">"; 5638 return s; 5639 } 5640 5641 5642 ///. 5643 string source; 5644 } 5645 5646 ///. 5647 /// Group: implementations 5648 class HtmlComment : SpecialElement { 5649 ///. 5650 this(Document _parentDocument, string s) { 5651 super(_parentDocument); 5652 source = s; 5653 tagName = "#comment"; 5654 } 5655 5656 override HtmlComment cloneNode(bool deep) { 5657 return new HtmlComment(parentDocument, source); 5658 } 5659 5660 ///. 5661 override string nodeValue() const { 5662 return this.source; 5663 } 5664 5665 ///. 5666 override string writeToAppender(Appender!string where = appender!string()) const { 5667 auto start = where.data.length; 5668 where.put("<!--"); 5669 where.put(source); 5670 where.put("-->"); 5671 return where.data[start .. $]; 5672 } 5673 5674 override string toPrettyStringImpl(bool, int, string) const { 5675 string s; 5676 s ~= "<!--"; 5677 s ~= source; 5678 s ~= "-->"; 5679 return s; 5680 } 5681 5682 5683 ///. 5684 string source; 5685 } 5686 5687 5688 5689 5690 ///. 5691 /// Group: implementations 5692 class TextNode : Element { 5693 public: 5694 ///. 5695 this(Document _parentDocument, string e) { 5696 super(_parentDocument); 5697 contents = e; 5698 tagName = "#text"; 5699 } 5700 5701 /// 5702 this(string e) { 5703 this(null, e); 5704 } 5705 5706 string opDispatch(string name)(string v = null) if(0) { return null; } // text nodes don't have attributes 5707 5708 ///. 5709 static TextNode fromUndecodedString(Document _parentDocument, string html) { 5710 auto e = new TextNode(_parentDocument, ""); 5711 e.contents = htmlEntitiesDecode(html, _parentDocument is null ? false : !_parentDocument.loose); 5712 return e; 5713 } 5714 5715 ///. 5716 override @property TextNode cloneNode(bool deep) { 5717 auto n = new TextNode(parentDocument, contents); 5718 return n; 5719 } 5720 5721 ///. 5722 override string nodeValue() const { 5723 return this.contents; //toString(); 5724 } 5725 5726 ///. 5727 @property override int nodeType() const { 5728 return NodeType.Text; 5729 } 5730 5731 ///. 5732 override string writeToAppender(Appender!string where = appender!string()) const { 5733 string s; 5734 if(contents.length) 5735 s = htmlEntitiesEncode(contents, where); 5736 else 5737 s = ""; 5738 5739 assert(s !is null); 5740 return s; 5741 } 5742 5743 override string toPrettyStringImpl(bool insertComments = false, int indentationLevel = 0, string indentWith = "\t") const { 5744 string s; 5745 5746 string contents = this.contents; 5747 // we will first collapse the whitespace per html 5748 // sort of. note this can break stuff yo!!!! 5749 if(this.parentNode is null || this.parentNode.tagName != "pre") { 5750 string n = ""; 5751 bool lastWasWhitespace = indentationLevel > 0; 5752 foreach(char c; contents) { 5753 if(c.isSimpleWhite) { 5754 if(!lastWasWhitespace) 5755 n ~= ' '; 5756 lastWasWhitespace = true; 5757 } else { 5758 n ~= c; 5759 lastWasWhitespace = false; 5760 } 5761 } 5762 5763 contents = n; 5764 } 5765 5766 if(this.parentNode !is null && this.parentNode.tagName != "p") { 5767 contents = contents.strip; 5768 } 5769 5770 auto e = htmlEntitiesEncode(contents); 5771 import std.algorithm.iteration : splitter; 5772 bool first = true; 5773 foreach(line; splitter(e, "\n")) { 5774 if(first) { 5775 s ~= toPrettyStringIndent(insertComments, indentationLevel, indentWith); 5776 first = false; 5777 } else { 5778 s ~= "\n"; 5779 if(insertComments) 5780 s ~= "<!--"; 5781 foreach(i; 0 .. indentationLevel) 5782 s ~= "\t"; 5783 if(insertComments) 5784 s ~= "-->"; 5785 } 5786 s ~= line.stripRight; 5787 } 5788 return s; 5789 } 5790 5791 ///. 5792 override Element appendChild(Element e) { 5793 assert(0, "Cannot append to a text node"); 5794 } 5795 5796 ///. 5797 string contents; 5798 // alias contents content; // I just mistype this a lot, 5799 } 5800 5801 /** 5802 There are subclasses of Element offering improved helper 5803 functions for the element in HTML. 5804 */ 5805 5806 /++ 5807 Represents a HTML link. This provides some convenience methods for manipulating query strings, but otherwise is sthe same Element interface. 5808 5809 Please note this object may not be used for all `<a>` tags. 5810 +/ 5811 /// Group: implementations 5812 class Link : Element { 5813 5814 /++ 5815 Constructs `<a href="that href">that text</a>`. 5816 +/ 5817 this(string href, string text) { 5818 super("a"); 5819 setAttribute("href", href); 5820 innerText = text; 5821 } 5822 5823 /// ditto 5824 this(Document _parentDocument) { 5825 super(_parentDocument); 5826 this.tagName = "a"; 5827 } 5828 5829 /+ 5830 /// Returns everything in the href EXCEPT the query string 5831 @property string targetSansQuery() { 5832 5833 } 5834 5835 ///. 5836 @property string domainName() { 5837 5838 } 5839 5840 ///. 5841 @property string path 5842 +/ 5843 /// This gets a variable from the URL's query string. 5844 string getValue(string name) { 5845 auto vars = variablesHash(); 5846 if(name in vars) 5847 return vars[name]; 5848 return null; 5849 } 5850 5851 private string[string] variablesHash() { 5852 string href = getAttribute("href"); 5853 if(href is null) 5854 return null; 5855 5856 auto ques = href.indexOf("?"); 5857 string str = ""; 5858 if(ques != -1) { 5859 str = href[ques+1..$]; 5860 5861 auto fragment = str.indexOf("#"); 5862 if(fragment != -1) 5863 str = str[0..fragment]; 5864 } 5865 5866 string[] variables = str.split("&"); 5867 5868 string[string] hash; 5869 5870 foreach(var; variables) { 5871 auto index = var.indexOf("="); 5872 if(index == -1) 5873 hash[var] = ""; 5874 else { 5875 hash[decodeUriComponent(var[0..index])] = decodeUriComponent(var[index + 1 .. $]); 5876 } 5877 } 5878 5879 return hash; 5880 } 5881 5882 /// Replaces all the stuff after a ? in the link at once with the given assoc array values. 5883 /*private*/ void updateQueryString(string[string] vars) { 5884 string href = getAttribute("href"); 5885 5886 auto question = href.indexOf("?"); 5887 if(question != -1) 5888 href = href[0..question]; 5889 5890 string frag = ""; 5891 auto fragment = href.indexOf("#"); 5892 if(fragment != -1) { 5893 frag = href[fragment..$]; 5894 href = href[0..fragment]; 5895 } 5896 5897 string query = "?"; 5898 bool first = true; 5899 foreach(name, value; vars) { 5900 if(!first) 5901 query ~= "&"; 5902 else 5903 first = false; 5904 5905 query ~= encodeUriComponent(name); 5906 if(value.length) 5907 query ~= "=" ~ encodeUriComponent(value); 5908 } 5909 5910 if(query != "?") 5911 href ~= query; 5912 5913 href ~= frag; 5914 5915 setAttribute("href", href); 5916 } 5917 5918 /// Sets or adds the variable with the given name to the given value 5919 /// It automatically URI encodes the values and takes care of the ? and &. 5920 override void setValue(string name, string variable) { 5921 auto vars = variablesHash(); 5922 vars[name] = variable; 5923 5924 updateQueryString(vars); 5925 } 5926 5927 override void setValue(string name, string[] variable) { 5928 assert(0, "not implemented FIXME"); 5929 } 5930 5931 /// Removes the given variable from the query string 5932 void removeValue(string name) { 5933 auto vars = variablesHash(); 5934 vars.remove(name); 5935 5936 updateQueryString(vars); 5937 } 5938 5939 /* 5940 ///. 5941 override string toString() { 5942 5943 } 5944 5945 ///. 5946 override string getAttribute(string name) { 5947 if(name == "href") { 5948 5949 } else 5950 return super.getAttribute(name); 5951 } 5952 */ 5953 } 5954 5955 /++ 5956 Represents a HTML form. This slightly specializes Element to add a few more convenience methods for adding and extracting form data. 5957 5958 Please note this object may not be used for all `<form>` tags. 5959 +/ 5960 /// Group: implementations 5961 class Form : Element { 5962 5963 ///. 5964 this(Document _parentDocument) { 5965 super(_parentDocument); 5966 tagName = "form"; 5967 } 5968 5969 /// Overrides of the base class implementations that more confirm to *my* conventions when writing form html. 5970 override Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 5971 auto t = this.querySelector("fieldset div"); 5972 if(t is null) 5973 return super.addField(label, name, type, fieldOptions); 5974 else 5975 return t.addField(label, name, type, fieldOptions); 5976 } 5977 5978 /// ditto 5979 override Element addField(string label, string name, FormFieldOptions fieldOptions) { 5980 auto type = "text"; 5981 auto t = this.querySelector("fieldset div"); 5982 if(t is null) 5983 return super.addField(label, name, type, fieldOptions); 5984 else 5985 return t.addField(label, name, type, fieldOptions); 5986 } 5987 5988 /// ditto 5989 override Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 5990 auto t = this.querySelector("fieldset div"); 5991 if(t is null) 5992 return super.addField(label, name, options, fieldOptions); 5993 else 5994 return t.addField(label, name, options, fieldOptions); 5995 } 5996 5997 /// ditto 5998 override void setValue(string field, string value) { 5999 setValue(field, value, true); 6000 } 6001 6002 override void setValue(string name, string[] variable) { 6003 assert(0, "not implemented FIXME"); 6004 } 6005 6006 // FIXME: doesn't handle arrays; multiple fields can have the same name 6007 6008 /// Set's the form field's value. For input boxes, this sets the value attribute. For 6009 /// textareas, it sets the innerText. For radio boxes and select boxes, it removes 6010 /// the checked/selected attribute from all, and adds it to the one matching the value. 6011 /// For checkboxes, if the value is non-null and not empty, it checks the box. 6012 6013 /// If you set a value that doesn't exist, it throws an exception if makeNew is false. 6014 /// Otherwise, it makes a new input with type=hidden to keep the value. 6015 void setValue(string field, string value, bool makeNew) { 6016 auto eles = getField(field); 6017 if(eles.length == 0) { 6018 if(makeNew) { 6019 addInput(field, value); 6020 return; 6021 } else 6022 throw new Exception("form field does not exist"); 6023 } 6024 6025 if(eles.length == 1) { 6026 auto e = eles[0]; 6027 switch(e.tagName) { 6028 default: assert(0); 6029 case "textarea": 6030 e.innerText = value; 6031 break; 6032 case "input": 6033 string type = e.getAttribute("type"); 6034 if(type is null) { 6035 e.value = value; 6036 return; 6037 } 6038 switch(type) { 6039 case "checkbox": 6040 case "radio": 6041 if(value.length && value != "false") 6042 e.setAttribute("checked", "checked"); 6043 else 6044 e.removeAttribute("checked"); 6045 break; 6046 default: 6047 e.value = value; 6048 return; 6049 } 6050 break; 6051 case "select": 6052 bool found = false; 6053 foreach(child; e.tree) { 6054 if(child.tagName != "option") 6055 continue; 6056 string val = child.getAttribute("value"); 6057 if(val is null) 6058 val = child.innerText; 6059 if(val == value) { 6060 child.setAttribute("selected", "selected"); 6061 found = true; 6062 } else 6063 child.removeAttribute("selected"); 6064 } 6065 6066 if(!found) { 6067 e.addChild("option", value) 6068 .setAttribute("selected", "selected"); 6069 } 6070 break; 6071 } 6072 } else { 6073 // assume radio boxes 6074 foreach(e; eles) { 6075 string val = e.getAttribute("value"); 6076 //if(val is null) 6077 // throw new Exception("don't know what to do with radio boxes with null value"); 6078 if(val == value) 6079 e.setAttribute("checked", "checked"); 6080 else 6081 e.removeAttribute("checked"); 6082 } 6083 } 6084 } 6085 6086 /// This takes an array of strings and adds hidden <input> elements for each one of them. Unlike setValue, 6087 /// it makes no attempt to find and modify existing elements in the form to the new values. 6088 void addValueArray(string key, string[] arrayOfValues) { 6089 foreach(arr; arrayOfValues) 6090 addChild("input", key, arr); 6091 } 6092 6093 /// Gets the value of the field; what would be given if it submitted right now. (so 6094 /// it handles select boxes and radio buttons too). For checkboxes, if a value isn't 6095 /// given, but it is checked, it returns "checked", since null and "" are indistinguishable 6096 string getValue(string field) { 6097 auto eles = getField(field); 6098 if(eles.length == 0) 6099 return ""; 6100 if(eles.length == 1) { 6101 auto e = eles[0]; 6102 switch(e.tagName) { 6103 default: assert(0); 6104 case "input": 6105 if(e.type == "checkbox") { 6106 if(e.checked) 6107 return e.value.length ? e.value : "checked"; 6108 return ""; 6109 } else 6110 return e.value; 6111 case "textarea": 6112 return e.innerText; 6113 case "select": 6114 foreach(child; e.tree) { 6115 if(child.tagName != "option") 6116 continue; 6117 if(child.selected) 6118 return child.value; 6119 } 6120 break; 6121 } 6122 } else { 6123 // assuming radio 6124 foreach(e; eles) { 6125 if(e.checked) 6126 return e.value; 6127 } 6128 } 6129 6130 return ""; 6131 } 6132 6133 // FIXME: doesn't handle multiple elements with the same name (except radio buttons) 6134 /++ 6135 Returns the form's contents in application/x-www-form-urlencoded format. 6136 6137 Bugs: 6138 Doesn't handle repeated elements of the same name nor files. 6139 +/ 6140 string getPostableData() { 6141 bool[string] namesDone; 6142 6143 string ret; 6144 bool outputted = false; 6145 6146 foreach(e; getElementsBySelector("[name]")) { 6147 if(e.name in namesDone) 6148 continue; 6149 6150 if(outputted) 6151 ret ~= "&"; 6152 else 6153 outputted = true; 6154 6155 ret ~= encodeUriComponent(e.name) ~ "=" ~ encodeUriComponent(getValue(e.name)); 6156 6157 namesDone[e.name] = true; 6158 } 6159 6160 return ret; 6161 } 6162 6163 /// Gets the actual elements with the given name 6164 Element[] getField(string name) { 6165 Element[] ret; 6166 foreach(e; tree) { 6167 if(e.name == name) 6168 ret ~= e; 6169 } 6170 return ret; 6171 } 6172 6173 /// Grabs the <label> with the given for tag, if there is one. 6174 Element getLabel(string forId) { 6175 foreach(e; tree) 6176 if(e.tagName == "label" && e.getAttribute("for") == forId) 6177 return e; 6178 return null; 6179 } 6180 6181 /// Adds a new INPUT field to the end of the form with the given attributes. 6182 Element addInput(string name, string value, string type = "hidden") { 6183 auto e = new Element(parentDocument, "input", null, true); 6184 e.name = name; 6185 e.value = value; 6186 e.type = type; 6187 6188 appendChild(e); 6189 6190 return e; 6191 } 6192 6193 /// Removes the given field from the form. It finds the element and knocks it right out. 6194 void removeField(string name) { 6195 foreach(e; getField(name)) 6196 e.parentNode.removeChild(e); 6197 } 6198 6199 /+ 6200 /// Returns all form members. 6201 @property Element[] elements() { 6202 6203 } 6204 6205 ///. 6206 string opDispatch(string name)(string v = null) 6207 // filter things that should actually be attributes on the form 6208 if( name != "method" && name != "action" && name != "enctype" 6209 && name != "style" && name != "name" && name != "id" && name != "class") 6210 { 6211 6212 } 6213 +/ 6214 /+ 6215 void submit() { 6216 // take its elements and submit them through http 6217 } 6218 +/ 6219 } 6220 6221 import std.conv; 6222 6223 /++ 6224 Represents a HTML table. Has some convenience methods for working with tabular data. 6225 +/ 6226 /// Group: implementations 6227 class Table : Element { 6228 6229 /// You can make this yourself but you'd generally get one of these object out of a html parse or [Element.make] call. 6230 this(Document _parentDocument) { 6231 super(_parentDocument); 6232 tagName = "table"; 6233 } 6234 6235 /++ 6236 Creates an element with the given type and content. The argument can be an Element, Html, or other data which is converted to text with `to!string` 6237 6238 The element is $(I not) appended to the table. 6239 +/ 6240 Element th(T)(T t) { 6241 Element e; 6242 if(parentDocument !is null) 6243 e = parentDocument.createElement("th"); 6244 else 6245 e = Element.make("th"); 6246 static if(is(T == Html)) 6247 e.innerHTML = t; 6248 else static if(is(T : Element)) 6249 e.appendChild(t); 6250 else 6251 e.innerText = to!string(t); 6252 return e; 6253 } 6254 6255 /// ditto 6256 Element td(T)(T t) { 6257 Element e; 6258 if(parentDocument !is null) 6259 e = parentDocument.createElement("td"); 6260 else 6261 e = Element.make("td"); 6262 static if(is(T == Html)) 6263 e.innerHTML = t; 6264 else static if(is(T : Element)) 6265 e.appendChild(t); 6266 else 6267 e.innerText = to!string(t); 6268 return e; 6269 } 6270 6271 /++ 6272 Passes each argument to the [th] method for `appendHeaderRow` or [td] method for the others, appends them all to the `<tbody>` element for `appendRow`, `<thead>` element for `appendHeaderRow`, or a `<tfoot>` element for `appendFooterRow`, and ensures it is appended it to the table. 6273 +/ 6274 Element appendHeaderRow(T...)(T t) { 6275 return appendRowInternal("th", "thead", t); 6276 } 6277 6278 /// ditto 6279 Element appendFooterRow(T...)(T t) { 6280 return appendRowInternal("td", "tfoot", t); 6281 } 6282 6283 /// ditto 6284 Element appendRow(T...)(T t) { 6285 return appendRowInternal("td", "tbody", t); 6286 } 6287 6288 /++ 6289 Takes each argument as a class name and calls [Element.addClass] for each element in the column associated with that index. 6290 6291 Please note this does not use the html `<col>` element. 6292 +/ 6293 void addColumnClasses(string[] classes...) { 6294 auto grid = getGrid(); 6295 foreach(row; grid) 6296 foreach(i, cl; classes) { 6297 if(cl.length) 6298 if(i < row.length) 6299 row[i].addClass(cl); 6300 } 6301 } 6302 6303 private Element appendRowInternal(T...)(string innerType, string findType, T t) { 6304 Element row = Element.make("tr"); 6305 6306 foreach(e; t) { 6307 static if(is(typeof(e) : Element)) { 6308 if(e.tagName == "td" || e.tagName == "th") 6309 row.appendChild(e); 6310 else { 6311 Element a = Element.make(innerType); 6312 6313 a.appendChild(e); 6314 6315 row.appendChild(a); 6316 } 6317 } else static if(is(typeof(e) == Html)) { 6318 Element a = Element.make(innerType); 6319 a.innerHTML = e.source; 6320 row.appendChild(a); 6321 } else static if(is(typeof(e) == Element[])) { 6322 Element a = Element.make(innerType); 6323 foreach(ele; e) 6324 a.appendChild(ele); 6325 row.appendChild(a); 6326 } else static if(is(typeof(e) == string[])) { 6327 foreach(ele; e) { 6328 Element a = Element.make(innerType); 6329 a.innerText = to!string(ele); 6330 row.appendChild(a); 6331 } 6332 } else { 6333 Element a = Element.make(innerType); 6334 a.innerText = to!string(e); 6335 row.appendChild(a); 6336 } 6337 } 6338 6339 foreach(e; children) { 6340 if(e.tagName == findType) { 6341 e.appendChild(row); 6342 return row; 6343 } 6344 } 6345 6346 // the type was not found if we are here... let's add it so it is well-formed 6347 auto lol = this.addChild(findType); 6348 lol.appendChild(row); 6349 6350 return row; 6351 } 6352 6353 /// Returns the `<caption>` element of the table, creating one if it isn't there. 6354 Element captionElement() { 6355 Element cap; 6356 foreach(c; children) { 6357 if(c.tagName == "caption") { 6358 cap = c; 6359 break; 6360 } 6361 } 6362 6363 if(cap is null) { 6364 cap = Element.make("caption"); 6365 appendChild(cap); 6366 } 6367 6368 return cap; 6369 } 6370 6371 /// Returns or sets the text inside the `<caption>` element, creating that element if it isnt' there. 6372 @property string caption() { 6373 return captionElement().innerText; 6374 } 6375 6376 /// ditto 6377 @property void caption(string text) { 6378 captionElement().innerText = text; 6379 } 6380 6381 /// Gets the logical layout of the table as a rectangular grid of 6382 /// cells. It considers rowspan and colspan. A cell with a large 6383 /// span is represented in the grid by being referenced several times. 6384 /// The tablePortition parameter can get just a <thead>, <tbody>, or 6385 /// <tfoot> portion if you pass one. 6386 /// 6387 /// Note: the rectangular grid might include null cells. 6388 /// 6389 /// This is kinda expensive so you should call once when you want the grid, 6390 /// then do lookups on the returned array. 6391 TableCell[][] getGrid(Element tablePortition = null) 6392 in { 6393 if(tablePortition is null) 6394 assert(tablePortition is null); 6395 else { 6396 assert(tablePortition !is null); 6397 assert(tablePortition.parentNode is this); 6398 assert( 6399 tablePortition.tagName == "tbody" 6400 || 6401 tablePortition.tagName == "tfoot" 6402 || 6403 tablePortition.tagName == "thead" 6404 ); 6405 } 6406 } 6407 do { 6408 if(tablePortition is null) 6409 tablePortition = this; 6410 6411 TableCell[][] ret; 6412 6413 // FIXME: will also return rows of sub tables! 6414 auto rows = tablePortition.getElementsByTagName("tr"); 6415 ret.length = rows.length; 6416 6417 int maxLength = 0; 6418 6419 int insertCell(int row, int position, TableCell cell) { 6420 if(row >= ret.length) 6421 return position; // not supposed to happen - a rowspan is prolly too big. 6422 6423 if(position == -1) { 6424 position++; 6425 foreach(item; ret[row]) { 6426 if(item is null) 6427 break; 6428 position++; 6429 } 6430 } 6431 6432 if(position < ret[row].length) 6433 ret[row][position] = cell; 6434 else 6435 foreach(i; ret[row].length .. position + 1) { 6436 if(i == position) 6437 ret[row] ~= cell; 6438 else 6439 ret[row] ~= null; 6440 } 6441 return position; 6442 } 6443 6444 foreach(i, rowElement; rows) { 6445 auto row = cast(TableRow) rowElement; 6446 assert(row !is null); 6447 assert(i < ret.length); 6448 6449 int position = 0; 6450 foreach(cellElement; rowElement.childNodes) { 6451 auto cell = cast(TableCell) cellElement; 6452 if(cell is null) 6453 continue; 6454 6455 // FIXME: colspan == 0 or rowspan == 0 6456 // is supposed to mean fill in the rest of 6457 // the table, not skip it 6458 foreach(int j; 0 .. cell.colspan) { 6459 foreach(int k; 0 .. cell.rowspan) 6460 // if the first row, always append. 6461 insertCell(k + cast(int) i, k == 0 ? -1 : position, cell); 6462 position++; 6463 } 6464 } 6465 6466 if(ret[i].length > maxLength) 6467 maxLength = cast(int) ret[i].length; 6468 } 6469 6470 // want to ensure it's rectangular 6471 foreach(ref r; ret) { 6472 foreach(i; r.length .. maxLength) 6473 r ~= null; 6474 } 6475 6476 return ret; 6477 } 6478 } 6479 6480 /// Represents a table row element - a <tr> 6481 /// Group: implementations 6482 class TableRow : Element { 6483 ///. 6484 this(Document _parentDocument) { 6485 super(_parentDocument); 6486 tagName = "tr"; 6487 } 6488 6489 // FIXME: the standard says there should be a lot more in here, 6490 // but meh, I never use it and it's a pain to implement. 6491 } 6492 6493 /// Represents anything that can be a table cell - <td> or <th> html. 6494 /// Group: implementations 6495 class TableCell : Element { 6496 ///. 6497 this(Document _parentDocument, string _tagName) { 6498 super(_parentDocument, _tagName); 6499 } 6500 6501 /// Gets and sets the row/colspan attributes as integers 6502 @property int rowspan() const { 6503 int ret = 1; 6504 auto it = getAttribute("rowspan"); 6505 if(it.length) 6506 ret = to!int(it); 6507 return ret; 6508 } 6509 6510 /// ditto 6511 @property int colspan() const { 6512 int ret = 1; 6513 auto it = getAttribute("colspan"); 6514 if(it.length) 6515 ret = to!int(it); 6516 return ret; 6517 } 6518 6519 /// ditto 6520 @property int rowspan(int i) { 6521 setAttribute("rowspan", to!string(i)); 6522 return i; 6523 } 6524 6525 /// ditto 6526 @property int colspan(int i) { 6527 setAttribute("colspan", to!string(i)); 6528 return i; 6529 } 6530 6531 } 6532 6533 6534 /// This is thrown on parse errors. 6535 /// Group: implementations 6536 class MarkupException : Exception { 6537 6538 ///. 6539 this(string message, string file = __FILE__, size_t line = __LINE__) { 6540 super(message, file, line); 6541 } 6542 } 6543 6544 /// This is used when you are using one of the require variants of navigation, and no matching element can be found in the tree. 6545 /// Group: implementations 6546 class ElementNotFoundException : Exception { 6547 6548 /// type == kind of element you were looking for and search == a selector describing the search. 6549 this(string type, string search, Element searchContext, string file = __FILE__, size_t line = __LINE__) { 6550 this.searchContext = searchContext; 6551 super("Element of type '"~type~"' matching {"~search~"} not found.", file, line); 6552 } 6553 6554 Element searchContext; 6555 } 6556 6557 /// The html struct is used to differentiate between regular text nodes and html in certain functions 6558 /// 6559 /// Easiest way to construct it is like this: `auto html = Html("<p>hello</p>");` 6560 /// Group: core_functionality 6561 struct Html { 6562 /// This string holds the actual html. Use it to retrieve the contents. 6563 string source; 6564 } 6565 6566 // for the observers 6567 enum DomMutationOperations { 6568 setAttribute, 6569 removeAttribute, 6570 appendChild, // tagname, attributes[], innerHTML 6571 insertBefore, 6572 truncateChildren, 6573 removeChild, 6574 appendHtml, 6575 replaceHtml, 6576 appendText, 6577 replaceText, 6578 replaceTextOnly 6579 } 6580 6581 // and for observers too 6582 struct DomMutationEvent { 6583 DomMutationOperations operation; 6584 Element target; 6585 Element related; // what this means differs with the operation 6586 Element related2; 6587 string relatedString; 6588 string relatedString2; 6589 } 6590 6591 6592 private immutable static string[] htmlSelfClosedElements = [ 6593 // html 4 6594 "area","base","br","col","hr","img","input","link","meta","param", 6595 6596 // html 5 6597 "embed","source","track","wbr" 6598 ]; 6599 6600 private immutable static string[] htmlRawSourceElements = [ 6601 "script", "style" 6602 ]; 6603 6604 private immutable static string[] htmlInlineElements = [ 6605 "span", "strong", "em", "b", "i", "a" 6606 ]; 6607 6608 6609 static import std.conv; 6610 6611 /// helper function for decoding html entities 6612 int intFromHex(string hex) { 6613 int place = 1; 6614 int value = 0; 6615 for(sizediff_t a = hex.length - 1; a >= 0; a--) { 6616 int v; 6617 char q = hex[a]; 6618 if( q >= '0' && q <= '9') 6619 v = q - '0'; 6620 else if (q >= 'a' && q <= 'f') 6621 v = q - 'a' + 10; 6622 else if (q >= 'A' && q <= 'F') 6623 v = q - 'A' + 10; 6624 else throw new Exception("Illegal hex character: " ~ q); 6625 6626 value += v * place; 6627 6628 place *= 16; 6629 } 6630 6631 return value; 6632 } 6633 6634 6635 // CSS selector handling 6636 6637 // EXTENSIONS 6638 // dd - dt means get the dt directly before that dd (opposite of +) NOT IMPLEMENTED 6639 // dd -- dt means rewind siblings until you hit a dt, go as far as you need to NOT IMPLEMENTED 6640 // dt < dl means get the parent of that dt iff it is a dl (usable for "get a dt that are direct children of dl") 6641 // dt << dl means go as far up as needed to find a dl (you have an element and want its containers) NOT IMPLEMENTED 6642 // :first means to stop at the first hit, don't do more (so p + p == p ~ p:first 6643 6644 6645 6646 // CSS4 draft currently says you can change the subject (the element actually returned) by putting a ! at the end of it. 6647 // That might be useful to implement, though I do have parent selectors too. 6648 6649 ///. 6650 static immutable string[] selectorTokens = [ 6651 // It is important that the 2 character possibilities go first here for accurate lexing 6652 "~=", "*=", "|=", "^=", "$=", "!=", 6653 "::", ">>", 6654 "<<", // my any-parent extension (reciprocal of whitespace) 6655 // " - ", // previous-sibling extension (whitespace required to disambiguate tag-names) 6656 ".", ">", "+", "*", ":", "[", "]", "=", "\"", "#", ",", " ", "~", "<", "(", ")" 6657 ]; // other is white space or a name. 6658 6659 ///. 6660 sizediff_t idToken(string str, sizediff_t position) { 6661 sizediff_t tid = -1; 6662 char c = str[position]; 6663 foreach(a, token; selectorTokens) 6664 6665 if(c == token[0]) { 6666 if(token.length > 1) { 6667 if(position + 1 >= str.length || str[position+1] != token[1]) 6668 continue; // not this token 6669 } 6670 tid = a; 6671 break; 6672 } 6673 return tid; 6674 } 6675 6676 /// Parts of the CSS selector implementation 6677 // look, ma, no phobos! 6678 // new lexer by ketmar 6679 string[] lexSelector (string selstr) { 6680 6681 static sizediff_t idToken (string str, size_t stpos) { 6682 char c = str[stpos]; 6683 foreach (sizediff_t tidx, immutable token; selectorTokens) { 6684 if (c == token[0]) { 6685 if (token.length > 1) { 6686 assert(token.length == 2, token); // we don't have 3-char tokens yet 6687 if (str.length-stpos < 2 || str[stpos+1] != token[1]) continue; 6688 } 6689 return tidx; 6690 } 6691 } 6692 return -1; 6693 } 6694 6695 // skip spaces and comments 6696 static string removeLeadingBlanks (string str) { 6697 size_t curpos = 0; 6698 while (curpos < str.length) { 6699 immutable char ch = str[curpos]; 6700 // this can overflow on 4GB strings on 32-bit; 'cmon, don't be silly, nobody cares! 6701 if (ch == '/' && str.length-curpos > 1 && str[curpos+1] == '*') { 6702 // comment 6703 curpos += 2; 6704 while (curpos < str.length) { 6705 if (str[curpos] == '*' && str.length-curpos > 1 && str[curpos+1] == '/') { 6706 curpos += 2; 6707 break; 6708 } 6709 ++curpos; 6710 } 6711 } else if (ch < 32) { // The < instead of <= is INTENTIONAL. See note from adr below. 6712 ++curpos; 6713 6714 // FROM ADR: This does NOT catch ' '! Spaces have semantic meaning in CSS! While 6715 // "foo bar" is clear, and can only have one meaning, consider ".foo .bar". 6716 // That is not the same as ".foo.bar". If the space is stripped, important 6717 // information is lost, despite the tokens being separatable anyway. 6718 // 6719 // The parser really needs to be aware of the presence of a space. 6720 } else { 6721 break; 6722 } 6723 } 6724 return str[curpos..$]; 6725 } 6726 6727 static bool isBlankAt() (string str, size_t pos) { 6728 // we should consider unicode spaces too, but... unicode sux anyway. 6729 return 6730 (pos < str.length && // in string 6731 (str[pos] <= 32 || // space 6732 (str.length-pos > 1 && str[pos] == '/' && str[pos+1] == '*'))); // comment 6733 } 6734 6735 string[] tokens; 6736 // lexx it! 6737 while ((selstr = removeLeadingBlanks(selstr)).length > 0) { 6738 if(selstr[0] == '\"' || selstr[0] == '\'') { 6739 auto end = selstr[0]; 6740 auto pos = 1; 6741 bool escaping; 6742 while(pos < selstr.length && !escaping && selstr[pos] != end) { 6743 if(escaping) 6744 escaping = false; 6745 else if(selstr[pos] == '\\') 6746 escaping = true; 6747 pos++; 6748 } 6749 6750 // FIXME: do better unescaping 6751 tokens ~= selstr[1 .. pos].replace(`\"`, `"`).replace(`\'`, `'`).replace(`\\`, `\`); 6752 if(pos+1 >= selstr.length) 6753 assert(0, selstr); 6754 selstr = selstr[pos + 1.. $]; 6755 continue; 6756 } 6757 6758 6759 // no tokens starts with escape 6760 immutable tid = idToken(selstr, 0); 6761 if (tid >= 0) { 6762 // special token 6763 tokens ~= selectorTokens[tid]; // it's funnier this way 6764 selstr = selstr[selectorTokens[tid].length..$]; 6765 continue; 6766 } 6767 // from start to space or special token 6768 size_t escapePos = size_t.max; 6769 size_t curpos = 0; // i can has chizburger^w escape at the start 6770 while (curpos < selstr.length) { 6771 if (selstr[curpos] == '\\') { 6772 // this is escape, just skip it and next char 6773 if (escapePos == size_t.max) escapePos = curpos; 6774 curpos = (selstr.length-curpos >= 2 ? curpos+2 : selstr.length); 6775 } else { 6776 if (isBlankAt(selstr, curpos) || idToken(selstr, curpos) >= 0) break; 6777 ++curpos; 6778 } 6779 } 6780 // identifier 6781 if (escapePos != size_t.max) { 6782 // i hate it when it happens 6783 string id = selstr[0..escapePos]; 6784 while (escapePos < curpos) { 6785 if (curpos-escapePos < 2) break; 6786 id ~= selstr[escapePos+1]; // escaped char 6787 escapePos += 2; 6788 immutable stp = escapePos; 6789 while (escapePos < curpos && selstr[escapePos] != '\\') ++escapePos; 6790 if (escapePos > stp) id ~= selstr[stp..escapePos]; 6791 } 6792 if (id.length > 0) tokens ~= id; 6793 } else { 6794 tokens ~= selstr[0..curpos]; 6795 } 6796 selstr = selstr[curpos..$]; 6797 } 6798 return tokens; 6799 } 6800 version(unittest_domd_lexer) unittest { 6801 assert(lexSelector(r" test\=me /*d*/") == [r"test=me"]); 6802 assert(lexSelector(r"div/**/. id") == ["div", ".", "id"]); 6803 assert(lexSelector(r" < <") == ["<", "<"]); 6804 assert(lexSelector(r" <<") == ["<<"]); 6805 assert(lexSelector(r" <</") == ["<<", "/"]); 6806 assert(lexSelector(r" <</*") == ["<<"]); 6807 assert(lexSelector(r" <\</*") == ["<", "<"]); 6808 assert(lexSelector(r"heh\") == ["heh"]); 6809 assert(lexSelector(r"alice \") == ["alice"]); 6810 assert(lexSelector(r"alice,is#best") == ["alice", ",", "is", "#", "best"]); 6811 } 6812 6813 /// ditto 6814 struct SelectorPart { 6815 string tagNameFilter; ///. 6816 string[] attributesPresent; /// [attr] 6817 string[2][] attributesEqual; /// [attr=value] 6818 string[2][] attributesStartsWith; /// [attr^=value] 6819 string[2][] attributesEndsWith; /// [attr$=value] 6820 // split it on space, then match to these 6821 string[2][] attributesIncludesSeparatedBySpaces; /// [attr~=value] 6822 // split it on dash, then match to these 6823 string[2][] attributesIncludesSeparatedByDashes; /// [attr|=value] 6824 string[2][] attributesInclude; /// [attr*=value] 6825 string[2][] attributesNotEqual; /// [attr!=value] -- extension by me 6826 6827 string[] hasSelectors; /// :has(this) 6828 string[] notSelectors; /// :not(this) 6829 6830 string[] isSelectors; /// :is(this) 6831 string[] whereSelectors; /// :where(this) 6832 6833 ParsedNth[] nthOfType; /// . 6834 ParsedNth[] nthLastOfType; /// . 6835 ParsedNth[] nthChild; /// . 6836 6837 bool firstChild; ///. 6838 bool lastChild; ///. 6839 6840 bool firstOfType; /// . 6841 bool lastOfType; /// . 6842 6843 bool emptyElement; ///. 6844 bool whitespaceOnly; /// 6845 bool oddChild; ///. 6846 bool evenChild; ///. 6847 6848 bool scopeElement; /// the css :scope thing; matches just the `this` element. NOT IMPLEMENTED 6849 6850 bool rootElement; ///. 6851 6852 int separation = -1; /// -1 == only itself; the null selector, 0 == tree, 1 == childNodes, 2 == childAfter, 3 == youngerSibling, 4 == parentOf 6853 6854 bool isCleanSlateExceptSeparation() { 6855 auto cp = this; 6856 cp.separation = -1; 6857 return cp is SelectorPart.init; 6858 } 6859 6860 ///. 6861 string toString() { 6862 string ret; 6863 switch(separation) { 6864 default: assert(0); 6865 case -1: break; 6866 case 0: ret ~= " "; break; 6867 case 1: ret ~= " > "; break; 6868 case 2: ret ~= " + "; break; 6869 case 3: ret ~= " ~ "; break; 6870 case 4: ret ~= " < "; break; 6871 } 6872 ret ~= tagNameFilter; 6873 foreach(a; attributesPresent) ret ~= "[" ~ a ~ "]"; 6874 foreach(a; attributesEqual) ret ~= "[" ~ a[0] ~ "=\"" ~ a[1] ~ "\"]"; 6875 foreach(a; attributesEndsWith) ret ~= "[" ~ a[0] ~ "$=\"" ~ a[1] ~ "\"]"; 6876 foreach(a; attributesStartsWith) ret ~= "[" ~ a[0] ~ "^=\"" ~ a[1] ~ "\"]"; 6877 foreach(a; attributesNotEqual) ret ~= "[" ~ a[0] ~ "!=\"" ~ a[1] ~ "\"]"; 6878 foreach(a; attributesInclude) ret ~= "[" ~ a[0] ~ "*=\"" ~ a[1] ~ "\"]"; 6879 foreach(a; attributesIncludesSeparatedByDashes) ret ~= "[" ~ a[0] ~ "|=\"" ~ a[1] ~ "\"]"; 6880 foreach(a; attributesIncludesSeparatedBySpaces) ret ~= "[" ~ a[0] ~ "~=\"" ~ a[1] ~ "\"]"; 6881 6882 foreach(a; notSelectors) ret ~= ":not(" ~ a ~ ")"; 6883 foreach(a; hasSelectors) ret ~= ":has(" ~ a ~ ")"; 6884 6885 foreach(a; isSelectors) ret ~= ":is(" ~ a ~ ")"; 6886 foreach(a; whereSelectors) ret ~= ":where(" ~ a ~ ")"; 6887 6888 foreach(a; nthChild) ret ~= ":nth-child(" ~ a.toString ~ ")"; 6889 foreach(a; nthOfType) ret ~= ":nth-of-type(" ~ a.toString ~ ")"; 6890 foreach(a; nthLastOfType) ret ~= ":nth-last-of-type(" ~ a.toString ~ ")"; 6891 6892 if(firstChild) ret ~= ":first-child"; 6893 if(lastChild) ret ~= ":last-child"; 6894 if(firstOfType) ret ~= ":first-of-type"; 6895 if(lastOfType) ret ~= ":last-of-type"; 6896 if(emptyElement) ret ~= ":empty"; 6897 if(whitespaceOnly) ret ~= ":whitespace-only"; 6898 if(oddChild) ret ~= ":odd-child"; 6899 if(evenChild) ret ~= ":even-child"; 6900 if(rootElement) ret ~= ":root"; 6901 if(scopeElement) ret ~= ":scope"; 6902 6903 return ret; 6904 } 6905 6906 // USEFUL 6907 /// Returns true if the given element matches this part 6908 bool matchElement(Element e, Element scopeElementNow = null) { 6909 // FIXME: this can be called a lot of times, and really add up in times according to the profiler. 6910 // Each individual call is reasonably fast already, but it adds up. 6911 if(e is null) return false; 6912 if(e.nodeType != 1) return false; 6913 6914 if(tagNameFilter != "" && tagNameFilter != "*") 6915 if(e.tagName != tagNameFilter) 6916 return false; 6917 if(firstChild) { 6918 if(e.parentNode is null) 6919 return false; 6920 if(e.parentNode.childElements[0] !is e) 6921 return false; 6922 } 6923 if(lastChild) { 6924 if(e.parentNode is null) 6925 return false; 6926 auto ce = e.parentNode.childElements; 6927 if(ce[$-1] !is e) 6928 return false; 6929 } 6930 if(firstOfType) { 6931 if(e.parentNode is null) 6932 return false; 6933 auto ce = e.parentNode.childElements; 6934 foreach(c; ce) { 6935 if(c.tagName == e.tagName) { 6936 if(c is e) 6937 return true; 6938 else 6939 return false; 6940 } 6941 } 6942 } 6943 if(lastOfType) { 6944 if(e.parentNode is null) 6945 return false; 6946 auto ce = e.parentNode.childElements; 6947 foreach_reverse(c; ce) { 6948 if(c.tagName == e.tagName) { 6949 if(c is e) 6950 return true; 6951 else 6952 return false; 6953 } 6954 } 6955 } 6956 if(scopeElement) { 6957 if(e !is scopeElementNow) 6958 return false; 6959 } 6960 if(emptyElement) { 6961 if(e.isEmpty()) 6962 return false; 6963 } 6964 if(whitespaceOnly) { 6965 if(e.innerText.strip.length) 6966 return false; 6967 } 6968 if(rootElement) { 6969 if(e.parentNode !is null) 6970 return false; 6971 } 6972 if(oddChild || evenChild) { 6973 if(e.parentNode is null) 6974 return false; 6975 foreach(i, child; e.parentNode.childElements) { 6976 if(child is e) { 6977 if(oddChild && !(i&1)) 6978 return false; 6979 if(evenChild && (i&1)) 6980 return false; 6981 break; 6982 } 6983 } 6984 } 6985 6986 bool matchWithSeparator(string attr, string value, string separator) { 6987 foreach(s; attr.split(separator)) 6988 if(s == value) 6989 return true; 6990 return false; 6991 } 6992 6993 foreach(a; attributesPresent) 6994 if(a !in e.attributes) 6995 return false; 6996 foreach(a; attributesEqual) 6997 if(a[0] !in e.attributes || e.attributes[a[0]] != a[1]) 6998 return false; 6999 foreach(a; attributesNotEqual) 7000 // FIXME: maybe it should say null counts... this just bit me. 7001 // I did [attr][attr!=value] to work around. 7002 // 7003 // if it's null, it's not equal, right? 7004 //if(a[0] !in e.attributes || e.attributes[a[0]] == a[1]) 7005 if(e.getAttribute(a[0]) == a[1]) 7006 return false; 7007 foreach(a; attributesInclude) 7008 if(a[0] !in e.attributes || (e.attributes[a[0]].indexOf(a[1]) == -1)) 7009 return false; 7010 foreach(a; attributesStartsWith) 7011 if(a[0] !in e.attributes || !e.attributes[a[0]].startsWith(a[1])) 7012 return false; 7013 foreach(a; attributesEndsWith) 7014 if(a[0] !in e.attributes || !e.attributes[a[0]].endsWith(a[1])) 7015 return false; 7016 foreach(a; attributesIncludesSeparatedBySpaces) 7017 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], " ")) 7018 return false; 7019 foreach(a; attributesIncludesSeparatedByDashes) 7020 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], "-")) 7021 return false; 7022 foreach(a; hasSelectors) { 7023 if(e.querySelector(a) is null) 7024 return false; 7025 } 7026 foreach(a; notSelectors) { 7027 auto sel = Selector(a); 7028 if(sel.matchesElement(e)) 7029 return false; 7030 } 7031 foreach(a; isSelectors) { 7032 auto sel = Selector(a); 7033 if(!sel.matchesElement(e)) 7034 return false; 7035 } 7036 foreach(a; whereSelectors) { 7037 auto sel = Selector(a); 7038 if(!sel.matchesElement(e)) 7039 return false; 7040 } 7041 7042 foreach(a; nthChild) { 7043 if(e.parentNode is null) 7044 return false; 7045 7046 auto among = e.parentNode.childElements; 7047 7048 if(!a.solvesFor(among, e)) 7049 return false; 7050 } 7051 foreach(a; nthOfType) { 7052 if(e.parentNode is null) 7053 return false; 7054 7055 auto among = e.parentNode.childElements(e.tagName); 7056 7057 if(!a.solvesFor(among, e)) 7058 return false; 7059 } 7060 foreach(a; nthLastOfType) { 7061 if(e.parentNode is null) 7062 return false; 7063 7064 auto among = retro(e.parentNode.childElements(e.tagName)); 7065 7066 if(!a.solvesFor(among, e)) 7067 return false; 7068 } 7069 7070 return true; 7071 } 7072 } 7073 7074 struct ParsedNth { 7075 int multiplier; 7076 int adder; 7077 7078 string of; 7079 7080 this(string text) { 7081 auto original = text; 7082 consumeWhitespace(text); 7083 if(text.startsWith("odd")) { 7084 multiplier = 2; 7085 adder = 1; 7086 7087 text = text[3 .. $]; 7088 } else if(text.startsWith("even")) { 7089 multiplier = 2; 7090 adder = 1; 7091 7092 text = text[4 .. $]; 7093 } else { 7094 int n = (text.length && text[0] == 'n') ? 1 : parseNumber(text); 7095 consumeWhitespace(text); 7096 if(text.length && text[0] == 'n') { 7097 multiplier = n; 7098 text = text[1 .. $]; 7099 consumeWhitespace(text); 7100 if(text.length) { 7101 if(text[0] == '+') { 7102 text = text[1 .. $]; 7103 adder = parseNumber(text); 7104 } else if(text[0] == '-') { 7105 text = text[1 .. $]; 7106 adder = -parseNumber(text); 7107 } else if(text[0] == 'o') { 7108 // continue, this is handled below 7109 } else 7110 throw new Exception("invalid css string at " ~ text ~ " in " ~ original); 7111 } 7112 } else { 7113 adder = n; 7114 } 7115 } 7116 7117 consumeWhitespace(text); 7118 if(text.startsWith("of")) { 7119 text = text[2 .. $]; 7120 consumeWhitespace(text); 7121 of = text[0 .. $]; 7122 } 7123 } 7124 7125 string toString() { 7126 return format("%dn%s%d%s%s", multiplier, adder >= 0 ? "+" : "", adder, of.length ? " of " : "", of); 7127 } 7128 7129 bool solvesFor(R)(R elements, Element e) { 7130 int idx = 1; 7131 bool found = false; 7132 foreach(ele; elements) { 7133 if(of.length) { 7134 auto sel = Selector(of); 7135 if(!sel.matchesElement(ele)) 7136 continue; 7137 } 7138 if(ele is e) { 7139 found = true; 7140 break; 7141 } 7142 idx++; 7143 } 7144 if(!found) return false; 7145 7146 // multiplier* n + adder = idx 7147 // if there is a solution for integral n, it matches 7148 7149 idx -= adder; 7150 if(multiplier) { 7151 if(idx % multiplier == 0) 7152 return true; 7153 } else { 7154 return idx == 0; 7155 } 7156 return false; 7157 } 7158 7159 private void consumeWhitespace(ref string text) { 7160 while(text.length && text[0] == ' ') 7161 text = text[1 .. $]; 7162 } 7163 7164 private int parseNumber(ref string text) { 7165 consumeWhitespace(text); 7166 if(text.length == 0) return 0; 7167 bool negative = text[0] == '-'; 7168 if(text[0] == '+') 7169 text = text[1 .. $]; 7170 if(negative) text = text[1 .. $]; 7171 int i = 0; 7172 while(i < text.length && (text[i] >= '0' && text[i] <= '9')) 7173 i++; 7174 if(i == 0) 7175 return 0; 7176 int cool = to!int(text[0 .. i]); 7177 text = text[i .. $]; 7178 return negative ? -cool : cool; 7179 } 7180 } 7181 7182 // USEFUL 7183 /// ditto 7184 Element[] getElementsBySelectorParts(Element start, SelectorPart[] parts, Element scopeElementNow = null) { 7185 Element[] ret; 7186 if(!parts.length) { 7187 return [start]; // the null selector only matches the start point; it 7188 // is what terminates the recursion 7189 } 7190 7191 auto part = parts[0]; 7192 //writeln("checking ", part, " against ", start, " with ", part.separation); 7193 switch(part.separation) { 7194 default: assert(0); 7195 case -1: 7196 case 0: // tree 7197 foreach(e; start.tree) { 7198 if(part.separation == 0 && start is e) 7199 continue; // space doesn't match itself! 7200 if(part.matchElement(e, scopeElementNow)) { 7201 ret ~= getElementsBySelectorParts(e, parts[1..$], scopeElementNow); 7202 } 7203 } 7204 break; 7205 case 1: // children 7206 foreach(e; start.childNodes) { 7207 if(part.matchElement(e, scopeElementNow)) { 7208 ret ~= getElementsBySelectorParts(e, parts[1..$], scopeElementNow); 7209 } 7210 } 7211 break; 7212 case 2: // next-sibling 7213 auto e = start.nextSibling("*"); 7214 if(part.matchElement(e, scopeElementNow)) 7215 ret ~= getElementsBySelectorParts(e, parts[1..$], scopeElementNow); 7216 break; 7217 case 3: // younger sibling 7218 auto tmp = start.parentNode; 7219 if(tmp !is null) { 7220 sizediff_t pos = -1; 7221 auto children = tmp.childElements; 7222 foreach(i, child; children) { 7223 if(child is start) { 7224 pos = i; 7225 break; 7226 } 7227 } 7228 assert(pos != -1); 7229 foreach(e; children[pos+1..$]) { 7230 if(part.matchElement(e, scopeElementNow)) 7231 ret ~= getElementsBySelectorParts(e, parts[1..$], scopeElementNow); 7232 } 7233 } 7234 break; 7235 case 4: // immediate parent node, an extension of mine to walk back up the tree 7236 auto e = start.parentNode; 7237 if(part.matchElement(e, scopeElementNow)) { 7238 ret ~= getElementsBySelectorParts(e, parts[1..$], scopeElementNow); 7239 } 7240 /* 7241 Example of usefulness: 7242 7243 Consider you have an HTML table. If you want to get all rows that have a th, you can do: 7244 7245 table th < tr 7246 7247 Get all th descendants of the table, then walk back up the tree to fetch their parent tr nodes 7248 */ 7249 break; 7250 case 5: // any parent note, another extension of mine to go up the tree (backward of the whitespace operator) 7251 /* 7252 Like with the < operator, this is best used to find some parent of a particular known element. 7253 7254 Say you have an anchor inside a 7255 */ 7256 } 7257 7258 return ret; 7259 } 7260 7261 /++ 7262 Represents a parsed CSS selector. You never have to use this directly, but you can if you know it is going to be reused a lot to avoid a bit of repeat parsing. 7263 7264 See_Also: 7265 $(LIST 7266 * [Element.querySelector] 7267 * [Element.querySelectorAll] 7268 * [Element.matches] 7269 * [Element.closest] 7270 * [Document.querySelector] 7271 * [Document.querySelectorAll] 7272 ) 7273 +/ 7274 /// Group: core_functionality 7275 struct Selector { 7276 SelectorComponent[] components; 7277 string original; 7278 /++ 7279 Parses the selector string and constructs the usable structure. 7280 +/ 7281 this(string cssSelector) { 7282 components = parseSelectorString(cssSelector); 7283 original = cssSelector; 7284 } 7285 7286 /++ 7287 Returns true if the given element matches this selector, 7288 considered relative to an arbitrary element. 7289 7290 You can do a form of lazy [Element.querySelectorAll|querySelectorAll] by using this 7291 with [std.algorithm.iteration.filter]: 7292 7293 --- 7294 Selector sel = Selector("foo > bar"); 7295 auto lazySelectorRange = element.tree.filter!(e => sel.matchElement(e))(document.root); 7296 --- 7297 +/ 7298 bool matchesElement(Element e, Element relativeTo = null) { 7299 foreach(component; components) 7300 if(component.matchElement(e, relativeTo)) 7301 return true; 7302 7303 return false; 7304 } 7305 7306 /++ 7307 Reciprocal of [Element.querySelectorAll] 7308 +/ 7309 Element[] getMatchingElements(Element start, Element relativeTo = null) { 7310 Element[] ret; 7311 foreach(component; components) 7312 ret ~= getElementsBySelectorParts(start, component.parts, relativeTo); 7313 return removeDuplicates(ret); 7314 } 7315 7316 /++ 7317 Like [getMatchingElements], but returns a lazy range. Be careful 7318 about mutating the dom as you iterate through this. 7319 +/ 7320 auto getMatchingElementsLazy(Element start, Element relativeTo = null) { 7321 import std.algorithm.iteration; 7322 return start.tree.filter!(a => this.matchesElement(a, relativeTo)); 7323 } 7324 7325 7326 /// Returns the string this was built from 7327 string toString() { 7328 return original; 7329 } 7330 7331 /++ 7332 Returns a string from the parsed result 7333 7334 7335 (may not match the original, this is mostly for debugging right now but in the future might be useful for pretty-printing) 7336 +/ 7337 string parsedToString() { 7338 string ret; 7339 7340 foreach(idx, component; components) { 7341 if(idx) ret ~= ", "; 7342 ret ~= component.toString(); 7343 } 7344 7345 return ret; 7346 } 7347 } 7348 7349 ///. 7350 struct SelectorComponent { 7351 ///. 7352 SelectorPart[] parts; 7353 7354 ///. 7355 string toString() { 7356 string ret; 7357 foreach(part; parts) 7358 ret ~= part.toString(); 7359 return ret; 7360 } 7361 7362 // USEFUL 7363 ///. 7364 Element[] getElements(Element start, Element relativeTo = null) { 7365 return removeDuplicates(getElementsBySelectorParts(start, parts, relativeTo)); 7366 } 7367 7368 // USEFUL (but not implemented) 7369 /// If relativeTo == null, it assumes the root of the parent document. 7370 bool matchElement(Element e, Element relativeTo = null) { 7371 if(e is null) return false; 7372 Element where = e; 7373 int lastSeparation = -1; 7374 7375 auto lparts = parts; 7376 7377 if(parts.length && parts[0].separation > 0) { 7378 throw new Exception("invalid selector"); 7379 /+ 7380 // if it starts with a non-trivial separator, inject 7381 // a "*" matcher to act as a root. for cases like document.querySelector("> body") 7382 // which implies html 7383 7384 // however, if it is a child-matching selector and there are no children, 7385 // bail out early as it obviously cannot match. 7386 bool hasNonTextChildren = false; 7387 foreach(c; e.children) 7388 if(c.nodeType != 3) { 7389 hasNonTextChildren = true; 7390 break; 7391 } 7392 if(!hasNonTextChildren) 7393 return false; 7394 7395 // there is probably a MUCH better way to do this. 7396 auto dummy = SelectorPart.init; 7397 dummy.tagNameFilter = "*"; 7398 dummy.separation = 0; 7399 lparts = dummy ~ lparts; 7400 +/ 7401 } 7402 7403 foreach(part; retro(lparts)) { 7404 7405 // writeln("matching ", where, " with ", part, " via ", lastSeparation); 7406 // writeln(parts); 7407 7408 if(lastSeparation == -1) { 7409 if(!part.matchElement(where, relativeTo)) 7410 return false; 7411 } else if(lastSeparation == 0) { // generic parent 7412 // need to go up the whole chain 7413 where = where.parentNode; 7414 7415 while(where !is null) { 7416 if(part.matchElement(where, relativeTo)) 7417 break; 7418 7419 if(where is relativeTo) 7420 return false; 7421 7422 where = where.parentNode; 7423 } 7424 7425 if(where is null) 7426 return false; 7427 } else if(lastSeparation == 1) { // the > operator 7428 where = where.parentNode; 7429 7430 if(!part.matchElement(where, relativeTo)) 7431 return false; 7432 } else if(lastSeparation == 2) { // the + operator 7433 //writeln("WHERE", where, " ", part); 7434 where = where.previousSibling("*"); 7435 7436 if(!part.matchElement(where, relativeTo)) 7437 return false; 7438 } else if(lastSeparation == 3) { // the ~ operator 7439 where = where.previousSibling("*"); 7440 while(where !is null) { 7441 if(part.matchElement(where, relativeTo)) 7442 break; 7443 7444 if(where is relativeTo) 7445 return false; 7446 7447 where = where.previousSibling("*"); 7448 } 7449 7450 if(where is null) 7451 return false; 7452 } else if(lastSeparation == 4) { // my bad idea extension < operator, don't use this anymore 7453 // FIXME 7454 } 7455 7456 lastSeparation = part.separation; 7457 7458 /* 7459 /+ 7460 I commented this to magically make unittest pass and I think the reason it works 7461 when commented is that I inject a :scope iff there's a selector at top level now 7462 and if not, it follows the (frankly stupid) w3c standard behavior at arbitrary id 7463 asduiwh . but me injecting the :scope also acts as a terminating condition. 7464 7465 tbh this prolly needs like a trillion more tests. 7466 +/ 7467 if(where is relativeTo) 7468 return false; // at end of line, if we aren't done by now, the match fails 7469 */ 7470 } 7471 return true; // if we got here, it is a success 7472 } 7473 7474 // the string should NOT have commas. Use parseSelectorString for that instead 7475 ///. 7476 static SelectorComponent fromString(string selector) { 7477 return parseSelector(lexSelector(selector)); 7478 } 7479 } 7480 7481 ///. 7482 SelectorComponent[] parseSelectorString(string selector, bool caseSensitiveTags = true) { 7483 SelectorComponent[] ret; 7484 auto tokens = lexSelector(selector); // this will parse commas too 7485 // and now do comma-separated slices (i haz phobosophobia!) 7486 int parensCount = 0; 7487 while (tokens.length > 0) { 7488 size_t end = 0; 7489 while (end < tokens.length && (parensCount > 0 || tokens[end] != ",")) { 7490 if(tokens[end] == "(") parensCount++; 7491 if(tokens[end] == ")") parensCount--; 7492 ++end; 7493 } 7494 if (end > 0) ret ~= parseSelector(tokens[0..end], caseSensitiveTags); 7495 if (tokens.length-end < 2) break; 7496 tokens = tokens[end+1..$]; 7497 } 7498 return ret; 7499 } 7500 7501 ///. 7502 SelectorComponent parseSelector(string[] tokens, bool caseSensitiveTags = true) { 7503 SelectorComponent s; 7504 7505 SelectorPart current; 7506 void commit() { 7507 // might as well skip null items 7508 if(!current.isCleanSlateExceptSeparation()) { 7509 s.parts ~= current; 7510 current = current.init; // start right over 7511 } 7512 } 7513 enum State { 7514 Starting, 7515 ReadingClass, 7516 ReadingId, 7517 ReadingAttributeSelector, 7518 ReadingAttributeComparison, 7519 ExpectingAttributeCloser, 7520 ReadingPseudoClass, 7521 ReadingAttributeValue, 7522 7523 SkippingFunctionalSelector, 7524 } 7525 State state = State.Starting; 7526 string attributeName, attributeValue, attributeComparison; 7527 int parensCount; 7528 foreach(idx, token; tokens) { 7529 string readFunctionalSelector() { 7530 string s; 7531 if(tokens[idx + 1] != "(") 7532 throw new Exception("parse error"); 7533 int pc = 1; 7534 foreach(t; tokens[idx + 2 .. $]) { 7535 if(t == "(") 7536 pc++; 7537 if(t == ")") 7538 pc--; 7539 if(pc == 0) 7540 break; 7541 s ~= t; 7542 } 7543 7544 return s; 7545 } 7546 7547 sizediff_t tid = -1; 7548 foreach(i, item; selectorTokens) 7549 if(token == item) { 7550 tid = i; 7551 break; 7552 } 7553 final switch(state) { 7554 case State.Starting: // fresh, might be reading an operator or a tagname 7555 if(tid == -1) { 7556 if(!caseSensitiveTags) 7557 token = token.toLower(); 7558 7559 if(current.isCleanSlateExceptSeparation()) { 7560 current.tagNameFilter = token; 7561 // default thing, see comment under "*" below 7562 if(current.separation == -1) current.separation = 0; 7563 } else { 7564 // if it was already set, we must see two thingies 7565 // separated by whitespace... 7566 commit(); 7567 current.separation = 0; // tree 7568 current.tagNameFilter = token; 7569 } 7570 } else { 7571 // Selector operators 7572 switch(token) { 7573 case "*": 7574 current.tagNameFilter = "*"; 7575 // the idea here is if we haven't actually set a separation 7576 // yet (e.g. the > operator), it should assume the generic 7577 // whitespace (descendant) mode to avoid matching self with -1 7578 if(current.separation == -1) current.separation = 0; 7579 break; 7580 case " ": 7581 // If some other separation has already been set, 7582 // this is irrelevant whitespace, so we should skip it. 7583 // this happens in the case of "foo > bar" for example. 7584 if(current.isCleanSlateExceptSeparation() && current.separation > 0) 7585 continue; 7586 commit(); 7587 current.separation = 0; // tree 7588 break; 7589 case ">>": 7590 commit(); 7591 current.separation = 0; // alternate syntax for tree from html5 css 7592 break; 7593 case ">": 7594 commit(); 7595 current.separation = 1; // child 7596 break; 7597 case "+": 7598 commit(); 7599 current.separation = 2; // sibling directly after 7600 break; 7601 case "~": 7602 commit(); 7603 current.separation = 3; // any sibling after 7604 break; 7605 case "<": 7606 commit(); 7607 current.separation = 4; // immediate parent of 7608 break; 7609 case "[": 7610 state = State.ReadingAttributeSelector; 7611 if(current.separation == -1) current.separation = 0; 7612 break; 7613 case ".": 7614 state = State.ReadingClass; 7615 if(current.separation == -1) current.separation = 0; 7616 break; 7617 case "#": 7618 state = State.ReadingId; 7619 if(current.separation == -1) current.separation = 0; 7620 break; 7621 case ":": 7622 case "::": 7623 state = State.ReadingPseudoClass; 7624 if(current.separation == -1) current.separation = 0; 7625 break; 7626 7627 default: 7628 import arsd.core; 7629 throw ArsdException!"CSS Selector Problem"(token, tokens, cast(int) state); 7630 } 7631 } 7632 break; 7633 case State.ReadingClass: 7634 current.attributesIncludesSeparatedBySpaces ~= ["class", token]; 7635 state = State.Starting; 7636 break; 7637 case State.ReadingId: 7638 current.attributesEqual ~= ["id", token]; 7639 state = State.Starting; 7640 break; 7641 case State.ReadingPseudoClass: 7642 switch(token) { 7643 case "first-of-type": 7644 current.firstOfType = true; 7645 break; 7646 case "last-of-type": 7647 current.lastOfType = true; 7648 break; 7649 case "only-of-type": 7650 current.firstOfType = true; 7651 current.lastOfType = true; 7652 break; 7653 case "first-child": 7654 current.firstChild = true; 7655 break; 7656 case "last-child": 7657 current.lastChild = true; 7658 break; 7659 case "only-child": 7660 current.firstChild = true; 7661 current.lastChild = true; 7662 break; 7663 case "scope": 7664 current.scopeElement = true; 7665 break; 7666 case "empty": 7667 // one with no children 7668 current.emptyElement = true; 7669 break; 7670 case "whitespace-only": 7671 current.whitespaceOnly = true; 7672 break; 7673 case "link": 7674 current.attributesPresent ~= "href"; 7675 break; 7676 case "root": 7677 current.rootElement = true; 7678 break; 7679 case "lang": 7680 state = State.SkippingFunctionalSelector; 7681 continue; 7682 case "nth-child": 7683 current.nthChild ~= ParsedNth(readFunctionalSelector()); 7684 state = State.SkippingFunctionalSelector; 7685 continue; 7686 case "nth-of-type": 7687 current.nthOfType ~= ParsedNth(readFunctionalSelector()); 7688 state = State.SkippingFunctionalSelector; 7689 continue; 7690 case "nth-last-of-type": 7691 current.nthLastOfType ~= ParsedNth(readFunctionalSelector()); 7692 state = State.SkippingFunctionalSelector; 7693 continue; 7694 case "nth-last-child": 7695 // FIXME 7696 //current.nthLastOfType ~= ParsedNth(readFunctionalSelector()); 7697 state = State.SkippingFunctionalSelector; 7698 continue; 7699 case "is": 7700 state = State.SkippingFunctionalSelector; 7701 current.isSelectors ~= readFunctionalSelector(); 7702 continue; // now the rest of the parser skips past the parens we just handled 7703 case "where": 7704 state = State.SkippingFunctionalSelector; 7705 current.whereSelectors ~= readFunctionalSelector(); 7706 continue; // now the rest of the parser skips past the parens we just handled 7707 case "not": 7708 state = State.SkippingFunctionalSelector; 7709 current.notSelectors ~= readFunctionalSelector(); 7710 continue; // now the rest of the parser skips past the parens we just handled 7711 case "has": 7712 state = State.SkippingFunctionalSelector; 7713 current.hasSelectors ~= readFunctionalSelector(); 7714 continue; // now the rest of the parser skips past the parens we just handled 7715 // back to standards though not quite right lol 7716 case "disabled": 7717 current.attributesPresent ~= "disabled"; 7718 break; 7719 case "checked": 7720 current.attributesPresent ~= "checked"; 7721 break; 7722 7723 case "visited", "active", "hover", "target", "focus", "selected": 7724 current.attributesPresent ~= "nothing"; 7725 // FIXME 7726 /+ 7727 // extensions not implemented 7728 //case "text": // takes the text in the element and wraps it in an element, returning it 7729 +/ 7730 goto case; 7731 case "before", "after": 7732 current.attributesPresent ~= "FIXME"; 7733 7734 break; 7735 // My extensions 7736 case "odd-child": 7737 current.oddChild = true; 7738 break; 7739 case "even-child": 7740 current.evenChild = true; 7741 break; 7742 default: 7743 //if(token.indexOf("lang") == -1) 7744 //assert(0, token); 7745 break; 7746 } 7747 state = State.Starting; 7748 break; 7749 case State.SkippingFunctionalSelector: 7750 if(token == "(") { 7751 parensCount++; 7752 } else if(token == ")") { 7753 parensCount--; 7754 } 7755 7756 if(parensCount == 0) 7757 state = State.Starting; 7758 break; 7759 case State.ReadingAttributeSelector: 7760 attributeName = token; 7761 attributeComparison = null; 7762 attributeValue = null; 7763 state = State.ReadingAttributeComparison; 7764 break; 7765 case State.ReadingAttributeComparison: 7766 // FIXME: these things really should be quotable in the proper lexer... 7767 if(token != "]") { 7768 if(token.indexOf("=") == -1) { 7769 // not a comparison; consider it 7770 // part of the attribute 7771 attributeValue ~= token; 7772 } else { 7773 attributeComparison = token; 7774 state = State.ReadingAttributeValue; 7775 } 7776 break; 7777 } 7778 goto case; 7779 case State.ExpectingAttributeCloser: 7780 if(token != "]") { 7781 // not the closer; consider it part of comparison 7782 if(attributeComparison == "") 7783 attributeName ~= token; 7784 else 7785 attributeValue ~= token; 7786 break; 7787 } 7788 7789 // Selector operators 7790 switch(attributeComparison) { 7791 default: assert(0); 7792 case "": 7793 current.attributesPresent ~= attributeName; 7794 break; 7795 case "=": 7796 current.attributesEqual ~= [attributeName, attributeValue]; 7797 break; 7798 case "|=": 7799 current.attributesIncludesSeparatedByDashes ~= [attributeName, attributeValue]; 7800 break; 7801 case "~=": 7802 current.attributesIncludesSeparatedBySpaces ~= [attributeName, attributeValue]; 7803 break; 7804 case "$=": 7805 current.attributesEndsWith ~= [attributeName, attributeValue]; 7806 break; 7807 case "^=": 7808 current.attributesStartsWith ~= [attributeName, attributeValue]; 7809 break; 7810 case "*=": 7811 current.attributesInclude ~= [attributeName, attributeValue]; 7812 break; 7813 case "!=": 7814 current.attributesNotEqual ~= [attributeName, attributeValue]; 7815 break; 7816 } 7817 7818 state = State.Starting; 7819 break; 7820 case State.ReadingAttributeValue: 7821 attributeValue = token; 7822 state = State.ExpectingAttributeCloser; 7823 break; 7824 } 7825 } 7826 7827 commit(); 7828 7829 return s; 7830 } 7831 7832 ///. 7833 Element[] removeDuplicates(Element[] input) { 7834 Element[] ret; 7835 7836 bool[Element] already; 7837 foreach(e; input) { 7838 if(e in already) continue; 7839 already[e] = true; 7840 ret ~= e; 7841 } 7842 7843 return ret; 7844 } 7845 7846 // done with CSS selector handling 7847 7848 /++ 7849 This delegate is called if you call [Element.computedStyle] to attach an object to the element 7850 that holds stylesheet information. You can rebind it to something else to return a subclass 7851 if you want to hold more per-element extension data than the normal computed style object holds 7852 (e.g. layout info as well). 7853 7854 The default is `return new CssStyle(null, element.style);` 7855 7856 History: 7857 Added September 13, 2024 (dub v11.6) 7858 +/ 7859 CssStyle function(Element e) computedStyleFactory = &defaultComputedStyleFactory; 7860 7861 /// ditto 7862 CssStyle defaultComputedStyleFactory(Element e) { 7863 return new CssStyle(null, e.style); // gives at least something to work with 7864 } 7865 7866 7867 // FIXME: use the better parser from html.d 7868 /// This is probably not useful to you unless you're writing a browser or something like that. 7869 /// It represents a *computed* style, like what the browser gives you after applying stylesheets, inline styles, and html attributes. 7870 /// From here, you can start to make a layout engine for the box model and have a css aware browser. 7871 class CssStyle { 7872 ///. 7873 this(string rule, string content) { 7874 rule = rule.strip(); 7875 content = content.strip(); 7876 7877 if(content.length == 0) 7878 return; 7879 7880 originatingRule = rule; 7881 originatingSpecificity = getSpecificityOfRule(rule); // FIXME: if there's commas, this won't actually work! 7882 7883 foreach(part; content.split(";")) { 7884 part = part.strip(); 7885 if(part.length == 0) 7886 continue; 7887 auto idx = part.indexOf(":"); 7888 if(idx == -1) 7889 continue; 7890 //throw new Exception("Bad css rule (no colon): " ~ part); 7891 7892 Property p; 7893 7894 p.name = part[0 .. idx].strip(); 7895 p.value = part[idx + 1 .. $].replace("! important", "!important").replace("!important", "").strip(); // FIXME don't drop important 7896 p.givenExplicitly = true; 7897 p.specificity = originatingSpecificity; 7898 7899 properties ~= p; 7900 7901 } 7902 7903 foreach(property; properties) 7904 expandShortForm(property, originatingSpecificity); 7905 } 7906 7907 ///. 7908 Specificity getSpecificityOfRule(string rule) { 7909 Specificity s; 7910 if(rule.length == 0) { // inline 7911 s.important = 2; 7912 } else { 7913 // SO. WRONG. 7914 foreach(ch; rule) { 7915 if(ch == '.') 7916 s.classes++; 7917 if(ch == '#') 7918 s.ids++; 7919 if(ch == ' ') 7920 s.tags++; 7921 if(ch == ',') 7922 break; 7923 } 7924 // FIXME 7925 } 7926 7927 return s; 7928 } 7929 7930 string originatingRule; ///. 7931 Specificity originatingSpecificity; ///. 7932 7933 ///. 7934 union Specificity { 7935 uint score; ///. 7936 // version(little_endian) 7937 ///. 7938 struct { 7939 ubyte tags; ///. 7940 ubyte classes; ///. 7941 ubyte ids; ///. 7942 ubyte important; /// 0 = none, 1 = stylesheet author, 2 = inline style, 3 = user important 7943 } 7944 } 7945 7946 ///. 7947 struct Property { 7948 bool givenExplicitly; /// this is false if for example the user said "padding" and this is "padding-left" 7949 string name; ///. 7950 string value; ///. 7951 Specificity specificity; ///. 7952 // do we care about the original source rule? 7953 } 7954 7955 ///. 7956 Property[] properties; 7957 7958 ///. 7959 string opDispatch(string nameGiven)(string value = null) if(nameGiven != "popFront") { 7960 string name = unCamelCase(nameGiven); 7961 if(value is null) 7962 return getValue(name); 7963 else 7964 return setValue(name, value, Specificity(0x02000000) /* inline specificity */); 7965 } 7966 7967 /// takes dash style name 7968 string getValue(string name) { 7969 foreach(property; properties) 7970 if(property.name == name) 7971 return property.value; 7972 return null; 7973 } 7974 7975 /// takes dash style name 7976 string setValue(string name, string value, Specificity newSpecificity, bool explicit = true) { 7977 value = value.replace("! important", "!important"); 7978 if(value.indexOf("!important") != -1) { 7979 newSpecificity.important = 1; // FIXME 7980 value = value.replace("!important", "").strip(); 7981 } 7982 7983 foreach(ref property; properties) 7984 if(property.name == name) { 7985 if(newSpecificity.score >= property.specificity.score) { 7986 property.givenExplicitly = explicit; 7987 expandShortForm(property, newSpecificity); 7988 property.specificity = newSpecificity; 7989 return (property.value = value); 7990 } else { 7991 if(name == "display") 7992 {}//writeln("Not setting ", name, " to ", value, " because ", newSpecificity.score, " < ", property.specificity.score); 7993 return value; // do nothing - the specificity is too low 7994 } 7995 } 7996 7997 // it's not here... 7998 7999 Property p; 8000 p.givenExplicitly = true; 8001 p.name = name; 8002 p.value = value; 8003 p.specificity = originatingSpecificity; 8004 8005 properties ~= p; 8006 expandShortForm(p, originatingSpecificity); 8007 8008 return value; 8009 } 8010 8011 private void expandQuadShort(string name, string value, Specificity specificity) { 8012 auto parts = value.split(" "); 8013 switch(parts.length) { 8014 case 1: 8015 setValue(name ~"-left", parts[0], specificity, false); 8016 setValue(name ~"-right", parts[0], specificity, false); 8017 setValue(name ~"-top", parts[0], specificity, false); 8018 setValue(name ~"-bottom", parts[0], specificity, false); 8019 break; 8020 case 2: 8021 setValue(name ~"-left", parts[1], specificity, false); 8022 setValue(name ~"-right", parts[1], specificity, false); 8023 setValue(name ~"-top", parts[0], specificity, false); 8024 setValue(name ~"-bottom", parts[0], specificity, false); 8025 break; 8026 case 3: 8027 setValue(name ~"-top", parts[0], specificity, false); 8028 setValue(name ~"-right", parts[1], specificity, false); 8029 setValue(name ~"-bottom", parts[2], specificity, false); 8030 setValue(name ~"-left", parts[2], specificity, false); 8031 8032 break; 8033 case 4: 8034 setValue(name ~"-top", parts[0], specificity, false); 8035 setValue(name ~"-right", parts[1], specificity, false); 8036 setValue(name ~"-bottom", parts[2], specificity, false); 8037 setValue(name ~"-left", parts[3], specificity, false); 8038 break; 8039 default: 8040 // assert(0, value); 8041 } 8042 } 8043 8044 ///. 8045 void expandShortForm(Property p, Specificity specificity) { 8046 switch(p.name) { 8047 case "margin": 8048 case "padding": 8049 expandQuadShort(p.name, p.value, specificity); 8050 break; 8051 case "border": 8052 case "outline": 8053 setValue(p.name ~ "-left", p.value, specificity, false); 8054 setValue(p.name ~ "-right", p.value, specificity, false); 8055 setValue(p.name ~ "-top", p.value, specificity, false); 8056 setValue(p.name ~ "-bottom", p.value, specificity, false); 8057 break; 8058 8059 case "border-top": 8060 case "border-bottom": 8061 case "border-left": 8062 case "border-right": 8063 case "outline-top": 8064 case "outline-bottom": 8065 case "outline-left": 8066 case "outline-right": 8067 8068 default: {} 8069 } 8070 } 8071 8072 ///. 8073 override string toString() { 8074 string ret; 8075 if(originatingRule.length) 8076 ret = originatingRule ~ " {"; 8077 8078 foreach(property; properties) { 8079 if(!property.givenExplicitly) 8080 continue; // skip the inferred shit 8081 8082 if(originatingRule.length) 8083 ret ~= "\n\t"; 8084 else 8085 ret ~= " "; 8086 8087 ret ~= property.name ~ ": " ~ property.value ~ ";"; 8088 } 8089 8090 if(originatingRule.length) 8091 ret ~= "\n}\n"; 8092 8093 return ret; 8094 } 8095 } 8096 8097 string cssUrl(string url) { 8098 return "url(\"" ~ url ~ "\")"; 8099 } 8100 8101 /// This probably isn't useful, unless you're writing a browser or something like that. 8102 /// You might want to look at arsd.html for css macro, nesting, etc., or just use standard css 8103 /// as text. 8104 /// 8105 /// The idea, however, is to represent a kind of CSS object model, complete with specificity, 8106 /// that you can apply to your documents to build the complete computedStyle object. 8107 class StyleSheet { 8108 ///. 8109 CssStyle[] rules; 8110 8111 ///. 8112 this(string source) { 8113 // FIXME: handle @ rules and probably could improve lexer 8114 // add nesting? 8115 int state; 8116 string currentRule; 8117 string currentValue; 8118 8119 string* currentThing = ¤tRule; 8120 foreach(c; source) { 8121 handle: switch(state) { 8122 default: assert(0); 8123 case 0: // starting - we assume we're reading a rule 8124 switch(c) { 8125 case '@': 8126 state = 4; 8127 break; 8128 case '/': 8129 state = 1; 8130 break; 8131 case '{': 8132 currentThing = ¤tValue; 8133 break; 8134 case '}': 8135 if(currentThing is ¤tValue) { 8136 rules ~= new CssStyle(currentRule, currentValue); 8137 8138 currentRule = ""; 8139 currentValue = ""; 8140 8141 currentThing = ¤tRule; 8142 } else { 8143 // idk what is going on here. 8144 // check sveit.com to reproduce 8145 currentRule = ""; 8146 currentValue = ""; 8147 } 8148 break; 8149 default: 8150 (*currentThing) ~= c; 8151 } 8152 break; 8153 case 1: // expecting * 8154 if(c == '*') 8155 state = 2; 8156 else { 8157 state = 0; 8158 (*currentThing) ~= "/" ~ c; 8159 } 8160 break; 8161 case 2: // inside comment 8162 if(c == '*') 8163 state = 3; 8164 break; 8165 case 3: // expecting / to end comment 8166 if(c == '/') 8167 state = 0; 8168 else 8169 state = 2; // it's just a comment so no need to append 8170 break; 8171 case 4: 8172 if(c == '{') 8173 state = 5; 8174 if(c == ';') 8175 state = 0; // just skipping import 8176 break; 8177 case 5: 8178 if(c == '}') 8179 state = 0; // skipping font face probably 8180 } 8181 } 8182 } 8183 8184 /// Run through the document and apply this stylesheet to it. The computedStyle member will be accurate after this call 8185 void apply(Document document) { 8186 foreach(rule; rules) { 8187 if(rule.originatingRule.length == 0) 8188 continue; // this shouldn't happen here in a stylesheet 8189 foreach(element; document.querySelectorAll(rule.originatingRule)) { 8190 // note: this should be a different object than the inline style 8191 // since givenExplicitly is likely destroyed here 8192 auto current = element.computedStyle; 8193 8194 foreach(item; rule.properties) 8195 current.setValue(item.name, item.value, item.specificity); 8196 } 8197 } 8198 } 8199 } 8200 8201 8202 /// This is kinda private; just a little utility container for use by the ElementStream class. 8203 final class Stack(T) { 8204 this() { 8205 internalLength = 0; 8206 arr = initialBuffer[]; 8207 } 8208 8209 ///. 8210 void push(T t) { 8211 if(internalLength >= arr.length) { 8212 auto oldarr = arr; 8213 if(arr.length < 4096) 8214 arr = new T[arr.length * 2]; 8215 else 8216 arr = new T[arr.length + 4096]; 8217 arr[0 .. oldarr.length] = oldarr[]; 8218 } 8219 8220 arr[internalLength] = t; 8221 internalLength++; 8222 } 8223 8224 ///. 8225 T pop() { 8226 assert(internalLength); 8227 internalLength--; 8228 return arr[internalLength]; 8229 } 8230 8231 ///. 8232 T peek() { 8233 assert(internalLength); 8234 return arr[internalLength - 1]; 8235 } 8236 8237 ///. 8238 @property bool empty() { 8239 return internalLength ? false : true; 8240 } 8241 8242 ///. 8243 private T[] arr; 8244 private size_t internalLength; 8245 private T[64] initialBuffer; 8246 // the static array is allocated with this object, so if we have a small stack (which we prolly do; dom trees usually aren't insanely deep), 8247 // using this saves us a bunch of trips to the GC. In my last profiling, I got about a 50x improvement in the push() 8248 // function thanks to this, and push() was actually one of the slowest individual functions in the code! 8249 } 8250 8251 /// This is the lazy range that walks the tree for you. It tries to go in the lexical order of the source: node, then children from first to last, each recursively. 8252 final class ElementStream { 8253 8254 ///. 8255 @property Element front() { 8256 return current.element; 8257 } 8258 8259 /// Use Element.tree instead. 8260 this(Element start) { 8261 current.element = start; 8262 current.childPosition = -1; 8263 isEmpty = false; 8264 stack = new Stack!(Current); 8265 } 8266 8267 /* 8268 Handle it 8269 handle its children 8270 8271 */ 8272 8273 ///. 8274 void popFront() { 8275 more: 8276 if(isEmpty) return; 8277 8278 // FIXME: the profiler says this function is somewhat slow (noticeable because it can be called a lot of times) 8279 8280 current.childPosition++; 8281 if(current.childPosition >= current.element.children.length) { 8282 if(stack.empty()) 8283 isEmpty = true; 8284 else { 8285 current = stack.pop(); 8286 goto more; 8287 } 8288 } else { 8289 stack.push(current); 8290 current.element = current.element.children[current.childPosition]; 8291 current.childPosition = -1; 8292 } 8293 } 8294 8295 /// You should call this when you remove an element from the tree. It then doesn't recurse into that node and adjusts the current position, keeping the range stable. 8296 void currentKilled() { 8297 if(stack.empty) // should never happen 8298 isEmpty = true; 8299 else { 8300 current = stack.pop(); 8301 current.childPosition--; // when it is killed, the parent is brought back a lil so when we popFront, this is then right 8302 } 8303 } 8304 8305 ///. 8306 @property bool empty() { 8307 return isEmpty; 8308 } 8309 8310 private: 8311 8312 struct Current { 8313 Element element; 8314 int childPosition; 8315 } 8316 8317 Current current; 8318 8319 Stack!(Current) stack; 8320 8321 bool isEmpty; 8322 } 8323 8324 8325 8326 // unbelievable. 8327 // Don't use any of these in your own code. Instead, try to use phobos or roll your own, as I might kill these at any time. 8328 sizediff_t indexOfBytes(immutable(ubyte)[] haystack, immutable(ubyte)[] needle) { 8329 static import std.algorithm; 8330 auto found = std.algorithm.find(haystack, needle); 8331 if(found.length == 0) 8332 return -1; 8333 return haystack.length - found.length; 8334 } 8335 8336 private T[] insertAfter(T)(T[] arr, int position, T[] what) { 8337 assert(position < arr.length); 8338 T[] ret; 8339 ret.length = arr.length + what.length; 8340 int a = 0; 8341 foreach(i; arr[0..position+1]) 8342 ret[a++] = i; 8343 8344 foreach(i; what) 8345 ret[a++] = i; 8346 8347 foreach(i; arr[position+1..$]) 8348 ret[a++] = i; 8349 8350 return ret; 8351 } 8352 8353 package bool isInArray(T)(T item, T[] arr) { 8354 foreach(i; arr) 8355 if(item == i) 8356 return true; 8357 return false; 8358 } 8359 8360 private string[string] aadup(in string[string] arr) { 8361 string[string] ret; 8362 foreach(k, v; arr) 8363 ret[k] = v; 8364 return ret; 8365 } 8366 8367 private AttributesHolder aadup(const AttributesHolder arr) { 8368 AttributesHolder ret; 8369 foreach(k, v; arr) 8370 ret[k] = v; 8371 return ret; 8372 } 8373 8374 8375 8376 8377 8378 8379 8380 8381 8382 8383 8384 8385 8386 8387 8388 // These MUST be sorted. See generatedomcases.d for a program to generate it if you need to add more than a few (otherwise maybe you can work it in yourself but yikes) 8389 8390 immutable string[] availableEntities = 8391 ["AElig", "AElig", "AMP", "AMP", "Aacute", "Aacute", "Abreve", "Abreve", "Acirc", "Acirc", "Acy", "Acy", "Afr", "Afr", "Agrave", "Agrave", "Alpha", "Alpha", "Amacr", "Amacr", "And", "And", "Aogon", "Aogon", "Aopf", "Aopf", "ApplyFunction", "ApplyFunction", "Aring", "Aring", "Ascr", "Ascr", "Assign", "Assign", "Atilde", 8392 "Atilde", "Auml", "Auml", "Backslash", "Backslash", "Barv", "Barv", "Barwed", "Barwed", "Bcy", "Bcy", "Because", "Because", "Bernoullis", "Bernoullis", "Beta", "Beta", "Bfr", "Bfr", "Bopf", "Bopf", "Breve", "Breve", "Bscr", "Bscr", "Bumpeq", "Bumpeq", "CHcy", "CHcy", "COPY", "COPY", "Cacute", "Cacute", "Cap", "Cap", "CapitalDifferentialD", 8393 "CapitalDifferentialD", "Cayleys", "Cayleys", "Ccaron", "Ccaron", "Ccedil", "Ccedil", "Ccirc", "Ccirc", "Cconint", "Cconint", "Cdot", "Cdot", "Cedilla", "Cedilla", "CenterDot", "CenterDot", "Cfr", "Cfr", "Chi", "Chi", "CircleDot", "CircleDot", "CircleMinus", "CircleMinus", "CirclePlus", "CirclePlus", "CircleTimes", "CircleTimes", 8394 "ClockwiseContourIntegral", "ClockwiseContourIntegral", "CloseCurlyDoubleQuote", "CloseCurlyDoubleQuote", "CloseCurlyQuote", "CloseCurlyQuote", "Colon", "Colon", "Colone", "Colone", "Congruent", "Congruent", "Conint", "Conint", "ContourIntegral", "ContourIntegral", "Copf", "Copf", "Coproduct", "Coproduct", "CounterClockwiseContourIntegral", 8395 "CounterClockwiseContourIntegral", "Cross", "Cross", "Cscr", "Cscr", "Cup", "Cup", "CupCap", "CupCap", "DD", "DD", "DDotrahd", "DDotrahd", "DJcy", "DJcy", "DScy", "DScy", "DZcy", "DZcy", "Dagger", "Dagger", "Darr", "Darr", "Dashv", "Dashv", "Dcaron", "Dcaron", "Dcy", "Dcy", "Del", "Del", "Delta", "Delta", "Dfr", "Dfr", 8396 "DiacriticalAcute", "DiacriticalAcute", "DiacriticalDot", "DiacriticalDot", "DiacriticalDoubleAcute", "DiacriticalDoubleAcute", "DiacriticalGrave", "DiacriticalGrave", "DiacriticalTilde", "DiacriticalTilde", "Diamond", "Diamond", "DifferentialD", "DifferentialD", "Dopf", "Dopf", "Dot", "Dot", "DotDot", "DotDot", "DotEqual", 8397 "DotEqual", "DoubleContourIntegral", "DoubleContourIntegral", "DoubleDot", "DoubleDot", "DoubleDownArrow", "DoubleDownArrow", "DoubleLeftArrow", "DoubleLeftArrow", "DoubleLeftRightArrow", "DoubleLeftRightArrow", "DoubleLeftTee", "DoubleLeftTee", "DoubleLongLeftArrow", "DoubleLongLeftArrow", "DoubleLongLeftRightArrow", 8398 "DoubleLongLeftRightArrow", "DoubleLongRightArrow", "DoubleLongRightArrow", "DoubleRightArrow", "DoubleRightArrow", "DoubleRightTee", "DoubleRightTee", "DoubleUpArrow", "DoubleUpArrow", "DoubleUpDownArrow", "DoubleUpDownArrow", "DoubleVerticalBar", "DoubleVerticalBar", "DownArrow", "DownArrow", "DownArrowBar", "DownArrowBar", 8399 "DownArrowUpArrow", "DownArrowUpArrow", "DownBreve", "DownBreve", "DownLeftRightVector", "DownLeftRightVector", "DownLeftTeeVector", "DownLeftTeeVector", "DownLeftVector", "DownLeftVector", "DownLeftVectorBar", "DownLeftVectorBar", "DownRightTeeVector", "DownRightTeeVector", "DownRightVector", "DownRightVector", "DownRightVectorBar", 8400 "DownRightVectorBar", "DownTee", "DownTee", "DownTeeArrow", "DownTeeArrow", "Downarrow", "Downarrow", "Dscr", "Dscr", "Dstrok", "Dstrok", "ENG", "ENG", "ETH", "ETH", "Eacute", "Eacute", "Ecaron", "Ecaron", "Ecirc", "Ecirc", "Ecy", "Ecy", "Edot", "Edot", "Efr", "Efr", "Egrave", "Egrave", "Element", "Element", "Emacr", "Emacr", 8401 "EmptySmallSquare", "EmptySmallSquare", "EmptyVerySmallSquare", "EmptyVerySmallSquare", "Eogon", "Eogon", "Eopf", "Eopf", "Epsilon", "Epsilon", "Equal", "Equal", "EqualTilde", "EqualTilde", "Equilibrium", "Equilibrium", "Escr", "Escr", "Esim", "Esim", "Eta", "Eta", "Euml", "Euml", "Exists", "Exists", "ExponentialE", "ExponentialE", 8402 "Fcy", "Fcy", "Ffr", "Ffr", "FilledSmallSquare", "FilledSmallSquare", "FilledVerySmallSquare", "FilledVerySmallSquare", "Fopf", "Fopf", "ForAll", "ForAll", "Fouriertrf", "Fouriertrf", "Fscr", "Fscr", "GJcy", "GJcy", "GT", "GT", "Gamma", "Gamma", "Gammad", "Gammad", "Gbreve", "Gbreve", "Gcedil", "Gcedil", "Gcirc", "Gcirc", 8403 "Gcy", "Gcy", "Gdot", "Gdot", "Gfr", "Gfr", "Gg", "Gg", "Gopf", "Gopf", "GreaterEqual", "GreaterEqual", "GreaterEqualLess", "GreaterEqualLess", "GreaterFullEqual", "GreaterFullEqual", "GreaterGreater", "GreaterGreater", "GreaterLess", "GreaterLess", "GreaterSlantEqual", "GreaterSlantEqual", "GreaterTilde", "GreaterTilde", 8404 "Gscr", "Gscr", "Gt", "Gt", "HARDcy", "HARDcy", "Hacek", "Hacek", "Hat", "Hat", "Hcirc", "Hcirc", "Hfr", "Hfr", "HilbertSpace", "HilbertSpace", "Hopf", "Hopf", "HorizontalLine", "HorizontalLine", "Hscr", "Hscr", "Hstrok", "Hstrok", "HumpDownHump", "HumpDownHump", "HumpEqual", "HumpEqual", "IEcy", "IEcy", "IJlig", "IJlig", 8405 "IOcy", "IOcy", "Iacute", "Iacute", "Icirc", "Icirc", "Icy", "Icy", "Idot", "Idot", "Ifr", "Ifr", "Igrave", "Igrave", "Im", "Im", "Imacr", "Imacr", "ImaginaryI", "ImaginaryI", "Implies", "Implies", "Int", "Int", "Integral", "Integral", "Intersection", "Intersection", "InvisibleComma", "InvisibleComma", "InvisibleTimes", 8406 "InvisibleTimes", "Iogon", "Iogon", "Iopf", "Iopf", "Iota", "Iota", "Iscr", "Iscr", "Itilde", "Itilde", "Iukcy", "Iukcy", "Iuml", "Iuml", "Jcirc", "Jcirc", "Jcy", "Jcy", "Jfr", "Jfr", "Jopf", "Jopf", "Jscr", "Jscr", "Jsercy", "Jsercy", "Jukcy", "Jukcy", "KHcy", "KHcy", "KJcy", "KJcy", "Kappa", "Kappa", "Kcedil", "Kcedil", 8407 "Kcy", "Kcy", "Kfr", "Kfr", "Kopf", "Kopf", "Kscr", "Kscr", "LJcy", "LJcy", "LT", "LT", "Lacute", "Lacute", "Lambda", "Lambda", "Lang", "Lang", "Laplacetrf", "Laplacetrf", "Larr", "Larr", "Lcaron", "Lcaron", "Lcedil", "Lcedil", "Lcy", "Lcy", "LeftAngleBracket", "LeftAngleBracket", "LeftArrow", "LeftArrow", "LeftArrowBar", 8408 "LeftArrowBar", "LeftArrowRightArrow", "LeftArrowRightArrow", "LeftCeiling", "LeftCeiling", "LeftDoubleBracket", "LeftDoubleBracket", "LeftDownTeeVector", "LeftDownTeeVector", "LeftDownVector", "LeftDownVector", "LeftDownVectorBar", "LeftDownVectorBar", "LeftFloor", "LeftFloor", "LeftRightArrow", "LeftRightArrow", "LeftRightVector", 8409 "LeftRightVector", "LeftTee", "LeftTee", "LeftTeeArrow", "LeftTeeArrow", "LeftTeeVector", "LeftTeeVector", "LeftTriangle", "LeftTriangle", "LeftTriangleBar", "LeftTriangleBar", "LeftTriangleEqual", "LeftTriangleEqual", "LeftUpDownVector", "LeftUpDownVector", "LeftUpTeeVector", "LeftUpTeeVector", "LeftUpVector", "LeftUpVector", 8410 "LeftUpVectorBar", "LeftUpVectorBar", "LeftVector", "LeftVector", "LeftVectorBar", "LeftVectorBar", "Leftarrow", "Leftarrow", "Leftrightarrow", "Leftrightarrow", "LessEqualGreater", "LessEqualGreater", "LessFullEqual", "LessFullEqual", "LessGreater", "LessGreater", "LessLess", "LessLess", "LessSlantEqual", "LessSlantEqual", 8411 "LessTilde", "LessTilde", "Lfr", "Lfr", "Ll", "Ll", "Lleftarrow", "Lleftarrow", "Lmidot", "Lmidot", "LongLeftArrow", "LongLeftArrow", "LongLeftRightArrow", "LongLeftRightArrow", "LongRightArrow", "LongRightArrow", "Longleftarrow", "Longleftarrow", "Longleftrightarrow", "Longleftrightarrow", "Longrightarrow", "Longrightarrow", 8412 "Lopf", "Lopf", "LowerLeftArrow", "LowerLeftArrow", "LowerRightArrow", "LowerRightArrow", "Lscr", "Lscr", "Lsh", "Lsh", "Lstrok", "Lstrok", "Lt", "Lt", "Map", "Map", "Mcy", "Mcy", "MediumSpace", "MediumSpace", "Mellintrf", "Mellintrf", "Mfr", "Mfr", "MinusPlus", "MinusPlus", "Mopf", "Mopf", "Mscr", "Mscr", "Mu", "Mu", 8413 "NJcy", "NJcy", "Nacute", "Nacute", "Ncaron", "Ncaron", "Ncedil", "Ncedil", "Ncy", "Ncy", "NegativeMediumSpace", "NegativeMediumSpace", "NegativeThickSpace", "NegativeThickSpace", "NegativeThinSpace", "NegativeThinSpace", "NegativeVeryThinSpace", "NegativeVeryThinSpace", "NestedGreaterGreater", "NestedGreaterGreater", 8414 "NestedLessLess", "NestedLessLess", "NewLine", "NewLine", "Nfr", "Nfr", "NoBreak", "NoBreak", "NonBreakingSpace", "NonBreakingSpace", "Nopf", "Nopf", "Not", "Not", "NotCongruent", "NotCongruent", "NotCupCap", "NotCupCap", "NotDoubleVerticalBar", "NotDoubleVerticalBar", "NotElement", "NotElement", "NotEqual", "NotEqual", 8415 "NotExists", "NotExists", "NotGreater", "NotGreater", "NotGreaterEqual", "NotGreaterEqual", "NotGreaterLess", "NotGreaterLess", "NotGreaterTilde", "NotGreaterTilde", "NotLeftTriangle", "NotLeftTriangle", "NotLeftTriangleEqual", "NotLeftTriangleEqual", "NotLess", "NotLess", "NotLessEqual", "NotLessEqual", "NotLessGreater", 8416 "NotLessGreater", "NotLessTilde", "NotLessTilde", "NotPrecedes", "NotPrecedes", "NotPrecedesSlantEqual", "NotPrecedesSlantEqual", "NotReverseElement", "NotReverseElement", "NotRightTriangle", "NotRightTriangle", "NotRightTriangleEqual", "NotRightTriangleEqual", "NotSquareSubsetEqual", "NotSquareSubsetEqual", "NotSquareSupersetEqual", 8417 "NotSquareSupersetEqual", "NotSubsetEqual", "NotSubsetEqual", "NotSucceeds", "NotSucceeds", "NotSucceedsSlantEqual", "NotSucceedsSlantEqual", "NotSupersetEqual", "NotSupersetEqual", "NotTilde", "NotTilde", "NotTildeEqual", "NotTildeEqual", "NotTildeFullEqual", "NotTildeFullEqual", "NotTildeTilde", "NotTildeTilde", "NotVerticalBar", 8418 "NotVerticalBar", "Nscr", "Nscr", "Ntilde", "Ntilde", "Nu", "Nu", "OElig", "OElig", "Oacute", "Oacute", "Ocirc", "Ocirc", "Ocy", "Ocy", "Odblac", "Odblac", "Ofr", "Ofr", "Ograve", "Ograve", "Omacr", "Omacr", "Omega", "Omega", "Omicron", "Omicron", "Oopf", "Oopf", "OpenCurlyDoubleQuote", "OpenCurlyDoubleQuote", "OpenCurlyQuote", 8419 "OpenCurlyQuote", "Or", "Or", "Oscr", "Oscr", "Oslash", "Oslash", "Otilde", "Otilde", "Otimes", "Otimes", "Ouml", "Ouml", "OverBar", "OverBar", "OverBrace", "OverBrace", "OverBracket", "OverBracket", "OverParenthesis", "OverParenthesis", "PartialD", "PartialD", "Pcy", "Pcy", "Pfr", "Pfr", "Phi", "Phi", "Pi", "Pi", "PlusMinus", 8420 "PlusMinus", "Poincareplane", "Poincareplane", "Popf", "Popf", "Pr", "Pr", "Precedes", "Precedes", "PrecedesEqual", "PrecedesEqual", "PrecedesSlantEqual", "PrecedesSlantEqual", "PrecedesTilde", "PrecedesTilde", "Prime", "Prime", "Product", "Product", "Proportion", "Proportion", "Proportional", "Proportional", "Pscr", "Pscr", 8421 "Psi", "Psi", "QUOT", "QUOT", "Qfr", "Qfr", "Qopf", "Qopf", "Qscr", "Qscr", "RBarr", "RBarr", "REG", "REG", "Racute", "Racute", "Rang", "Rang", "Rarr", "Rarr", "Rarrtl", "Rarrtl", "Rcaron", "Rcaron", "Rcedil", "Rcedil", "Rcy", "Rcy", "Re", "Re", "ReverseElement", "ReverseElement", "ReverseEquilibrium", "ReverseEquilibrium", 8422 "ReverseUpEquilibrium", "ReverseUpEquilibrium", "Rfr", "Rfr", "Rho", "Rho", "RightAngleBracket", "RightAngleBracket", "RightArrow", "RightArrow", "RightArrowBar", "RightArrowBar", "RightArrowLeftArrow", "RightArrowLeftArrow", "RightCeiling", "RightCeiling", "RightDoubleBracket", "RightDoubleBracket", "RightDownTeeVector", 8423 "RightDownTeeVector", "RightDownVector", "RightDownVector", "RightDownVectorBar", "RightDownVectorBar", "RightFloor", "RightFloor", "RightTee", "RightTee", "RightTeeArrow", "RightTeeArrow", "RightTeeVector", "RightTeeVector", "RightTriangle", "RightTriangle", "RightTriangleBar", "RightTriangleBar", "RightTriangleEqual", 8424 "RightTriangleEqual", "RightUpDownVector", "RightUpDownVector", "RightUpTeeVector", "RightUpTeeVector", "RightUpVector", "RightUpVector", "RightUpVectorBar", "RightUpVectorBar", "RightVector", "RightVector", "RightVectorBar", "RightVectorBar", "Rightarrow", "Rightarrow", "Ropf", "Ropf", "RoundImplies", "RoundImplies", 8425 "Rrightarrow", "Rrightarrow", "Rscr", "Rscr", "Rsh", "Rsh", "RuleDelayed", "RuleDelayed", "SHCHcy", "SHCHcy", "SHcy", "SHcy", "SOFTcy", "SOFTcy", "Sacute", "Sacute", "Sc", "Sc", "Scaron", "Scaron", "Scedil", "Scedil", "Scirc", "Scirc", "Scy", "Scy", "Sfr", "Sfr", "ShortDownArrow", "ShortDownArrow", "ShortLeftArrow", "ShortLeftArrow", 8426 "ShortRightArrow", "ShortRightArrow", "ShortUpArrow", "ShortUpArrow", "Sigma", "Sigma", "SmallCircle", "SmallCircle", "Sopf", "Sopf", "Sqrt", "Sqrt", "Square", "Square", "SquareIntersection", "SquareIntersection", "SquareSubset", "SquareSubset", "SquareSubsetEqual", "SquareSubsetEqual", "SquareSuperset", "SquareSuperset", 8427 "SquareSupersetEqual", "SquareSupersetEqual", "SquareUnion", "SquareUnion", "Sscr", "Sscr", "Star", "Star", "Sub", "Sub", "Subset", "Subset", "SubsetEqual", "SubsetEqual", "Succeeds", "Succeeds", "SucceedsEqual", "SucceedsEqual", "SucceedsSlantEqual", "SucceedsSlantEqual", "SucceedsTilde", "SucceedsTilde", "SuchThat", 8428 "SuchThat", "Sum", "Sum", "Sup", "Sup", "Superset", "Superset", "SupersetEqual", "SupersetEqual", "Supset", "Supset", "THORN", "THORN", "TRADE", "TRADE", "TSHcy", "TSHcy", "TScy", "TScy", "Tab", "Tab", "Tau", "Tau", "Tcaron", "Tcaron", "Tcedil", "Tcedil", "Tcy", "Tcy", "Tfr", "Tfr", "Therefore", "Therefore", "Theta", "Theta", 8429 "ThinSpace", "ThinSpace", "Tilde", "Tilde", "TildeEqual", "TildeEqual", "TildeFullEqual", "TildeFullEqual", "TildeTilde", "TildeTilde", "Topf", "Topf", "TripleDot", "TripleDot", "Tscr", "Tscr", "Tstrok", "Tstrok", "Uacute", "Uacute", "Uarr", "Uarr", "Uarrocir", "Uarrocir", "Ubrcy", "Ubrcy", "Ubreve", "Ubreve", "Ucirc", 8430 "Ucirc", "Ucy", "Ucy", "Udblac", "Udblac", "Ufr", "Ufr", "Ugrave", "Ugrave", "Umacr", "Umacr", "UnderBar", "UnderBar", "UnderBrace", "UnderBrace", "UnderBracket", "UnderBracket", "UnderParenthesis", "UnderParenthesis", "Union", "Union", "UnionPlus", "UnionPlus", "Uogon", "Uogon", "Uopf", "Uopf", "UpArrow", "UpArrow", "UpArrowBar", 8431 "UpArrowBar", "UpArrowDownArrow", "UpArrowDownArrow", "UpDownArrow", "UpDownArrow", "UpEquilibrium", "UpEquilibrium", "UpTee", "UpTee", "UpTeeArrow", "UpTeeArrow", "Uparrow", "Uparrow", "Updownarrow", "Updownarrow", "UpperLeftArrow", "UpperLeftArrow", "UpperRightArrow", "UpperRightArrow", "Upsi", "Upsi", "Upsilon", "Upsilon", 8432 "Uring", "Uring", "Uscr", "Uscr", "Utilde", "Utilde", "Uuml", "Uuml", "VDash", "VDash", "Vbar", "Vbar", "Vcy", "Vcy", "Vdash", "Vdash", "Vdashl", "Vdashl", "Vee", "Vee", "Verbar", "Verbar", "Vert", "Vert", "VerticalBar", "VerticalBar", "VerticalLine", "VerticalLine", "VerticalSeparator", "VerticalSeparator", "VerticalTilde", 8433 "VerticalTilde", "VeryThinSpace", "VeryThinSpace", "Vfr", "Vfr", "Vopf", "Vopf", "Vscr", "Vscr", "Vvdash", "Vvdash", "Wcirc", "Wcirc", "Wedge", "Wedge", "Wfr", "Wfr", "Wopf", "Wopf", "Wscr", "Wscr", "Xfr", "Xfr", "Xi", "Xi", "Xopf", "Xopf", "Xscr", "Xscr", "YAcy", "YAcy", "YIcy", "YIcy", "YUcy", "YUcy", "Yacute", "Yacute", 8434 "Ycirc", "Ycirc", "Ycy", "Ycy", "Yfr", "Yfr", "Yopf", "Yopf", "Yscr", "Yscr", "Yuml", "Yuml", "ZHcy", "ZHcy", "Zacute", "Zacute", "Zcaron", "Zcaron", "Zcy", "Zcy", "Zdot", "Zdot", "ZeroWidthSpace", "ZeroWidthSpace", "Zeta", "Zeta", "Zfr", "Zfr", "Zopf", "Zopf", "Zscr", "Zscr", "aacute", "aacute", "abreve", "abreve", "ac", 8435 "ac", "acd", "acd", "acirc", "acirc", "acute", "acute", "acy", "acy", "aelig", "aelig", "af", "af", "afr", "afr", "agrave", "agrave", "alefsym", "alefsym", "aleph", "aleph", "alpha", "alpha", "amacr", "amacr", "amalg", "amalg", "and", "and", "andand", "andand", "andd", "andd", "andslope", "andslope", "andv", "andv", "ang", 8436 "ang", "ange", "ange", "angle", "angle", "angmsd", "angmsd", "angmsdaa", "angmsdaa", "angmsdab", "angmsdab", "angmsdac", "angmsdac", "angmsdad", "angmsdad", "angmsdae", "angmsdae", "angmsdaf", "angmsdaf", "angmsdag", "angmsdag", "angmsdah", "angmsdah", "angrt", "angrt", "angrtvb", "angrtvb", "angrtvbd", "angrtvbd", "angsph", 8437 "angsph", "angst", "angst", "angzarr", "angzarr", "aogon", "aogon", "aopf", "aopf", "ap", "ap", "apE", "apE", "apacir", "apacir", "ape", "ape", "apid", "apid", "approx", "approx", "approxeq", "approxeq", "aring", "aring", "ascr", "ascr", "ast", "ast", "asymp", "asymp", "asympeq", "asympeq", "atilde", "atilde", "auml", 8438 "auml", "awconint", "awconint", "awint", "awint", "bNot", "bNot", "backcong", "backcong", "backepsilon", "backepsilon", "backprime", "backprime", "backsim", "backsim", "backsimeq", "backsimeq", "barvee", "barvee", "barwed", "barwed", "barwedge", "barwedge", "bbrk", "bbrk", "bbrktbrk", "bbrktbrk", "bcong", "bcong", "bcy", 8439 "bcy", "bdquo", "bdquo", "becaus", "becaus", "because", "because", "bemptyv", "bemptyv", "bepsi", "bepsi", "bernou", "bernou", "beta", "beta", "beth", "beth", "between", "between", "bfr", "bfr", "bigcap", "bigcap", "bigcirc", "bigcirc", "bigcup", "bigcup", "bigodot", "bigodot", "bigoplus", "bigoplus", "bigotimes", "bigotimes", 8440 "bigsqcup", "bigsqcup", "bigstar", "bigstar", "bigtriangledown", "bigtriangledown", "bigtriangleup", "bigtriangleup", "biguplus", "biguplus", "bigvee", "bigvee", "bigwedge", "bigwedge", "bkarow", "bkarow", "blacklozenge", "blacklozenge", "blacksquare", "blacksquare", "blacktriangle", "blacktriangle", "blacktriangledown", 8441 "blacktriangledown", "blacktriangleleft", "blacktriangleleft", "blacktriangleright", "blacktriangleright", "blank", "blank", "blk12", "blk12", "blk14", "blk14", "blk34", "blk34", "block", "block", "bnot", "bnot", "bopf", "bopf", "bot", "bot", "bottom", "bottom", "bowtie", "bowtie", "boxDL", "boxDL", "boxDR", "boxDR", "boxDl", 8442 "boxDl", "boxDr", "boxDr", "boxH", "boxH", "boxHD", "boxHD", "boxHU", "boxHU", "boxHd", "boxHd", "boxHu", "boxHu", "boxUL", "boxUL", "boxUR", "boxUR", "boxUl", "boxUl", "boxUr", "boxUr", "boxV", "boxV", "boxVH", "boxVH", "boxVL", "boxVL", "boxVR", "boxVR", "boxVh", "boxVh", "boxVl", "boxVl", "boxVr", "boxVr", "boxbox", 8443 "boxbox", "boxdL", "boxdL", "boxdR", "boxdR", "boxdl", "boxdl", "boxdr", "boxdr", "boxh", "boxh", "boxhD", "boxhD", "boxhU", "boxhU", "boxhd", "boxhd", "boxhu", "boxhu", "boxminus", "boxminus", "boxplus", "boxplus", "boxtimes", "boxtimes", "boxuL", "boxuL", "boxuR", "boxuR", "boxul", "boxul", "boxur", "boxur", "boxv", 8444 "boxv", "boxvH", "boxvH", "boxvL", "boxvL", "boxvR", "boxvR", "boxvh", "boxvh", "boxvl", "boxvl", "boxvr", "boxvr", "bprime", "bprime", "breve", "breve", "brvbar", "brvbar", "bscr", "bscr", "bsemi", "bsemi", "bsim", "bsim", "bsime", "bsime", "bsol", "bsol", "bsolb", "bsolb", "bsolhsub", "bsolhsub", "bull", "bull", "bullet", 8445 "bullet", "bump", "bump", "bumpE", "bumpE", "bumpe", "bumpe", "bumpeq", "bumpeq", "cacute", "cacute", "cap", "cap", "capand", "capand", "capbrcup", "capbrcup", "capcap", "capcap", "capcup", "capcup", "capdot", "capdot", "caret", "caret", "caron", "caron", "ccaps", "ccaps", "ccaron", "ccaron", "ccedil", "ccedil", "ccirc", 8446 "ccirc", "ccups", "ccups", "ccupssm", "ccupssm", "cdot", "cdot", "cedil", "cedil", "cemptyv", "cemptyv", "cent", "cent", "centerdot", "centerdot", "cfr", "cfr", "chcy", "chcy", "check", "check", "checkmark", "checkmark", "chi", "chi", "cir", "cir", "cirE", "cirE", "circ", "circ", "circeq", "circeq", "circlearrowleft", 8447 "circlearrowleft", "circlearrowright", "circlearrowright", "circledR", "circledR", "circledS", "circledS", "circledast", "circledast", "circledcirc", "circledcirc", "circleddash", "circleddash", "cire", "cire", "cirfnint", "cirfnint", "cirmid", "cirmid", "cirscir", "cirscir", "clubs", "clubs", "clubsuit", "clubsuit", "colon", 8448 "colon", "colone", "colone", "coloneq", "coloneq", "comma", "comma", "commat", "commat", "comp", "comp", "compfn", "compfn", "complement", "complement", "complexes", "complexes", "cong", "cong", "congdot", "congdot", "conint", "conint", "copf", "copf", "coprod", "coprod", "copy", "copy", "copysr", "copysr", "crarr", "crarr", 8449 "cross", "cross", "cscr", "cscr", "csub", "csub", "csube", "csube", "csup", "csup", "csupe", "csupe", "ctdot", "ctdot", "cudarrl", "cudarrl", "cudarrr", "cudarrr", "cuepr", "cuepr", "cuesc", "cuesc", "cularr", "cularr", "cularrp", "cularrp", "cup", "cup", "cupbrcap", "cupbrcap", "cupcap", "cupcap", "cupcup", "cupcup", 8450 "cupdot", "cupdot", "cupor", "cupor", "curarr", "curarr", "curarrm", "curarrm", "curlyeqprec", "curlyeqprec", "curlyeqsucc", "curlyeqsucc", "curlyvee", "curlyvee", "curlywedge", "curlywedge", "curren", "curren", "curvearrowleft", "curvearrowleft", "curvearrowright", "curvearrowright", "cuvee", "cuvee", "cuwed", "cuwed", 8451 "cwconint", "cwconint", "cwint", "cwint", "cylcty", "cylcty", "dArr", "dArr", "dHar", "dHar", "dagger", "dagger", "daleth", "daleth", "darr", "darr", "dash", "dash", "dashv", "dashv", "dbkarow", "dbkarow", "dblac", "dblac", "dcaron", "dcaron", "dcy", "dcy", "dd", "dd", "ddagger", "ddagger", "ddarr", "ddarr", "ddotseq", 8452 "ddotseq", "deg", "deg", "delta", "delta", "demptyv", "demptyv", "dfisht", "dfisht", "dfr", "dfr", "dharl", "dharl", "dharr", "dharr", "diam", "diam", "diamond", "diamond", "diamondsuit", "diamondsuit", "diams", "diams", "die", "die", "digamma", "digamma", "disin", "disin", "div", "div", "divide", "divide", "divideontimes", 8453 "divideontimes", "divonx", "divonx", "djcy", "djcy", "dlcorn", "dlcorn", "dlcrop", "dlcrop", "dollar", "dollar", "dopf", "dopf", "dot", "dot", "doteq", "doteq", "doteqdot", "doteqdot", "dotminus", "dotminus", "dotplus", "dotplus", "dotsquare", "dotsquare", "doublebarwedge", "doublebarwedge", "downarrow", "downarrow", "downdownarrows", 8454 "downdownarrows", "downharpoonleft", "downharpoonleft", "downharpoonright", "downharpoonright", "drbkarow", "drbkarow", "drcorn", "drcorn", "drcrop", "drcrop", "dscr", "dscr", "dscy", "dscy", "dsol", "dsol", "dstrok", "dstrok", "dtdot", "dtdot", "dtri", "dtri", "dtrif", "dtrif", "duarr", "duarr", "duhar", "duhar", "dwangle", 8455 "dwangle", "dzcy", "dzcy", "dzigrarr", "dzigrarr", "eDDot", "eDDot", "eDot", "eDot", "eacute", "eacute", "easter", "easter", "ecaron", "ecaron", "ecir", "ecir", "ecirc", "ecirc", "ecolon", "ecolon", "ecy", "ecy", "edot", "edot", "ee", "ee", "efDot", "efDot", "efr", "efr", "eg", "eg", "egrave", "egrave", "egs", "egs", "egsdot", 8456 "egsdot", "el", "el", "elinters", "elinters", "ell", "ell", "els", "els", "elsdot", "elsdot", "emacr", "emacr", "empty", "empty", "emptyset", "emptyset", "emptyv", "emptyv", "emsp", "emsp", "emsp13", "emsp13", "emsp14", "emsp14", "eng", "eng", "ensp", "ensp", "eogon", "eogon", "eopf", "eopf", "epar", "epar", "eparsl", 8457 "eparsl", "eplus", "eplus", "epsi", "epsi", "epsilon", "epsilon", "epsiv", "epsiv", "eqcirc", "eqcirc", "eqcolon", "eqcolon", "eqsim", "eqsim", "eqslantgtr", "eqslantgtr", "eqslantless", "eqslantless", "equals", "equals", "equest", "equest", "equiv", "equiv", "equivDD", "equivDD", "eqvparsl", "eqvparsl", "erDot", "erDot", 8458 "erarr", "erarr", "escr", "escr", "esdot", "esdot", "esim", "esim", "eta", "eta", "eth", "eth", "euml", "euml", "euro", "euro", "excl", "excl", "exist", "exist", "expectation", "expectation", "exponentiale", "exponentiale", "fallingdotseq", "fallingdotseq", "fcy", "fcy", "female", "female", "ffilig", "ffilig", "fflig", 8459 "fflig", "ffllig", "ffllig", "ffr", "ffr", "filig", "filig", "flat", "flat", "fllig", "fllig", "fltns", "fltns", "fnof", "fnof", "fopf", "fopf", "forall", "forall", "fork", "fork", "forkv", "forkv", "fpartint", "fpartint", "frac12", "frac12", "frac13", "frac13", "frac14", "frac14", "frac15", "frac15", "frac16", "frac16", 8460 "frac18", "frac18", "frac23", "frac23", "frac25", "frac25", "frac34", "frac34", "frac35", "frac35", "frac38", "frac38", "frac45", "frac45", "frac56", "frac56", "frac58", "frac58", "frac78", "frac78", "frasl", "frasl", "frown", "frown", "fscr", "fscr", "gE", "gE", "gEl", "gEl", "gacute", "gacute", "gamma", "gamma", "gammad", 8461 "gammad", "gap", "gap", "gbreve", "gbreve", "gcirc", "gcirc", "gcy", "gcy", "gdot", "gdot", "ge", "ge", "gel", "gel", "geq", "geq", "geqq", "geqq", "geqslant", "geqslant", "ges", "ges", "gescc", "gescc", "gesdot", "gesdot", "gesdoto", "gesdoto", "gesdotol", "gesdotol", "gesles", "gesles", "gfr", "gfr", "gg", "gg", "ggg", 8462 "ggg", "gimel", "gimel", "gjcy", "gjcy", "gl", "gl", "glE", "glE", "gla", "gla", "glj", "glj", "gnE", "gnE", "gnap", "gnap", "gnapprox", "gnapprox", "gne", "gne", "gneq", "gneq", "gneqq", "gneqq", "gnsim", "gnsim", "gopf", "gopf", "grave", "grave", "gscr", "gscr", "gsim", "gsim", "gsime", "gsime", "gsiml", "gsiml", "gtcc", 8463 "gtcc", "gtcir", "gtcir", "gtdot", "gtdot", "gtlPar", "gtlPar", "gtquest", "gtquest", "gtrapprox", "gtrapprox", "gtrarr", "gtrarr", "gtrdot", "gtrdot", "gtreqless", "gtreqless", "gtreqqless", "gtreqqless", "gtrless", "gtrless", "gtrsim", "gtrsim", "hArr", "hArr", "hairsp", "hairsp", "half", "half", "hamilt", "hamilt", 8464 "hardcy", "hardcy", "harr", "harr", "harrcir", "harrcir", "harrw", "harrw", "hbar", "hbar", "hcirc", "hcirc", "hearts", "hearts", "heartsuit", "heartsuit", "hellip", "hellip", "hercon", "hercon", "hfr", "hfr", "hksearow", "hksearow", "hkswarow", "hkswarow", "hoarr", "hoarr", "homtht", "homtht", "hookleftarrow", "hookleftarrow", 8465 "hookrightarrow", "hookrightarrow", "hopf", "hopf", "horbar", "horbar", "hscr", "hscr", "hslash", "hslash", "hstrok", "hstrok", "hybull", "hybull", "hyphen", "hyphen", "iacute", "iacute", "ic", "ic", "icirc", "icirc", "icy", "icy", "iecy", "iecy", "iexcl", "iexcl", "iff", "iff", "ifr", "ifr", "igrave", "igrave", "ii", 8466 "ii", "iiiint", "iiiint", "iiint", "iiint", "iinfin", "iinfin", "iiota", "iiota", "ijlig", "ijlig", "imacr", "imacr", "image", "image", "imagline", "imagline", "imagpart", "imagpart", "imath", "imath", "imof", "imof", "imped", "imped", "in", "in", "incare", "incare", "infin", "infin", "infintie", "infintie", "inodot", 8467 "inodot", "int", "int", "intcal", "intcal", "integers", "integers", "intercal", "intercal", "intlarhk", "intlarhk", "intprod", "intprod", "iocy", "iocy", "iogon", "iogon", "iopf", "iopf", "iota", "iota", "iprod", "iprod", "iquest", "iquest", "iscr", "iscr", "isin", "isin", "isinE", "isinE", "isindot", "isindot", "isins", 8468 "isins", "isinsv", "isinsv", "isinv", "isinv", "it", "it", "itilde", "itilde", "iukcy", "iukcy", "iuml", "iuml", "jcirc", "jcirc", "jcy", "jcy", "jfr", "jfr", "jmath", "jmath", "jopf", "jopf", "jscr", "jscr", "jsercy", "jsercy", "jukcy", "jukcy", "kappa", "kappa", "kappav", "kappav", "kcedil", "kcedil", "kcy", "kcy", "kfr", 8469 "kfr", "kgreen", "kgreen", "khcy", "khcy", "kjcy", "kjcy", "kopf", "kopf", "kscr", "kscr", "lAarr", "lAarr", "lArr", "lArr", "lAtail", "lAtail", "lBarr", "lBarr", "lE", "lE", "lEg", "lEg", "lHar", "lHar", "lacute", "lacute", "laemptyv", "laemptyv", "lagran", "lagran", "lambda", "lambda", "lang", "lang", "langd", "langd", 8470 "langle", "langle", "lap", "lap", "laquo", "laquo", "larr", "larr", "larrb", "larrb", "larrbfs", "larrbfs", "larrfs", "larrfs", "larrhk", "larrhk", "larrlp", "larrlp", "larrpl", "larrpl", "larrsim", "larrsim", "larrtl", "larrtl", "lat", "lat", "latail", "latail", "late", "late", "lbarr", "lbarr", "lbbrk", "lbbrk", "lbrace", 8471 "lbrace", "lbrack", "lbrack", "lbrke", "lbrke", "lbrksld", "lbrksld", "lbrkslu", "lbrkslu", "lcaron", "lcaron", "lcedil", "lcedil", "lceil", "lceil", "lcub", "lcub", "lcy", "lcy", "ldca", "ldca", "ldquo", "ldquo", "ldquor", "ldquor", "ldrdhar", "ldrdhar", "ldrushar", "ldrushar", "ldsh", "ldsh", "le", "le", "leftarrow", 8472 "leftarrow", "leftarrowtail", "leftarrowtail", "leftharpoondown", "leftharpoondown", "leftharpoonup", "leftharpoonup", "leftleftarrows", "leftleftarrows", "leftrightarrow", "leftrightarrow", "leftrightarrows", "leftrightarrows", "leftrightharpoons", "leftrightharpoons", "leftrightsquigarrow", "leftrightsquigarrow", "leftthreetimes", 8473 "leftthreetimes", "leg", "leg", "leq", "leq", "leqq", "leqq", "leqslant", "leqslant", "les", "les", "lescc", "lescc", "lesdot", "lesdot", "lesdoto", "lesdoto", "lesdotor", "lesdotor", "lesges", "lesges", "lessapprox", "lessapprox", "lessdot", "lessdot", "lesseqgtr", "lesseqgtr", "lesseqqgtr", "lesseqqgtr", "lessgtr", "lessgtr", 8474 "lesssim", "lesssim", "lfisht", "lfisht", "lfloor", "lfloor", "lfr", "lfr", "lg", "lg", "lgE", "lgE", "lhard", "lhard", "lharu", "lharu", "lharul", "lharul", "lhblk", "lhblk", "ljcy", "ljcy", "ll", "ll", "llarr", "llarr", "llcorner", "llcorner", "llhard", "llhard", "lltri", "lltri", "lmidot", "lmidot", "lmoust", "lmoust", 8475 "lmoustache", "lmoustache", "lnE", "lnE", "lnap", "lnap", "lnapprox", "lnapprox", "lne", "lne", "lneq", "lneq", "lneqq", "lneqq", "lnsim", "lnsim", "loang", "loang", "loarr", "loarr", "lobrk", "lobrk", "longleftarrow", "longleftarrow", "longleftrightarrow", "longleftrightarrow", "longmapsto", "longmapsto", "longrightarrow", 8476 "longrightarrow", "looparrowleft", "looparrowleft", "looparrowright", "looparrowright", "lopar", "lopar", "lopf", "lopf", "loplus", "loplus", "lotimes", "lotimes", "lowast", "lowast", "lowbar", "lowbar", "loz", "loz", "lozenge", "lozenge", "lozf", "lozf", "lpar", "lpar", "lparlt", "lparlt", "lrarr", "lrarr", "lrcorner", 8477 "lrcorner", "lrhar", "lrhar", "lrhard", "lrhard", "lrm", "lrm", "lrtri", "lrtri", "lsaquo", "lsaquo", "lscr", "lscr", "lsh", "lsh", "lsim", "lsim", "lsime", "lsime", "lsimg", "lsimg", "lsqb", "lsqb", "lsquo", "lsquo", "lsquor", "lsquor", "lstrok", "lstrok", "ltcc", "ltcc", "ltcir", "ltcir", "ltdot", "ltdot", "lthree", 8478 "lthree", "ltimes", "ltimes", "ltlarr", "ltlarr", "ltquest", "ltquest", "ltrPar", "ltrPar", "ltri", "ltri", "ltrie", "ltrie", "ltrif", "ltrif", "lurdshar", "lurdshar", "luruhar", "luruhar", "mDDot", "mDDot", "macr", "macr", "male", "male", "malt", "malt", "maltese", "maltese", "map", "map", "mapsto", "mapsto", "mapstodown", 8479 "mapstodown", "mapstoleft", "mapstoleft", "mapstoup", "mapstoup", "marker", "marker", "mcomma", "mcomma", "mcy", "mcy", "mdash", "mdash", "measuredangle", "measuredangle", "mfr", "mfr", "mho", "mho", "micro", "micro", "mid", "mid", "midast", "midast", "midcir", "midcir", "middot", "middot", "minus", "minus", "minusb", 8480 "minusb", "minusd", "minusd", "minusdu", "minusdu", "mlcp", "mlcp", "mldr", "mldr", "mnplus", "mnplus", "models", "models", "mopf", "mopf", "mp", "mp", "mscr", "mscr", "mstpos", "mstpos", "mu", "mu", "multimap", "multimap", "mumap", "mumap", "nLeftarrow", "nLeftarrow", "nLeftrightarrow", "nLeftrightarrow", "nRightarrow", 8481 "nRightarrow", "nVDash", "nVDash", "nVdash", "nVdash", "nabla", "nabla", "nacute", "nacute", "nap", "nap", "napos", "napos", "napprox", "napprox", "natur", "natur", "natural", "natural", "naturals", "naturals", "nbsp", "nbsp", "ncap", "ncap", "ncaron", "ncaron", "ncedil", "ncedil", "ncong", "ncong", "ncup", "ncup", "ncy", 8482 "ncy", "ndash", "ndash", "ne", "ne", "neArr", "neArr", "nearhk", "nearhk", "nearr", "nearr", "nearrow", "nearrow", "nequiv", "nequiv", "nesear", "nesear", "nexist", "nexist", "nexists", "nexists", "nfr", "nfr", "nge", "nge", "ngeq", "ngeq", "ngsim", "ngsim", "ngt", "ngt", "ngtr", "ngtr", "nhArr", "nhArr", "nharr", "nharr", 8483 "nhpar", "nhpar", "ni", "ni", "nis", "nis", "nisd", "nisd", "niv", "niv", "njcy", "njcy", "nlArr", "nlArr", "nlarr", "nlarr", "nldr", "nldr", "nle", "nle", "nleftarrow", "nleftarrow", "nleftrightarrow", "nleftrightarrow", "nleq", "nleq", "nless", "nless", "nlsim", "nlsim", "nlt", "nlt", "nltri", "nltri", "nltrie", "nltrie", 8484 "nmid", "nmid", "nopf", "nopf", "not", "not", "notin", "notin", "notinva", "notinva", "notinvb", "notinvb", "notinvc", "notinvc", "notni", "notni", "notniva", "notniva", "notnivb", "notnivb", "notnivc", "notnivc", "npar", "npar", "nparallel", "nparallel", "npolint", "npolint", "npr", "npr", "nprcue", "nprcue", "nprec", 8485 "nprec", "nrArr", "nrArr", "nrarr", "nrarr", "nrightarrow", "nrightarrow", "nrtri", "nrtri", "nrtrie", "nrtrie", "nsc", "nsc", "nsccue", "nsccue", "nscr", "nscr", "nshortmid", "nshortmid", "nshortparallel", "nshortparallel", "nsim", "nsim", "nsime", "nsime", "nsimeq", "nsimeq", "nsmid", "nsmid", "nspar", "nspar", "nsqsube", 8486 "nsqsube", "nsqsupe", "nsqsupe", "nsub", "nsub", "nsube", "nsube", "nsubseteq", "nsubseteq", "nsucc", "nsucc", "nsup", "nsup", "nsupe", "nsupe", "nsupseteq", "nsupseteq", "ntgl", "ntgl", "ntilde", "ntilde", "ntlg", "ntlg", "ntriangleleft", "ntriangleleft", "ntrianglelefteq", "ntrianglelefteq", "ntriangleright", "ntriangleright", 8487 "ntrianglerighteq", "ntrianglerighteq", "nu", "nu", "num", "num", "numero", "numero", "numsp", "numsp", "nvDash", "nvDash", "nvHarr", "nvHarr", "nvdash", "nvdash", "nvinfin", "nvinfin", "nvlArr", "nvlArr", "nvrArr", "nvrArr", "nwArr", "nwArr", "nwarhk", "nwarhk", "nwarr", "nwarr", "nwarrow", "nwarrow", "nwnear", "nwnear", 8488 "oS", "oS", "oacute", "oacute", "oast", "oast", "ocir", "ocir", "ocirc", "ocirc", "ocy", "ocy", "odash", "odash", "odblac", "odblac", "odiv", "odiv", "odot", "odot", "odsold", "odsold", "oelig", "oelig", "ofcir", "ofcir", "ofr", "ofr", "ogon", "ogon", "ograve", "ograve", "ogt", "ogt", "ohbar", "ohbar", "ohm", "ohm", "oint", 8489 "oint", "olarr", "olarr", "olcir", "olcir", "olcross", "olcross", "oline", "oline", "olt", "olt", "omacr", "omacr", "omega", "omega", "omicron", "omicron", "omid", "omid", "ominus", "ominus", "oopf", "oopf", "opar", "opar", "operp", "operp", "oplus", "oplus", "or", "or", "orarr", "orarr", "ord", "ord", "order", "order", 8490 "orderof", "orderof", "ordf", "ordf", "ordm", "ordm", "origof", "origof", "oror", "oror", "orslope", "orslope", "orv", "orv", "oscr", "oscr", "oslash", "oslash", "osol", "osol", "otilde", "otilde", "otimes", "otimes", "otimesas", "otimesas", "ouml", "ouml", "ovbar", "ovbar", "par", "par", "para", "para", "parallel", "parallel", 8491 "parsim", "parsim", "parsl", "parsl", "part", "part", "pcy", "pcy", "percnt", "percnt", "period", "period", "permil", "permil", "perp", "perp", "pertenk", "pertenk", "pfr", "pfr", "phi", "phi", "phiv", "phiv", "phmmat", "phmmat", "phone", "phone", "pi", "pi", "pitchfork", "pitchfork", "piv", "piv", "planck", "planck", 8492 "planckh", "planckh", "plankv", "plankv", "plus", "plus", "plusacir", "plusacir", "plusb", "plusb", "pluscir", "pluscir", "plusdo", "plusdo", "plusdu", "plusdu", "pluse", "pluse", "plusmn", "plusmn", "plussim", "plussim", "plustwo", "plustwo", "pm", "pm", "pointint", "pointint", "popf", "popf", "pound", "pound", "pr", 8493 "pr", "prE", "prE", "prap", "prap", "prcue", "prcue", "pre", "pre", "prec", "prec", "precapprox", "precapprox", "preccurlyeq", "preccurlyeq", "preceq", "preceq", "precnapprox", "precnapprox", "precneqq", "precneqq", "precnsim", "precnsim", "precsim", "precsim", "prime", "prime", "primes", "primes", "prnE", "prnE", "prnap", 8494 "prnap", "prnsim", "prnsim", "prod", "prod", "profalar", "profalar", "profline", "profline", "profsurf", "profsurf", "prop", "prop", "propto", "propto", "prsim", "prsim", "prurel", "prurel", "pscr", "pscr", "psi", "psi", "puncsp", "puncsp", "qfr", "qfr", "qint", "qint", "qopf", "qopf", "qprime", "qprime", "qscr", "qscr", 8495 "quaternions", "quaternions", "quatint", "quatint", "quest", "quest", "questeq", "questeq", "rAarr", "rAarr", "rArr", "rArr", "rAtail", "rAtail", "rBarr", "rBarr", "rHar", "rHar", "racute", "racute", "radic", "radic", "raemptyv", "raemptyv", "rang", "rang", "rangd", "rangd", "range", "range", "rangle", "rangle", "raquo", 8496 "raquo", "rarr", "rarr", "rarrap", "rarrap", "rarrb", "rarrb", "rarrbfs", "rarrbfs", "rarrc", "rarrc", "rarrfs", "rarrfs", "rarrhk", "rarrhk", "rarrlp", "rarrlp", "rarrpl", "rarrpl", "rarrsim", "rarrsim", "rarrtl", "rarrtl", "rarrw", "rarrw", "ratail", "ratail", "ratio", "ratio", "rationals", "rationals", "rbarr", "rbarr", 8497 "rbbrk", "rbbrk", "rbrace", "rbrace", "rbrack", "rbrack", "rbrke", "rbrke", "rbrksld", "rbrksld", "rbrkslu", "rbrkslu", "rcaron", "rcaron", "rcedil", "rcedil", "rceil", "rceil", "rcub", "rcub", "rcy", "rcy", "rdca", "rdca", "rdldhar", "rdldhar", "rdquo", "rdquo", "rdquor", "rdquor", "rdsh", "rdsh", "real", "real", "realine", 8498 "realine", "realpart", "realpart", "reals", "reals", "rect", "rect", "reg", "reg", "rfisht", "rfisht", "rfloor", "rfloor", "rfr", "rfr", "rhard", "rhard", "rharu", "rharu", "rharul", "rharul", "rho", "rho", "rhov", "rhov", "rightarrow", "rightarrow", "rightarrowtail", "rightarrowtail", "rightharpoondown", "rightharpoondown", 8499 "rightharpoonup", "rightharpoonup", "rightleftarrows", "rightleftarrows", "rightleftharpoons", "rightleftharpoons", "rightrightarrows", "rightrightarrows", "rightsquigarrow", "rightsquigarrow", "rightthreetimes", "rightthreetimes", "ring", "ring", "risingdotseq", "risingdotseq", "rlarr", "rlarr", "rlhar", "rlhar", "rlm", 8500 "rlm", "rmoust", "rmoust", "rmoustache", "rmoustache", "rnmid", "rnmid", "roang", "roang", "roarr", "roarr", "robrk", "robrk", "ropar", "ropar", "ropf", "ropf", "roplus", "roplus", "rotimes", "rotimes", "rpar", "rpar", "rpargt", "rpargt", "rppolint", "rppolint", "rrarr", "rrarr", "rsaquo", "rsaquo", "rscr", "rscr", "rsh", 8501 "rsh", "rsqb", "rsqb", "rsquo", "rsquo", "rsquor", "rsquor", "rthree", "rthree", "rtimes", "rtimes", "rtri", "rtri", "rtrie", "rtrie", "rtrif", "rtrif", "rtriltri", "rtriltri", "ruluhar", "ruluhar", "rx", "rx", "sacute", "sacute", "sbquo", "sbquo", "sc", "sc", "scE", "scE", "scap", "scap", "scaron", "scaron", "sccue", 8502 "sccue", "sce", "sce", "scedil", "scedil", "scirc", "scirc", "scnE", "scnE", "scnap", "scnap", "scnsim", "scnsim", "scpolint", "scpolint", "scsim", "scsim", "scy", "scy", "sdot", "sdot", "sdotb", "sdotb", "sdote", "sdote", "seArr", "seArr", "searhk", "searhk", "searr", "searr", "searrow", "searrow", "sect", "sect", "semi", 8503 "semi", "seswar", "seswar", "setminus", "setminus", "setmn", "setmn", "sext", "sext", "sfr", "sfr", "sfrown", "sfrown", "sharp", "sharp", "shchcy", "shchcy", "shcy", "shcy", "shortmid", "shortmid", "shortparallel", "shortparallel", "shy", "shy", "sigma", "sigma", "sigmaf", "sigmaf", "sigmav", "sigmav", "sim", "sim", "simdot", 8504 "simdot", "sime", "sime", "simeq", "simeq", "simg", "simg", "simgE", "simgE", "siml", "siml", "simlE", "simlE", "simne", "simne", "simplus", "simplus", "simrarr", "simrarr", "slarr", "slarr", "smallsetminus", "smallsetminus", "smashp", "smashp", "smeparsl", "smeparsl", "smid", "smid", "smile", "smile", "smt", "smt", "smte", 8505 "smte", "softcy", "softcy", "sol", "sol", "solb", "solb", "solbar", "solbar", "sopf", "sopf", "spades", "spades", "spadesuit", "spadesuit", "spar", "spar", "sqcap", "sqcap", "sqcup", "sqcup", "sqsub", "sqsub", "sqsube", "sqsube", "sqsubset", "sqsubset", "sqsubseteq", "sqsubseteq", "sqsup", "sqsup", "sqsupe", "sqsupe", 8506 "sqsupset", "sqsupset", "sqsupseteq", "sqsupseteq", "squ", "squ", "square", "square", "squarf", "squarf", "squf", "squf", "srarr", "srarr", "sscr", "sscr", "ssetmn", "ssetmn", "ssmile", "ssmile", "sstarf", "sstarf", "star", "star", "starf", "starf", "straightepsilon", "straightepsilon", "straightphi", "straightphi", "strns", 8507 "strns", "sub", "sub", "subE", "subE", "subdot", "subdot", "sube", "sube", "subedot", "subedot", "submult", "submult", "subnE", "subnE", "subne", "subne", "subplus", "subplus", "subrarr", "subrarr", "subset", "subset", "subseteq", "subseteq", "subseteqq", "subseteqq", "subsetneq", "subsetneq", "subsetneqq", "subsetneqq", 8508 "subsim", "subsim", "subsub", "subsub", "subsup", "subsup", "succ", "succ", "succapprox", "succapprox", "succcurlyeq", "succcurlyeq", "succeq", "succeq", "succnapprox", "succnapprox", "succneqq", "succneqq", "succnsim", "succnsim", "succsim", "succsim", "sum", "sum", "sung", "sung", "sup", "sup", "sup1", "sup1", "sup2", 8509 "sup2", "sup3", "sup3", "supE", "supE", "supdot", "supdot", "supdsub", "supdsub", "supe", "supe", "supedot", "supedot", "suphsol", "suphsol", "suphsub", "suphsub", "suplarr", "suplarr", "supmult", "supmult", "supnE", "supnE", "supne", "supne", "supplus", "supplus", "supset", "supset", "supseteq", "supseteq", "supseteqq", 8510 "supseteqq", "supsetneq", "supsetneq", "supsetneqq", "supsetneqq", "supsim", "supsim", "supsub", "supsub", "supsup", "supsup", "swArr", "swArr", "swarhk", "swarhk", "swarr", "swarr", "swarrow", "swarrow", "swnwar", "swnwar", "szlig", "szlig", "target", "target", "tau", "tau", "tbrk", "tbrk", "tcaron", "tcaron", "tcedil", 8511 "tcedil", "tcy", "tcy", "tdot", "tdot", "telrec", "telrec", "tfr", "tfr", "there4", "there4", "therefore", "therefore", "theta", "theta", "thetasym", "thetasym", "thetav", "thetav", "thickapprox", "thickapprox", "thicksim", "thicksim", "thinsp", "thinsp", "thkap", "thkap", "thksim", "thksim", "thorn", "thorn", "tilde", 8512 "tilde", "times", "times", "timesb", "timesb", "timesbar", "timesbar", "timesd", "timesd", "tint", "tint", "toea", "toea", "top", "top", "topbot", "topbot", "topcir", "topcir", "topf", "topf", "topfork", "topfork", "tosa", "tosa", "tprime", "tprime", "trade", "trade", "triangle", "triangle", "triangledown", "triangledown", 8513 "triangleleft", "triangleleft", "trianglelefteq", "trianglelefteq", "triangleq", "triangleq", "triangleright", "triangleright", "trianglerighteq", "trianglerighteq", "tridot", "tridot", "trie", "trie", "triminus", "triminus", "triplus", "triplus", "trisb", "trisb", "tritime", "tritime", "trpezium", "trpezium", "tscr", 8514 "tscr", "tscy", "tscy", "tshcy", "tshcy", "tstrok", "tstrok", "twixt", "twixt", "twoheadleftarrow", "twoheadleftarrow", "twoheadrightarrow", "twoheadrightarrow", "uArr", "uArr", "uHar", "uHar", "uacute", "uacute", "uarr", "uarr", "ubrcy", "ubrcy", "ubreve", "ubreve", "ucirc", "ucirc", "ucy", "ucy", "udarr", "udarr", "udblac", 8515 "udblac", "udhar", "udhar", "ufisht", "ufisht", "ufr", "ufr", "ugrave", "ugrave", "uharl", "uharl", "uharr", "uharr", "uhblk", "uhblk", "ulcorn", "ulcorn", "ulcorner", "ulcorner", "ulcrop", "ulcrop", "ultri", "ultri", "umacr", "umacr", "uml", "uml", "uogon", "uogon", "uopf", "uopf", "uparrow", "uparrow", "updownarrow", 8516 "updownarrow", "upharpoonleft", "upharpoonleft", "upharpoonright", "upharpoonright", "uplus", "uplus", "upsi", "upsi", "upsih", "upsih", "upsilon", "upsilon", "upuparrows", "upuparrows", "urcorn", "urcorn", "urcorner", "urcorner", "urcrop", "urcrop", "uring", "uring", "urtri", "urtri", "uscr", "uscr", "utdot", "utdot", 8517 "utilde", "utilde", "utri", "utri", "utrif", "utrif", "uuarr", "uuarr", "uuml", "uuml", "uwangle", "uwangle", "vArr", "vArr", "vBar", "vBar", "vBarv", "vBarv", "vDash", "vDash", "vangrt", "vangrt", "varepsilon", "varepsilon", "varkappa", "varkappa", "varnothing", "varnothing", "varphi", "varphi", "varpi", "varpi", "varpropto", 8518 "varpropto", "varr", "varr", "varrho", "varrho", "varsigma", "varsigma", "vartheta", "vartheta", "vartriangleleft", "vartriangleleft", "vartriangleright", "vartriangleright", "vcy", "vcy", "vdash", "vdash", "vee", "vee", "veebar", "veebar", "veeeq", "veeeq", "vellip", "vellip", "verbar", "verbar", "vert", "vert", "vfr", 8519 "vfr", "vltri", "vltri", "vopf", "vopf", "vprop", "vprop", "vrtri", "vrtri", "vscr", "vscr", "vzigzag", "vzigzag", "wcirc", "wcirc", "wedbar", "wedbar", "wedge", "wedge", "wedgeq", "wedgeq", "weierp", "weierp", "wfr", "wfr", "wopf", "wopf", "wp", "wp", "wr", "wr", "wreath", "wreath", "wscr", "wscr", "xcap", "xcap", "xcirc", 8520 "xcirc", "xcup", "xcup", "xdtri", "xdtri", "xfr", "xfr", "xhArr", "xhArr", "xharr", "xharr", "xi", "xi", "xlArr", "xlArr", "xlarr", "xlarr", "xmap", "xmap", "xnis", "xnis", "xodot", "xodot", "xopf", "xopf", "xoplus", "xoplus", "xotime", "xotime", "xrArr", "xrArr", "xrarr", "xrarr", "xscr", "xscr", "xsqcup", "xsqcup", "xuplus", 8521 "xuplus", "xutri", "xutri", "xvee", "xvee", "xwedge", "xwedge", "yacute", "yacute", "yacy", "yacy", "ycirc", "ycirc", "ycy", "ycy", "yen", "yen", "yfr", "yfr", "yicy", "yicy", "yopf", "yopf", "yscr", "yscr", "yucy", "yucy", "yuml", "yuml", "zacute", "zacute", "zcaron", "zcaron", "zcy", "zcy", "zdot", "zdot", "zeetrf", 8522 "zeetrf", "zeta", "zeta", "zfr", "zfr", "zhcy", "zhcy", "zigrarr", "zigrarr", "zopf", "zopf", "zscr", "zscr", "zwj", "zwj", "zwnj", "zwnj", ]; 8523 8524 immutable dchar[] availableEntitiesValues = 8525 ['\u00c6', '\u00c6', '\u0026', '\u0026', '\u00c1', '\u00c1', '\u0102', '\u0102', '\u00c2', '\u00c2', '\u0410', '\u0410', '\U0001d504', '\U0001d504', '\u00c0', '\u00c0', '\u0391', '\u0391', '\u0100', '\u0100', '\u2a53', '\u2a53', '\u0104', '\u0104', '\U0001d538', '\U0001d538', '\u2061', '\u2061', '\u00c5', '\u00c5', '\U0001d49c', '\U0001d49c', '\u2254', '\u2254', '\u00c3', 8526 '\u00c3', '\u00c4', '\u00c4', '\u2216', '\u2216', '\u2ae7', '\u2ae7', '\u2306', '\u2306', '\u0411', '\u0411', '\u2235', '\u2235', '\u212c', '\u212c', '\u0392', '\u0392', '\U0001d505', '\U0001d505', '\U0001d539', '\U0001d539', '\u02d8', '\u02d8', '\u212c', '\u212c', '\u224e', '\u224e', '\u0427', '\u0427', '\u00a9', '\u00a9', '\u0106', '\u0106', '\u22d2', '\u22d2', '\u2145', 8527 '\u2145', '\u212d', '\u212d', '\u010c', '\u010c', '\u00c7', '\u00c7', '\u0108', '\u0108', '\u2230', '\u2230', '\u010a', '\u010a', '\u00b8', '\u00b8', '\u00b7', '\u00b7', '\u212d', '\u212d', '\u03a7', '\u03a7', '\u2299', '\u2299', '\u2296', '\u2296', '\u2295', '\u2295', '\u2297', '\u2297', 8528 '\u2232', '\u2232', '\u201d', '\u201d', '\u2019', '\u2019', '\u2237', '\u2237', '\u2a74', '\u2a74', '\u2261', '\u2261', '\u222f', '\u222f', '\u222e', '\u222e', '\u2102', '\u2102', '\u2210', '\u2210', '\u2233', 8529 '\u2233', '\u2a2f', '\u2a2f', '\U0001d49e', '\U0001d49e', '\u22d3', '\u22d3', '\u224d', '\u224d', '\u2145', '\u2145', '\u2911', '\u2911', '\u0402', '\u0402', '\u0405', '\u0405', '\u040f', '\u040f', '\u2021', '\u2021', '\u21a1', '\u21a1', '\u2ae4', '\u2ae4', '\u010e', '\u010e', '\u0414', '\u0414', '\u2207', '\u2207', '\u0394', '\u0394', '\U0001d507', '\U0001d507', 8530 '\u00b4', '\u00b4', '\u02d9', '\u02d9', '\u02dd', '\u02dd', '\u0060', '\u0060', '\u02dc', '\u02dc', '\u22c4', '\u22c4', '\u2146', '\u2146', '\U0001d53b', '\U0001d53b', '\u00a8', '\u00a8', '\u20dc', '\u20dc', '\u2250', 8531 '\u2250', '\u222f', '\u222f', '\u00a8', '\u00a8', '\u21d3', '\u21d3', '\u21d0', '\u21d0', '\u21d4', '\u21d4', '\u2ae4', '\u2ae4', '\u27f8', '\u27f8', '\u27fa', 8532 '\u27fa', '\u27f9', '\u27f9', '\u21d2', '\u21d2', '\u22a8', '\u22a8', '\u21d1', '\u21d1', '\u21d5', '\u21d5', '\u2225', '\u2225', '\u2193', '\u2193', '\u2913', '\u2913', 8533 '\u21f5', '\u21f5', '\u0311', '\u0311', '\u2950', '\u2950', '\u295e', '\u295e', '\u21bd', '\u21bd', '\u2956', '\u2956', '\u295f', '\u295f', '\u21c1', '\u21c1', '\u2957', 8534 '\u2957', '\u22a4', '\u22a4', '\u21a7', '\u21a7', '\u21d3', '\u21d3', '\U0001d49f', '\U0001d49f', '\u0110', '\u0110', '\u014a', '\u014a', '\u00d0', '\u00d0', '\u00c9', '\u00c9', '\u011a', '\u011a', '\u00ca', '\u00ca', '\u042d', '\u042d', '\u0116', '\u0116', '\U0001d508', '\U0001d508', '\u00c8', '\u00c8', '\u2208', '\u2208', '\u0112', '\u0112', 8535 '\u25fb', '\u25fb', '\u25ab', '\u25ab', '\u0118', '\u0118', '\U0001d53c', '\U0001d53c', '\u0395', '\u0395', '\u2a75', '\u2a75', '\u2242', '\u2242', '\u21cc', '\u21cc', '\u2130', '\u2130', '\u2a73', '\u2a73', '\u0397', '\u0397', '\u00cb', '\u00cb', '\u2203', '\u2203', '\u2147', '\u2147', 8536 '\u0424', '\u0424', '\U0001d509', '\U0001d509', '\u25fc', '\u25fc', '\u25aa', '\u25aa', '\U0001d53d', '\U0001d53d', '\u2200', '\u2200', '\u2131', '\u2131', '\u2131', '\u2131', '\u0403', '\u0403', '\u003e', '\u003e', '\u0393', '\u0393', '\u03dc', '\u03dc', '\u011e', '\u011e', '\u0122', '\u0122', '\u011c', '\u011c', 8537 '\u0413', '\u0413', '\u0120', '\u0120', '\U0001d50a', '\U0001d50a', '\u22d9', '\u22d9', '\U0001d53e', '\U0001d53e', '\u2265', '\u2265', '\u22db', '\u22db', '\u2267', '\u2267', '\u2aa2', '\u2aa2', '\u2277', '\u2277', '\u2a7e', '\u2a7e', '\u2273', '\u2273', 8538 '\U0001d4a2', '\U0001d4a2', '\u226b', '\u226b', '\u042a', '\u042a', '\u02c7', '\u02c7', '\u005e', '\u005e', '\u0124', '\u0124', '\u210c', '\u210c', '\u210b', '\u210b', '\u210d', '\u210d', '\u2500', '\u2500', '\u210b', '\u210b', '\u0126', '\u0126', '\u224e', '\u224e', '\u224f', '\u224f', '\u0415', '\u0415', '\u0132', '\u0132', 8539 '\u0401', '\u0401', '\u00cd', '\u00cd', '\u00ce', '\u00ce', '\u0418', '\u0418', '\u0130', '\u0130', '\u2111', '\u2111', '\u00cc', '\u00cc', '\u2111', '\u2111', '\u012a', '\u012a', '\u2148', '\u2148', '\u21d2', '\u21d2', '\u222c', '\u222c', '\u222b', '\u222b', '\u22c2', '\u22c2', '\u2063', '\u2063', '\u2062', 8540 '\u2062', '\u012e', '\u012e', '\U0001d540', '\U0001d540', '\u0399', '\u0399', '\u2110', '\u2110', '\u0128', '\u0128', '\u0406', '\u0406', '\u00cf', '\u00cf', '\u0134', '\u0134', '\u0419', '\u0419', '\U0001d50d', '\U0001d50d', '\U0001d541', '\U0001d541', '\U0001d4a5', '\U0001d4a5', '\u0408', '\u0408', '\u0404', '\u0404', '\u0425', '\u0425', '\u040c', '\u040c', '\u039a', '\u039a', '\u0136', '\u0136', 8541 '\u041a', '\u041a', '\U0001d50e', '\U0001d50e', '\U0001d542', '\U0001d542', '\U0001d4a6', '\U0001d4a6', '\u0409', '\u0409', '\u003c', '\u003c', '\u0139', '\u0139', '\u039b', '\u039b', '\u27ea', '\u27ea', '\u2112', '\u2112', '\u219e', '\u219e', '\u013d', '\u013d', '\u013b', '\u013b', '\u041b', '\u041b', '\u27e8', '\u27e8', '\u2190', '\u2190', '\u21e4', 8542 '\u21e4', '\u21c6', '\u21c6', '\u2308', '\u2308', '\u27e6', '\u27e6', '\u2961', '\u2961', '\u21c3', '\u21c3', '\u2959', '\u2959', '\u230a', '\u230a', '\u2194', '\u2194', '\u294e', 8543 '\u294e', '\u22a3', '\u22a3', '\u21a4', '\u21a4', '\u295a', '\u295a', '\u22b2', '\u22b2', '\u29cf', '\u29cf', '\u22b4', '\u22b4', '\u2951', '\u2951', '\u2960', '\u2960', '\u21bf', '\u21bf', 8544 '\u2958', '\u2958', '\u21bc', '\u21bc', '\u2952', '\u2952', '\u21d0', '\u21d0', '\u21d4', '\u21d4', '\u22da', '\u22da', '\u2266', '\u2266', '\u2276', '\u2276', '\u2aa1', '\u2aa1', '\u2a7d', '\u2a7d', 8545 '\u2272', '\u2272', '\U0001d50f', '\U0001d50f', '\u22d8', '\u22d8', '\u21da', '\u21da', '\u013f', '\u013f', '\u27f5', '\u27f5', '\u27f7', '\u27f7', '\u27f6', '\u27f6', '\u27f8', '\u27f8', '\u27fa', '\u27fa', '\u27f9', '\u27f9', 8546 '\U0001d543', '\U0001d543', '\u2199', '\u2199', '\u2198', '\u2198', '\u2112', '\u2112', '\u21b0', '\u21b0', '\u0141', '\u0141', '\u226a', '\u226a', '\u2905', '\u2905', '\u041c', '\u041c', '\u205f', '\u205f', '\u2133', '\u2133', '\U0001d510', '\U0001d510', '\u2213', '\u2213', '\U0001d544', '\U0001d544', '\u2133', '\u2133', '\u039c', '\u039c', 8547 '\u040a', '\u040a', '\u0143', '\u0143', '\u0147', '\u0147', '\u0145', '\u0145', '\u041d', '\u041d', '\u200b', '\u200b', '\u200b', '\u200b', '\u200b', '\u200b', '\u200b', '\u200b', '\u226b', '\u226b', 8548 '\u226a', '\u226a', '\u000a', '\u000a', '\U0001d511', '\U0001d511', '\u2060', '\u2060', '\u00a0', '\u00a0', '\u2115', '\u2115', '\u2aec', '\u2aec', '\u2262', '\u2262', '\u226d', '\u226d', '\u2226', '\u2226', '\u2209', '\u2209', '\u2260', '\u2260', 8549 '\u2204', '\u2204', '\u226f', '\u226f', '\u2271', '\u2271', '\u2279', '\u2279', '\u2275', '\u2275', '\u22ea', '\u22ea', '\u22ec', '\u22ec', '\u226e', '\u226e', '\u2270', '\u2270', '\u2278', 8550 '\u2278', '\u2274', '\u2274', '\u2280', '\u2280', '\u22e0', '\u22e0', '\u220c', '\u220c', '\u22eb', '\u22eb', '\u22ed', '\u22ed', '\u22e2', '\u22e2', '\u22e3', 8551 '\u22e3', '\u2288', '\u2288', '\u2281', '\u2281', '\u22e1', '\u22e1', '\u2289', '\u2289', '\u2241', '\u2241', '\u2244', '\u2244', '\u2247', '\u2247', '\u2249', '\u2249', '\u2224', 8552 '\u2224', '\U0001d4a9', '\U0001d4a9', '\u00d1', '\u00d1', '\u039d', '\u039d', '\u0152', '\u0152', '\u00d3', '\u00d3', '\u00d4', '\u00d4', '\u041e', '\u041e', '\u0150', '\u0150', '\U0001d512', '\U0001d512', '\u00d2', '\u00d2', '\u014c', '\u014c', '\u03a9', '\u03a9', '\u039f', '\u039f', '\U0001d546', '\U0001d546', '\u201c', '\u201c', '\u2018', 8553 '\u2018', '\u2a54', '\u2a54', '\U0001d4aa', '\U0001d4aa', '\u00d8', '\u00d8', '\u00d5', '\u00d5', '\u2a37', '\u2a37', '\u00d6', '\u00d6', '\u203e', '\u203e', '\u23de', '\u23de', '\u23b4', '\u23b4', '\u23dc', '\u23dc', '\u2202', '\u2202', '\u041f', '\u041f', '\U0001d513', '\U0001d513', '\u03a6', '\u03a6', '\u03a0', '\u03a0', '\u00b1', 8554 '\u00b1', '\u210c', '\u210c', '\u2119', '\u2119', '\u2abb', '\u2abb', '\u227a', '\u227a', '\u2aaf', '\u2aaf', '\u227c', '\u227c', '\u227e', '\u227e', '\u2033', '\u2033', '\u220f', '\u220f', '\u2237', '\u2237', '\u221d', '\u221d', '\U0001d4ab', '\U0001d4ab', 8555 '\u03a8', '\u03a8', '\u0022', '\u0022', '\U0001d514', '\U0001d514', '\u211a', '\u211a', '\U0001d4ac', '\U0001d4ac', '\u2910', '\u2910', '\u00ae', '\u00ae', '\u0154', '\u0154', '\u27eb', '\u27eb', '\u21a0', '\u21a0', '\u2916', '\u2916', '\u0158', '\u0158', '\u0156', '\u0156', '\u0420', '\u0420', '\u211c', '\u211c', '\u220b', '\u220b', '\u21cb', '\u21cb', 8556 '\u296f', '\u296f', '\u211c', '\u211c', '\u03a1', '\u03a1', '\u27e9', '\u27e9', '\u2192', '\u2192', '\u21e5', '\u21e5', '\u21c4', '\u21c4', '\u2309', '\u2309', '\u27e7', '\u27e7', '\u295d', 8557 '\u295d', '\u21c2', '\u21c2', '\u2955', '\u2955', '\u230b', '\u230b', '\u22a2', '\u22a2', '\u21a6', '\u21a6', '\u295b', '\u295b', '\u22b3', '\u22b3', '\u29d0', '\u29d0', '\u22b5', 8558 '\u22b5', '\u294f', '\u294f', '\u295c', '\u295c', '\u21be', '\u21be', '\u2954', '\u2954', '\u21c0', '\u21c0', '\u2953', '\u2953', '\u21d2', '\u21d2', '\u211d', '\u211d', '\u2970', '\u2970', 8559 '\u21db', '\u21db', '\u211b', '\u211b', '\u21b1', '\u21b1', '\u29f4', '\u29f4', '\u0429', '\u0429', '\u0428', '\u0428', '\u042c', '\u042c', '\u015a', '\u015a', '\u2abc', '\u2abc', '\u0160', '\u0160', '\u015e', '\u015e', '\u015c', '\u015c', '\u0421', '\u0421', '\U0001d516', '\U0001d516', '\u2193', '\u2193', '\u2190', '\u2190', 8560 '\u2192', '\u2192', '\u2191', '\u2191', '\u03a3', '\u03a3', '\u2218', '\u2218', '\U0001d54a', '\U0001d54a', '\u221a', '\u221a', '\u25a1', '\u25a1', '\u2293', '\u2293', '\u228f', '\u228f', '\u2291', '\u2291', '\u2290', '\u2290', 8561 '\u2292', '\u2292', '\u2294', '\u2294', '\U0001d4ae', '\U0001d4ae', '\u22c6', '\u22c6', '\u22d0', '\u22d0', '\u22d0', '\u22d0', '\u2286', '\u2286', '\u227b', '\u227b', '\u2ab0', '\u2ab0', '\u227d', '\u227d', '\u227f', '\u227f', '\u220b', 8562 '\u220b', '\u2211', '\u2211', '\u22d1', '\u22d1', '\u2283', '\u2283', '\u2287', '\u2287', '\u22d1', '\u22d1', '\u00de', '\u00de', '\u2122', '\u2122', '\u040b', '\u040b', '\u0426', '\u0426', '\u0009', '\u0009', '\u03a4', '\u03a4', '\u0164', '\u0164', '\u0162', '\u0162', '\u0422', '\u0422', '\U0001d517', '\U0001d517', '\u2234', '\u2234', '\u0398', '\u0398', 8563 '\u2009', '\u2009', '\u223c', '\u223c', '\u2243', '\u2243', '\u2245', '\u2245', '\u2248', '\u2248', '\U0001d54b', '\U0001d54b', '\u20db', '\u20db', '\U0001d4af', '\U0001d4af', '\u0166', '\u0166', '\u00da', '\u00da', '\u219f', '\u219f', '\u2949', '\u2949', '\u040e', '\u040e', '\u016c', '\u016c', '\u00db', 8564 '\u00db', '\u0423', '\u0423', '\u0170', '\u0170', '\U0001d518', '\U0001d518', '\u00d9', '\u00d9', '\u016a', '\u016a', '\u005f', '\u005f', '\u23df', '\u23df', '\u23b5', '\u23b5', '\u23dd', '\u23dd', '\u22c3', '\u22c3', '\u228e', '\u228e', '\u0172', '\u0172', '\U0001d54c', '\U0001d54c', '\u2191', '\u2191', '\u2912', 8565 '\u2912', '\u21c5', '\u21c5', '\u2195', '\u2195', '\u296e', '\u296e', '\u22a5', '\u22a5', '\u21a5', '\u21a5', '\u21d1', '\u21d1', '\u21d5', '\u21d5', '\u2196', '\u2196', '\u2197', '\u2197', '\u03d2', '\u03d2', '\u03a5', '\u03a5', 8566 '\u016e', '\u016e', '\U0001d4b0', '\U0001d4b0', '\u0168', '\u0168', '\u00dc', '\u00dc', '\u22ab', '\u22ab', '\u2aeb', '\u2aeb', '\u0412', '\u0412', '\u22a9', '\u22a9', '\u2ae6', '\u2ae6', '\u22c1', '\u22c1', '\u2016', '\u2016', '\u2016', '\u2016', '\u2223', '\u2223', '\u007c', '\u007c', '\u2758', '\u2758', '\u2240', 8567 '\u2240', '\u200a', '\u200a', '\U0001d519', '\U0001d519', '\U0001d54d', '\U0001d54d', '\U0001d4b1', '\U0001d4b1', '\u22aa', '\u22aa', '\u0174', '\u0174', '\u22c0', '\u22c0', '\U0001d51a', '\U0001d51a', '\U0001d54e', '\U0001d54e', '\U0001d4b2', '\U0001d4b2', '\U0001d51b', '\U0001d51b', '\u039e', '\u039e', '\U0001d54f', '\U0001d54f', '\U0001d4b3', '\U0001d4b3', '\u042f', '\u042f', '\u0407', '\u0407', '\u042e', '\u042e', '\u00dd', '\u00dd', 8568 '\u0176', '\u0176', '\u042b', '\u042b', '\U0001d51c', '\U0001d51c', '\U0001d550', '\U0001d550', '\U0001d4b4', '\U0001d4b4', '\u0178', '\u0178', '\u0416', '\u0416', '\u0179', '\u0179', '\u017d', '\u017d', '\u0417', '\u0417', '\u017b', '\u017b', '\u200b', '\u200b', '\u0396', '\u0396', '\u2128', '\u2128', '\u2124', '\u2124', '\U0001d4b5', '\U0001d4b5', '\u00e1', '\u00e1', '\u0103', '\u0103', '\u223e', 8569 '\u223e', '\u223f', '\u223f', '\u00e2', '\u00e2', '\u00b4', '\u00b4', '\u0430', '\u0430', '\u00e6', '\u00e6', '\u2061', '\u2061', '\U0001d51e', '\U0001d51e', '\u00e0', '\u00e0', '\u2135', '\u2135', '\u2135', '\u2135', '\u03b1', '\u03b1', '\u0101', '\u0101', '\u2a3f', '\u2a3f', '\u2227', '\u2227', '\u2a55', '\u2a55', '\u2a5c', '\u2a5c', '\u2a58', '\u2a58', '\u2a5a', '\u2a5a', '\u2220', 8570 '\u2220', '\u29a4', '\u29a4', '\u2220', '\u2220', '\u2221', '\u2221', '\u29a8', '\u29a8', '\u29a9', '\u29a9', '\u29aa', '\u29aa', '\u29ab', '\u29ab', '\u29ac', '\u29ac', '\u29ad', '\u29ad', '\u29ae', '\u29ae', '\u29af', '\u29af', '\u221f', '\u221f', '\u22be', '\u22be', '\u299d', '\u299d', '\u2222', 8571 '\u2222', '\u00c5', '\u00c5', '\u237c', '\u237c', '\u0105', '\u0105', '\U0001d552', '\U0001d552', '\u2248', '\u2248', '\u2a70', '\u2a70', '\u2a6f', '\u2a6f', '\u224a', '\u224a', '\u224b', '\u224b', '\u2248', '\u2248', '\u224a', '\u224a', '\u00e5', '\u00e5', '\U0001d4b6', '\U0001d4b6', '\u002a', '\u002a', '\u2248', '\u2248', '\u224d', '\u224d', '\u00e3', '\u00e3', '\u00e4', 8572 '\u00e4', '\u2233', '\u2233', '\u2a11', '\u2a11', '\u2aed', '\u2aed', '\u224c', '\u224c', '\u03f6', '\u03f6', '\u2035', '\u2035', '\u223d', '\u223d', '\u22cd', '\u22cd', '\u22bd', '\u22bd', '\u2305', '\u2305', '\u2305', '\u2305', '\u23b5', '\u23b5', '\u23b6', '\u23b6', '\u224c', '\u224c', '\u0431', 8573 '\u0431', '\u201e', '\u201e', '\u2235', '\u2235', '\u2235', '\u2235', '\u29b0', '\u29b0', '\u03f6', '\u03f6', '\u212c', '\u212c', '\u03b2', '\u03b2', '\u2136', '\u2136', '\u226c', '\u226c', '\U0001d51f', '\U0001d51f', '\u22c2', '\u22c2', '\u25ef', '\u25ef', '\u22c3', '\u22c3', '\u2a00', '\u2a00', '\u2a01', '\u2a01', '\u2a02', '\u2a02', 8574 '\u2a06', '\u2a06', '\u2605', '\u2605', '\u25bd', '\u25bd', '\u25b3', '\u25b3', '\u2a04', '\u2a04', '\u22c1', '\u22c1', '\u22c0', '\u22c0', '\u290d', '\u290d', '\u29eb', '\u29eb', '\u25aa', '\u25aa', '\u25b4', '\u25b4', '\u25be', 8575 '\u25be', '\u25c2', '\u25c2', '\u25b8', '\u25b8', '\u2423', '\u2423', '\u2592', '\u2592', '\u2591', '\u2591', '\u2593', '\u2593', '\u2588', '\u2588', '\u2310', '\u2310', '\U0001d553', '\U0001d553', '\u22a5', '\u22a5', '\u22a5', '\u22a5', '\u22c8', '\u22c8', '\u2557', '\u2557', '\u2554', '\u2554', '\u2556', 8576 '\u2556', '\u2553', '\u2553', '\u2550', '\u2550', '\u2566', '\u2566', '\u2569', '\u2569', '\u2564', '\u2564', '\u2567', '\u2567', '\u255d', '\u255d', '\u255a', '\u255a', '\u255c', '\u255c', '\u2559', '\u2559', '\u2551', '\u2551', '\u256c', '\u256c', '\u2563', '\u2563', '\u2560', '\u2560', '\u256b', '\u256b', '\u2562', '\u2562', '\u255f', '\u255f', '\u29c9', 8577 '\u29c9', '\u2555', '\u2555', '\u2552', '\u2552', '\u2510', '\u2510', '\u250c', '\u250c', '\u2500', '\u2500', '\u2565', '\u2565', '\u2568', '\u2568', '\u252c', '\u252c', '\u2534', '\u2534', '\u229f', '\u229f', '\u229e', '\u229e', '\u22a0', '\u22a0', '\u255b', '\u255b', '\u2558', '\u2558', '\u2518', '\u2518', '\u2514', '\u2514', '\u2502', 8578 '\u2502', '\u256a', '\u256a', '\u2561', '\u2561', '\u255e', '\u255e', '\u253c', '\u253c', '\u2524', '\u2524', '\u251c', '\u251c', '\u2035', '\u2035', '\u02d8', '\u02d8', '\u00a6', '\u00a6', '\U0001d4b7', '\U0001d4b7', '\u204f', '\u204f', '\u223d', '\u223d', '\u22cd', '\u22cd', '\u005c', '\u005c', '\u29c5', '\u29c5', '\u27c8', '\u27c8', '\u2022', '\u2022', '\u2022', 8579 '\u2022', '\u224e', '\u224e', '\u2aae', '\u2aae', '\u224f', '\u224f', '\u224f', '\u224f', '\u0107', '\u0107', '\u2229', '\u2229', '\u2a44', '\u2a44', '\u2a49', '\u2a49', '\u2a4b', '\u2a4b', '\u2a47', '\u2a47', '\u2a40', '\u2a40', '\u2041', '\u2041', '\u02c7', '\u02c7', '\u2a4d', '\u2a4d', '\u010d', '\u010d', '\u00e7', '\u00e7', '\u0109', 8580 '\u0109', '\u2a4c', '\u2a4c', '\u2a50', '\u2a50', '\u010b', '\u010b', '\u00b8', '\u00b8', '\u29b2', '\u29b2', '\u00a2', '\u00a2', '\u00b7', '\u00b7', '\U0001d520', '\U0001d520', '\u0447', '\u0447', '\u2713', '\u2713', '\u2713', '\u2713', '\u03c7', '\u03c7', '\u25cb', '\u25cb', '\u29c3', '\u29c3', '\u02c6', '\u02c6', '\u2257', '\u2257', '\u21ba', 8581 '\u21ba', '\u21bb', '\u21bb', '\u00ae', '\u00ae', '\u24c8', '\u24c8', '\u229b', '\u229b', '\u229a', '\u229a', '\u229d', '\u229d', '\u2257', '\u2257', '\u2a10', '\u2a10', '\u2aef', '\u2aef', '\u29c2', '\u29c2', '\u2663', '\u2663', '\u2663', '\u2663', '\u003a', 8582 '\u003a', '\u2254', '\u2254', '\u2254', '\u2254', '\u002c', '\u002c', '\u0040', '\u0040', '\u2201', '\u2201', '\u2218', '\u2218', '\u2201', '\u2201', '\u2102', '\u2102', '\u2245', '\u2245', '\u2a6d', '\u2a6d', '\u222e', '\u222e', '\U0001d554', '\U0001d554', '\u2210', '\u2210', '\u00a9', '\u00a9', '\u2117', '\u2117', '\u21b5', '\u21b5', 8583 '\u2717', '\u2717', '\U0001d4b8', '\U0001d4b8', '\u2acf', '\u2acf', '\u2ad1', '\u2ad1', '\u2ad0', '\u2ad0', '\u2ad2', '\u2ad2', '\u22ef', '\u22ef', '\u2938', '\u2938', '\u2935', '\u2935', '\u22de', '\u22de', '\u22df', '\u22df', '\u21b6', '\u21b6', '\u293d', '\u293d', '\u222a', '\u222a', '\u2a48', '\u2a48', '\u2a46', '\u2a46', '\u2a4a', '\u2a4a', 8584 '\u228d', '\u228d', '\u2a45', '\u2a45', '\u21b7', '\u21b7', '\u293c', '\u293c', '\u22de', '\u22de', '\u22df', '\u22df', '\u22ce', '\u22ce', '\u22cf', '\u22cf', '\u00a4', '\u00a4', '\u21b6', '\u21b6', '\u21b7', '\u21b7', '\u22ce', '\u22ce', '\u22cf', '\u22cf', 8585 '\u2232', '\u2232', '\u2231', '\u2231', '\u232d', '\u232d', '\u21d3', '\u21d3', '\u2965', '\u2965', '\u2020', '\u2020', '\u2138', '\u2138', '\u2193', '\u2193', '\u2010', '\u2010', '\u22a3', '\u22a3', '\u290f', '\u290f', '\u02dd', '\u02dd', '\u010f', '\u010f', '\u0434', '\u0434', '\u2146', '\u2146', '\u2021', '\u2021', '\u21ca', '\u21ca', '\u2a77', 8586 '\u2a77', '\u00b0', '\u00b0', '\u03b4', '\u03b4', '\u29b1', '\u29b1', '\u297f', '\u297f', '\U0001d521', '\U0001d521', '\u21c3', '\u21c3', '\u21c2', '\u21c2', '\u22c4', '\u22c4', '\u22c4', '\u22c4', '\u2666', '\u2666', '\u2666', '\u2666', '\u00a8', '\u00a8', '\u03dd', '\u03dd', '\u22f2', '\u22f2', '\u00f7', '\u00f7', '\u00f7', '\u00f7', '\u22c7', 8587 '\u22c7', '\u22c7', '\u22c7', '\u0452', '\u0452', '\u231e', '\u231e', '\u230d', '\u230d', '\u0024', '\u0024', '\U0001d555', '\U0001d555', '\u02d9', '\u02d9', '\u2250', '\u2250', '\u2251', '\u2251', '\u2238', '\u2238', '\u2214', '\u2214', '\u22a1', '\u22a1', '\u2306', '\u2306', '\u2193', '\u2193', '\u21ca', 8588 '\u21ca', '\u21c3', '\u21c3', '\u21c2', '\u21c2', '\u2910', '\u2910', '\u231f', '\u231f', '\u230c', '\u230c', '\U0001d4b9', '\U0001d4b9', '\u0455', '\u0455', '\u29f6', '\u29f6', '\u0111', '\u0111', '\u22f1', '\u22f1', '\u25bf', '\u25bf', '\u25be', '\u25be', '\u21f5', '\u21f5', '\u296f', '\u296f', '\u29a6', 8589 '\u29a6', '\u045f', '\u045f', '\u27ff', '\u27ff', '\u2a77', '\u2a77', '\u2251', '\u2251', '\u00e9', '\u00e9', '\u2a6e', '\u2a6e', '\u011b', '\u011b', '\u2256', '\u2256', '\u00ea', '\u00ea', '\u2255', '\u2255', '\u044d', '\u044d', '\u0117', '\u0117', '\u2147', '\u2147', '\u2252', '\u2252', '\U0001d522', '\U0001d522', '\u2a9a', '\u2a9a', '\u00e8', '\u00e8', '\u2a96', '\u2a96', '\u2a98', 8590 '\u2a98', '\u2a99', '\u2a99', '\u23e7', '\u23e7', '\u2113', '\u2113', '\u2a95', '\u2a95', '\u2a97', '\u2a97', '\u0113', '\u0113', '\u2205', '\u2205', '\u2205', '\u2205', '\u2205', '\u2205', '\u2003', '\u2003', '\u2004', '\u2004', '\u2005', '\u2005', '\u014b', '\u014b', '\u2002', '\u2002', '\u0119', '\u0119', '\U0001d556', '\U0001d556', '\u22d5', '\u22d5', '\u29e3', 8591 '\u29e3', '\u2a71', '\u2a71', '\u03b5', '\u03b5', '\u03b5', '\u03b5', '\u03f5', '\u03f5', '\u2256', '\u2256', '\u2255', '\u2255', '\u2242', '\u2242', '\u2a96', '\u2a96', '\u2a95', '\u2a95', '\u003d', '\u003d', '\u225f', '\u225f', '\u2261', '\u2261', '\u2a78', '\u2a78', '\u29e5', '\u29e5', '\u2253', '\u2253', 8592 '\u2971', '\u2971', '\u212f', '\u212f', '\u2250', '\u2250', '\u2242', '\u2242', '\u03b7', '\u03b7', '\u00f0', '\u00f0', '\u00eb', '\u00eb', '\u20ac', '\u20ac', '\u0021', '\u0021', '\u2203', '\u2203', '\u2130', '\u2130', '\u2147', '\u2147', '\u2252', '\u2252', '\u0444', '\u0444', '\u2640', '\u2640', '\ufb03', '\ufb03', '\ufb00', 8593 '\ufb00', '\ufb04', '\ufb04', '\U0001d523', '\U0001d523', '\ufb01', '\ufb01', '\u266d', '\u266d', '\ufb02', '\ufb02', '\u25b1', '\u25b1', '\u0192', '\u0192', '\U0001d557', '\U0001d557', '\u2200', '\u2200', '\u22d4', '\u22d4', '\u2ad9', '\u2ad9', '\u2a0d', '\u2a0d', '\u00bd', '\u00bd', '\u2153', '\u2153', '\u00bc', '\u00bc', '\u2155', '\u2155', '\u2159', '\u2159', 8594 '\u215b', '\u215b', '\u2154', '\u2154', '\u2156', '\u2156', '\u00be', '\u00be', '\u2157', '\u2157', '\u215c', '\u215c', '\u2158', '\u2158', '\u215a', '\u215a', '\u215d', '\u215d', '\u215e', '\u215e', '\u2044', '\u2044', '\u2322', '\u2322', '\U0001d4bb', '\U0001d4bb', '\u2267', '\u2267', '\u2a8c', '\u2a8c', '\u01f5', '\u01f5', '\u03b3', '\u03b3', '\u03dd', 8595 '\u03dd', '\u2a86', '\u2a86', '\u011f', '\u011f', '\u011d', '\u011d', '\u0433', '\u0433', '\u0121', '\u0121', '\u2265', '\u2265', '\u22db', '\u22db', '\u2265', '\u2265', '\u2267', '\u2267', '\u2a7e', '\u2a7e', '\u2a7e', '\u2a7e', '\u2aa9', '\u2aa9', '\u2a80', '\u2a80', '\u2a82', '\u2a82', '\u2a84', '\u2a84', '\u2a94', '\u2a94', '\U0001d524', '\U0001d524', '\u226b', '\u226b', '\u22d9', 8596 '\u22d9', '\u2137', '\u2137', '\u0453', '\u0453', '\u2277', '\u2277', '\u2a92', '\u2a92', '\u2aa5', '\u2aa5', '\u2aa4', '\u2aa4', '\u2269', '\u2269', '\u2a8a', '\u2a8a', '\u2a8a', '\u2a8a', '\u2a88', '\u2a88', '\u2a88', '\u2a88', '\u2269', '\u2269', '\u22e7', '\u22e7', '\U0001d558', '\U0001d558', '\u0060', '\u0060', '\u210a', '\u210a', '\u2273', '\u2273', '\u2a8e', '\u2a8e', '\u2a90', '\u2a90', '\u2aa7', 8597 '\u2aa7', '\u2a7a', '\u2a7a', '\u22d7', '\u22d7', '\u2995', '\u2995', '\u2a7c', '\u2a7c', '\u2a86', '\u2a86', '\u2978', '\u2978', '\u22d7', '\u22d7', '\u22db', '\u22db', '\u2a8c', '\u2a8c', '\u2277', '\u2277', '\u2273', '\u2273', '\u21d4', '\u21d4', '\u200a', '\u200a', '\u00bd', '\u00bd', '\u210b', '\u210b', 8598 '\u044a', '\u044a', '\u2194', '\u2194', '\u2948', '\u2948', '\u21ad', '\u21ad', '\u210f', '\u210f', '\u0125', '\u0125', '\u2665', '\u2665', '\u2665', '\u2665', '\u2026', '\u2026', '\u22b9', '\u22b9', '\U0001d525', '\U0001d525', '\u2925', '\u2925', '\u2926', '\u2926', '\u21ff', '\u21ff', '\u223b', '\u223b', '\u21a9', '\u21a9', 8599 '\u21aa', '\u21aa', '\U0001d559', '\U0001d559', '\u2015', '\u2015', '\U0001d4bd', '\U0001d4bd', '\u210f', '\u210f', '\u0127', '\u0127', '\u2043', '\u2043', '\u2010', '\u2010', '\u00ed', '\u00ed', '\u2063', '\u2063', '\u00ee', '\u00ee', '\u0438', '\u0438', '\u0435', '\u0435', '\u00a1', '\u00a1', '\u21d4', '\u21d4', '\U0001d526', '\U0001d526', '\u00ec', '\u00ec', '\u2148', 8600 '\u2148', '\u2a0c', '\u2a0c', '\u222d', '\u222d', '\u29dc', '\u29dc', '\u2129', '\u2129', '\u0133', '\u0133', '\u012b', '\u012b', '\u2111', '\u2111', '\u2110', '\u2110', '\u2111', '\u2111', '\u0131', '\u0131', '\u22b7', '\u22b7', '\u01b5', '\u01b5', '\u2208', '\u2208', '\u2105', '\u2105', '\u221e', '\u221e', '\u29dd', '\u29dd', '\u0131', 8601 '\u0131', '\u222b', '\u222b', '\u22ba', '\u22ba', '\u2124', '\u2124', '\u22ba', '\u22ba', '\u2a17', '\u2a17', '\u2a3c', '\u2a3c', '\u0451', '\u0451', '\u012f', '\u012f', '\U0001d55a', '\U0001d55a', '\u03b9', '\u03b9', '\u2a3c', '\u2a3c', '\u00bf', '\u00bf', '\U0001d4be', '\U0001d4be', '\u2208', '\u2208', '\u22f9', '\u22f9', '\u22f5', '\u22f5', '\u22f4', 8602 '\u22f4', '\u22f3', '\u22f3', '\u2208', '\u2208', '\u2062', '\u2062', '\u0129', '\u0129', '\u0456', '\u0456', '\u00ef', '\u00ef', '\u0135', '\u0135', '\u0439', '\u0439', '\U0001d527', '\U0001d527', '\u0237', '\u0237', '\U0001d55b', '\U0001d55b', '\U0001d4bf', '\U0001d4bf', '\u0458', '\u0458', '\u0454', '\u0454', '\u03ba', '\u03ba', '\u03f0', '\u03f0', '\u0137', '\u0137', '\u043a', '\u043a', '\U0001d528', 8603 '\U0001d528', '\u0138', '\u0138', '\u0445', '\u0445', '\u045c', '\u045c', '\U0001d55c', '\U0001d55c', '\U0001d4c0', '\U0001d4c0', '\u21da', '\u21da', '\u21d0', '\u21d0', '\u291b', '\u291b', '\u290e', '\u290e', '\u2266', '\u2266', '\u2a8b', '\u2a8b', '\u2962', '\u2962', '\u013a', '\u013a', '\u29b4', '\u29b4', '\u2112', '\u2112', '\u03bb', '\u03bb', '\u27e8', '\u27e8', '\u2991', '\u2991', 8604 '\u27e8', '\u27e8', '\u2a85', '\u2a85', '\u00ab', '\u00ab', '\u2190', '\u2190', '\u21e4', '\u21e4', '\u291f', '\u291f', '\u291d', '\u291d', '\u21a9', '\u21a9', '\u21ab', '\u21ab', '\u2939', '\u2939', '\u2973', '\u2973', '\u21a2', '\u21a2', '\u2aab', '\u2aab', '\u2919', '\u2919', '\u2aad', '\u2aad', '\u290c', '\u290c', '\u2772', '\u2772', '\u007b', 8605 '\u007b', '\u005b', '\u005b', '\u298b', '\u298b', '\u298f', '\u298f', '\u298d', '\u298d', '\u013e', '\u013e', '\u013c', '\u013c', '\u2308', '\u2308', '\u007b', '\u007b', '\u043b', '\u043b', '\u2936', '\u2936', '\u201c', '\u201c', '\u201e', '\u201e', '\u2967', '\u2967', '\u294b', '\u294b', '\u21b2', '\u21b2', '\u2264', '\u2264', '\u2190', 8606 '\u2190', '\u21a2', '\u21a2', '\u21bd', '\u21bd', '\u21bc', '\u21bc', '\u21c7', '\u21c7', '\u2194', '\u2194', '\u21c6', '\u21c6', '\u21cb', '\u21cb', '\u21ad', '\u21ad', '\u22cb', 8607 '\u22cb', '\u22da', '\u22da', '\u2264', '\u2264', '\u2266', '\u2266', '\u2a7d', '\u2a7d', '\u2a7d', '\u2a7d', '\u2aa8', '\u2aa8', '\u2a7f', '\u2a7f', '\u2a81', '\u2a81', '\u2a83', '\u2a83', '\u2a93', '\u2a93', '\u2a85', '\u2a85', '\u22d6', '\u22d6', '\u22da', '\u22da', '\u2a8b', '\u2a8b', '\u2276', '\u2276', 8608 '\u2272', '\u2272', '\u297c', '\u297c', '\u230a', '\u230a', '\U0001d529', '\U0001d529', '\u2276', '\u2276', '\u2a91', '\u2a91', '\u21bd', '\u21bd', '\u21bc', '\u21bc', '\u296a', '\u296a', '\u2584', '\u2584', '\u0459', '\u0459', '\u226a', '\u226a', '\u21c7', '\u21c7', '\u231e', '\u231e', '\u296b', '\u296b', '\u25fa', '\u25fa', '\u0140', '\u0140', '\u23b0', '\u23b0', 8609 '\u23b0', '\u23b0', '\u2268', '\u2268', '\u2a89', '\u2a89', '\u2a89', '\u2a89', '\u2a87', '\u2a87', '\u2a87', '\u2a87', '\u2268', '\u2268', '\u22e6', '\u22e6', '\u27ec', '\u27ec', '\u21fd', '\u21fd', '\u27e6', '\u27e6', '\u27f5', '\u27f5', '\u27f7', '\u27f7', '\u27fc', '\u27fc', '\u27f6', 8610 '\u27f6', '\u21ab', '\u21ab', '\u21ac', '\u21ac', '\u2985', '\u2985', '\U0001d55d', '\U0001d55d', '\u2a2d', '\u2a2d', '\u2a34', '\u2a34', '\u2217', '\u2217', '\u005f', '\u005f', '\u25ca', '\u25ca', '\u25ca', '\u25ca', '\u29eb', '\u29eb', '\u0028', '\u0028', '\u2993', '\u2993', '\u21c6', '\u21c6', '\u231f', 8611 '\u231f', '\u21cb', '\u21cb', '\u296d', '\u296d', '\u200e', '\u200e', '\u22bf', '\u22bf', '\u2039', '\u2039', '\U0001d4c1', '\U0001d4c1', '\u21b0', '\u21b0', '\u2272', '\u2272', '\u2a8d', '\u2a8d', '\u2a8f', '\u2a8f', '\u005b', '\u005b', '\u2018', '\u2018', '\u201a', '\u201a', '\u0142', '\u0142', '\u2aa6', '\u2aa6', '\u2a79', '\u2a79', '\u22d6', '\u22d6', '\u22cb', 8612 '\u22cb', '\u22c9', '\u22c9', '\u2976', '\u2976', '\u2a7b', '\u2a7b', '\u2996', '\u2996', '\u25c3', '\u25c3', '\u22b4', '\u22b4', '\u25c2', '\u25c2', '\u294a', '\u294a', '\u2966', '\u2966', '\u223a', '\u223a', '\u00af', '\u00af', '\u2642', '\u2642', '\u2720', '\u2720', '\u2720', '\u2720', '\u21a6', '\u21a6', '\u21a6', '\u21a6', '\u21a7', 8613 '\u21a7', '\u21a4', '\u21a4', '\u21a5', '\u21a5', '\u25ae', '\u25ae', '\u2a29', '\u2a29', '\u043c', '\u043c', '\u2014', '\u2014', '\u2221', '\u2221', '\U0001d52a', '\U0001d52a', '\u2127', '\u2127', '\u00b5', '\u00b5', '\u2223', '\u2223', '\u002a', '\u002a', '\u2af0', '\u2af0', '\u00b7', '\u00b7', '\u2212', '\u2212', '\u229f', 8614 '\u229f', '\u2238', '\u2238', '\u2a2a', '\u2a2a', '\u2adb', '\u2adb', '\u2026', '\u2026', '\u2213', '\u2213', '\u22a7', '\u22a7', '\U0001d55e', '\U0001d55e', '\u2213', '\u2213', '\U0001d4c2', '\U0001d4c2', '\u223e', '\u223e', '\u03bc', '\u03bc', '\u22b8', '\u22b8', '\u22b8', '\u22b8', '\u21cd', '\u21cd', '\u21ce', '\u21ce', '\u21cf', 8615 '\u21cf', '\u22af', '\u22af', '\u22ae', '\u22ae', '\u2207', '\u2207', '\u0144', '\u0144', '\u2249', '\u2249', '\u0149', '\u0149', '\u2249', '\u2249', '\u266e', '\u266e', '\u266e', '\u266e', '\u2115', '\u2115', '\u00a0', '\u00a0', '\u2a43', '\u2a43', '\u0148', '\u0148', '\u0146', '\u0146', '\u2247', '\u2247', '\u2a42', '\u2a42', '\u043d', 8616 '\u043d', '\u2013', '\u2013', '\u2260', '\u2260', '\u21d7', '\u21d7', '\u2924', '\u2924', '\u2197', '\u2197', '\u2197', '\u2197', '\u2262', '\u2262', '\u2928', '\u2928', '\u2204', '\u2204', '\u2204', '\u2204', '\U0001d52b', '\U0001d52b', '\u2271', '\u2271', '\u2271', '\u2271', '\u2275', '\u2275', '\u226f', '\u226f', '\u226f', '\u226f', '\u21ce', '\u21ce', '\u21ae', '\u21ae', 8617 '\u2af2', '\u2af2', '\u220b', '\u220b', '\u22fc', '\u22fc', '\u22fa', '\u22fa', '\u220b', '\u220b', '\u045a', '\u045a', '\u21cd', '\u21cd', '\u219a', '\u219a', '\u2025', '\u2025', '\u2270', '\u2270', '\u219a', '\u219a', '\u21ae', '\u21ae', '\u2270', '\u2270', '\u226e', '\u226e', '\u2274', '\u2274', '\u226e', '\u226e', '\u22ea', '\u22ea', '\u22ec', '\u22ec', 8618 '\u2224', '\u2224', '\U0001d55f', '\U0001d55f', '\u00ac', '\u00ac', '\u2209', '\u2209', '\u2209', '\u2209', '\u22f7', '\u22f7', '\u22f6', '\u22f6', '\u220c', '\u220c', '\u220c', '\u220c', '\u22fe', '\u22fe', '\u22fd', '\u22fd', '\u2226', '\u2226', '\u2226', '\u2226', '\u2a14', '\u2a14', '\u2280', '\u2280', '\u22e0', '\u22e0', '\u2280', 8619 '\u2280', '\u21cf', '\u21cf', '\u219b', '\u219b', '\u219b', '\u219b', '\u22eb', '\u22eb', '\u22ed', '\u22ed', '\u2281', '\u2281', '\u22e1', '\u22e1', '\U0001d4c3', '\U0001d4c3', '\u2224', '\u2224', '\u2226', '\u2226', '\u2241', '\u2241', '\u2244', '\u2244', '\u2244', '\u2244', '\u2224', '\u2224', '\u2226', '\u2226', '\u22e2', 8620 '\u22e2', '\u22e3', '\u22e3', '\u2284', '\u2284', '\u2288', '\u2288', '\u2288', '\u2288', '\u2281', '\u2281', '\u2285', '\u2285', '\u2289', '\u2289', '\u2289', '\u2289', '\u2279', '\u2279', '\u00f1', '\u00f1', '\u2278', '\u2278', '\u22ea', '\u22ea', '\u22ec', '\u22ec', '\u22eb', '\u22eb', 8621 '\u22ed', '\u22ed', '\u03bd', '\u03bd', '\u0023', '\u0023', '\u2116', '\u2116', '\u2007', '\u2007', '\u22ad', '\u22ad', '\u2904', '\u2904', '\u22ac', '\u22ac', '\u29de', '\u29de', '\u2902', '\u2902', '\u2903', '\u2903', '\u21d6', '\u21d6', '\u2923', '\u2923', '\u2196', '\u2196', '\u2196', '\u2196', '\u2927', '\u2927', 8622 '\u24c8', '\u24c8', '\u00f3', '\u00f3', '\u229b', '\u229b', '\u229a', '\u229a', '\u00f4', '\u00f4', '\u043e', '\u043e', '\u229d', '\u229d', '\u0151', '\u0151', '\u2a38', '\u2a38', '\u2299', '\u2299', '\u29bc', '\u29bc', '\u0153', '\u0153', '\u29bf', '\u29bf', '\U0001d52c', '\U0001d52c', '\u02db', '\u02db', '\u00f2', '\u00f2', '\u29c1', '\u29c1', '\u29b5', '\u29b5', '\u03a9', '\u03a9', '\u222e', 8623 '\u222e', '\u21ba', '\u21ba', '\u29be', '\u29be', '\u29bb', '\u29bb', '\u203e', '\u203e', '\u29c0', '\u29c0', '\u014d', '\u014d', '\u03c9', '\u03c9', '\u03bf', '\u03bf', '\u29b6', '\u29b6', '\u2296', '\u2296', '\U0001d560', '\U0001d560', '\u29b7', '\u29b7', '\u29b9', '\u29b9', '\u2295', '\u2295', '\u2228', '\u2228', '\u21bb', '\u21bb', '\u2a5d', '\u2a5d', '\u2134', '\u2134', 8624 '\u2134', '\u2134', '\u00aa', '\u00aa', '\u00ba', '\u00ba', '\u22b6', '\u22b6', '\u2a56', '\u2a56', '\u2a57', '\u2a57', '\u2a5b', '\u2a5b', '\u2134', '\u2134', '\u00f8', '\u00f8', '\u2298', '\u2298', '\u00f5', '\u00f5', '\u2297', '\u2297', '\u2a36', '\u2a36', '\u00f6', '\u00f6', '\u233d', '\u233d', '\u2225', '\u2225', '\u00b6', '\u00b6', '\u2225', '\u2225', 8625 '\u2af3', '\u2af3', '\u2afd', '\u2afd', '\u2202', '\u2202', '\u043f', '\u043f', '\u0025', '\u0025', '\u002e', '\u002e', '\u2030', '\u2030', '\u22a5', '\u22a5', '\u2031', '\u2031', '\U0001d52d', '\U0001d52d', '\u03c6', '\u03c6', '\u03d5', '\u03d5', '\u2133', '\u2133', '\u260e', '\u260e', '\u03c0', '\u03c0', '\u22d4', '\u22d4', '\u03d6', '\u03d6', '\u210f', '\u210f', 8626 '\u210e', '\u210e', '\u210f', '\u210f', '\u002b', '\u002b', '\u2a23', '\u2a23', '\u229e', '\u229e', '\u2a22', '\u2a22', '\u2214', '\u2214', '\u2a25', '\u2a25', '\u2a72', '\u2a72', '\u00b1', '\u00b1', '\u2a26', '\u2a26', '\u2a27', '\u2a27', '\u00b1', '\u00b1', '\u2a15', '\u2a15', '\U0001d561', '\U0001d561', '\u00a3', '\u00a3', '\u227a', 8627 '\u227a', '\u2ab3', '\u2ab3', '\u2ab7', '\u2ab7', '\u227c', '\u227c', '\u2aaf', '\u2aaf', '\u227a', '\u227a', '\u2ab7', '\u2ab7', '\u227c', '\u227c', '\u2aaf', '\u2aaf', '\u2ab9', '\u2ab9', '\u2ab5', '\u2ab5', '\u22e8', '\u22e8', '\u227e', '\u227e', '\u2032', '\u2032', '\u2119', '\u2119', '\u2ab5', '\u2ab5', '\u2ab9', 8628 '\u2ab9', '\u22e8', '\u22e8', '\u220f', '\u220f', '\u232e', '\u232e', '\u2312', '\u2312', '\u2313', '\u2313', '\u221d', '\u221d', '\u221d', '\u221d', '\u227e', '\u227e', '\u22b0', '\u22b0', '\U0001d4c5', '\U0001d4c5', '\u03c8', '\u03c8', '\u2008', '\u2008', '\U0001d52e', '\U0001d52e', '\u2a0c', '\u2a0c', '\U0001d562', '\U0001d562', '\u2057', '\u2057', '\U0001d4c6', '\U0001d4c6', 8629 '\u210d', '\u210d', '\u2a16', '\u2a16', '\u003f', '\u003f', '\u225f', '\u225f', '\u21db', '\u21db', '\u21d2', '\u21d2', '\u291c', '\u291c', '\u290f', '\u290f', '\u2964', '\u2964', '\u0155', '\u0155', '\u221a', '\u221a', '\u29b3', '\u29b3', '\u27e9', '\u27e9', '\u2992', '\u2992', '\u29a5', '\u29a5', '\u27e9', '\u27e9', '\u00bb', 8630 '\u00bb', '\u2192', '\u2192', '\u2975', '\u2975', '\u21e5', '\u21e5', '\u2920', '\u2920', '\u2933', '\u2933', '\u291e', '\u291e', '\u21aa', '\u21aa', '\u21ac', '\u21ac', '\u2945', '\u2945', '\u2974', '\u2974', '\u21a3', '\u21a3', '\u219d', '\u219d', '\u291a', '\u291a', '\u2236', '\u2236', '\u211a', '\u211a', '\u290d', '\u290d', 8631 '\u2773', '\u2773', '\u007d', '\u007d', '\u005d', '\u005d', '\u298c', '\u298c', '\u298e', '\u298e', '\u2990', '\u2990', '\u0159', '\u0159', '\u0157', '\u0157', '\u2309', '\u2309', '\u007d', '\u007d', '\u0440', '\u0440', '\u2937', '\u2937', '\u2969', '\u2969', '\u201d', '\u201d', '\u201d', '\u201d', '\u21b3', '\u21b3', '\u211c', '\u211c', '\u211b', 8632 '\u211b', '\u211c', '\u211c', '\u211d', '\u211d', '\u25ad', '\u25ad', '\u00ae', '\u00ae', '\u297d', '\u297d', '\u230b', '\u230b', '\U0001d52f', '\U0001d52f', '\u21c1', '\u21c1', '\u21c0', '\u21c0', '\u296c', '\u296c', '\u03c1', '\u03c1', '\u03f1', '\u03f1', '\u2192', '\u2192', '\u21a3', '\u21a3', '\u21c1', '\u21c1', 8633 '\u21c0', '\u21c0', '\u21c4', '\u21c4', '\u21cc', '\u21cc', '\u21c9', '\u21c9', '\u219d', '\u219d', '\u22cc', '\u22cc', '\u02da', '\u02da', '\u2253', '\u2253', '\u21c4', '\u21c4', '\u21cc', '\u21cc', '\u200f', 8634 '\u200f', '\u23b1', '\u23b1', '\u23b1', '\u23b1', '\u2aee', '\u2aee', '\u27ed', '\u27ed', '\u21fe', '\u21fe', '\u27e7', '\u27e7', '\u2986', '\u2986', '\U0001d563', '\U0001d563', '\u2a2e', '\u2a2e', '\u2a35', '\u2a35', '\u0029', '\u0029', '\u2994', '\u2994', '\u2a12', '\u2a12', '\u21c9', '\u21c9', '\u203a', '\u203a', '\U0001d4c7', '\U0001d4c7', '\u21b1', 8635 '\u21b1', '\u005d', '\u005d', '\u2019', '\u2019', '\u2019', '\u2019', '\u22cc', '\u22cc', '\u22ca', '\u22ca', '\u25b9', '\u25b9', '\u22b5', '\u22b5', '\u25b8', '\u25b8', '\u29ce', '\u29ce', '\u2968', '\u2968', '\u211e', '\u211e', '\u015b', '\u015b', '\u201a', '\u201a', '\u227b', '\u227b', '\u2ab4', '\u2ab4', '\u2ab8', '\u2ab8', '\u0161', '\u0161', '\u227d', 8636 '\u227d', '\u2ab0', '\u2ab0', '\u015f', '\u015f', '\u015d', '\u015d', '\u2ab6', '\u2ab6', '\u2aba', '\u2aba', '\u22e9', '\u22e9', '\u2a13', '\u2a13', '\u227f', '\u227f', '\u0441', '\u0441', '\u22c5', '\u22c5', '\u22a1', '\u22a1', '\u2a66', '\u2a66', '\u21d8', '\u21d8', '\u2925', '\u2925', '\u2198', '\u2198', '\u2198', '\u2198', '\u00a7', '\u00a7', '\u003b', 8637 '\u003b', '\u2929', '\u2929', '\u2216', '\u2216', '\u2216', '\u2216', '\u2736', '\u2736', '\U0001d530', '\U0001d530', '\u2322', '\u2322', '\u266f', '\u266f', '\u0449', '\u0449', '\u0448', '\u0448', '\u2223', '\u2223', '\u2225', '\u2225', '\u00ad', '\u00ad', '\u03c3', '\u03c3', '\u03c2', '\u03c2', '\u03c2', '\u03c2', '\u223c', '\u223c', '\u2a6a', 8638 '\u2a6a', '\u2243', '\u2243', '\u2243', '\u2243', '\u2a9e', '\u2a9e', '\u2aa0', '\u2aa0', '\u2a9d', '\u2a9d', '\u2a9f', '\u2a9f', '\u2246', '\u2246', '\u2a24', '\u2a24', '\u2972', '\u2972', '\u2190', '\u2190', '\u2216', '\u2216', '\u2a33', '\u2a33', '\u29e4', '\u29e4', '\u2223', '\u2223', '\u2323', '\u2323', '\u2aaa', '\u2aaa', '\u2aac', 8639 '\u2aac', '\u044c', '\u044c', '\u002f', '\u002f', '\u29c4', '\u29c4', '\u233f', '\u233f', '\U0001d564', '\U0001d564', '\u2660', '\u2660', '\u2660', '\u2660', '\u2225', '\u2225', '\u2293', '\u2293', '\u2294', '\u2294', '\u228f', '\u228f', '\u2291', '\u2291', '\u228f', '\u228f', '\u2291', '\u2291', '\u2290', '\u2290', '\u2292', '\u2292', 8640 '\u2290', '\u2290', '\u2292', '\u2292', '\u25a1', '\u25a1', '\u25a1', '\u25a1', '\u25aa', '\u25aa', '\u25aa', '\u25aa', '\u2192', '\u2192', '\U0001d4c8', '\U0001d4c8', '\u2216', '\u2216', '\u2323', '\u2323', '\u22c6', '\u22c6', '\u2606', '\u2606', '\u2605', '\u2605', '\u03f5', '\u03f5', '\u03d5', '\u03d5', '\u00af', 8641 '\u00af', '\u2282', '\u2282', '\u2ac5', '\u2ac5', '\u2abd', '\u2abd', '\u2286', '\u2286', '\u2ac3', '\u2ac3', '\u2ac1', '\u2ac1', '\u2acb', '\u2acb', '\u228a', '\u228a', '\u2abf', '\u2abf', '\u2979', '\u2979', '\u2282', '\u2282', '\u2286', '\u2286', '\u2ac5', '\u2ac5', '\u228a', '\u228a', '\u2acb', '\u2acb', 8642 '\u2ac7', '\u2ac7', '\u2ad5', '\u2ad5', '\u2ad3', '\u2ad3', '\u227b', '\u227b', '\u2ab8', '\u2ab8', '\u227d', '\u227d', '\u2ab0', '\u2ab0', '\u2aba', '\u2aba', '\u2ab6', '\u2ab6', '\u22e9', '\u22e9', '\u227f', '\u227f', '\u2211', '\u2211', '\u266a', '\u266a', '\u2283', '\u2283', '\u00b9', '\u00b9', '\u00b2', 8643 '\u00b2', '\u00b3', '\u00b3', '\u2ac6', '\u2ac6', '\u2abe', '\u2abe', '\u2ad8', '\u2ad8', '\u2287', '\u2287', '\u2ac4', '\u2ac4', '\u27c9', '\u27c9', '\u2ad7', '\u2ad7', '\u297b', '\u297b', '\u2ac2', '\u2ac2', '\u2acc', '\u2acc', '\u228b', '\u228b', '\u2ac0', '\u2ac0', '\u2283', '\u2283', '\u2287', '\u2287', '\u2ac6', 8644 '\u2ac6', '\u228b', '\u228b', '\u2acc', '\u2acc', '\u2ac8', '\u2ac8', '\u2ad4', '\u2ad4', '\u2ad6', '\u2ad6', '\u21d9', '\u21d9', '\u2926', '\u2926', '\u2199', '\u2199', '\u2199', '\u2199', '\u292a', '\u292a', '\u00df', '\u00df', '\u2316', '\u2316', '\u03c4', '\u03c4', '\u23b4', '\u23b4', '\u0165', '\u0165', '\u0163', 8645 '\u0163', '\u0442', '\u0442', '\u20db', '\u20db', '\u2315', '\u2315', '\U0001d531', '\U0001d531', '\u2234', '\u2234', '\u2234', '\u2234', '\u03b8', '\u03b8', '\u03d1', '\u03d1', '\u03d1', '\u03d1', '\u2248', '\u2248', '\u223c', '\u223c', '\u2009', '\u2009', '\u2248', '\u2248', '\u223c', '\u223c', '\u00fe', '\u00fe', '\u02dc', 8646 '\u02dc', '\u00d7', '\u00d7', '\u22a0', '\u22a0', '\u2a31', '\u2a31', '\u2a30', '\u2a30', '\u222d', '\u222d', '\u2928', '\u2928', '\u22a4', '\u22a4', '\u2336', '\u2336', '\u2af1', '\u2af1', '\U0001d565', '\U0001d565', '\u2ada', '\u2ada', '\u2929', '\u2929', '\u2034', '\u2034', '\u2122', '\u2122', '\u25b5', '\u25b5', '\u25bf', '\u25bf', 8647 '\u25c3', '\u25c3', '\u22b4', '\u22b4', '\u225c', '\u225c', '\u25b9', '\u25b9', '\u22b5', '\u22b5', '\u25ec', '\u25ec', '\u225c', '\u225c', '\u2a3a', '\u2a3a', '\u2a39', '\u2a39', '\u29cd', '\u29cd', '\u2a3b', '\u2a3b', '\u23e2', '\u23e2', '\U0001d4c9', 8648 '\U0001d4c9', '\u0446', '\u0446', '\u045b', '\u045b', '\u0167', '\u0167', '\u226c', '\u226c', '\u219e', '\u219e', '\u21a0', '\u21a0', '\u21d1', '\u21d1', '\u2963', '\u2963', '\u00fa', '\u00fa', '\u2191', '\u2191', '\u045e', '\u045e', '\u016d', '\u016d', '\u00fb', '\u00fb', '\u0443', '\u0443', '\u21c5', '\u21c5', '\u0171', 8649 '\u0171', '\u296e', '\u296e', '\u297e', '\u297e', '\U0001d532', '\U0001d532', '\u00f9', '\u00f9', '\u21bf', '\u21bf', '\u21be', '\u21be', '\u2580', '\u2580', '\u231c', '\u231c', '\u231c', '\u231c', '\u230f', '\u230f', '\u25f8', '\u25f8', '\u016b', '\u016b', '\u00a8', '\u00a8', '\u0173', '\u0173', '\U0001d566', '\U0001d566', '\u2191', '\u2191', '\u2195', 8650 '\u2195', '\u21bf', '\u21bf', '\u21be', '\u21be', '\u228e', '\u228e', '\u03c5', '\u03c5', '\u03d2', '\u03d2', '\u03c5', '\u03c5', '\u21c8', '\u21c8', '\u231d', '\u231d', '\u231d', '\u231d', '\u230e', '\u230e', '\u016f', '\u016f', '\u25f9', '\u25f9', '\U0001d4ca', '\U0001d4ca', '\u22f0', '\u22f0', 8651 '\u0169', '\u0169', '\u25b5', '\u25b5', '\u25b4', '\u25b4', '\u21c8', '\u21c8', '\u00fc', '\u00fc', '\u29a7', '\u29a7', '\u21d5', '\u21d5', '\u2ae8', '\u2ae8', '\u2ae9', '\u2ae9', '\u22a8', '\u22a8', '\u299c', '\u299c', '\u03f5', '\u03f5', '\u03f0', '\u03f0', '\u2205', '\u2205', '\u03d5', '\u03d5', '\u03d6', '\u03d6', '\u221d', 8652 '\u221d', '\u2195', '\u2195', '\u03f1', '\u03f1', '\u03c2', '\u03c2', '\u03d1', '\u03d1', '\u22b2', '\u22b2', '\u22b3', '\u22b3', '\u0432', '\u0432', '\u22a2', '\u22a2', '\u2228', '\u2228', '\u22bb', '\u22bb', '\u225a', '\u225a', '\u22ee', '\u22ee', '\u007c', '\u007c', '\u007c', '\u007c', '\U0001d533', 8653 '\U0001d533', '\u22b2', '\u22b2', '\U0001d567', '\U0001d567', '\u221d', '\u221d', '\u22b3', '\u22b3', '\U0001d4cb', '\U0001d4cb', '\u299a', '\u299a', '\u0175', '\u0175', '\u2a5f', '\u2a5f', '\u2227', '\u2227', '\u2259', '\u2259', '\u2118', '\u2118', '\U0001d534', '\U0001d534', '\U0001d568', '\U0001d568', '\u2118', '\u2118', '\u2240', '\u2240', '\u2240', '\u2240', '\U0001d4cc', '\U0001d4cc', '\u22c2', '\u22c2', '\u25ef', 8654 '\u25ef', '\u22c3', '\u22c3', '\u25bd', '\u25bd', '\U0001d535', '\U0001d535', '\u27fa', '\u27fa', '\u27f7', '\u27f7', '\u03be', '\u03be', '\u27f8', '\u27f8', '\u27f5', '\u27f5', '\u27fc', '\u27fc', '\u22fb', '\u22fb', '\u2a00', '\u2a00', '\U0001d569', '\U0001d569', '\u2a01', '\u2a01', '\u2a02', '\u2a02', '\u27f9', '\u27f9', '\u27f6', '\u27f6', '\U0001d4cd', '\U0001d4cd', '\u2a06', '\u2a06', '\u2a04', 8655 '\u2a04', '\u25b3', '\u25b3', '\u22c1', '\u22c1', '\u22c0', '\u22c0', '\u00fd', '\u00fd', '\u044f', '\u044f', '\u0177', '\u0177', '\u044b', '\u044b', '\u00a5', '\u00a5', '\U0001d536', '\U0001d536', '\u0457', '\u0457', '\U0001d56a', '\U0001d56a', '\U0001d4ce', '\U0001d4ce', '\u044e', '\u044e', '\u00ff', '\u00ff', '\u017a', '\u017a', '\u017e', '\u017e', '\u0437', '\u0437', '\u017c', '\u017c', '\u2128', 8656 '\u2128', '\u03b6', '\u03b6', '\U0001d537', '\U0001d537', '\u0436', '\u0436', '\u21dd', '\u21dd', '\U0001d56b', '\U0001d56b', '\U0001d4cf', '\U0001d4cf', '\u200d', '\u200d', '\u200c', '\u200c', ]; 8657 8658 8659 8660 8661 8662 8663 8664 8665 8666 8667 8668 8669 8670 8671 8672 8673 8674 8675 8676 8677 8678 8679 8680 // dom event support, if you want to use it 8681 8682 /// used for DOM events 8683 version(dom_with_events) 8684 alias EventHandler = void delegate(Element handlerAttachedTo, Event event); 8685 8686 /// This is a DOM event, like in javascript. Note that this library never fires events - it is only here for you to use if you want it. 8687 version(dom_with_events) 8688 class Event { 8689 this(string eventName, Element target) { 8690 this.eventName = eventName; 8691 this.srcElement = target; 8692 } 8693 8694 /// Prevents the default event handler (if there is one) from being called 8695 void preventDefault() { 8696 defaultPrevented = true; 8697 } 8698 8699 /// Stops the event propagation immediately. 8700 void stopPropagation() { 8701 propagationStopped = true; 8702 } 8703 8704 bool defaultPrevented; 8705 bool propagationStopped; 8706 string eventName; 8707 8708 Element srcElement; 8709 alias srcElement target; 8710 8711 Element relatedTarget; 8712 8713 int clientX; 8714 int clientY; 8715 8716 int button; 8717 8718 bool isBubbling; 8719 8720 /// this sends it only to the target. If you want propagation, use dispatch() instead. 8721 void send() { 8722 if(srcElement is null) 8723 return; 8724 8725 auto e = srcElement; 8726 8727 if(eventName in e.bubblingEventHandlers) 8728 foreach(handler; e.bubblingEventHandlers[eventName]) 8729 handler(e, this); 8730 8731 if(!defaultPrevented) 8732 if(eventName in e.defaultEventHandlers) 8733 e.defaultEventHandlers[eventName](e, this); 8734 } 8735 8736 /// this dispatches the element using the capture -> target -> bubble process 8737 void dispatch() { 8738 if(srcElement is null) 8739 return; 8740 8741 // first capture, then bubble 8742 8743 Element[] chain; 8744 Element curr = srcElement; 8745 while(curr) { 8746 auto l = curr; 8747 chain ~= l; 8748 curr = curr.parentNode; 8749 8750 } 8751 8752 isBubbling = false; 8753 8754 foreach(e; chain.retro()) { 8755 if(eventName in e.capturingEventHandlers) 8756 foreach(handler; e.capturingEventHandlers[eventName]) 8757 handler(e, this); 8758 8759 // the default on capture should really be to always do nothing 8760 8761 //if(!defaultPrevented) 8762 // if(eventName in e.defaultEventHandlers) 8763 // e.defaultEventHandlers[eventName](e.element, this); 8764 8765 if(propagationStopped) 8766 break; 8767 } 8768 8769 isBubbling = true; 8770 if(!propagationStopped) 8771 foreach(e; chain) { 8772 if(eventName in e.bubblingEventHandlers) 8773 foreach(handler; e.bubblingEventHandlers[eventName]) 8774 handler(e, this); 8775 8776 if(propagationStopped) 8777 break; 8778 } 8779 8780 if(!defaultPrevented) 8781 foreach(e; chain) { 8782 if(eventName in e.defaultEventHandlers) 8783 e.defaultEventHandlers[eventName](e, this); 8784 } 8785 } 8786 } 8787 8788 struct FormFieldOptions { 8789 // usable for any 8790 8791 /// this is a regex pattern used to validate the field 8792 string pattern; 8793 /// must the field be filled in? Even with a regex, it can be submitted blank if this is false. 8794 bool isRequired; 8795 /// this is displayed as an example to the user 8796 string placeholder; 8797 8798 // usable for numeric ones 8799 8800 8801 // convenience methods to quickly get some options 8802 @property static FormFieldOptions none() { 8803 FormFieldOptions f; 8804 return f; 8805 } 8806 8807 static FormFieldOptions required() { 8808 FormFieldOptions f; 8809 f.isRequired = true; 8810 return f; 8811 } 8812 8813 static FormFieldOptions regex(string pattern, bool required = false) { 8814 FormFieldOptions f; 8815 f.pattern = pattern; 8816 f.isRequired = required; 8817 return f; 8818 } 8819 8820 static FormFieldOptions fromElement(Element e) { 8821 FormFieldOptions f; 8822 if(e.hasAttribute("required")) 8823 f.isRequired = true; 8824 if(e.hasAttribute("pattern")) 8825 f.pattern = e.pattern; 8826 if(e.hasAttribute("placeholder")) 8827 f.placeholder = e.placeholder; 8828 return f; 8829 } 8830 8831 Element applyToElement(Element e) { 8832 if(this.isRequired) 8833 e.required = "required"; 8834 if(this.pattern.length) 8835 e.pattern = this.pattern; 8836 if(this.placeholder.length) 8837 e.placeholder = this.placeholder; 8838 return e; 8839 } 8840 } 8841 8842 // this needs to look just like a string, but can expand as needed 8843 version(no_dom_stream) 8844 alias string Utf8Stream; 8845 else 8846 class Utf8Stream { 8847 protected: 8848 // these two should be overridden in subclasses to actually do the stream magic 8849 string getMore() { 8850 if(getMoreHelper !is null) 8851 return getMoreHelper(); 8852 return null; 8853 } 8854 8855 bool hasMore() { 8856 if(hasMoreHelper !is null) 8857 return hasMoreHelper(); 8858 return false; 8859 } 8860 // the rest should be ok 8861 8862 public: 8863 this(string d) { 8864 this.data = d; 8865 } 8866 8867 this(string delegate() getMoreHelper, bool delegate() hasMoreHelper) { 8868 this.getMoreHelper = getMoreHelper; 8869 this.hasMoreHelper = hasMoreHelper; 8870 8871 if(hasMore()) 8872 this.data ~= getMore(); 8873 8874 // stdout.flush(); 8875 } 8876 8877 enum contextToKeep = 100; 8878 8879 void markDataDiscardable(size_t p) { 8880 8881 if(p < contextToKeep) 8882 return; 8883 p -= contextToKeep; 8884 8885 // pretends data[0 .. p] is gone and adjusts future things as if it was still there 8886 startingLineNumber = getLineNumber(p); 8887 assert(p >= virtualStartIndex); 8888 data = data[p - virtualStartIndex .. $]; 8889 virtualStartIndex = p; 8890 } 8891 8892 int getLineNumber(size_t p) { 8893 int line = startingLineNumber; 8894 assert(p >= virtualStartIndex); 8895 foreach(c; data[0 .. p - virtualStartIndex]) 8896 if(c == '\n') 8897 line++; 8898 return line; 8899 } 8900 8901 8902 @property final size_t length() { 8903 // the parser checks length primarily directly before accessing the next character 8904 // so this is the place we'll hook to append more if possible and needed. 8905 if(lastIdx + 1 >= (data.length + virtualStartIndex) && hasMore()) { 8906 data ~= getMore(); 8907 } 8908 return data.length + virtualStartIndex; 8909 } 8910 8911 final char opIndex(size_t idx) { 8912 if(idx > lastIdx) 8913 lastIdx = idx; 8914 return data[idx - virtualStartIndex]; 8915 } 8916 8917 final string opSlice(size_t start, size_t end) { 8918 if(end > lastIdx) 8919 lastIdx = end; 8920 // writeln(virtualStartIndex, " " , start, " ", end); 8921 assert(start >= virtualStartIndex); 8922 assert(end >= virtualStartIndex); 8923 return data[start - virtualStartIndex .. end - virtualStartIndex]; 8924 } 8925 8926 final size_t opDollar() { 8927 return length(); 8928 } 8929 8930 final Utf8Stream opBinary(string op : "~")(string s) { 8931 this.data ~= s; 8932 return this; 8933 } 8934 8935 final Utf8Stream opOpAssign(string op : "~")(string s) { 8936 this.data ~= s; 8937 return this; 8938 } 8939 8940 final Utf8Stream opAssign(string rhs) { 8941 this.data = rhs; 8942 return this; 8943 } 8944 private: 8945 string data; 8946 8947 size_t lastIdx; 8948 8949 bool delegate() hasMoreHelper; 8950 string delegate() getMoreHelper; 8951 8952 int startingLineNumber = 1; 8953 size_t virtualStartIndex = 0; 8954 8955 8956 /+ 8957 // used to maybe clear some old stuff 8958 // you might have to remove elements parsed with it too since they can hold slices into the 8959 // old stuff, preventing gc 8960 void dropFront(int bytes) { 8961 posAdjustment += bytes; 8962 data = data[bytes .. $]; 8963 } 8964 8965 int posAdjustment; 8966 +/ 8967 } 8968 8969 void fillForm(T)(Form form, T obj, string name) { 8970 import arsd.database; 8971 fillData((k, v) => form.setValue(k, v), obj, name); 8972 } 8973 8974 /++ 8975 Normalizes the whitespace in the given text according to HTML rules. 8976 8977 History: 8978 Added March 25, 2022 (dub v10.8) 8979 8980 The `stripLeadingAndTrailing` argument was added September 13, 2024 (dub v11.6). 8981 +/ 8982 string normalizeWhitespace(string text, bool stripLeadingAndTrailing = true) { 8983 string ret; 8984 ret.reserve(text.length); 8985 bool lastWasWhite = stripLeadingAndTrailing; 8986 foreach(char ch; text) { 8987 if(ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') { 8988 if(lastWasWhite) 8989 continue; 8990 lastWasWhite = true; 8991 ch = ' '; 8992 } else { 8993 lastWasWhite = false; 8994 } 8995 8996 ret ~= ch; 8997 } 8998 8999 if(stripLeadingAndTrailing) 9000 return ret.stripRight; 9001 else { 9002 /+ 9003 if(lastWasWhite && (ret.length == 0 || ret[$-1] != ' ')) 9004 ret ~= ' '; 9005 +/ 9006 return ret; 9007 } 9008 } 9009 9010 unittest { 9011 assert(normalizeWhitespace(" foo ") == "foo"); 9012 assert(normalizeWhitespace(" f\n \t oo ") == "f oo"); 9013 assert(normalizeWhitespace(" foo ", false) == " foo "); 9014 assert(normalizeWhitespace(" foo ", false) == " foo "); 9015 assert(normalizeWhitespace("\nfoo", false) == " foo"); 9016 } 9017 9018 unittest { 9019 Document document; 9020 9021 document = new Document("<test> foo \r </test>"); 9022 assert(document.root.visibleText == "foo"); 9023 9024 document = new Document("<test> foo \r <br>hi</test>"); 9025 assert(document.root.visibleText == "foo\nhi"); 9026 9027 document = new Document("<test> foo \r <br>hi<pre>hi\nthere\n indent<br />line</pre></test>"); 9028 assert(document.root.visibleText == "foo\nhihi\nthere\n indent\nline", document.root.visibleText); 9029 } 9030 9031 /+ 9032 /+ 9033 Syntax: 9034 9035 Tag: tagname#id.class 9036 Tree: Tag(Children, comma, separated...) 9037 Children: Tee or Variable 9038 Variable: $varname with optional |funcname following. 9039 9040 If a variable has a tree after it, it breaks the variable down: 9041 * if array, foreach it does the tree 9042 * if struct, it breaks down the member variables 9043 9044 stolen from georgy on irc, see: https://github.com/georgy7/stringplate 9045 +/ 9046 struct Stringplate { 9047 /++ 9048 9049 +/ 9050 this(string s) { 9051 9052 } 9053 9054 /++ 9055 9056 +/ 9057 Element expand(T...)(T vars) { 9058 return null; 9059 } 9060 } 9061 /// 9062 unittest { 9063 auto stringplate = Stringplate("#bar(.foo($foo), .baz($baz))"); 9064 assert(stringplate.expand.innerHTML == `<div id="bar"><div class="foo">$foo</div><div class="baz">$baz</div></div>`); 9065 } 9066 +/ 9067 9068 bool allAreInlineHtml(const(Element)[] children, const string[] inlineElements) { 9069 foreach(child; children) { 9070 if(child.nodeType == NodeType.Text && child.nodeValue.strip.length) { 9071 // cool 9072 } else if(child.tagName.isInArray(inlineElements) && allAreInlineHtml(child.children, inlineElements)) { 9073 // cool, this is an inline element and none of its children contradict that 9074 } else { 9075 // prolly block 9076 return false; 9077 } 9078 } 9079 return true; 9080 } 9081 9082 private bool isSimpleWhite(dchar c) { 9083 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; 9084 } 9085 9086 unittest { 9087 // Test for issue #120 9088 string s = `<html> 9089 <body> 9090 <P>AN 9091 <P>bubbles</P> 9092 <P>giggles</P> 9093 </body> 9094 </html>`; 9095 auto doc = new Document(); 9096 doc.parseUtf8(s, false, false); 9097 auto s2 = doc.toString(); 9098 assert( 9099 s2.indexOf("bubbles") < s2.indexOf("giggles"), 9100 "paragraph order incorrect:\n" ~ s2); 9101 } 9102 9103 unittest { 9104 // test for suncarpet email dec 24 2019 9105 // arbitrary id asduiwh 9106 auto document = new Document("<html> 9107 <head> 9108 <meta charset=\"utf-8\"></meta> 9109 <title>Element.querySelector Test</title> 9110 </head> 9111 <body> 9112 <div id=\"foo\"> 9113 <div>Foo</div> 9114 <div>Bar</div> 9115 </div> 9116 <div id=\"empty\"></div> 9117 <div id=\"empty-but-text\">test</div> 9118 </body> 9119 </html>"); 9120 9121 auto doc = document; 9122 9123 { 9124 auto empty = doc.requireElementById("empty"); 9125 assert(empty.querySelector(" > *") is null, empty.querySelector(" > *").toString); 9126 } 9127 { 9128 auto empty = doc.requireElementById("empty-but-text"); 9129 assert(empty.querySelector(" > *") is null, empty.querySelector(" > *").toString); 9130 } 9131 9132 assert(doc.querySelectorAll("div div").length == 2); 9133 assert(doc.querySelector("div").querySelectorAll("div").length == 2); 9134 assert(doc.querySelectorAll("> html").length == 0); 9135 assert(doc.querySelector("head").querySelectorAll("> title").length == 1); 9136 assert(doc.querySelector("head").querySelectorAll("> meta[charset]").length == 1); 9137 9138 9139 assert(doc.root.matches("html")); 9140 assert(!doc.root.matches("nothtml")); 9141 assert(doc.querySelector("#foo > div").matches("div")); 9142 assert(doc.querySelector("body > #foo").matches("#foo")); 9143 9144 assert(doc.root.querySelectorAll(":root > body").length == 0); // the root has no CHILD root! 9145 assert(doc.querySelectorAll(":root > body").length == 1); // but the DOCUMENT does 9146 assert(doc.querySelectorAll(" > body").length == 1); // should mean the same thing 9147 assert(doc.root.querySelectorAll(" > body").length == 1); // the root of HTML has this 9148 assert(doc.root.querySelectorAll(" > html").length == 0); // but not this 9149 9150 // also confirming the querySelector works via the mdn definition 9151 auto foo = doc.requireSelector("#foo"); 9152 assert(foo.querySelector("#foo > div") !is null); 9153 assert(foo.querySelector("body #foo > div") !is null); 9154 9155 // this is SUPPOSED to work according to the spec but never has in dom.d since it limits the scope. 9156 // the new css :scope thing is designed to bring this in. and meh idk if i even care. 9157 //assert(foo.querySelectorAll("#foo > div").length == 2); 9158 } 9159 9160 unittest { 9161 // based on https://developer.mozilla.org/en-US/docs/Web/API/Element/closest example 9162 auto document = new Document(`<article> 9163 <div id="div-01">Here is div-01 9164 <div id="div-02">Here is div-02 9165 <div id="div-03">Here is div-03</div> 9166 </div> 9167 </div> 9168 </article>`, true, true); 9169 9170 auto el = document.getElementById("div-03"); 9171 assert(el.closest("#div-02").id == "div-02"); 9172 assert(el.closest("div div").id == "div-03"); 9173 assert(el.closest("article > div").id == "div-01"); 9174 assert(el.closest(":not(div)").tagName == "article"); 9175 9176 assert(el.closest("p") is null); 9177 assert(el.closest("p, div") is el); 9178 } 9179 9180 unittest { 9181 // https://developer.mozilla.org/en-US/docs/Web/CSS/:is 9182 auto document = new Document(`<test> 9183 <div class="foo"><p>cool</p><span>bar</span></div> 9184 <main><p>two</p></main> 9185 </test>`); 9186 9187 assert(document.querySelectorAll(":is(.foo, main) p").length == 2); 9188 assert(document.querySelector("div:where(.foo)") !is null); 9189 } 9190 9191 unittest { 9192 immutable string html = q{ 9193 <root> 9194 <div class="roundedbox"> 9195 <table> 9196 <caption class="boxheader">Recent Reviews</caption> 9197 <tr> 9198 <th>Game</th> 9199 <th>User</th> 9200 <th>Rating</th> 9201 <th>Created</th> 9202 </tr> 9203 9204 <tr> 9205 <td>June 13, 2020 15:10</td> 9206 <td><a href="/reviews/8833">[Show]</a></td> 9207 </tr> 9208 9209 <tr> 9210 <td>June 13, 2020 15:02</td> 9211 <td><a href="/reviews/8832">[Show]</a></td> 9212 </tr> 9213 9214 <tr> 9215 <td>June 13, 2020 14:41</td> 9216 <td><a href="/reviews/8831">[Show]</a></td> 9217 </tr> 9218 </table> 9219 </div> 9220 </root> 9221 }; 9222 9223 auto doc = new Document(cast(string)html); 9224 // this should select the second table row, but... 9225 auto rd = doc.root.querySelector(`div.roundedbox > table > caption.boxheader + tr + tr + tr > td > a[href^=/reviews/]`); 9226 assert(rd !is null); 9227 assert(rd.href == "/reviews/8832"); 9228 9229 rd = doc.querySelector(`div.roundedbox > table > caption.boxheader + tr + tr + tr > td > a[href^=/reviews/]`); 9230 assert(rd !is null); 9231 assert(rd.href == "/reviews/8832"); 9232 } 9233 9234 unittest { 9235 try { 9236 auto doc = new XmlDocument("<testxmlns:foo=\"/\"></test>"); 9237 assert(0); 9238 } catch(Exception e) { 9239 // good; it should throw an exception, not an error. 9240 } 9241 } 9242 9243 unittest { 9244 // toPrettyString is not stable, but these are some best-effort attempts 9245 // despite these being in a test, I might change these anyway! 9246 assert(Element.make("a").toPrettyString == "<a></a>"); 9247 assert(Element.make("a", "").toPrettyString(false, 0, " ") == "<a></a>"); 9248 assert(Element.make("a", " ").toPrettyString(false, 0, " ") == "<a> </a>");//, Element.make("a", " ").toPrettyString(false, 0, " ")); 9249 assert(Element.make("a", "b").toPrettyString == "<a>b</a>"); 9250 assert(Element.make("a", "b").toPrettyString(false, 0, "") == "<a>b</a>"); 9251 9252 { 9253 auto document = new Document("<html><body><p>hello <a href=\"world\">world</a></p></body></html>"); 9254 auto pretty = document.toPrettyString(false, 0, " "); 9255 assert(pretty == 9256 `<!DOCTYPE html> 9257 <html> 9258 <body> 9259 <p>hello <a href="world">world</a></p> 9260 </body> 9261 </html>`, pretty); 9262 } 9263 9264 { 9265 auto document = new XmlDocument("<html><body><p>hello <a href=\"world\">world</a></p></body></html>"); 9266 assert(document.toPrettyString(false, 0, " ") == 9267 `<?xml version="1.0" encoding="UTF-8"?> 9268 <html> 9269 <body> 9270 <p> 9271 hello 9272 <a href="world">world</a> 9273 </p> 9274 </body> 9275 </html>`); 9276 } 9277 9278 foreach(test; [ 9279 "<a att=\"http://ele\"><b><ele1>Hello</ele1>\n <c>\n <d>\n <ele2>How are you?</ele2>\n </d>\n <e>\n <ele3>Good & you?</ele3>\n </e>\n </c>\n </b>\n</a>", 9280 "<a att=\"http://ele\"><b><ele1>Hello</ele1><c><d><ele2>How are you?</ele2></d><e><ele3>Good & you?</ele3></e></c></b></a>", 9281 ] ) 9282 { 9283 auto document = new XmlDocument(test); 9284 assert(document.root.toPrettyString(false, 0, " ") == "<a att=\"http://ele\">\n <b>\n <ele1>Hello</ele1>\n <c>\n <d>\n <ele2>How are you?</ele2>\n </d>\n <e>\n <ele3>Good & you?</ele3>\n </e>\n </c>\n </b>\n</a>"); 9285 assert(document.toPrettyString(false, 0, " ") == "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<a att=\"http://ele\">\n <b>\n <ele1>Hello</ele1>\n <c>\n <d>\n <ele2>How are you?</ele2>\n </d>\n <e>\n <ele3>Good & you?</ele3>\n </e>\n </c>\n </b>\n</a>"); 9286 auto omg = document.root; 9287 omg.parent_ = null; 9288 assert(omg.toPrettyString(false, 0, " ") == "<a att=\"http://ele\">\n <b>\n <ele1>Hello</ele1>\n <c>\n <d>\n <ele2>How are you?</ele2>\n </d>\n <e>\n <ele3>Good & you?</ele3>\n </e>\n </c>\n </b>\n</a>"); 9289 } 9290 9291 { 9292 auto document = new XmlDocument(`<a><b>toto</b><c></c></a>`); 9293 assert(document.root.toPrettyString(false, 0, null) == `<a><b>toto</b><c></c></a>`); 9294 assert(document.root.toPrettyString(false, 0, " ") == `<a> 9295 <b>toto</b> 9296 <c></c> 9297 </a>`); 9298 } 9299 9300 { 9301 auto str = `<!DOCTYPE html> 9302 <html> 9303 <head> 9304 <title>Test</title> 9305 </head> 9306 <body> 9307 <p>Hello there</p> 9308 <p>I like <a href="">Links</a></p> 9309 <div> 9310 this is indented since there's a block inside 9311 <p>this is the block</p> 9312 and this gets its own line 9313 </div> 9314 </body> 9315 </html>`; 9316 auto doc = new Document(str, true, true); 9317 assert(doc.toPrettyString == str); 9318 } 9319 } 9320 9321 unittest { 9322 auto document = new Document("<foo><items><item><title>test</title><desc>desc</desc></item></items></foo>"); 9323 auto items = document.root.requireSelector("> items"); 9324 auto item = items.requireSelector("> item"); 9325 auto title = item.requireSelector("> title"); 9326 9327 // this not actually implemented at this point but i might want to later. it prolly should work as an extension of the standard behavior 9328 // assert(title.requireSelector("~ desc").innerText == "desc"); 9329 9330 assert(item.requireSelector("title ~ desc").innerText == "desc"); 9331 9332 assert(items.querySelector("item:has(title)") !is null); 9333 assert(items.querySelector("item:has(nothing)") is null); 9334 9335 assert(title.innerText == "test"); 9336 } 9337 9338 unittest { 9339 auto document = new Document("broken"); // just ensuring it doesn't crash 9340 } 9341 9342 9343 /* 9344 Copyright: Adam D. Ruppe, 2010 - 2023 9345 License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>. 9346 Authors: Adam D. Ruppe, with contributions by Nick Sabalausky, Trass3r, and ketmar among others 9347 */