1 /** 2 This is an html DOM implementation, started with cloning 3 what the browser offers in Javascript, but going well beyond 4 it in convenience. 5 6 If you can do it in Javascript, you can probably do it with 7 this module. 8 9 And much more. 10 11 12 Note: some of the documentation here writes html with added 13 spaces. That's because ddoc doesn't bother encoding html output, 14 and adding spaces is easier than using LT macros everywhere. 15 16 17 BTW: this file depends on arsd.characterencodings, so help it 18 correctly read files from the internet. You should be able to 19 get characterencodings.d from the same place you got this file. 20 */ 21 module arsd.dom; 22 23 // FIXME: do parent selector picking in get selector 24 // FIXME: do :has too... or instead, :has is quite nice. 25 26 version(with_arsd_jsvar) 27 import arsd.jsvar; 28 else { 29 enum Scriptable; 30 } 31 32 // this is only meant to be used at compile time, as a filter for opDispatch 33 // lists the attributes we want to allow without the use of .attr 34 bool isConvenientAttribute(string name) { 35 static immutable list = [ 36 "name", "id", "href", "value", 37 "checked", "selected", "type", 38 "src", "content", "pattern", 39 "placeholder", "required", "alt", 40 "rel", 41 ]; 42 foreach(l; list) 43 if(name == l) return true; 44 return false; 45 } 46 47 // FIXME: might be worth doing Element.attrs and taking opDispatch off that 48 // so more UFCS works. 49 50 51 // FIXME: something like <ol>spam <ol> with no closing </ol> should read the second tag as the closer in garbage mode 52 // FIXME: failing to close a paragraph sometimes messes things up too 53 54 // FIXME: it would be kinda cool to have some support for internal DTDs 55 // and maybe XPath as well, to some extent 56 /* 57 we could do 58 meh this sux 59 60 auto xpath = XPath(element); 61 62 // get the first p 63 xpath.p[0].a["href"] 64 */ 65 66 // public import arsd.domconvenience; // merged for now 67 68 /* domconvenience follows { */ 69 70 71 import std.string; 72 73 // the reason this is separated is so I can plug it into D->JS as well, which uses a different base Element class 74 75 import arsd.dom; 76 77 mixin template DomConvenienceFunctions() { 78 79 /// Calls getElementById, but throws instead of returning null if the element is not found. You can also ask for a specific subclass of Element to dynamically cast to, which also throws if it cannot be done. 80 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 81 if( 82 is(SomeElementType : Element) 83 ) 84 out(ret) { 85 assert(ret !is null); 86 } 87 do { 88 auto e = cast(SomeElementType) getElementById(id); 89 if(e is null) 90 throw new ElementNotFoundException(SomeElementType.stringof, "id=" ~ id, file, line); 91 return e; 92 } 93 94 /// ditto but with selectors instead of ids 95 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 96 if( 97 is(SomeElementType : Element) 98 ) 99 out(ret) { 100 assert(ret !is null); 101 } 102 do { 103 auto e = cast(SomeElementType) querySelector(selector); 104 if(e is null) 105 throw new ElementNotFoundException(SomeElementType.stringof, selector, file, line); 106 return e; 107 } 108 109 110 111 112 /// get all the classes on this element 113 @property string[] classes() { 114 return split(className, " "); 115 } 116 117 /// Adds a string to the class attribute. The class attribute is used a lot in CSS. 118 Element addClass(string c) { 119 if(hasClass(c)) 120 return this; // don't add it twice 121 122 string cn = getAttribute("class"); 123 if(cn.length == 0) { 124 setAttribute("class", c); 125 return this; 126 } else { 127 setAttribute("class", cn ~ " " ~ c); 128 } 129 130 return this; 131 } 132 133 /// Removes a particular class name. 134 Element removeClass(string c) { 135 if(!hasClass(c)) 136 return this; 137 string n; 138 foreach(name; classes) { 139 if(c == name) 140 continue; // cut it out 141 if(n.length) 142 n ~= " "; 143 n ~= name; 144 } 145 146 className = n.strip(); 147 148 return this; 149 } 150 151 /// Returns whether the given class appears in this element. 152 bool hasClass(string c) { 153 string cn = className; 154 155 auto idx = cn.indexOf(c); 156 if(idx == -1) 157 return false; 158 159 foreach(cla; cn.split(" ")) 160 if(cla == c) 161 return true; 162 return false; 163 164 /* 165 int rightSide = idx + c.length; 166 167 bool checkRight() { 168 if(rightSide == cn.length) 169 return true; // it's the only class 170 else if(iswhite(cn[rightSide])) 171 return true; 172 return false; // this is a substring of something else.. 173 } 174 175 if(idx == 0) { 176 return checkRight(); 177 } else { 178 if(!iswhite(cn[idx - 1])) 179 return false; // substring 180 return checkRight(); 181 } 182 183 assert(0); 184 */ 185 } 186 187 188 /* ******************************* 189 DOM Mutation 190 *********************************/ 191 192 /// Removes all inner content from the tag; all child text and elements are gone. 193 void removeAllChildren() 194 out { 195 assert(this.children.length == 0); 196 } 197 do { 198 children = null; 199 } 200 /// convenience function to quickly add a tag with some text or 201 /// other relevant info (for example, it's a src for an <img> element 202 /// instead of inner text) 203 Element addChild(string tagName, string childInfo = null, string childInfo2 = null) 204 in { 205 assert(tagName !is null); 206 } 207 out(e) { 208 assert(e.parentNode is this); 209 assert(e.parentDocument is this.parentDocument); 210 } 211 do { 212 auto e = Element.make(tagName, childInfo, childInfo2); 213 // FIXME (maybe): if the thing is self closed, we might want to go ahead and 214 // return the parent. That will break existing code though. 215 return appendChild(e); 216 } 217 218 /// Another convenience function. Adds a child directly after the current one, returning 219 /// the new child. 220 /// 221 /// Between this, addChild, and parentNode, you can build a tree as a single expression. 222 Element addSibling(string tagName, string childInfo = null, string childInfo2 = null) 223 in { 224 assert(tagName !is null); 225 assert(parentNode !is null); 226 } 227 out(e) { 228 assert(e.parentNode is this.parentNode); 229 assert(e.parentDocument is this.parentDocument); 230 } 231 do { 232 auto e = Element.make(tagName, childInfo, childInfo2); 233 return parentNode.insertAfter(this, e); 234 } 235 236 Element addSibling(Element e) { 237 return parentNode.insertAfter(this, e); 238 } 239 240 Element addChild(Element e) { 241 return this.appendChild(e); 242 } 243 244 /// Convenience function to append text intermixed with other children. 245 /// For example: div.addChildren("You can visit my website by ", new Link("mysite.com", "clicking here"), "."); 246 /// or div.addChildren("Hello, ", user.name, "!"); 247 248 /// See also: appendHtml. This might be a bit simpler though because you don't have to think about escaping. 249 void addChildren(T...)(T t) { 250 foreach(item; t) { 251 static if(is(item : Element)) 252 appendChild(item); 253 else static if (is(isSomeString!(item))) 254 appendText(to!string(item)); 255 else static assert(0, "Cannot pass " ~ typeof(item).stringof ~ " to addChildren"); 256 } 257 } 258 259 ///. 260 Element addChild(string tagName, Element firstChild, string info2 = null) 261 in { 262 assert(firstChild !is null); 263 } 264 out(ret) { 265 assert(ret !is null); 266 assert(ret.parentNode is this); 267 assert(firstChild.parentNode is ret); 268 269 assert(ret.parentDocument is this.parentDocument); 270 //assert(firstChild.parentDocument is this.parentDocument); 271 } 272 do { 273 auto e = Element.make(tagName, "", info2); 274 e.appendChild(firstChild); 275 this.appendChild(e); 276 return e; 277 } 278 279 Element addChild(string tagName, in Html innerHtml, string info2 = null) 280 in { 281 } 282 out(ret) { 283 assert(ret !is null); 284 assert(ret.parentNode is this); 285 assert(ret.parentDocument is this.parentDocument); 286 } 287 do { 288 auto e = Element.make(tagName, "", info2); 289 this.appendChild(e); 290 e.innerHTML = innerHtml.source; 291 return e; 292 } 293 294 295 /// . 296 void appendChildren(Element[] children) { 297 foreach(ele; children) 298 appendChild(ele); 299 } 300 301 ///. 302 void reparent(Element newParent) 303 in { 304 assert(newParent !is null); 305 assert(parentNode !is null); 306 } 307 out { 308 assert(this.parentNode is newParent); 309 //assert(isInArray(this, newParent.children)); 310 } 311 do { 312 parentNode.removeChild(this); 313 newParent.appendChild(this); 314 } 315 316 /** 317 Strips this tag out of the document, putting its inner html 318 as children of the parent. 319 320 For example, given: <p>hello <b>there</b></p>, if you 321 call stripOut() on the b element, you'll be left with 322 <p>hello there<p>. 323 324 The idea here is to make it easy to get rid of garbage 325 markup you aren't interested in. 326 */ 327 void stripOut() 328 in { 329 assert(parentNode !is null); 330 } 331 out { 332 assert(parentNode is null); 333 assert(children.length == 0); 334 } 335 do { 336 foreach(c; children) 337 c.parentNode = null; // remove the parent 338 if(children.length) 339 parentNode.replaceChild(this, this.children); 340 else 341 parentNode.removeChild(this); 342 this.children.length = 0; // we reparented them all above 343 } 344 345 /// shorthand for this.parentNode.removeChild(this) with parentNode null check 346 /// if the element already isn't in a tree, it does nothing. 347 Element removeFromTree() 348 in { 349 350 } 351 out(var) { 352 assert(this.parentNode is null); 353 assert(var is this); 354 } 355 do { 356 if(this.parentNode is null) 357 return this; 358 359 this.parentNode.removeChild(this); 360 361 return this; 362 } 363 364 /// Wraps this element inside the given element. 365 /// It's like this.replaceWith(what); what.appendchild(this); 366 /// 367 /// Given: < b >cool</ b >, if you call b.wrapIn(new Link("site.com", "my site is ")); 368 /// you'll end up with: < a href="site.com">my site is < b >cool< /b ></ a >. 369 Element wrapIn(Element what) 370 in { 371 assert(what !is null); 372 } 373 out(ret) { 374 assert(this.parentNode is what); 375 assert(ret is what); 376 } 377 do { 378 this.replaceWith(what); 379 what.appendChild(this); 380 381 return what; 382 } 383 384 /// Replaces this element with something else in the tree. 385 Element replaceWith(Element e) 386 in { 387 assert(this.parentNode !is null); 388 } 389 do { 390 e.removeFromTree(); 391 this.parentNode.replaceChild(this, e); 392 return e; 393 } 394 395 /** 396 Splits the className into an array of each class given 397 */ 398 string[] classNames() const { 399 return className().split(" "); 400 } 401 402 /** 403 Fetches the first consecutive nodes, if text nodes, concatenated together 404 405 If the first node is not text, returns null. 406 407 See also: directText, innerText 408 */ 409 string firstInnerText() const { 410 string s; 411 foreach(child; children) { 412 if(child.nodeType != NodeType.Text) 413 break; 414 415 s ~= child.nodeValue(); 416 } 417 return s; 418 } 419 420 421 /** 422 Returns the text directly under this element, 423 not recursively like innerText. 424 425 See also: firstInnerText 426 */ 427 @property string directText() { 428 string ret; 429 foreach(e; children) { 430 if(e.nodeType == NodeType.Text) 431 ret ~= e.nodeValue(); 432 } 433 434 return ret; 435 } 436 437 /** 438 Sets the direct text, keeping the same place. 439 440 Unlike innerText, this does *not* remove existing 441 elements in the element. 442 443 It only replaces the first text node it sees. 444 445 If there are no text nodes, it calls appendText 446 447 So, given (ignore the spaces in the tags): 448 < div > < img > text here < /div > 449 450 it will keep the img, and replace the "text here". 451 */ 452 @property void directText(string text) { 453 foreach(e; children) { 454 if(e.nodeType == NodeType.Text) { 455 auto it = cast(TextNode) e; 456 it.contents = text; 457 return; 458 } 459 } 460 461 appendText(text); 462 } 463 } 464 465 /// finds comments that match the given txt. Case insensitive, strips whitespace. 466 Element[] findComments(Document document, string txt) { 467 return findComments(document.root, txt); 468 } 469 470 /// ditto 471 Element[] findComments(Element element, string txt) { 472 txt = txt.strip().toLower(); 473 Element[] ret; 474 475 foreach(comment; element.getElementsByTagName("#comment")) { 476 string t = comment.nodeValue().strip().toLower(); 477 if(t == txt) 478 ret ~= comment; 479 } 480 481 return ret; 482 } 483 484 // I'm just dicking around with this 485 struct ElementCollection { 486 this(Element e) { 487 elements = [e]; 488 } 489 490 this(Element e, string selector) { 491 elements = e.querySelectorAll(selector); 492 } 493 494 this(Element[] e) { 495 elements = e; 496 } 497 498 Element[] elements; 499 //alias elements this; // let it implicitly convert to the underlying array 500 501 ElementCollection opIndex(string selector) { 502 ElementCollection ec; 503 foreach(e; elements) 504 ec.elements ~= e.getElementsBySelector(selector); 505 return ec; 506 } 507 508 /// if you slice it, give the underlying array for easy forwarding of the 509 /// collection to range expecting algorithms or looping over. 510 Element[] opSlice() { 511 return elements; 512 } 513 514 /// And input range primitives so we can foreach over this 515 void popFront() { 516 elements = elements[1..$]; 517 } 518 519 /// ditto 520 Element front() { 521 return elements[0]; 522 } 523 524 /// ditto 525 bool empty() { 526 return !elements.length; 527 } 528 529 /// Forward method calls to each individual element of the collection 530 /// returns this so it can be chained. 531 ElementCollection opDispatch(string name, T...)(T t) { 532 foreach(e; elements) { 533 mixin("e." ~ name)(t); 534 } 535 return this; 536 } 537 538 ElementCollection opBinary(string op : "~")(ElementCollection rhs) { 539 return ElementCollection(this.elements ~ rhs.elements); 540 } 541 } 542 543 544 // this puts in operators and opDispatch to handle string indexes and properties, forwarding to get and set functions. 545 mixin template JavascriptStyleDispatch() { 546 string opDispatch(string name)(string v = null) if(name != "popFront") { // popFront will make this look like a range. Do not want. 547 if(v !is null) 548 return set(name, v); 549 return get(name); 550 } 551 552 string opIndex(string key) const { 553 return get(key); 554 } 555 556 string opIndexAssign(string value, string field) { 557 return set(field, value); 558 } 559 560 // FIXME: doesn't seem to work 561 string* opBinary(string op)(string key) if(op == "in") { 562 return key in fields; 563 } 564 } 565 566 /// A proxy object to do the Element class' dataset property. See Element.dataset for more info. 567 /// 568 /// Do not create this object directly. 569 struct DataSet { 570 this(Element e) { 571 this._element = e; 572 } 573 574 private Element _element; 575 string set(string name, string value) { 576 _element.setAttribute("data-" ~ unCamelCase(name), value); 577 return value; 578 } 579 580 string get(string name) const { 581 return _element.getAttribute("data-" ~ unCamelCase(name)); 582 } 583 584 mixin JavascriptStyleDispatch!(); 585 } 586 587 /// Proxy object for attributes which will replace the main opDispatch eventually 588 struct AttributeSet { 589 this(Element e) { 590 this._element = e; 591 } 592 593 private Element _element; 594 string set(string name, string value) { 595 _element.setAttribute(name, value); 596 return value; 597 } 598 599 string get(string name) const { 600 return _element.getAttribute(name); 601 } 602 603 mixin JavascriptStyleDispatch!(); 604 } 605 606 607 608 /// for style, i want to be able to set it with a string like a plain attribute, 609 /// but also be able to do properties Javascript style. 610 611 struct ElementStyle { 612 this(Element parent) { 613 _element = parent; 614 } 615 616 Element _element; 617 618 @property ref inout(string) _attribute() inout { 619 auto s = "style" in _element.attributes; 620 if(s is null) { 621 auto e = cast() _element; // const_cast 622 e.attributes["style"] = ""; // we need something to reference 623 s = cast(inout) ("style" in e.attributes); 624 } 625 626 assert(s !is null); 627 return *s; 628 } 629 630 alias _attribute this; // this is meant to allow element.style = element.style ~ " string "; to still work. 631 632 string set(string name, string value) { 633 if(name.length == 0) 634 return value; 635 if(name == "cssFloat") 636 name = "float"; 637 else 638 name = unCamelCase(name); 639 auto r = rules(); 640 r[name] = value; 641 642 _attribute = ""; 643 foreach(k, v; r) { 644 if(v is null || v.length == 0) /* css can't do empty rules anyway so we'll use that to remove */ 645 continue; 646 if(_attribute.length) 647 _attribute ~= " "; 648 _attribute ~= k ~ ": " ~ v ~ ";"; 649 } 650 651 _element.setAttribute("style", _attribute); // this is to trigger the observer call 652 653 return value; 654 } 655 string get(string name) const { 656 if(name == "cssFloat") 657 name = "float"; 658 else 659 name = unCamelCase(name); 660 auto r = rules(); 661 if(name in r) 662 return r[name]; 663 return null; 664 } 665 666 string[string] rules() const { 667 string[string] ret; 668 foreach(rule; _attribute.split(";")) { 669 rule = rule.strip(); 670 if(rule.length == 0) 671 continue; 672 auto idx = rule.indexOf(":"); 673 if(idx == -1) 674 ret[rule] = ""; 675 else { 676 auto name = rule[0 .. idx].strip(); 677 auto value = rule[idx + 1 .. $].strip(); 678 679 ret[name] = value; 680 } 681 } 682 683 return ret; 684 } 685 686 mixin JavascriptStyleDispatch!(); 687 } 688 689 /// Converts a camel cased propertyName to a css style dashed property-name 690 string unCamelCase(string a) { 691 string ret; 692 foreach(c; a) 693 if((c >= 'A' && c <= 'Z')) 694 ret ~= "-" ~ toLower("" ~ c)[0]; 695 else 696 ret ~= c; 697 return ret; 698 } 699 700 /// Translates a css style property-name to a camel cased propertyName 701 string camelCase(string a) { 702 string ret; 703 bool justSawDash = false; 704 foreach(c; a) 705 if(c == '-') { 706 justSawDash = true; 707 } else { 708 if(justSawDash) { 709 justSawDash = false; 710 ret ~= toUpper("" ~ c); 711 } else 712 ret ~= c; 713 } 714 return ret; 715 } 716 717 718 719 720 721 722 723 724 725 // domconvenience ends } 726 727 728 729 730 731 732 733 734 735 736 737 // @safe: 738 739 // NOTE: do *NOT* override toString on Element subclasses. It won't work. 740 // Instead, override writeToAppender(); 741 742 // FIXME: should I keep processing instructions like <?blah ?> and <!-- blah --> (comments too lol)? I *want* them stripped out of most my output, but I want to be able to parse and create them too. 743 744 // Stripping them is useful for reading php as html.... but adding them 745 // is good for building php. 746 747 // I need to maintain compatibility with the way it is now too. 748 749 import std.string; 750 import std.exception; 751 import std.uri; 752 import std.array; 753 import std.range; 754 755 //import std.stdio; 756 757 // tag soup works for most the crap I know now! If you have two bad closing tags back to back, it might erase one, but meh 758 // that's rarer than the flipped closing tags that hack fixes so I'm ok with it. (Odds are it should be erased anyway; it's 759 // most likely a typo so I say kill kill kill. 760 761 762 /// This might belong in another module, but it represents a file with a mime type and some data. 763 /// Document implements this interface with type = text/html (see Document.contentType for more info) 764 /// and data = document.toString, so you can return Documents anywhere web.d expects FileResources. 765 interface FileResource { 766 @property string contentType() const; /// the content-type of the file. e.g. "text/html; charset=utf-8" or "image/png" 767 immutable(ubyte)[] getData() const; /// the data 768 } 769 770 771 772 773 ///. 774 enum NodeType { Text = 3 } 775 776 777 /// You can use this to do an easy null check or a dynamic cast+null check on any element. 778 T require(T = Element, string file = __FILE__, int line = __LINE__)(Element e) if(is(T : Element)) 779 in {} 780 out(ret) { assert(ret !is null); } 781 do { 782 auto ret = cast(T) e; 783 if(ret is null) 784 throw new ElementNotFoundException(T.stringof, "passed value", file, line); 785 return ret; 786 } 787 788 /// This represents almost everything in the DOM. 789 class Element { 790 mixin DomConvenienceFunctions!(); 791 792 // do nothing, this is primarily a virtual hook 793 // for links and forms 794 void setValue(string field, string value) { } 795 796 797 // this is a thing so i can remove observer support if it gets slow 798 // I have not implemented all these yet 799 private void sendObserverEvent(DomMutationOperations operation, string s1 = null, string s2 = null, Element r = null, Element r2 = null) { 800 if(parentDocument is null) return; 801 DomMutationEvent me; 802 me.operation = operation; 803 me.target = this; 804 me.relatedString = s1; 805 me.relatedString2 = s2; 806 me.related = r; 807 me.related2 = r2; 808 parentDocument.dispatchMutationEvent(me); 809 } 810 811 // putting all the members up front 812 813 // this ought to be private. don't use it directly. 814 Element[] children; 815 816 /// The name of the tag. Remember, changing this doesn't change the dynamic type of the object. 817 string tagName; 818 819 /// This is where the attributes are actually stored. You should use getAttribute, setAttribute, and hasAttribute instead. 820 string[string] attributes; 821 822 /// In XML, it is valid to write <tag /> for all elements with no children, but that breaks HTML, so I don't do it here. 823 /// Instead, this flag tells if it should be. It is based on the source document's notation and a html element list. 824 private bool selfClosed; 825 826 /// Get the parent Document object that contains this element. 827 /// It may be null, so remember to check for that. 828 Document parentDocument; 829 830 ///. 831 Element parentNode; 832 833 // the next few methods are for implementing interactive kind of things 834 private CssStyle _computedStyle; 835 836 // these are here for event handlers. Don't forget that this library never fires events. 837 // (I'm thinking about putting this in a version statement so you don't have the baggage. The instance size of this class is 56 bytes right now.) 838 EventHandler[][string] bubblingEventHandlers; 839 EventHandler[][string] capturingEventHandlers; 840 EventHandler[string] defaultEventHandlers; 841 842 void addEventListener(string event, EventHandler handler, bool useCapture = false) { 843 if(event.length > 2 && event[0..2] == "on") 844 event = event[2 .. $]; 845 846 if(useCapture) 847 capturingEventHandlers[event] ~= handler; 848 else 849 bubblingEventHandlers[event] ~= handler; 850 } 851 852 853 // and now methods 854 855 /// Convenience function to try to do the right thing for HTML. This is the main 856 /// way I create elements. 857 static Element make(string tagName, string childInfo = null, string childInfo2 = null) { 858 bool selfClosed = tagName.isInArray(selfClosedElements); 859 860 Element e; 861 // want to create the right kind of object for the given tag... 862 switch(tagName) { 863 case "#text": 864 e = new TextNode(null, childInfo); 865 return e; 866 // break; 867 case "table": 868 e = new Table(null); 869 break; 870 case "a": 871 e = new Link(null); 872 break; 873 case "form": 874 e = new Form(null); 875 break; 876 case "tr": 877 e = new TableRow(null); 878 break; 879 case "td", "th": 880 e = new TableCell(null, tagName); 881 break; 882 default: 883 e = new Element(null, tagName, null, selfClosed); // parent document should be set elsewhere 884 } 885 886 // make sure all the stuff is constructed properly FIXME: should probably be in all the right constructors too 887 e.tagName = tagName; 888 e.selfClosed = selfClosed; 889 890 if(childInfo !is null) 891 switch(tagName) { 892 /* html5 convenience tags */ 893 case "audio": 894 if(childInfo.length) 895 e.addChild("source", childInfo); 896 if(childInfo2 !is null) 897 e.appendText(childInfo2); 898 break; 899 case "source": 900 e.src = childInfo; 901 if(childInfo2 !is null) 902 e.type = childInfo2; 903 break; 904 /* regular html 4 stuff */ 905 case "img": 906 e.src = childInfo; 907 if(childInfo2 !is null) 908 e.alt = childInfo2; 909 break; 910 case "link": 911 e.href = childInfo; 912 if(childInfo2 !is null) 913 e.rel = childInfo2; 914 break; 915 case "option": 916 e.innerText = childInfo; 917 if(childInfo2 !is null) 918 e.value = childInfo2; 919 break; 920 case "input": 921 e.type = "hidden"; 922 e.name = childInfo; 923 if(childInfo2 !is null) 924 e.value = childInfo2; 925 break; 926 case "button": 927 e.innerText = childInfo; 928 if(childInfo2 !is null) 929 e.type = childInfo2; 930 break; 931 case "a": 932 e.innerText = childInfo; 933 if(childInfo2 !is null) 934 e.href = childInfo2; 935 break; 936 case "script": 937 case "style": 938 e.innerRawSource = childInfo; 939 break; 940 case "meta": 941 e.name = childInfo; 942 if(childInfo2 !is null) 943 e.content = childInfo2; 944 break; 945 /* generically, assume we were passed text and perhaps class */ 946 default: 947 e.innerText = childInfo; 948 if(childInfo2.length) 949 e.className = childInfo2; 950 } 951 952 return e; 953 } 954 955 static Element make(string tagName, in Html innerHtml, string childInfo2 = null) { 956 // FIXME: childInfo2 is ignored when info1 is null 957 auto m = Element.make(tagName, cast(string) null, childInfo2); 958 m.innerHTML = innerHtml.source; 959 return m; 960 } 961 962 static Element make(string tagName, Element child, string childInfo2 = null) { 963 auto m = Element.make(tagName, cast(string) null, childInfo2); 964 m.appendChild(child); 965 return m; 966 } 967 968 969 /// Generally, you don't want to call this yourself - use Element.make or document.createElement instead. 970 this(Document _parentDocument, string _tagName, string[string] _attributes = null, bool _selfClosed = false) { 971 parentDocument = _parentDocument; 972 tagName = _tagName; 973 if(_attributes !is null) 974 attributes = _attributes; 975 selfClosed = _selfClosed; 976 977 version(dom_node_indexes) 978 this.dataset.nodeIndex = to!string(&(this.attributes)); 979 980 assert(_tagName.indexOf(" ") == -1);//, "<" ~ _tagName ~ "> is invalid"); 981 } 982 983 /// Convenience constructor when you don't care about the parentDocument. Note this might break things on the document. 984 /// Note also that without a parent document, elements are always in strict, case-sensitive mode. 985 this(string _tagName, string[string] _attributes = null) { 986 tagName = _tagName; 987 if(_attributes !is null) 988 attributes = _attributes; 989 selfClosed = tagName.isInArray(selfClosedElements); 990 991 // this is meant to reserve some memory. It makes a small, but consistent improvement. 992 //children.length = 8; 993 //children.length = 0; 994 995 version(dom_node_indexes) 996 this.dataset.nodeIndex = to!string(&(this.attributes)); 997 } 998 999 private this(Document _parentDocument) { 1000 parentDocument = _parentDocument; 1001 1002 version(dom_node_indexes) 1003 this.dataset.nodeIndex = to!string(&(this.attributes)); 1004 } 1005 1006 1007 /* ******************************* 1008 Navigating the DOM 1009 *********************************/ 1010 1011 /// Returns the first child of this element. If it has no children, returns null. 1012 /// Remember, text nodes are children too. 1013 @property Element firstChild() { 1014 return children.length ? children[0] : null; 1015 } 1016 1017 /// 1018 @property Element lastChild() { 1019 return children.length ? children[$ - 1] : null; 1020 } 1021 1022 1023 ///. 1024 @property Element previousSibling(string tagName = null) { 1025 if(this.parentNode is null) 1026 return null; 1027 Element ps = null; 1028 foreach(e; this.parentNode.childNodes) { 1029 if(e is this) 1030 break; 1031 if(tagName == "*" && e.nodeType != NodeType.Text) { 1032 ps = e; 1033 break; 1034 } 1035 if(tagName is null || e.tagName == tagName) 1036 ps = e; 1037 } 1038 1039 return ps; 1040 } 1041 1042 ///. 1043 @property Element nextSibling(string tagName = null) { 1044 if(this.parentNode is null) 1045 return null; 1046 Element ns = null; 1047 bool mightBe = false; 1048 foreach(e; this.parentNode.childNodes) { 1049 if(e is this) { 1050 mightBe = true; 1051 continue; 1052 } 1053 if(mightBe) { 1054 if(tagName == "*" && e.nodeType != NodeType.Text) { 1055 ns = e; 1056 break; 1057 } 1058 if(tagName is null || e.tagName == tagName) { 1059 ns = e; 1060 break; 1061 } 1062 } 1063 } 1064 1065 return ns; 1066 } 1067 1068 1069 /// Gets the nearest node, going up the chain, with the given tagName 1070 /// May return null or throw. 1071 T getParent(T = Element)(string tagName = null) if(is(T : Element)) { 1072 if(tagName is null) { 1073 static if(is(T == Form)) 1074 tagName = "form"; 1075 else static if(is(T == Table)) 1076 tagName = "table"; 1077 else static if(is(T == Link)) 1078 tagName == "a"; 1079 } 1080 1081 auto par = this.parentNode; 1082 while(par !is null) { 1083 if(tagName is null || par.tagName == tagName) 1084 break; 1085 par = par.parentNode; 1086 } 1087 1088 static if(!is(T == Element)) { 1089 auto t = cast(T) par; 1090 if(t is null) 1091 throw new ElementNotFoundException("", tagName ~ " parent not found"); 1092 } else 1093 auto t = par; 1094 1095 return t; 1096 } 1097 1098 ///. 1099 Element getElementById(string id) { 1100 // FIXME: I use this function a lot, and it's kinda slow 1101 // not terribly slow, but not great. 1102 foreach(e; tree) 1103 if(e.id == id) 1104 return e; 1105 return null; 1106 } 1107 1108 /// Note: you can give multiple selectors, separated by commas. 1109 /// It will return the first match it finds. 1110 Element querySelector(string selector) { 1111 // FIXME: inefficient; it gets all results just to discard most of them 1112 auto list = getElementsBySelector(selector); 1113 if(list.length == 0) 1114 return null; 1115 return list[0]; 1116 } 1117 1118 /// a more standards-compliant alias for getElementsBySelector 1119 Element[] querySelectorAll(string selector) { 1120 return getElementsBySelector(selector); 1121 } 1122 1123 /** 1124 Does a CSS selector 1125 1126 * -- all, default if nothing else is there 1127 1128 tag#id.class.class.class:pseudo[attrib=what][attrib=what] OP selector 1129 1130 It is all additive 1131 1132 OP 1133 1134 space = descendant 1135 > = direct descendant 1136 + = sibling (E+F Matches any F element immediately preceded by a sibling element E) 1137 1138 [foo] Foo is present as an attribute 1139 [foo="warning"] Matches any E element whose "foo" attribute value is exactly equal to "warning". 1140 E[foo~="warning"] Matches any E element whose "foo" attribute value is a list of space-separated values, one of which is exactly equal to "warning" 1141 E[lang|="en"] Matches any E element whose "lang" attribute has a hyphen-separated list of values beginning (from the left) with "en". 1142 1143 [item$=sdas] ends with 1144 [item^-sdsad] begins with 1145 1146 Quotes are optional here. 1147 1148 Pseudos: 1149 :first-child 1150 :last-child 1151 :link (same as a[href] for our purposes here) 1152 1153 1154 There can be commas separating the selector. A comma separated list result is OR'd onto the main. 1155 1156 1157 1158 This ONLY cares about elements. text, etc, are ignored 1159 1160 1161 There should be two functions: given element, does it match the selector? and given a selector, give me all the elements 1162 */ 1163 Element[] getElementsBySelector(string selector) { 1164 // FIXME: this function could probably use some performance attention 1165 // ... but only mildly so according to the profiler in the big scheme of things; probably negligible in a big app. 1166 1167 1168 bool caseSensitiveTags = true; 1169 if(parentDocument && parentDocument.loose) 1170 caseSensitiveTags = false; 1171 1172 Element[] ret; 1173 foreach(sel; parseSelectorString(selector, caseSensitiveTags)) 1174 ret ~= sel.getElements(this); 1175 return ret; 1176 } 1177 1178 /// . 1179 Element[] getElementsByClassName(string cn) { 1180 // is this correct? 1181 return getElementsBySelector("." ~ cn); 1182 } 1183 1184 ///. 1185 Element[] getElementsByTagName(string tag) { 1186 if(parentDocument && parentDocument.loose) 1187 tag = tag.toLower(); 1188 Element[] ret; 1189 foreach(e; tree) 1190 if(e.tagName == tag) 1191 ret ~= e; 1192 return ret; 1193 } 1194 1195 1196 /* ******************************* 1197 Attributes 1198 *********************************/ 1199 1200 /** 1201 Gets the given attribute value, or null if the 1202 attribute is not set. 1203 1204 Note that the returned string is decoded, so it no longer contains any xml entities. 1205 */ 1206 string getAttribute(string name) const { 1207 if(parentDocument && parentDocument.loose) 1208 name = name.toLower(); 1209 auto e = name in attributes; 1210 if(e) 1211 return *e; 1212 else 1213 return null; 1214 } 1215 1216 /** 1217 Sets an attribute. Returns this for easy chaining 1218 */ 1219 Element setAttribute(string name, string value) { 1220 if(parentDocument && parentDocument.loose) 1221 name = name.toLower(); 1222 1223 // I never use this shit legitimately and neither should you 1224 auto it = name.toLower(); 1225 if(it == "href" || it == "src") { 1226 auto v = value.strip().toLower(); 1227 if(v.startsWith("vbscript:")) 1228 value = value[9..$]; 1229 if(v.startsWith("javascript:")) 1230 value = value[11..$]; 1231 } 1232 1233 attributes[name] = value; 1234 1235 sendObserverEvent(DomMutationOperations.setAttribute, name, value); 1236 1237 return this; 1238 } 1239 1240 /** 1241 Returns if the attribute exists. 1242 */ 1243 bool hasAttribute(string name) { 1244 if(parentDocument && parentDocument.loose) 1245 name = name.toLower(); 1246 1247 if(name in attributes) 1248 return true; 1249 else 1250 return false; 1251 } 1252 1253 /** 1254 Removes the given attribute from the element. 1255 */ 1256 Element removeAttribute(string name) 1257 out(ret) { 1258 assert(ret is this); 1259 } 1260 do { 1261 if(parentDocument && parentDocument.loose) 1262 name = name.toLower(); 1263 if(name in attributes) 1264 attributes.remove(name); 1265 1266 sendObserverEvent(DomMutationOperations.removeAttribute, name); 1267 return this; 1268 } 1269 1270 /** 1271 Gets the class attribute's contents. Returns 1272 an empty string if it has no class. 1273 */ 1274 @property string className() const { 1275 auto c = getAttribute("class"); 1276 if(c is null) 1277 return ""; 1278 return c; 1279 } 1280 1281 ///. 1282 @property Element className(string c) { 1283 setAttribute("class", c); 1284 return this; 1285 } 1286 1287 /** 1288 Provides easy access to attributes, object style. 1289 1290 auto element = Element.make("a"); 1291 a.href = "cool.html"; // this is the same as a.setAttribute("href", "cool.html"); 1292 string where = a.href; // same as a.getAttribute("href"); 1293 1294 */ 1295 @property string opDispatch(string name)(string v = null) if(isConvenientAttribute(name)) { 1296 if(v !is null) 1297 setAttribute(name, v); 1298 return getAttribute(name); 1299 } 1300 1301 /** 1302 DEPRECATED: generally open opDispatch caused a lot of unforeseen trouble with compile time duck typing and UFCS extensions. 1303 so I want to remove it. A small whitelist of attributes is still allowed, but others are not. 1304 1305 Instead, use element.attrs.attribute, element.attrs["attribute"], 1306 or element.getAttribute("attribute")/element.setAttribute("attribute"). 1307 */ 1308 @property string opDispatch(string name)(string v = null) if(!isConvenientAttribute(name)) { 1309 static assert(0, "Don't use " ~ name ~ " direct on Element, instead use element.attrs.attributeName"); 1310 } 1311 1312 /* 1313 // this would be nice for convenience, but it broke the getter above. 1314 @property void opDispatch(string name)(bool boolean) if(name != "popFront") { 1315 if(boolean) 1316 setAttribute(name, name); 1317 else 1318 removeAttribute(name); 1319 } 1320 */ 1321 1322 /** 1323 Returns the element's children. 1324 */ 1325 @property const(Element[]) childNodes() const { 1326 return children; 1327 } 1328 1329 /// Mutable version of the same 1330 @property Element[] childNodes() { // FIXME: the above should be inout 1331 return children; 1332 } 1333 1334 /// HTML5's dataset property. It is an alternate view into attributes with the data- prefix. 1335 /// 1336 /// Given: <a data-my-property="cool" /> 1337 /// 1338 /// We get: assert(a.dataset.myProperty == "cool"); 1339 @property DataSet dataset() { 1340 return DataSet(this); 1341 } 1342 1343 /// Gives dot/opIndex access to attributes 1344 /// ele.attrs.largeSrc = "foo"; // same as ele.setAttribute("largeSrc", "foo") 1345 @property AttributeSet attrs() { 1346 return AttributeSet(this); 1347 } 1348 1349 /// Provides both string and object style (like in Javascript) access to the style attribute. 1350 @property ElementStyle style() { 1351 return ElementStyle(this); 1352 } 1353 1354 /// This sets the style attribute with a string. 1355 @property ElementStyle style(string s) { 1356 this.setAttribute("style", s); 1357 return this.style; 1358 } 1359 1360 private void parseAttributes(string[] whichOnes = null) { 1361 /+ 1362 if(whichOnes is null) 1363 whichOnes = attributes.keys; 1364 foreach(attr; whichOnes) { 1365 switch(attr) { 1366 case "id": 1367 1368 break; 1369 case "class": 1370 1371 break; 1372 case "style": 1373 1374 break; 1375 default: 1376 // we don't care about it 1377 } 1378 } 1379 +/ 1380 } 1381 1382 1383 // if you change something here, it won't apply... FIXME const? but changing it would be nice if it applies to the style attribute too though you should use style there. 1384 ///. 1385 @property CssStyle computedStyle() { 1386 if(_computedStyle is null) { 1387 auto style = this.getAttribute("style"); 1388 /* we'll treat shitty old html attributes as css here */ 1389 if(this.hasAttribute("width")) 1390 style ~= "; width: " ~ this.attrs.width; 1391 if(this.hasAttribute("height")) 1392 style ~= "; height: " ~ this.attrs.height; 1393 if(this.hasAttribute("bgcolor")) 1394 style ~= "; background-color: " ~ this.attrs.bgcolor; 1395 if(this.tagName == "body" && this.hasAttribute("text")) 1396 style ~= "; color: " ~ this.attrs.text; 1397 if(this.hasAttribute("color")) 1398 style ~= "; color: " ~ this.attrs.color; 1399 /* done */ 1400 1401 1402 _computedStyle = new CssStyle(null, style); // gives at least something to work with 1403 } 1404 return _computedStyle; 1405 } 1406 1407 /// These properties are useless in most cases, but if you write a layout engine on top of this lib, they may be good 1408 version(browser) { 1409 void* expansionHook; ///ditto 1410 int offsetWidth; ///ditto 1411 int offsetHeight; ///ditto 1412 int offsetLeft; ///ditto 1413 int offsetTop; ///ditto 1414 Element offsetParent; ///ditto 1415 bool hasLayout; ///ditto 1416 int zIndex; ///ditto 1417 1418 ///ditto 1419 int absoluteLeft() { 1420 int a = offsetLeft; 1421 auto p = offsetParent; 1422 while(p) { 1423 a += p.offsetLeft; 1424 p = p.offsetParent; 1425 } 1426 1427 return a; 1428 } 1429 1430 ///ditto 1431 int absoluteTop() { 1432 int a = offsetTop; 1433 auto p = offsetParent; 1434 while(p) { 1435 a += p.offsetTop; 1436 p = p.offsetParent; 1437 } 1438 1439 return a; 1440 } 1441 } 1442 1443 // Back to the regular dom functions 1444 1445 public: 1446 1447 1448 /* ******************************* 1449 DOM Mutation 1450 *********************************/ 1451 1452 /// Removes all inner content from the tag; all child text and elements are gone. 1453 void removeAllChildren() 1454 out { 1455 assert(this.children.length == 0); 1456 } 1457 do { 1458 children = null; 1459 } 1460 1461 1462 /// Appends the given element to this one. The given element must not have a parent already. 1463 Element appendChild(Element e) 1464 in { 1465 assert(e !is null); 1466 assert(e.parentNode is null); 1467 } 1468 out (ret) { 1469 assert(e.parentNode is this); 1470 assert(e.parentDocument is this.parentDocument); 1471 assert(e is ret); 1472 } 1473 do { 1474 selfClosed = false; 1475 e.parentNode = this; 1476 e.parentDocument = this.parentDocument; 1477 children ~= e; 1478 1479 sendObserverEvent(DomMutationOperations.appendChild, null, null, e); 1480 1481 return e; 1482 } 1483 1484 /// Inserts the second element to this node, right before the first param 1485 Element insertBefore(in Element where, Element what) 1486 in { 1487 assert(where !is null); 1488 assert(where.parentNode is this); 1489 assert(what !is null); 1490 assert(what.parentNode is null); 1491 } 1492 out (ret) { 1493 assert(where.parentNode is this); 1494 assert(what.parentNode is this); 1495 1496 assert(what.parentDocument is this.parentDocument); 1497 assert(ret is what); 1498 } 1499 do { 1500 foreach(i, e; children) { 1501 if(e is where) { 1502 children = children[0..i] ~ what ~ children[i..$]; 1503 what.parentDocument = this.parentDocument; 1504 what.parentNode = this; 1505 return what; 1506 } 1507 } 1508 1509 return what; 1510 1511 assert(0); 1512 } 1513 1514 ///. 1515 Element insertAfter(in Element where, Element what) 1516 in { 1517 assert(where !is null); 1518 assert(where.parentNode is this); 1519 assert(what !is null); 1520 assert(what.parentNode is null); 1521 } 1522 out (ret) { 1523 assert(where.parentNode is this); 1524 assert(what.parentNode is this); 1525 assert(what.parentDocument is this.parentDocument); 1526 assert(ret is what); 1527 } 1528 do { 1529 foreach(i, e; children) { 1530 if(e is where) { 1531 children = children[0 .. i + 1] ~ what ~ children[i + 1 .. $]; 1532 what.parentNode = this; 1533 what.parentDocument = this.parentDocument; 1534 return what; 1535 } 1536 } 1537 1538 return what; 1539 1540 assert(0); 1541 } 1542 1543 /// swaps one child for a new thing. Returns the old child which is now parentless. 1544 Element swapNode(Element child, Element replacement) 1545 in { 1546 assert(child !is null); 1547 assert(replacement !is null); 1548 assert(child.parentNode is this); 1549 } 1550 out(ret) { 1551 assert(ret is child); 1552 assert(ret.parentNode is null); 1553 assert(replacement.parentNode is this); 1554 assert(replacement.parentDocument is this.parentDocument); 1555 } 1556 do { 1557 foreach(ref c; this.children) 1558 if(c is child) { 1559 c.parentNode = null; 1560 c = replacement; 1561 c.parentNode = this; 1562 c.parentDocument = this.parentDocument; 1563 return child; 1564 } 1565 assert(0); 1566 } 1567 1568 1569 ///. 1570 Element appendText(string text) { 1571 Element e = new TextNode(parentDocument, text); 1572 appendChild(e); 1573 return this; 1574 } 1575 1576 ///. 1577 @property Element[] childElements() { 1578 Element[] ret; 1579 foreach(c; children) 1580 if(c.nodeType == 1) 1581 ret ~= c; 1582 return ret; 1583 } 1584 1585 /// Appends the given html to the element, returning the elements appended 1586 Element[] appendHtml(string html) { 1587 Document d = new Document("<root>" ~ html ~ "</root>"); 1588 return stealChildren(d.root); 1589 } 1590 1591 1592 ///. 1593 void insertChildAfter(Element child, Element where) 1594 in { 1595 assert(child !is null); 1596 assert(where !is null); 1597 assert(where.parentNode is this); 1598 assert(!selfClosed); 1599 //assert(isInArray(where, children)); 1600 } 1601 out { 1602 assert(child.parentNode is this); 1603 assert(where.parentNode is this); 1604 //assert(isInArray(where, children)); 1605 //assert(isInArray(child, children)); 1606 } 1607 do { 1608 foreach(ref i, c; children) { 1609 if(c is where) { 1610 i++; 1611 children = children[0..i] ~ child ~ children[i..$]; 1612 child.parentNode = this; 1613 child.parentDocument = this.parentDocument; 1614 break; 1615 } 1616 } 1617 } 1618 1619 ///. 1620 Element[] stealChildren(Element e, Element position = null) 1621 in { 1622 assert(!selfClosed); 1623 assert(e !is null); 1624 //if(position !is null) 1625 //assert(isInArray(position, children)); 1626 } 1627 out (ret) { 1628 assert(e.children.length == 0); 1629 debug foreach(child; ret) { 1630 assert(child.parentNode is this); 1631 assert(child.parentDocument is this.parentDocument); 1632 } 1633 } 1634 do { 1635 foreach(c; e.children) { 1636 c.parentNode = this; 1637 c.parentDocument = this.parentDocument; 1638 } 1639 if(position is null) 1640 children ~= e.children; 1641 else { 1642 foreach(i, child; children) { 1643 if(child is position) { 1644 children = children[0..i] ~ 1645 e.children ~ 1646 children[i..$]; 1647 break; 1648 } 1649 } 1650 } 1651 1652 auto ret = e.children.dup; 1653 e.children.length = 0; 1654 1655 return ret; 1656 } 1657 1658 /// Puts the current element first in our children list. The given element must not have a parent already. 1659 Element prependChild(Element e) 1660 in { 1661 assert(e.parentNode is null); 1662 assert(!selfClosed); 1663 } 1664 out { 1665 assert(e.parentNode is this); 1666 assert(e.parentDocument is this.parentDocument); 1667 assert(children[0] is e); 1668 } 1669 do { 1670 e.parentNode = this; 1671 e.parentDocument = this.parentDocument; 1672 children = e ~ children; 1673 return e; 1674 } 1675 1676 1677 /** 1678 Returns a string containing all child elements, formatted such that it could be pasted into 1679 an XML file. 1680 */ 1681 @property string innerHTML(Appender!string where = appender!string()) const { 1682 if(children is null) 1683 return ""; 1684 1685 auto start = where.data.length; 1686 1687 foreach(child; children) { 1688 assert(child !is null); 1689 1690 child.writeToAppender(where); 1691 } 1692 1693 return where.data[start .. $]; 1694 } 1695 1696 /** 1697 Takes some html and replaces the element's children with the tree made from the string. 1698 */ 1699 @property Element innerHTML(string html, bool strict = false) { 1700 if(html.length) 1701 selfClosed = false; 1702 1703 if(html.length == 0) { 1704 // I often say innerHTML = ""; as a shortcut to clear it out, 1705 // so let's optimize that slightly. 1706 removeAllChildren(); 1707 return this; 1708 } 1709 1710 auto doc = new Document(); 1711 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>", strict, strict); // FIXME: this should preserve the strictness of the parent document 1712 1713 children = doc.root.children; 1714 foreach(c; children) { 1715 c.parentNode = this; 1716 c.parentDocument = this.parentDocument; 1717 } 1718 1719 reparentTreeDocuments(); 1720 1721 doc.root.children = null; 1722 1723 return this; 1724 } 1725 1726 /// ditto 1727 @property Element innerHTML(Html html) { 1728 return this.innerHTML = html.source; 1729 } 1730 1731 private void reparentTreeDocuments() { 1732 foreach(c; this.tree) 1733 c.parentDocument = this.parentDocument; 1734 } 1735 1736 /** 1737 Replaces this node with the given html string, which is parsed 1738 1739 Note: this invalidates the this reference, since it is removed 1740 from the tree. 1741 1742 Returns the new children that replace this. 1743 */ 1744 @property Element[] outerHTML(string html) { 1745 auto doc = new Document(); 1746 doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>"); // FIXME: needs to preserve the strictness 1747 1748 children = doc.root.children; 1749 foreach(c; children) { 1750 c.parentNode = this; 1751 c.parentDocument = this.parentDocument; 1752 } 1753 1754 1755 reparentTreeDocuments(); 1756 1757 1758 stripOut(); 1759 1760 return doc.root.children; 1761 } 1762 1763 /// Returns all the html for this element, including the tag itself. 1764 /// This is equivalent to calling toString(). 1765 @property string outerHTML() { 1766 return this.toString(); 1767 } 1768 1769 /// This sets the inner content of the element *without* trying to parse it. 1770 /// You can inject any code in there; this serves as an escape hatch from the dom. 1771 /// 1772 /// The only times you might actually need it are for < style > and < script > tags in html. 1773 /// Other than that, innerHTML and/or innerText should do the job. 1774 @property void innerRawSource(string rawSource) { 1775 children.length = 0; 1776 auto rs = new RawSource(parentDocument, rawSource); 1777 rs.parentNode = this; 1778 1779 children ~= rs; 1780 } 1781 1782 ///. 1783 Element replaceChild(Element find, Element replace) 1784 in { 1785 assert(find !is null); 1786 assert(replace !is null); 1787 assert(replace.parentNode is null); 1788 } 1789 out(ret) { 1790 assert(ret is replace); 1791 assert(replace.parentNode is this); 1792 assert(replace.parentDocument is this.parentDocument); 1793 assert(find.parentNode is null); 1794 } 1795 do { 1796 for(int i = 0; i < children.length; i++) { 1797 if(children[i] is find) { 1798 replace.parentNode = this; 1799 children[i].parentNode = null; 1800 children[i] = replace; 1801 replace.parentDocument = this.parentDocument; 1802 return replace; 1803 } 1804 } 1805 1806 throw new Exception("no such child"); 1807 } 1808 1809 /** 1810 Replaces the given element with a whole group. 1811 */ 1812 void replaceChild(Element find, Element[] replace) 1813 in { 1814 assert(find !is null); 1815 assert(replace !is null); 1816 assert(find.parentNode is this); 1817 debug foreach(r; replace) 1818 assert(r.parentNode is null); 1819 } 1820 out { 1821 assert(find.parentNode is null); 1822 assert(children.length >= replace.length); 1823 debug foreach(child; children) 1824 assert(child !is find); 1825 debug foreach(r; replace) 1826 assert(r.parentNode is this); 1827 } 1828 do { 1829 if(replace.length == 0) { 1830 removeChild(find); 1831 return; 1832 } 1833 assert(replace.length); 1834 for(int i = 0; i < children.length; i++) { 1835 if(children[i] is find) { 1836 children[i].parentNode = null; // this element should now be dead 1837 children[i] = replace[0]; 1838 foreach(e; replace) { 1839 e.parentNode = this; 1840 e.parentDocument = this.parentDocument; 1841 } 1842 1843 children = .insertAfter(children, i, replace[1..$]); 1844 1845 return; 1846 } 1847 } 1848 1849 throw new Exception("no such child"); 1850 } 1851 1852 1853 /** 1854 Removes the given child from this list. 1855 1856 Returns the removed element. 1857 */ 1858 Element removeChild(Element c) 1859 in { 1860 assert(c !is null); 1861 assert(c.parentNode is this); 1862 } 1863 out { 1864 debug foreach(child; children) 1865 assert(child !is c); 1866 assert(c.parentNode is null); 1867 } 1868 do { 1869 foreach(i, e; children) { 1870 if(e is c) { 1871 children = children[0..i] ~ children [i+1..$]; 1872 c.parentNode = null; 1873 return c; 1874 } 1875 } 1876 1877 throw new Exception("no such child"); 1878 } 1879 1880 /// This removes all the children from this element, returning the old list. 1881 Element[] removeChildren() 1882 out (ret) { 1883 assert(children.length == 0); 1884 debug foreach(r; ret) 1885 assert(r.parentNode is null); 1886 } 1887 do { 1888 Element[] oldChildren = children.dup; 1889 foreach(c; oldChildren) 1890 c.parentNode = null; 1891 1892 children.length = 0; 1893 1894 return oldChildren; 1895 } 1896 1897 /** 1898 Fetch the inside text, with all tags stripped out. 1899 1900 <p>cool <b>api</b> & code dude<p> 1901 innerText of that is "cool api & code dude". 1902 */ 1903 @property string innerText() const { 1904 string s; 1905 foreach(child; children) { 1906 if(child.nodeType != NodeType.Text) 1907 s ~= child.innerText; 1908 else 1909 s ~= child.nodeValue(); 1910 } 1911 return s; 1912 } 1913 1914 /** 1915 Sets the inside text, replacing all children. You don't 1916 have to worry about entity encoding. 1917 */ 1918 @property void innerText(string text) { 1919 selfClosed = false; 1920 Element e = new TextNode(parentDocument, text); 1921 e.parentNode = this; 1922 children = [e]; 1923 } 1924 1925 /** 1926 Strips this node out of the document, replacing it with the given text 1927 */ 1928 @property void outerText(string text) { 1929 parentNode.replaceChild(this, new TextNode(parentDocument, text)); 1930 } 1931 1932 /** 1933 Same result as innerText; the tag with all inner tags stripped out 1934 */ 1935 string outerText() const { 1936 return innerText; 1937 } 1938 1939 1940 /* ******************************* 1941 Miscellaneous 1942 *********************************/ 1943 1944 /// This is a full clone of the element 1945 @property Element cloned() 1946 /+ 1947 out(ret) { 1948 // FIXME: not sure why these fail... 1949 assert(ret.children.length == this.children.length, format("%d %d", ret.children.length, this.children.length)); 1950 assert(ret.tagName == this.tagName); 1951 } 1952 do { 1953 +/ 1954 { 1955 auto e = Element.make(this.tagName); 1956 e.parentDocument = this.parentDocument; 1957 e.attributes = this.attributes.aadup; 1958 e.selfClosed = this.selfClosed; 1959 foreach(child; children) { 1960 e.appendChild(child.cloned); 1961 } 1962 1963 return e; 1964 } 1965 1966 /// Clones the node. If deepClone is true, clone all inner tags too. If false, only do this tag (and its attributes), but it will have no contents. 1967 Element cloneNode(bool deepClone) { 1968 if(deepClone) 1969 return this.cloned; 1970 1971 // shallow clone 1972 auto e = Element.make(this.tagName); 1973 e.parentDocument = this.parentDocument; 1974 e.attributes = this.attributes.aadup; 1975 e.selfClosed = this.selfClosed; 1976 return e; 1977 } 1978 1979 ///. 1980 string nodeValue() const { 1981 return ""; 1982 } 1983 1984 // should return int 1985 ///. 1986 @property int nodeType() const { 1987 return 1; 1988 } 1989 1990 1991 invariant () { 1992 assert(tagName.indexOf(" ") == -1); 1993 1994 if(children !is null) 1995 debug foreach(child; children) { 1996 // assert(parentNode !is null); 1997 assert(child !is null); 1998 assert(child.parentNode is this, format("%s is not a parent of %s (it thought it was %s)", tagName, child.tagName, child.parentNode is null ? "null" : child.parentNode.tagName)); 1999 assert(child !is this); 2000 assert(child !is parentNode); 2001 } 2002 2003 /+ // only depend on parentNode's accuracy if you shuffle things around and use the top elements - where the contracts guarantee it on out 2004 if(parentNode !is null) { 2005 // if you have a parent, you should share the same parentDocument; this is appendChild()'s job 2006 auto lol = cast(TextNode) this; 2007 assert(parentDocument is parentNode.parentDocument, lol is null ? this.tagName : lol.contents); 2008 } 2009 +/ 2010 //assert(parentDocument !is null); // no more; if it is present, we use it, but it is not required 2011 // reason is so you can create these without needing a reference to the document 2012 } 2013 2014 /** 2015 Turns the whole element, including tag, attributes, and children, into a string which could be pasted into 2016 an XML file. 2017 */ 2018 override string toString() const { 2019 return writeToAppender(); 2020 } 2021 2022 /// This is the actual implementation used by toString. You can pass it a preallocated buffer to save some time. 2023 /// Returns the string it creates. 2024 string writeToAppender(Appender!string where = appender!string()) const { 2025 assert(tagName !is null); 2026 2027 where.reserve((this.children.length + 1) * 512); 2028 2029 auto start = where.data.length; 2030 2031 where.put("<"); 2032 where.put(tagName); 2033 2034 foreach(n, v ; attributes) { 2035 assert(n !is null); 2036 //assert(v !is null); 2037 where.put(" "); 2038 where.put(n); 2039 where.put("=\""); 2040 htmlEntitiesEncode(v, where); 2041 where.put("\""); 2042 } 2043 2044 if(selfClosed){ 2045 where.put(" />"); 2046 return where.data[start .. $]; 2047 } 2048 2049 where.put('>'); 2050 2051 innerHTML(where); 2052 2053 where.put("</"); 2054 where.put(tagName); 2055 where.put('>'); 2056 2057 return where.data[start .. $]; 2058 } 2059 2060 /** 2061 Returns a lazy range of all its children, recursively. 2062 */ 2063 @property ElementStream tree() { 2064 return new ElementStream(this); 2065 } 2066 2067 // I moved these from Form because they are generally useful. 2068 // Ideally, I'd put them in arsd.html and use UFCS, but that doesn't work with the opDispatch here. 2069 /// Tags: HTML, HTML5 2070 // FIXME: add overloads for other label types... 2071 Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 2072 auto fs = this; 2073 auto i = fs.addChild("label"); 2074 2075 if(!(type == "checkbox" || type == "radio")) 2076 i.addChild("span", label); 2077 2078 Element input; 2079 if(type == "textarea") 2080 input = i.addChild("textarea"). 2081 setAttribute("name", name). 2082 setAttribute("rows", "6"); 2083 else 2084 input = i.addChild("input"). 2085 setAttribute("name", name). 2086 setAttribute("type", type); 2087 2088 if(type == "checkbox" || type == "radio") 2089 i.addChild("span", label); 2090 2091 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 2092 fieldOptions.applyToElement(input); 2093 return i; 2094 } 2095 2096 Element addField(Element label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 2097 auto fs = this; 2098 auto i = fs.addChild("label"); 2099 i.addChild(label); 2100 Element input; 2101 if(type == "textarea") 2102 input = i.addChild("textarea"). 2103 setAttribute("name", name). 2104 setAttribute("rows", "6"); 2105 else 2106 input = i.addChild("input"). 2107 setAttribute("name", name). 2108 setAttribute("type", type); 2109 2110 // these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later. 2111 fieldOptions.applyToElement(input); 2112 return i; 2113 } 2114 2115 Element addField(string label, string name, FormFieldOptions fieldOptions) { 2116 return addField(label, name, "text", fieldOptions); 2117 } 2118 2119 Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 2120 auto fs = this; 2121 auto i = fs.addChild("label"); 2122 i.addChild("span", label); 2123 auto sel = i.addChild("select").setAttribute("name", name); 2124 2125 foreach(k, opt; options) 2126 sel.addChild("option", opt, k); 2127 2128 // FIXME: implement requirements somehow 2129 2130 return i; 2131 } 2132 2133 Element addSubmitButton(string label = null) { 2134 auto t = this; 2135 auto holder = t.addChild("div"); 2136 holder.addClass("submit-holder"); 2137 auto i = holder.addChild("input"); 2138 i.type = "submit"; 2139 if(label.length) 2140 i.value = label; 2141 return holder; 2142 } 2143 2144 } 2145 2146 ///. 2147 class DocumentFragment : Element { 2148 ///. 2149 this(Document _parentDocument) { 2150 tagName = "#fragment"; 2151 super(_parentDocument); 2152 } 2153 2154 ///. 2155 override string writeToAppender(Appender!string where = appender!string()) const { 2156 return this.innerHTML(where); 2157 } 2158 } 2159 2160 /// Given text, encode all html entities on it - &, <, >, and ". This function also 2161 /// encodes all 8 bit characters as entities, thus ensuring the resultant text will work 2162 /// even if your charset isn't set right. 2163 /// 2164 /// The output parameter can be given to append to an existing buffer. You don't have to 2165 /// pass one; regardless, the return value will be usable for you, with just the data encoded. 2166 string htmlEntitiesEncode(string data, Appender!string output = appender!string()) { 2167 // if there's no entities, we can save a lot of time by not bothering with the 2168 // decoding loop. This check cuts the net toString time by better than half in my test. 2169 // let me know if it made your tests worse though, since if you use an entity in just about 2170 // every location, the check will add time... but I suspect the average experience is like mine 2171 // since the check gives up as soon as it can anyway. 2172 2173 bool shortcut = true; 2174 foreach(char c; data) { 2175 // non ascii chars are always higher than 127 in utf8; we'd better go to the full decoder if we see it. 2176 if(c == '<' || c == '>' || c == '"' || c == '&' || cast(uint) c > 127) { 2177 shortcut = false; // there's actual work to be done 2178 break; 2179 } 2180 } 2181 2182 if(shortcut) { 2183 output.put(data); 2184 return data; 2185 } 2186 2187 auto start = output.data.length; 2188 2189 output.reserve(data.length + 64); // grab some extra space for the encoded entities 2190 2191 foreach(dchar d; data) { 2192 if(d == '&') 2193 output.put("&"); 2194 else if (d == '<') 2195 output.put("<"); 2196 else if (d == '>') 2197 output.put(">"); 2198 else if (d == '\"') 2199 output.put("""); 2200 // else if (d == '\'') 2201 // output.put("'"); // if you are in an attribute, it might be important to encode for the same reason as double quotes 2202 // FIXME: should I encode apostrophes too? as '... I could also do space but if your html is so bad that it doesn't 2203 // quote attributes at all, maybe you deserve the xss. Encoding spaces will make everything really ugly so meh 2204 // idk about apostrophes though. Might be worth it, might not. 2205 else if (d < 128 && d > 0) 2206 output.put(d); 2207 else 2208 output.put("&#" ~ std.conv.to!string(cast(int) d) ~ ";"); 2209 } 2210 2211 //assert(output !is null); // this fails on empty attributes..... 2212 return output.data[start .. $]; 2213 2214 // data = data.replace("\u00a0", " "); 2215 } 2216 2217 /// An alias for htmlEntitiesEncode; it works for xml too 2218 string xmlEntitiesEncode(string data) { 2219 return htmlEntitiesEncode(data); 2220 } 2221 2222 /// This helper function is used for decoding html entities. It has a hard-coded list of entities and characters. 2223 dchar parseEntity(in dchar[] entity) { 2224 switch(entity[1..$-1]) { 2225 case "quot": 2226 return '"'; 2227 case "apos": 2228 return '\''; 2229 case "lt": 2230 return '<'; 2231 case "gt": 2232 return '>'; 2233 case "amp": 2234 return '&'; 2235 // the next are html rather than xml 2236 2237 case "Agrave": return '\u00C0'; 2238 case "Aacute": return '\u00C1'; 2239 case "Acirc": return '\u00C2'; 2240 case "Atilde": return '\u00C3'; 2241 case "Auml": return '\u00C4'; 2242 case "Aring": return '\u00C5'; 2243 case "AElig": return '\u00C6'; 2244 case "Ccedil": return '\u00C7'; 2245 case "Egrave": return '\u00C8'; 2246 case "Eacute": return '\u00C9'; 2247 case "Ecirc": return '\u00CA'; 2248 case "Euml": return '\u00CB'; 2249 case "Igrave": return '\u00CC'; 2250 case "Iacute": return '\u00CD'; 2251 case "Icirc": return '\u00CE'; 2252 case "Iuml": return '\u00CF'; 2253 case "ETH": return '\u00D0'; 2254 case "Ntilde": return '\u00D1'; 2255 case "Ograve": return '\u00D2'; 2256 case "Oacute": return '\u00D3'; 2257 case "Ocirc": return '\u00D4'; 2258 case "Otilde": return '\u00D5'; 2259 case "Ouml": return '\u00D6'; 2260 case "Oslash": return '\u00D8'; 2261 case "Ugrave": return '\u00D9'; 2262 case "Uacute": return '\u00DA'; 2263 case "Ucirc": return '\u00DB'; 2264 case "Uuml": return '\u00DC'; 2265 case "Yacute": return '\u00DD'; 2266 case "THORN": return '\u00DE'; 2267 case "szlig": return '\u00DF'; 2268 case "agrave": return '\u00E0'; 2269 case "aacute": return '\u00E1'; 2270 case "acirc": return '\u00E2'; 2271 case "atilde": return '\u00E3'; 2272 case "auml": return '\u00E4'; 2273 case "aring": return '\u00E5'; 2274 case "aelig": return '\u00E6'; 2275 case "ccedil": return '\u00E7'; 2276 case "egrave": return '\u00E8'; 2277 case "eacute": return '\u00E9'; 2278 case "ecirc": return '\u00EA'; 2279 case "euml": return '\u00EB'; 2280 case "igrave": return '\u00EC'; 2281 case "iacute": return '\u00ED'; 2282 case "icirc": return '\u00EE'; 2283 case "iuml": return '\u00EF'; 2284 case "eth": return '\u00F0'; 2285 case "ntilde": return '\u00F1'; 2286 case "ograve": return '\u00F2'; 2287 case "oacute": return '\u00F3'; 2288 case "ocirc": return '\u00F4'; 2289 case "otilde": return '\u00F5'; 2290 case "ouml": return '\u00F6'; 2291 case "oslash": return '\u00F8'; 2292 case "ugrave": return '\u00F9'; 2293 case "uacute": return '\u00FA'; 2294 case "ucirc": return '\u00FB'; 2295 case "uuml": return '\u00FC'; 2296 case "yacute": return '\u00FD'; 2297 case "thorn": return '\u00FE'; 2298 case "yuml": return '\u00FF'; 2299 case "nbsp": return '\u00A0'; 2300 case "iexcl": return '\u00A1'; 2301 case "cent": return '\u00A2'; 2302 case "pound": return '\u00A3'; 2303 case "curren": return '\u00A4'; 2304 case "yen": return '\u00A5'; 2305 case "brvbar": return '\u00A6'; 2306 case "sect": return '\u00A7'; 2307 case "uml": return '\u00A8'; 2308 case "copy": return '\u00A9'; 2309 case "ordf": return '\u00AA'; 2310 case "laquo": return '\u00AB'; 2311 case "not": return '\u00AC'; 2312 case "shy": return '\u00AD'; 2313 case "reg": return '\u00AE'; 2314 case "ldquo": return '\u201c'; 2315 case "rdquo": return '\u201d'; 2316 case "macr": return '\u00AF'; 2317 case "deg": return '\u00B0'; 2318 case "plusmn": return '\u00B1'; 2319 case "sup2": return '\u00B2'; 2320 case "sup3": return '\u00B3'; 2321 case "acute": return '\u00B4'; 2322 case "micro": return '\u00B5'; 2323 case "para": return '\u00B6'; 2324 case "middot": return '\u00B7'; 2325 case "cedil": return '\u00B8'; 2326 case "sup1": return '\u00B9'; 2327 case "ordm": return '\u00BA'; 2328 case "raquo": return '\u00BB'; 2329 case "frac14": return '\u00BC'; 2330 case "frac12": return '\u00BD'; 2331 case "frac34": return '\u00BE'; 2332 case "iquest": return '\u00BF'; 2333 case "times": return '\u00D7'; 2334 case "divide": return '\u00F7'; 2335 case "OElig": return '\u0152'; 2336 case "oelig": return '\u0153'; 2337 case "Scaron": return '\u0160'; 2338 case "scaron": return '\u0161'; 2339 case "Yuml": return '\u0178'; 2340 case "fnof": return '\u0192'; 2341 case "circ": return '\u02C6'; 2342 case "tilde": return '\u02DC'; 2343 case "trade": return '\u2122'; 2344 2345 case "hellip": return '\u2026'; 2346 case "ndash": return '\u2013'; 2347 case "mdash": return '\u2014'; 2348 case "lsquo": return '\u2018'; 2349 case "rsquo": return '\u2019'; 2350 2351 case "Omicron": return '\u039f'; 2352 case "omicron": return '\u03bf'; 2353 2354 // and handling numeric entities 2355 default: 2356 if(entity[1] == '#') { 2357 if(entity[2] == 'x' /*|| (!strict && entity[2] == 'X')*/) { 2358 auto hex = entity[3..$-1]; 2359 2360 auto p = intFromHex(to!string(hex).toLower()); 2361 return cast(dchar) p; 2362 } else { 2363 auto decimal = entity[2..$-1]; 2364 2365 // dealing with broken html entities 2366 while(decimal.length && (decimal[0] < '0' || decimal[0] > '9')) 2367 decimal = decimal[1 .. $]; 2368 2369 if(decimal.length == 0) 2370 return ' '; // this is really broken html 2371 // done with dealing with broken stuff 2372 2373 auto p = std.conv.to!int(decimal); 2374 return cast(dchar) p; 2375 } 2376 } else 2377 return '\ufffd'; // replacement character diamond thing 2378 } 2379 2380 assert(0); 2381 } 2382 2383 import std.utf; 2384 import std.stdio; 2385 2386 /// This takes a string of raw HTML and decodes the entities into a nice D utf-8 string. 2387 /// By default, it uses loose mode - it will try to return a useful string from garbage input too. 2388 /// Set the second parameter to true if you'd prefer it to strictly throw exceptions on garbage input. 2389 string htmlEntitiesDecode(string data, bool strict = false) { 2390 // this check makes a *big* difference; about a 50% improvement of parse speed on my test. 2391 if(data.indexOf("&") == -1) // all html entities begin with & 2392 return data; // if there are no entities in here, we can return the original slice and save some time 2393 2394 char[] a; // this seems to do a *better* job than appender! 2395 2396 char[4] buffer; 2397 2398 bool tryingEntity = false; 2399 dchar[] entityBeingTried; 2400 int entityAttemptIndex = 0; 2401 2402 foreach(dchar ch; data) { 2403 if(tryingEntity) { 2404 entityAttemptIndex++; 2405 entityBeingTried ~= ch; 2406 2407 // I saw some crappy html in the wild that looked like &0ї this tries to handle that. 2408 if(ch == '&') { 2409 if(strict) 2410 throw new Exception("unterminated entity; & inside another at " ~ to!string(entityBeingTried)); 2411 2412 // if not strict, let's try to parse both. 2413 2414 if(entityBeingTried == "&&") 2415 a ~= "&"; // double amp means keep the first one, still try to parse the next one 2416 else 2417 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried))]; 2418 2419 // tryingEntity is still true 2420 entityBeingTried = entityBeingTried[0 .. 1]; // keep the & 2421 entityAttemptIndex = 0; // restarting o this 2422 } else 2423 if(ch == ';') { 2424 tryingEntity = false; 2425 a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried))]; 2426 } else if(ch == ' ') { 2427 // e.g. you & i 2428 if(strict) 2429 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried)); 2430 else { 2431 tryingEntity = false; 2432 a ~= to!(char[])(entityBeingTried); 2433 } 2434 } else { 2435 if(entityAttemptIndex >= 9) { 2436 if(strict) 2437 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried)); 2438 else { 2439 tryingEntity = false; 2440 a ~= to!(char[])(entityBeingTried); 2441 } 2442 } 2443 } 2444 } else { 2445 if(ch == '&') { 2446 tryingEntity = true; 2447 entityBeingTried = null; 2448 entityBeingTried ~= ch; 2449 entityAttemptIndex = 0; 2450 } else { 2451 a ~= buffer[0 .. std.utf.encode(buffer, ch)]; 2452 } 2453 } 2454 } 2455 2456 if(tryingEntity) { 2457 if(strict) 2458 throw new Exception("unterminated entity at " ~ to!string(entityBeingTried)); 2459 2460 // otherwise, let's try to recover, at least so we don't drop any data 2461 a ~= to!string(entityBeingTried); 2462 // FIXME: what if we have "cool &"? should we try to parse it? 2463 } 2464 2465 return cast(string) a; // assumeUnique is actually kinda slow, lol 2466 } 2467 2468 abstract class SpecialElement : Element { 2469 this(Document _parentDocument) { 2470 super(_parentDocument); 2471 } 2472 2473 ///. 2474 override Element appendChild(Element e) { 2475 assert(0, "Cannot append to a special node"); 2476 } 2477 2478 ///. 2479 @property override int nodeType() const { 2480 return 100; 2481 } 2482 } 2483 2484 ///. 2485 class RawSource : SpecialElement { 2486 ///. 2487 this(Document _parentDocument, string s) { 2488 super(_parentDocument); 2489 source = s; 2490 tagName = "#raw"; 2491 } 2492 2493 ///. 2494 override string nodeValue() const { 2495 return this.toString(); 2496 } 2497 2498 ///. 2499 override string writeToAppender(Appender!string where = appender!string()) const { 2500 where.put(source); 2501 return source; 2502 } 2503 2504 ///. 2505 string source; 2506 } 2507 2508 abstract class ServerSideCode : SpecialElement { 2509 this(Document _parentDocument, string type) { 2510 super(_parentDocument); 2511 tagName = "#" ~ type; 2512 } 2513 2514 ///. 2515 override string nodeValue() const { 2516 return this.source; 2517 } 2518 2519 ///. 2520 override string writeToAppender(Appender!string where = appender!string()) const { 2521 auto start = where.data.length; 2522 where.put("<"); 2523 where.put(source); 2524 where.put(">"); 2525 return where.data[start .. $]; 2526 } 2527 2528 ///. 2529 string source; 2530 } 2531 2532 ///. 2533 class PhpCode : ServerSideCode { 2534 ///. 2535 this(Document _parentDocument, string s) { 2536 super(_parentDocument, "php"); 2537 source = s; 2538 } 2539 } 2540 2541 ///. 2542 class AspCode : ServerSideCode { 2543 ///. 2544 this(Document _parentDocument, string s) { 2545 super(_parentDocument, "asp"); 2546 source = s; 2547 } 2548 } 2549 2550 ///. 2551 class BangInstruction : SpecialElement { 2552 ///. 2553 this(Document _parentDocument, string s) { 2554 super(_parentDocument); 2555 source = s; 2556 tagName = "#bpi"; 2557 } 2558 2559 ///. 2560 override string nodeValue() const { 2561 return this.source; 2562 } 2563 2564 ///. 2565 override string writeToAppender(Appender!string where = appender!string()) const { 2566 auto start = where.data.length; 2567 where.put("<!"); 2568 where.put(source); 2569 where.put(">"); 2570 return where.data[start .. $]; 2571 } 2572 2573 ///. 2574 string source; 2575 } 2576 2577 ///. 2578 class QuestionInstruction : SpecialElement { 2579 ///. 2580 this(Document _parentDocument, string s) { 2581 super(_parentDocument); 2582 source = s; 2583 tagName = "#qpi"; 2584 } 2585 2586 ///. 2587 override string nodeValue() const { 2588 return this.source; 2589 } 2590 2591 ///. 2592 override string writeToAppender(Appender!string where = appender!string()) const { 2593 auto start = where.data.length; 2594 where.put("<"); 2595 where.put(source); 2596 where.put(">"); 2597 return where.data[start .. $]; 2598 } 2599 2600 ///. 2601 string source; 2602 } 2603 2604 ///. 2605 class HtmlComment : SpecialElement { 2606 ///. 2607 this(Document _parentDocument, string s) { 2608 super(_parentDocument); 2609 source = s; 2610 tagName = "#comment"; 2611 } 2612 2613 ///. 2614 override string nodeValue() const { 2615 return this.source; 2616 } 2617 2618 ///. 2619 override string writeToAppender(Appender!string where = appender!string()) const { 2620 auto start = where.data.length; 2621 where.put("<!--"); 2622 where.put(source); 2623 where.put("-->"); 2624 return where.data[start .. $]; 2625 } 2626 2627 ///. 2628 string source; 2629 } 2630 2631 2632 2633 2634 ///. 2635 class TextNode : Element { 2636 public: 2637 ///. 2638 this(Document _parentDocument, string e) { 2639 super(_parentDocument); 2640 contents = e; 2641 tagName = "#text"; 2642 } 2643 2644 string opDispatch(string name)(string v = null) if(0) { return null; } // text nodes don't have attributes 2645 2646 ///. 2647 static TextNode fromUndecodedString(Document _parentDocument, string html) { 2648 auto e = new TextNode(_parentDocument, ""); 2649 e.contents = htmlEntitiesDecode(html, _parentDocument is null ? false : !_parentDocument.loose); 2650 return e; 2651 } 2652 2653 ///. 2654 override @property Element cloned() { 2655 auto n = new TextNode(parentDocument, contents); 2656 return n; 2657 } 2658 2659 ///. 2660 override string nodeValue() const { 2661 return this.contents; //toString(); 2662 } 2663 2664 ///. 2665 @property override int nodeType() const { 2666 return NodeType.Text; 2667 } 2668 2669 ///. 2670 override string writeToAppender(Appender!string where = appender!string()) const { 2671 string s; 2672 if(contents.length) 2673 s = htmlEntitiesEncode(contents, where); 2674 else 2675 s = ""; 2676 2677 assert(s !is null); 2678 return s; 2679 } 2680 2681 ///. 2682 override Element appendChild(Element e) { 2683 assert(0, "Cannot append to a text node"); 2684 } 2685 2686 ///. 2687 string contents; 2688 // alias contents content; // I just mistype this a lot, 2689 } 2690 2691 /** 2692 There are subclasses of Element offering improved helper 2693 functions for the element in HTML. 2694 */ 2695 2696 ///. 2697 class Link : Element { 2698 2699 ///. 2700 this(Document _parentDocument) { 2701 super(_parentDocument); 2702 this.tagName = "a"; 2703 } 2704 2705 2706 ///. 2707 this(string href, string text) { 2708 super("a"); 2709 setAttribute("href", href); 2710 innerText = text; 2711 } 2712 /+ 2713 /// Returns everything in the href EXCEPT the query string 2714 @property string targetSansQuery() { 2715 2716 } 2717 2718 ///. 2719 @property string domainName() { 2720 2721 } 2722 2723 ///. 2724 @property string path 2725 +/ 2726 /// This gets a variable from the URL's query string. 2727 string getValue(string name) { 2728 auto vars = variablesHash(); 2729 if(name in vars) 2730 return vars[name]; 2731 return null; 2732 } 2733 2734 private string[string] variablesHash() { 2735 string href = getAttribute("href"); 2736 if(href is null) 2737 return null; 2738 2739 auto ques = href.indexOf("?"); 2740 string str = ""; 2741 if(ques != -1) { 2742 str = href[ques+1..$]; 2743 2744 auto fragment = str.indexOf("#"); 2745 if(fragment != -1) 2746 str = str[0..fragment]; 2747 } 2748 2749 string[] variables = str.split("&"); 2750 2751 string[string] hash; 2752 2753 foreach(var; variables) { 2754 auto index = var.indexOf("="); 2755 if(index == -1) 2756 hash[var] = ""; 2757 else { 2758 hash[decodeComponent(var[0..index])] = decodeComponent(var[index + 1 .. $]); 2759 } 2760 } 2761 2762 return hash; 2763 } 2764 2765 ///. 2766 /*private*/ void updateQueryString(string[string] vars) { 2767 string href = getAttribute("href"); 2768 2769 auto question = href.indexOf("?"); 2770 if(question != -1) 2771 href = href[0..question]; 2772 2773 string frag = ""; 2774 auto fragment = href.indexOf("#"); 2775 if(fragment != -1) { 2776 frag = href[fragment..$]; 2777 href = href[0..fragment]; 2778 } 2779 2780 string query = "?"; 2781 bool first = true; 2782 foreach(name, value; vars) { 2783 if(!first) 2784 query ~= "&"; 2785 else 2786 first = false; 2787 2788 query ~= encodeComponent(name); 2789 if(value.length) 2790 query ~= "=" ~ encodeComponent(value); 2791 } 2792 2793 if(query != "?") 2794 href ~= query; 2795 2796 href ~= frag; 2797 2798 setAttribute("href", href); 2799 } 2800 2801 /// Sets or adds the variable with the given name to the given value 2802 /// It automatically URI encodes the values and takes care of the ? and &. 2803 override void setValue(string name, string variable) { 2804 auto vars = variablesHash(); 2805 vars[name] = variable; 2806 2807 updateQueryString(vars); 2808 } 2809 2810 /// Removes the given variable from the query string 2811 void removeValue(string name) { 2812 auto vars = variablesHash(); 2813 vars.remove(name); 2814 2815 updateQueryString(vars); 2816 } 2817 2818 /* 2819 ///. 2820 override string toString() { 2821 2822 } 2823 2824 ///. 2825 override string getAttribute(string name) { 2826 if(name == "href") { 2827 2828 } else 2829 return super.getAttribute(name); 2830 } 2831 */ 2832 } 2833 2834 ///. 2835 class Form : Element { 2836 2837 ///. 2838 this(Document _parentDocument) { 2839 super(_parentDocument); 2840 tagName = "form"; 2841 } 2842 2843 override Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) { 2844 auto t = this.querySelector("fieldset div"); 2845 if(t is null) 2846 return super.addField(label, name, type, fieldOptions); 2847 else 2848 return t.addField(label, name, type, fieldOptions); 2849 } 2850 2851 override Element addField(string label, string name, FormFieldOptions fieldOptions) { 2852 auto type = "text"; 2853 auto t = this.querySelector("fieldset div"); 2854 if(t is null) 2855 return super.addField(label, name, type, fieldOptions); 2856 else 2857 return t.addField(label, name, type, fieldOptions); 2858 } 2859 2860 override Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) { 2861 auto t = this.querySelector("fieldset div"); 2862 if(t is null) 2863 return super.addField(label, name, options, fieldOptions); 2864 else 2865 return t.addField(label, name, options, fieldOptions); 2866 } 2867 2868 override void setValue(string field, string value) { 2869 setValue(field, value, true); 2870 } 2871 2872 // FIXME: doesn't handle arrays; multiple fields can have the same name 2873 2874 /// Set's the form field's value. For input boxes, this sets the value attribute. For 2875 /// textareas, it sets the innerText. For radio boxes and select boxes, it removes 2876 /// the checked/selected attribute from all, and adds it to the one matching the value. 2877 /// For checkboxes, if the value is non-null and not empty, it checks the box. 2878 2879 /// If you set a value that doesn't exist, it throws an exception if makeNew is false. 2880 /// Otherwise, it makes a new input with type=hidden to keep the value. 2881 void setValue(string field, string value, bool makeNew) { 2882 auto eles = getField(field); 2883 if(eles.length == 0) { 2884 if(makeNew) { 2885 addInput(field, value); 2886 return; 2887 } else 2888 throw new Exception("form field does not exist"); 2889 } 2890 2891 if(eles.length == 1) { 2892 auto e = eles[0]; 2893 switch(e.tagName) { 2894 default: assert(0); 2895 case "textarea": 2896 e.innerText = value; 2897 break; 2898 case "input": 2899 string type = e.getAttribute("type"); 2900 if(type is null) { 2901 e.value = value; 2902 return; 2903 } 2904 switch(type) { 2905 case "checkbox": 2906 case "radio": 2907 if(value.length) 2908 e.setAttribute("checked", "checked"); 2909 else 2910 e.removeAttribute("checked"); 2911 break; 2912 default: 2913 e.value = value; 2914 return; 2915 } 2916 break; 2917 case "select": 2918 bool found = false; 2919 foreach(child; e.tree) { 2920 if(child.tagName != "option") 2921 continue; 2922 string val = child.getAttribute("value"); 2923 if(val is null) 2924 val = child.innerText; 2925 if(val == value) { 2926 child.setAttribute("selected", "selected"); 2927 found = true; 2928 } else 2929 child.removeAttribute("selected"); 2930 } 2931 2932 if(!found) { 2933 e.addChild("option", value) 2934 .setAttribute("selected", "selected"); 2935 } 2936 break; 2937 } 2938 } else { 2939 // assume radio boxes 2940 foreach(e; eles) { 2941 string val = e.getAttribute("value"); 2942 //if(val is null) 2943 // throw new Exception("don't know what to do with radio boxes with null value"); 2944 if(val == value) 2945 e.setAttribute("checked", "checked"); 2946 else 2947 e.removeAttribute("checked"); 2948 } 2949 } 2950 } 2951 2952 /// This takes an array of strings and adds hidden <input> elements for each one of them. Unlike setValue, 2953 /// it makes no attempt to find and modify existing elements in the form to the new values. 2954 void addValueArray(string key, string[] arrayOfValues) { 2955 foreach(arr; arrayOfValues) 2956 addChild("input", key, arr); 2957 } 2958 2959 /// Gets the value of the field; what would be given if it submitted right now. (so 2960 /// it handles select boxes and radio buttons too). For checkboxes, if a value isn't 2961 /// given, but it is checked, it returns "checked", since null and "" are indistinguishable 2962 string getValue(string field) { 2963 auto eles = getField(field); 2964 if(eles.length == 0) 2965 return ""; 2966 if(eles.length == 1) { 2967 auto e = eles[0]; 2968 switch(e.tagName) { 2969 default: assert(0); 2970 case "input": 2971 if(e.type == "checkbox") { 2972 if(e.checked) 2973 return e.value.length ? e.value : "checked"; 2974 return ""; 2975 } else 2976 return e.value; 2977 case "textarea": 2978 return e.innerText; 2979 case "select": 2980 foreach(child; e.tree) { 2981 if(child.tagName != "option") 2982 continue; 2983 if(child.selected) 2984 return child.value; 2985 } 2986 break; 2987 } 2988 } else { 2989 // assuming radio 2990 foreach(e; eles) { 2991 if(e.checked) 2992 return e.value; 2993 } 2994 } 2995 2996 return ""; 2997 } 2998 2999 // FIXME: doesn't handle multiple elements with the same name (except radio buttons) 3000 ///. 3001 string getPostableData() { 3002 bool[string] namesDone; 3003 3004 string ret; 3005 bool outputted = false; 3006 3007 foreach(e; getElementsBySelector("[name]")) { 3008 if(e.name in namesDone) 3009 continue; 3010 3011 if(outputted) 3012 ret ~= "&"; 3013 else 3014 outputted = true; 3015 3016 ret ~= std.uri.encodeComponent(e.name) ~ "=" ~ std.uri.encodeComponent(getValue(e.name)); 3017 3018 namesDone[e.name] = true; 3019 } 3020 3021 return ret; 3022 } 3023 3024 /// Gets the actual elements with the given name 3025 Element[] getField(string name) { 3026 Element[] ret; 3027 foreach(e; tree) { 3028 if(e.name == name) 3029 ret ~= e; 3030 } 3031 return ret; 3032 } 3033 3034 /// Grabs the <label> with the given for tag, if there is one. 3035 Element getLabel(string forId) { 3036 foreach(e; tree) 3037 if(e.tagName == "label" && e.getAttribute("for") == forId) 3038 return e; 3039 return null; 3040 } 3041 3042 /// Adds a new INPUT field to the end of the form with the given attributes. 3043 Element addInput(string name, string value, string type = "hidden") { 3044 auto e = new Element(parentDocument, "input", null, true); 3045 e.name = name; 3046 e.value = value; 3047 e.type = type; 3048 3049 appendChild(e); 3050 3051 return e; 3052 } 3053 3054 /// Removes the given field from the form. It finds the element and knocks it right out. 3055 void removeField(string name) { 3056 foreach(e; getField(name)) 3057 e.parentNode.removeChild(e); 3058 } 3059 3060 /+ 3061 /// Returns all form members. 3062 @property Element[] elements() { 3063 3064 } 3065 3066 ///. 3067 string opDispatch(string name)(string v = null) 3068 // filter things that should actually be attributes on the form 3069 if( name != "method" && name != "action" && name != "enctype" 3070 && name != "style" && name != "name" && name != "id" && name != "class") 3071 { 3072 3073 } 3074 +/ 3075 /+ 3076 void submit() { 3077 // take its elements and submit them through http 3078 } 3079 +/ 3080 } 3081 3082 import std.conv; 3083 3084 ///. 3085 class Table : Element { 3086 3087 ///. 3088 this(Document _parentDocument) { 3089 super(_parentDocument); 3090 tagName = "table"; 3091 } 3092 3093 ///. 3094 Element th(T)(T t) { 3095 Element e; 3096 if(parentDocument !is null) 3097 e = parentDocument.createElement("th"); 3098 else 3099 e = Element.make("th"); 3100 static if(is(T == Html)) 3101 e.innerHTML = t; 3102 else 3103 e.innerText = to!string(t); 3104 return e; 3105 } 3106 3107 ///. 3108 Element td(T)(T t) { 3109 Element e; 3110 if(parentDocument !is null) 3111 e = parentDocument.createElement("td"); 3112 else 3113 e = Element.make("td"); 3114 static if(is(T == Html)) 3115 e.innerHTML = t; 3116 else 3117 e.innerText = to!string(t); 3118 return e; 3119 } 3120 3121 /// . 3122 Element appendHeaderRow(T...)(T t) { 3123 return appendRowInternal("th", "thead", t); 3124 } 3125 3126 /// . 3127 Element appendFooterRow(T...)(T t) { 3128 return appendRowInternal("td", "tfoot", t); 3129 } 3130 3131 /// . 3132 Element appendRow(T...)(T t) { 3133 return appendRowInternal("td", "tbody", t); 3134 } 3135 3136 void addColumnClasses(string[] classes...) { 3137 auto grid = getGrid(); 3138 foreach(row; grid) 3139 foreach(i, cl; classes) { 3140 if(cl.length) 3141 if(i < row.length) 3142 row[i].addClass(cl); 3143 } 3144 } 3145 3146 private Element appendRowInternal(T...)(string innerType, string findType, T t) { 3147 Element row = Element.make("tr"); 3148 3149 foreach(e; t) { 3150 static if(is(typeof(e) : Element)) { 3151 if(e.tagName == "td" || e.tagName == "th") 3152 row.appendChild(e); 3153 else { 3154 Element a = Element.make(innerType); 3155 3156 a.appendChild(e); 3157 3158 row.appendChild(a); 3159 } 3160 } else static if(is(typeof(e) == Html)) { 3161 Element a = Element.make(innerType); 3162 a.innerHTML = e.source; 3163 row.appendChild(a); 3164 } else static if(is(typeof(e) == Element[])) { 3165 Element a = Element.make(innerType); 3166 foreach(ele; e) 3167 a.appendChild(ele); 3168 row.appendChild(a); 3169 } else { 3170 Element a = Element.make(innerType); 3171 a.innerText = to!string(e); 3172 row.appendChild(a); 3173 } 3174 } 3175 3176 foreach(e; children) { 3177 if(e.tagName == findType) { 3178 e.appendChild(row); 3179 return row; 3180 } 3181 } 3182 3183 // the type was not found if we are here... let's add it so it is well-formed 3184 auto lol = this.addChild(findType); 3185 lol.appendChild(row); 3186 3187 return row; 3188 } 3189 3190 ///. 3191 Element captionElement() { 3192 Element cap; 3193 foreach(c; children) { 3194 if(c.tagName == "caption") { 3195 cap = c; 3196 break; 3197 } 3198 } 3199 3200 if(cap is null) { 3201 cap = Element.make("caption"); 3202 appendChild(cap); 3203 } 3204 3205 return cap; 3206 } 3207 3208 ///. 3209 @property string caption() { 3210 return captionElement().innerText; 3211 } 3212 3213 ///. 3214 @property void caption(string text) { 3215 captionElement().innerText = text; 3216 } 3217 3218 /// Gets the logical layout of the table as a rectangular grid of 3219 /// cells. It considers rowspan and colspan. A cell with a large 3220 /// span is represented in the grid by being referenced several times. 3221 /// The tablePortition parameter can get just a <thead>, <tbody>, or 3222 /// <tfoot> portion if you pass one. 3223 /// 3224 /// Note: the rectangular grid might include null cells. 3225 /// 3226 /// This is kinda expensive so you should call once when you want the grid, 3227 /// then do lookups on the returned array. 3228 TableCell[][] getGrid(Element tablePortition = null) 3229 in { 3230 if(tablePortition is null) 3231 assert(tablePortition is null); 3232 else { 3233 assert(tablePortition !is null); 3234 assert(tablePortition.parentNode is this); 3235 assert( 3236 tablePortition.tagName == "tbody" 3237 || 3238 tablePortition.tagName == "tfoot" 3239 || 3240 tablePortition.tagName == "thead" 3241 ); 3242 } 3243 } 3244 do { 3245 if(tablePortition is null) 3246 tablePortition = this; 3247 3248 TableCell[][] ret; 3249 3250 // FIXME: will also return rows of sub tables! 3251 auto rows = tablePortition.getElementsByTagName("tr"); 3252 ret.length = rows.length; 3253 3254 int maxLength = 0; 3255 3256 int insertCell(int row, int position, TableCell cell) { 3257 if(row >= ret.length) 3258 return position; // not supposed to happen - a rowspan is prolly too big. 3259 3260 if(position == -1) { 3261 position++; 3262 foreach(item; ret[row]) { 3263 if(item is null) 3264 break; 3265 position++; 3266 } 3267 } 3268 3269 if(position < ret[row].length) 3270 ret[row][position] = cell; 3271 else 3272 foreach(i; ret[row].length .. position + 1) { 3273 if(i == position) 3274 ret[row] ~= cell; 3275 else 3276 ret[row] ~= null; 3277 } 3278 return position; 3279 } 3280 3281 foreach(size_t i, rowElement; rows) { 3282 auto row = cast(TableRow) rowElement; 3283 assert(row !is null); 3284 assert(i < ret.length); 3285 3286 int position = 0; 3287 foreach(cellElement; rowElement.childNodes) { 3288 auto cell = cast(TableCell) cellElement; 3289 if(cell is null) 3290 continue; 3291 3292 // FIXME: colspan == 0 or rowspan == 0 3293 // is supposed to mean fill in the rest of 3294 // the table, not skip it 3295 foreach(int j; 0 .. cell.colspan) { 3296 foreach(int k; 0 .. cell.rowspan) 3297 // if the first row, always append. 3298 insertCell(cast(int)(k + i), k == 0 ? -1 : position, cell); 3299 position++; 3300 } 3301 } 3302 3303 if(ret[i].length > maxLength) 3304 maxLength = cast(int) ret[i].length; 3305 } 3306 3307 // want to ensure it's rectangular 3308 foreach(ref r; ret) { 3309 foreach(i; r.length .. maxLength) 3310 r ~= null; 3311 } 3312 3313 return ret; 3314 } 3315 } 3316 3317 /// Represents a table row element - a <tr> 3318 class TableRow : Element { 3319 ///. 3320 this(Document _parentDocument) { 3321 super(_parentDocument); 3322 tagName = "tr"; 3323 } 3324 3325 // FIXME: the standard says there should be a lot more in here, 3326 // but meh, I never use it and it's a pain to implement. 3327 } 3328 3329 /// Represents anything that can be a table cell - <td> or <th> html. 3330 class TableCell : Element { 3331 ///. 3332 this(Document _parentDocument, string _tagName) { 3333 super(_parentDocument, _tagName); 3334 } 3335 3336 @property int rowspan() const { 3337 int ret = 1; 3338 auto it = getAttribute("rowspan"); 3339 if(it.length) 3340 ret = to!int(it); 3341 return ret; 3342 } 3343 3344 @property int colspan() const { 3345 int ret = 1; 3346 auto it = getAttribute("colspan"); 3347 if(it.length) 3348 ret = to!int(it); 3349 return ret; 3350 } 3351 3352 @property int rowspan(int i) { 3353 setAttribute("rowspan", to!string(i)); 3354 return i; 3355 } 3356 3357 @property int colspan(int i) { 3358 setAttribute("colspan", to!string(i)); 3359 return i; 3360 } 3361 3362 } 3363 3364 3365 ///. 3366 class MarkupException : Exception { 3367 3368 ///. 3369 this(string message, string file = __FILE__, size_t line = __LINE__) { 3370 super(message, file, line); 3371 } 3372 } 3373 3374 /// This is used when you are using one of the require variants of navigation, and no matching element can be found in the tree. 3375 class ElementNotFoundException : Exception { 3376 3377 /// type == kind of element you were looking for and search == a selector describing the search. 3378 this(string type, string search, string file = __FILE__, size_t line = __LINE__) { 3379 super("Element of type '"~type~"' matching {"~search~"} not found.", file, line); 3380 } 3381 } 3382 3383 /// The html struct is used to differentiate between regular text nodes and html in certain functions 3384 /// 3385 /// Easiest way to construct it is like this: auto html = Html("<p>hello</p>"); 3386 struct Html { 3387 /// This string holds the actual html. Use it to retrieve the contents. 3388 string source; 3389 } 3390 3391 /// The main document interface, including a html parser. 3392 class Document : FileResource { 3393 ///. 3394 this(string data, bool caseSensitive = false, bool strict = false) { 3395 parseUtf8(data, caseSensitive, strict); 3396 } 3397 3398 /** 3399 Creates an empty document. It has *nothing* in it at all. 3400 */ 3401 this() { 3402 3403 } 3404 3405 /// This is just something I'm toying with. Right now, you use opIndex to put in css selectors. 3406 /// It returns a struct that forwards calls to all elements it holds, and returns itself so you 3407 /// can chain it. 3408 /// 3409 /// Example: document["p"].innerText("hello").addClass("modified"); 3410 /// 3411 /// Equivalent to: foreach(e; document.getElementsBySelector("p")) { e.innerText("hello"); e.addClas("modified"); } 3412 /// 3413 /// Note: always use function calls (not property syntax) and don't use toString in there for best results. 3414 /// 3415 /// You can also do things like: document["p"]["b"] though tbh I'm not sure why since the selector string can do all that anyway. Maybe 3416 /// you could put in some kind of custom filter function tho. 3417 ElementCollection opIndex(string selector) { 3418 auto e = ElementCollection(this.root); 3419 return e[selector]; 3420 } 3421 3422 string _contentType = "text/html; charset=utf-8"; 3423 3424 /// If you're using this for some other kind of XML, you can 3425 /// set the content type here. 3426 /// 3427 /// Note: this has no impact on the function of this class. 3428 /// It is only used if the document is sent via a protocol like HTTP. 3429 /// 3430 /// This may be called by parse() if it recognizes the data. Otherwise, 3431 /// if you don't set it, it assumes text/html; charset=utf-8. 3432 @property string contentType(string mimeType) { 3433 _contentType = mimeType; 3434 return _contentType; 3435 } 3436 3437 /// implementing the FileResource interface, useful for sending via 3438 /// http automatically. 3439 override @property string contentType() const { 3440 return _contentType; 3441 } 3442 3443 /// implementing the FileResource interface; it calls toString. 3444 override immutable(ubyte)[] getData() const { 3445 return cast(immutable(ubyte)[]) this.toString(); 3446 } 3447 3448 3449 /// Concatenates any consecutive text nodes 3450 /* 3451 void normalize() { 3452 3453 } 3454 */ 3455 3456 /// This will set delegates for parseSaw* (note: this overwrites anything else you set, and you setting subsequently will overwrite this) that add those things to the dom tree when it sees them. 3457 /// Call this before calling parse(). 3458 3459 /// Note this will also preserve the prolog and doctype from the original file, if there was one. 3460 void enableAddingSpecialTagsToDom() { 3461 parseSawComment = (string) => true; 3462 parseSawAspCode = (string) => true; 3463 parseSawPhpCode = (string) => true; 3464 parseSawQuestionInstruction = (string) => true; 3465 parseSawBangInstruction = (string) => true; 3466 } 3467 3468 /// If the parser sees a html comment, it will call this callback 3469 /// <!-- comment --> will call parseSawComment(" comment ") 3470 /// Return true if you want the node appended to the document. 3471 bool delegate(string) parseSawComment; 3472 3473 /// If the parser sees <% asp code... %>, it will call this callback. 3474 /// It will be passed "% asp code... %" or "%= asp code .. %" 3475 /// Return true if you want the node appended to the document. 3476 bool delegate(string) parseSawAspCode; 3477 3478 /// If the parser sees <?php php code... ?>, it will call this callback. 3479 /// It will be passed "?php php code... ?" or "?= asp code .. ?" 3480 /// Note: dom.d cannot identify the other php <? code ?> short format. 3481 /// Return true if you want the node appended to the document. 3482 bool delegate(string) parseSawPhpCode; 3483 3484 /// if it sees a <?xxx> that is not php or asp 3485 /// it calls this function with the contents. 3486 /// <?SOMETHING foo> calls parseSawQuestionInstruction("?SOMETHING foo") 3487 /// Unlike the php/asp ones, this ends on the first > it sees, without requiring ?>. 3488 /// Return true if you want the node appended to the document. 3489 bool delegate(string) parseSawQuestionInstruction; 3490 3491 /// if it sees a <! that is not CDATA or comment (CDATA is handled automatically and comments call parseSawComment), 3492 /// it calls this function with the contents. 3493 /// <!SOMETHING foo> calls parseSawBangInstruction("SOMETHING foo") 3494 /// Return true if you want the node appended to the document. 3495 bool delegate(string) parseSawBangInstruction; 3496 3497 /// Given the kind of garbage you find on the Internet, try to make sense of it. 3498 /// Equivalent to document.parse(data, false, false, null); 3499 /// (Case-insensitive, non-strict, determine character encoding from the data.) 3500 3501 /// NOTE: this makes no attempt at added security. 3502 /// 3503 /// It is a template so it lazily imports characterencodings. 3504 void parseGarbage()(string data) { 3505 parse(data, false, false, null); 3506 } 3507 3508 /// Parses well-formed UTF-8, case-sensitive, XML or XHTML 3509 /// Will throw exceptions on things like unclosed tags. 3510 void parseStrict(string data) { 3511 parseStream(toUtf8Stream(data), true, true); 3512 } 3513 3514 /// Parses well-formed UTF-8 in loose mode (by default). Tries to correct 3515 /// tag soup, but does NOT try to correct bad character encodings. 3516 /// 3517 /// They will still throw an exception. 3518 void parseUtf8(string data, bool caseSensitive = false, bool strict = false) { 3519 parseStream(toUtf8Stream(data), caseSensitive, strict); 3520 } 3521 3522 // this is a template so we get lazy import behavior 3523 Utf8Stream handleDataEncoding()(in string rawdata, string dataEncoding, bool strict) { 3524 import arsd.characterencodings; 3525 // gotta determine the data encoding. If you know it, pass it in above to skip all this. 3526 if(dataEncoding is null) { 3527 dataEncoding = tryToDetermineEncoding(cast(const(ubyte[])) rawdata); 3528 // it can't tell... probably a random 8 bit encoding. Let's check the document itself. 3529 // Now, XML and HTML can both list encoding in the document, but we can't really parse 3530 // it here without changing a lot of code until we know the encoding. So I'm going to 3531 // do some hackish string checking. 3532 if(dataEncoding is null) { 3533 auto dataAsBytes = cast(immutable(ubyte)[]) rawdata; 3534 // first, look for an XML prolog 3535 auto idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "encoding=\""); 3536 if(idx != -1) { 3537 idx += "encoding=\"".length; 3538 // we're probably past the prolog if it's this far in; we might be looking at 3539 // content. Forget about it. 3540 if(idx > 100) 3541 idx = -1; 3542 } 3543 // if that fails, we're looking for Content-Type http-equiv or a meta charset (see html5).. 3544 if(idx == -1) { 3545 idx = indexOfBytes(dataAsBytes, cast(immutable ubyte[]) "charset="); 3546 if(idx != -1) { 3547 idx += "charset=".length; 3548 if(dataAsBytes[idx] == '"') 3549 idx++; 3550 } 3551 } 3552 3553 // found something in either branch... 3554 if(idx != -1) { 3555 // read till a quote or about 12 chars, whichever comes first... 3556 auto end = idx; 3557 while(end < dataAsBytes.length && dataAsBytes[end] != '"' && end - idx < 12) 3558 end++; 3559 3560 dataEncoding = cast(string) dataAsBytes[idx .. end]; 3561 } 3562 // otherwise, we just don't know. 3563 } 3564 } 3565 3566 if(dataEncoding is null) { 3567 if(strict) 3568 throw new MarkupException("I couldn't figure out the encoding of this document."); 3569 else 3570 // if we really don't know by here, it means we already tried UTF-8, 3571 // looked for utf 16 and 32 byte order marks, and looked for xml or meta 3572 // tags... let's assume it's Windows-1252, since that's probably the most 3573 // common aside from utf that wouldn't be labeled. 3574 3575 dataEncoding = "Windows 1252"; 3576 } 3577 3578 // and now, go ahead and convert it. 3579 3580 string data; 3581 3582 if(!strict) { 3583 // if we're in non-strict mode, we need to check 3584 // the document for mislabeling too; sometimes 3585 // web documents will say they are utf-8, but aren't 3586 // actually properly encoded. If it fails to validate, 3587 // we'll assume it's actually Windows encoding - the most 3588 // likely candidate for mislabeled garbage. 3589 dataEncoding = dataEncoding.toLower(); 3590 dataEncoding = dataEncoding.replace(" ", ""); 3591 dataEncoding = dataEncoding.replace("-", ""); 3592 dataEncoding = dataEncoding.replace("_", ""); 3593 if(dataEncoding == "utf8") { 3594 try { 3595 validate(rawdata); 3596 } catch(UTFException e) { 3597 dataEncoding = "Windows 1252"; 3598 } 3599 } 3600 } 3601 3602 if(dataEncoding != "UTF-8") { 3603 if(strict) 3604 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 3605 else { 3606 try { 3607 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, dataEncoding); 3608 } catch(Exception e) { 3609 data = convertToUtf8(cast(immutable(ubyte)[]) rawdata, "Windows 1252"); 3610 } 3611 } 3612 } else 3613 data = rawdata; 3614 3615 return toUtf8Stream(data); 3616 } 3617 3618 private 3619 Utf8Stream toUtf8Stream(in string rawdata) { 3620 string data = rawdata; 3621 static if(is(Utf8Stream == string)) 3622 return data; 3623 else 3624 return new Utf8Stream(data); 3625 } 3626 3627 /** 3628 Take XMLish data and try to make the DOM tree out of it. 3629 3630 The goal isn't to be perfect, but to just be good enough to 3631 approximate Javascript's behavior. 3632 3633 If strict, it throws on something that doesn't make sense. 3634 (Examples: mismatched tags. It doesn't validate!) 3635 If not strict, it tries to recover anyway, and only throws 3636 when something is REALLY unworkable. 3637 3638 If strict is false, it uses a magic list of tags that needn't 3639 be closed. If you are writing a document specifically for this, 3640 try to avoid such - use self closed tags at least. Easier to parse. 3641 3642 The dataEncoding argument can be used to pass a specific 3643 charset encoding for automatic conversion. If null (which is NOT 3644 the default!), it tries to determine from the data itself, 3645 using the xml prolog or meta tags, and assumes UTF-8 if unsure. 3646 3647 If this assumption is wrong, it can throw on non-ascii 3648 characters! 3649 3650 3651 Note that it previously assumed the data was encoded as UTF-8, which 3652 is why the dataEncoding argument defaults to that. 3653 3654 So it shouldn't break backward compatibility. 3655 3656 But, if you want the best behavior on wild data - figuring it out from the document 3657 instead of assuming - you'll probably want to change that argument to null. 3658 3659 This is a template so it lazily imports arsd.characterencodings, which is required 3660 to fix up data encodings. 3661 3662 If you are sure the encoding is good, try parseUtf8 or parseStrict to avoid the 3663 dependency. If it is data from the Internet though, a random website, the encoding 3664 is often a lie. This function, if dataEncoding == null, can correct for that, or 3665 you can try parseGarbage. In those cases, arsd.characterencodings is required to 3666 compile. 3667 */ 3668 void parse()(in string rawdata, bool caseSensitive = false, bool strict = false, string dataEncoding = "UTF-8") { 3669 auto data = handleDataEncoding(rawdata, dataEncoding, strict); 3670 parseStream(data, caseSensitive, strict); 3671 } 3672 3673 // note: this work best in strict mode, unless data is just a simple string wrapper 3674 void parseStream(Utf8Stream data, bool caseSensitive = false, bool strict = false) { 3675 // FIXME: this parser could be faster; it's in the top ten biggest tree times according to the profiler 3676 // of my big app. 3677 3678 assert(data !is null); 3679 3680 // go through character by character. 3681 // if you see a <, consider it a tag. 3682 // name goes until the first non tagname character 3683 // then see if it self closes or has an attribute 3684 3685 // if not in a tag, anything not a tag is a big text 3686 // node child. It ends as soon as it sees a < 3687 3688 // Whitespace in text or attributes is preserved, but not between attributes 3689 3690 // & and friends are converted when I know them, left the same otherwise 3691 3692 3693 // this it should already be done correctly.. so I'm leaving it off to net a ~10% speed boost on my typical test file (really) 3694 //validate(data); // it *must* be UTF-8 for this to work correctly 3695 3696 sizediff_t pos = 0; 3697 3698 clear(); 3699 3700 loose = !caseSensitive; 3701 3702 bool sawImproperNesting = false; 3703 bool paragraphHackfixRequired = false; 3704 3705 int getLineNumber(sizediff_t p) { 3706 int line = 1; 3707 foreach(c; data[0..p]) 3708 if(c == '\n') 3709 line++; 3710 return line; 3711 } 3712 3713 void parseError(string message) { 3714 throw new MarkupException(format("char %d (line %d): %s", pos, getLineNumber(pos), message)); 3715 } 3716 3717 void eatWhitespace() { 3718 while(pos < data.length && (data[pos] == ' ' || data[pos] == '\n' || data[pos] == '\t')) 3719 pos++; 3720 } 3721 3722 string readTagName() { 3723 // remember to include : for namespaces 3724 // basically just keep going until >, /, or whitespace 3725 auto start = pos; 3726 while( data[pos] != '>' && data[pos] != '/' && 3727 data[pos] != ' ' && data[pos] != '\n' && data[pos] != '\t') 3728 { 3729 pos++; 3730 if(pos == data.length) { 3731 if(strict) 3732 throw new Exception("tag name incomplete when file ended"); 3733 else 3734 break; 3735 } 3736 } 3737 3738 if(!caseSensitive) 3739 return toLower(data[start..pos]); 3740 else 3741 return data[start..pos]; 3742 } 3743 3744 string readAttributeName() { 3745 // remember to include : for namespaces 3746 // basically just keep going until >, /, or whitespace 3747 auto start = pos; 3748 while( data[pos] != '>' && data[pos] != '/' && data[pos] != '=' && 3749 data[pos] != ' ' && data[pos] != '\n' && data[pos] != '\t') 3750 { 3751 if(data[pos] == '<') { 3752 if(strict) 3753 throw new MarkupException("The character < can never appear in an attribute name. Line " ~ to!string(getLineNumber(pos))); 3754 else 3755 break; // e.g. <a href="something" <img src="poo" /></a>. The > should have been after the href, but some shitty files don't do that right and the browser handles it, so we will too, by pretending the > was indeed there 3756 } 3757 pos++; 3758 if(pos == data.length) { 3759 if(strict) 3760 throw new Exception("unterminated attribute name"); 3761 else 3762 break; 3763 } 3764 } 3765 3766 if(!caseSensitive) 3767 return toLower(data[start..pos]); 3768 else 3769 return data[start..pos]; 3770 } 3771 3772 string readAttributeValue() { 3773 if(pos >= data.length) { 3774 if(strict) 3775 throw new Exception("no attribute value before end of file"); 3776 else 3777 return null; 3778 } 3779 switch(data[pos]) { 3780 case '\'': 3781 case '"': 3782 auto started = pos; 3783 char end = data[pos]; 3784 pos++; 3785 auto start = pos; 3786 while(pos < data.length && data[pos] != end) 3787 pos++; 3788 if(strict && pos == data.length) 3789 throw new MarkupException("Unclosed attribute value, started on char " ~ to!string(started)); 3790 string v = htmlEntitiesDecode(data[start..pos], strict); 3791 pos++; // skip over the end 3792 return v; 3793 default: 3794 if(strict) 3795 parseError("Attributes must be quoted"); 3796 // read until whitespace or terminator (/ or >) 3797 auto start = pos; 3798 while( 3799 pos < data.length && 3800 data[pos] != '>' && 3801 // unquoted attributes might be urls, so gotta be careful with them and self-closed elements 3802 !(data[pos] == '/' && pos + 1 < data.length && data[pos+1] == '>') && 3803 data[pos] != ' ' && data[pos] != '\n' && data[pos] != '\t') 3804 pos++; 3805 3806 string v = htmlEntitiesDecode(data[start..pos], strict); 3807 // don't skip the end - we'll need it later 3808 return v; 3809 } 3810 } 3811 3812 TextNode readTextNode() { 3813 auto start = pos; 3814 while(pos < data.length && data[pos] != '<') { 3815 pos++; 3816 } 3817 3818 return TextNode.fromUndecodedString(this, data[start..pos]); 3819 } 3820 3821 // this is obsolete! 3822 RawSource readCDataNode() { 3823 auto start = pos; 3824 while(pos < data.length && data[pos] != '<') { 3825 pos++; 3826 } 3827 3828 return new RawSource(this, data[start..pos]); 3829 } 3830 3831 3832 struct Ele { 3833 int type; // element or closing tag or nothing 3834 /* 3835 type == 0 means regular node, self-closed (element is valid) 3836 type == 1 means closing tag (payload is the tag name, element may be valid) 3837 type == 2 means you should ignore it completely 3838 type == 3 means it is a special element that should be appended, if possible, e.g. a <!DOCTYPE> that was chosen to be kept, php code, or comment. It will be appended at the current element if inside the root, and to a special document area if not 3839 type == 4 means the document was totally empty 3840 */ 3841 Element element; // for type == 0 or type == 3 3842 string payload; // for type == 1 3843 } 3844 // recursively read a tag 3845 Ele readElement(string[] parentChain = null) { 3846 // FIXME: this is the slowest function in this module, by far, even in strict mode. 3847 // Loose mode should perform decently, but strict mode is the important one. 3848 if(!strict && parentChain is null) 3849 parentChain = []; 3850 3851 static string[] recentAutoClosedTags; 3852 3853 if(pos >= data.length) 3854 { 3855 if(strict) { 3856 throw new MarkupException("Gone over the input (is there no root element or did it never close?), chain: " ~ to!string(parentChain)); 3857 } else { 3858 if(parentChain.length) 3859 return Ele(1, null, parentChain[0]); // in loose mode, we just assume the document has ended 3860 else 3861 return Ele(4); // signal emptiness upstream 3862 } 3863 } 3864 3865 if(data[pos] != '<') { 3866 return Ele(0, readTextNode(), null); 3867 } 3868 3869 enforce(data[pos] == '<'); 3870 pos++; 3871 if(pos == data.length) { 3872 if(strict) 3873 throw new MarkupException("Found trailing < at end of file"); 3874 // if not strict, we'll just skip the switch 3875 } else 3876 switch(data[pos]) { 3877 // I don't care about these, so I just want to skip them 3878 case '!': // might be a comment, a doctype, or a special instruction 3879 pos++; 3880 3881 // FIXME: we should store these in the tree too 3882 // though I like having it stripped out tbh. 3883 3884 if(pos == data.length) { 3885 if(strict) 3886 throw new MarkupException("<! opened at end of file"); 3887 } else if(data[pos] == '-' && (pos + 1 < data.length) && data[pos+1] == '-') { 3888 // comment 3889 pos += 2; 3890 3891 // FIXME: technically, a comment is anything 3892 // between -- and -- inside a <!> block. 3893 // so in <!-- test -- lol> , the " lol" is NOT a comment 3894 // and should probably be handled differently in here, but for now 3895 // I'll just keep running until --> since that's the common way 3896 3897 auto commentStart = pos; 3898 while(pos+3 < data.length && data[pos..pos+3] != "-->") 3899 pos++; 3900 3901 auto end = commentStart; 3902 3903 if(pos + 3 >= data.length) { 3904 if(strict) 3905 throw new MarkupException("unclosed comment"); 3906 end = data.length; 3907 pos = data.length; 3908 } else { 3909 end = pos; 3910 assert(data[pos] == '-'); 3911 pos++; 3912 assert(data[pos] == '-'); 3913 pos++; 3914 assert(data[pos] == '>'); 3915 pos++; 3916 } 3917 3918 if(parseSawComment !is null) 3919 if(parseSawComment(data[commentStart .. end])) { 3920 return Ele(3, new HtmlComment(this, data[commentStart .. end]), null); 3921 } 3922 } else if(pos + 7 <= data.length && data[pos..pos + 7] == "[CDATA[") { 3923 pos += 7; 3924 3925 auto cdataStart = pos; 3926 3927 ptrdiff_t end = -1; 3928 typeof(end) cdataEnd; 3929 3930 if(pos < data.length) { 3931 // cdata isn't allowed to nest, so this should be generally ok, as long as it is found 3932 end = data[pos .. $].indexOf("]]>"); 3933 } 3934 3935 if(end == -1) { 3936 if(strict) 3937 throw new MarkupException("Unclosed CDATA section"); 3938 end = pos; 3939 cdataEnd = pos; 3940 } else { 3941 cdataEnd = pos + end; 3942 pos = cdataEnd + 3; 3943 } 3944 3945 return Ele(0, new TextNode(this, data[cdataStart .. cdataEnd]), null); 3946 } else { 3947 auto start = pos; 3948 while(pos < data.length && data[pos] != '>') 3949 pos++; 3950 3951 auto bangEnds = pos; 3952 if(pos == data.length) { 3953 if(strict) 3954 throw new MarkupException("unclosed processing instruction (<!xxx>)"); 3955 } else pos++; // skipping the > 3956 3957 if(parseSawBangInstruction !is null) 3958 if(parseSawBangInstruction(data[start .. bangEnds])) { 3959 // FIXME: these should be able to modify the parser state, 3960 // doing things like adding entities, somehow. 3961 3962 return Ele(3, new BangInstruction(this, data[start .. bangEnds]), null); 3963 } 3964 } 3965 3966 /* 3967 if(pos < data.length && data[pos] == '>') 3968 pos++; // skip the > 3969 else 3970 assert(!strict); 3971 */ 3972 break; 3973 case '%': 3974 case '?': 3975 /* 3976 Here's what we want to support: 3977 3978 <% asp code %> 3979 <%= asp code %> 3980 <?php php code ?> 3981 <?= php code ?> 3982 3983 The contents don't really matter, just if it opens with 3984 one of the above for, it ends on the two char terminator. 3985 3986 <?something> 3987 this is NOT php code 3988 because I've seen this in the wild: <?EM-dummyText> 3989 3990 This could be php with shorttags which would be cut off 3991 prematurely because if(a >) - that > counts as the close 3992 of the tag, but since dom.d can't tell the difference 3993 between that and the <?EM> real world example, it will 3994 not try to look for the ?> ending. 3995 3996 The difference between this and the asp/php stuff is that it 3997 ends on >, not ?>. ONLY <?php or <?= ends on ?>. The rest end 3998 on >. 3999 */ 4000 4001 char end = data[pos]; 4002 auto started = pos; 4003 bool isAsp = end == '%'; 4004 int currentIndex = 0; 4005 bool isPhp = false; 4006 bool isEqualTag = false; 4007 int phpCount = 0; 4008 4009 more: 4010 pos++; // skip the start 4011 if(pos == data.length) { 4012 if(strict) 4013 throw new MarkupException("Unclosed <"~end~" by end of file"); 4014 } else { 4015 currentIndex++; 4016 if(currentIndex == 1 && data[pos] == '=') { 4017 if(!isAsp) 4018 isPhp = true; 4019 isEqualTag = true; 4020 goto more; 4021 } 4022 if(currentIndex == 1 && data[pos] == 'p') 4023 phpCount++; 4024 if(currentIndex == 2 && data[pos] == 'h') 4025 phpCount++; 4026 if(currentIndex == 3 && data[pos] == 'p' && phpCount == 2) 4027 isPhp = true; 4028 4029 if(data[pos] == '>') { 4030 if((isAsp || isPhp) && data[pos - 1] != end) 4031 goto more; 4032 // otherwise we're done 4033 } else 4034 goto more; 4035 } 4036 4037 //writefln("%s: %s", isAsp ? "ASP" : isPhp ? "PHP" : "<? ", data[started .. pos]); 4038 auto code = data[started .. pos]; 4039 4040 4041 assert((pos < data.length && data[pos] == '>') || (!strict && pos == data.length)); 4042 if(pos < data.length) 4043 pos++; // get past the > 4044 4045 if(isAsp && parseSawAspCode !is null) { 4046 if(parseSawAspCode(code)) { 4047 return Ele(3, new AspCode(this, code), null); 4048 } 4049 } else if(isPhp && parseSawPhpCode !is null) { 4050 if(parseSawPhpCode(code)) { 4051 return Ele(3, new PhpCode(this, code), null); 4052 } 4053 } else if(!isAsp && !isPhp && parseSawQuestionInstruction !is null) { 4054 if(parseSawQuestionInstruction(code)) { 4055 return Ele(3, new QuestionInstruction(this, code), null); 4056 } 4057 } 4058 break; 4059 case '/': // closing an element 4060 pos++; // skip the start 4061 auto p = pos; 4062 while(pos < data.length && data[pos] != '>') 4063 pos++; 4064 //writefln("</%s>", data[p..pos]); 4065 if(pos == data.length && data[pos-1] != '>') { 4066 if(strict) 4067 throw new MarkupException("File ended before closing tag had a required >"); 4068 else 4069 data ~= ">"; // just hack it in 4070 } 4071 pos++; // skip the '>' 4072 4073 string tname = data[p..pos-1]; 4074 if(!caseSensitive) 4075 tname = tname.toLower(); 4076 4077 return Ele(1, null, tname); // closing tag reports itself here 4078 case ' ': // assume it isn't a real element... 4079 if(strict) 4080 parseError("bad markup - improperly placed <"); 4081 else 4082 return Ele(0, TextNode.fromUndecodedString(this, "<"), null); 4083 break; 4084 default: 4085 4086 if(!strict) { 4087 // what about something that kinda looks like a tag, but isn't? 4088 auto nextTag = data[pos .. $].indexOf("<"); 4089 auto closeTag = data[pos .. $].indexOf(">"); 4090 if(closeTag != -1 && nextTag != -1) 4091 if(nextTag < closeTag) { 4092 // since attribute names cannot possibly have a < in them, we'll look for an equal since it might be an attribute value... and even in garbage mode, it'd have to be a quoted one realistically 4093 4094 auto equal = data[pos .. $].indexOf("=\""); 4095 if(equal != -1 && equal < closeTag) { 4096 // this MIGHT be ok, soldier on 4097 } else { 4098 // definitely no good, this must be a (horribly distorted) text node 4099 pos++; // skip the < we're on - don't want text node to end prematurely 4100 auto node = readTextNode(); 4101 node.contents = "<" ~ node.contents; // put this back 4102 return Ele(0, node, null); 4103 } 4104 } 4105 } 4106 4107 string tagName = readTagName(); 4108 string[string] attributes; 4109 4110 Ele addTag(bool selfClosed) { 4111 if(selfClosed) 4112 pos++; 4113 else { 4114 if(!strict) 4115 if(tagName.isInArray(selfClosedElements)) 4116 // these are de-facto self closed 4117 selfClosed = true; 4118 } 4119 4120 if(strict) 4121 enforce(data[pos] == '>');//, format("got %s when expecting >\nContext:\n%s", data[pos], data[pos - 100 .. pos + 100])); 4122 else { 4123 // if we got here, it's probably because a slash was in an 4124 // unquoted attribute - don't trust the selfClosed value 4125 if(!selfClosed) 4126 selfClosed = tagName.isInArray(selfClosedElements); 4127 4128 while(pos < data.length && data[pos] != '>') 4129 pos++; 4130 } 4131 4132 auto whereThisTagStarted = pos; // for better error messages 4133 4134 pos++; 4135 4136 auto e = createElement(tagName); 4137 e.attributes = attributes; 4138 version(dom_node_indexes) { 4139 if(e.dataset.nodeIndex.length == 0) 4140 e.dataset.nodeIndex = to!string(&(e.attributes)); 4141 } 4142 e.selfClosed = selfClosed; 4143 e.parseAttributes(); 4144 4145 4146 // HACK to handle script and style as a raw data section as it is in HTML browsers 4147 if(tagName == "script" || tagName == "style") { 4148 if(!selfClosed) { 4149 string closer = "</" ~ tagName ~ ">"; 4150 ptrdiff_t ending; 4151 if(pos >= data.length) 4152 ending = -1; 4153 else 4154 ending = indexOf(data[pos..$], closer); 4155 4156 ending = indexOf(data[pos..$], closer, 0, (loose ? CaseSensitive.no : CaseSensitive.yes)); 4157 /* 4158 if(loose && ending == -1 && pos < data.length) 4159 ending = indexOf(data[pos..$], closer.toUpper()); 4160 */ 4161 if(ending == -1) { 4162 if(strict) 4163 throw new Exception("tag " ~ tagName ~ " never closed"); 4164 else { 4165 // let's call it totally empty and do the rest of the file as text. doing it as html could still result in some weird stuff like if(a<4) being read as <4 being a tag so it comes out if(a<4></4> and other weirdness) It is either a closed script tag or the rest of the file is forfeit. 4166 if(pos < data.length) { 4167 e = new TextNode(this, data[pos .. $]); 4168 pos = data.length; 4169 } 4170 } 4171 } else { 4172 ending += pos; 4173 e.innerRawSource = data[pos..ending]; 4174 pos = ending + closer.length; 4175 } 4176 } 4177 return Ele(0, e, null); 4178 } 4179 4180 bool closed = selfClosed; 4181 4182 void considerHtmlParagraphHack(Element n) { 4183 assert(!strict); 4184 if(e.tagName == "p" && e.tagName == n.tagName) { 4185 // html lets you write <p> para 1 <p> para 1 4186 // but in the dom tree, they should be siblings, not children. 4187 paragraphHackfixRequired = true; 4188 } 4189 } 4190 4191 //writef("<%s>", tagName); 4192 while(!closed) { 4193 Ele n; 4194 if(strict) 4195 n = readElement(); 4196 else 4197 n = readElement(parentChain ~ tagName); 4198 4199 if(n.type == 4) return n; // the document is empty 4200 4201 if(n.type == 3 && n.element !is null) { 4202 // special node, append if possible 4203 if(e !is null) 4204 e.appendChild(n.element); 4205 else 4206 piecesBeforeRoot ~= n.element; 4207 } else if(n.type == 0) { 4208 if(!strict) 4209 considerHtmlParagraphHack(n.element); 4210 e.appendChild(n.element); 4211 } else if(n.type == 1) { 4212 bool found = false; 4213 if(n.payload != tagName) { 4214 if(strict) 4215 parseError(format("mismatched tag: </%s> != <%s> (opened on line %d)", n.payload, tagName, getLineNumber(whereThisTagStarted))); 4216 else { 4217 sawImproperNesting = true; 4218 // this is so we don't drop several levels of awful markup 4219 if(n.element) { 4220 if(!strict) 4221 considerHtmlParagraphHack(n.element); 4222 e.appendChild(n.element); 4223 n.element = null; 4224 } 4225 4226 // is the element open somewhere up the chain? 4227 foreach(i, parent; parentChain) 4228 if(parent == n.payload) { 4229 recentAutoClosedTags ~= tagName; 4230 // just rotating it so we don't inadvertently break stuff with vile crap 4231 if(recentAutoClosedTags.length > 4) 4232 recentAutoClosedTags = recentAutoClosedTags[1 .. $]; 4233 4234 n.element = e; 4235 return n; 4236 } 4237 4238 // if not, this is a text node; we can't fix it up... 4239 4240 // If it's already in the tree somewhere, assume it is closed by algorithm 4241 // and we shouldn't output it - odds are the user just flipped a couple tags 4242 foreach(ele; e.tree) { 4243 if(ele.tagName == n.payload) { 4244 found = true; 4245 break; 4246 } 4247 } 4248 4249 foreach(ele; recentAutoClosedTags) { 4250 if(ele == n.payload) { 4251 found = true; 4252 break; 4253 } 4254 } 4255 4256 if(!found) // if not found in the tree though, it's probably just text 4257 e.appendChild(TextNode.fromUndecodedString(this, "</"~n.payload~">")); 4258 } 4259 } else { 4260 if(n.element) { 4261 if(!strict) 4262 considerHtmlParagraphHack(n.element); 4263 e.appendChild(n.element); 4264 } 4265 } 4266 4267 if(n.payload == tagName) // in strict mode, this is always true 4268 closed = true; 4269 } else { /*throw new Exception("wtf " ~ tagName);*/ } 4270 } 4271 //writef("</%s>\n", tagName); 4272 return Ele(0, e, null); 4273 } 4274 4275 // if a tag was opened but not closed by end of file, we can arrive here 4276 if(!strict && pos >= data.length) 4277 return addTag(false); 4278 //else if(strict) assert(0); // should be caught before 4279 4280 switch(data[pos]) { 4281 default: assert(0); 4282 case '/': // self closing tag 4283 return addTag(true); 4284 case '>': 4285 return addTag(false); 4286 case ' ': 4287 case '\t': 4288 case '\n': 4289 // there might be attributes... 4290 moreAttributes: 4291 eatWhitespace(); 4292 4293 // same deal as above the switch.... 4294 if(!strict && pos >= data.length) 4295 return addTag(false); 4296 4297 if(strict && pos >= data.length) 4298 throw new MarkupException("tag open, didn't find > before end of file"); 4299 4300 switch(data[pos]) { 4301 case '/': // self closing tag 4302 return addTag(true); 4303 case '>': // closed tag; open -- we now read the contents 4304 return addTag(false); 4305 default: // it is an attribute 4306 string attrName = readAttributeName(); 4307 string attrValue = attrName; 4308 if(pos >= data.length) { 4309 if(strict) 4310 assert(0, "this should have thrown in readAttributeName"); 4311 else { 4312 data ~= ">"; 4313 goto blankValue; 4314 } 4315 } 4316 if(data[pos] == '=') { 4317 pos++; 4318 attrValue = readAttributeValue(); 4319 } 4320 4321 blankValue: 4322 4323 if(strict && attrName in attributes) 4324 throw new MarkupException("Repeated attribute: " ~ attrName); 4325 4326 if(attrName.strip().length) 4327 attributes[attrName] = attrValue; 4328 else if(strict) throw new MarkupException("wtf, zero length attribute name"); 4329 4330 if(!strict && pos < data.length && data[pos] == '<') { 4331 // this is the broken tag that doesn't have a > at the end 4332 data = data[0 .. pos] ~ ">" ~ data[pos.. $]; 4333 // let's insert one as a hack 4334 goto case '>'; 4335 } 4336 4337 goto moreAttributes; 4338 } 4339 } 4340 } 4341 4342 return Ele(2, null, null); // this is a <! or <? thing that got ignored prolly. 4343 //assert(0); 4344 } 4345 4346 eatWhitespace(); 4347 Ele r; 4348 do { 4349 r = readElement(); // there SHOULD only be one element... 4350 4351 if(r.type == 3 && r.element !is null) 4352 piecesBeforeRoot ~= r.element; 4353 4354 if(r.type == 4) 4355 break; // the document is completely empty... 4356 } while (r.type != 0 || r.element.nodeType != 1); // we look past the xml prologue and doctype; root only begins on a regular node 4357 4358 root = r.element; 4359 4360 if(!strict) // in strict mode, we'll just ignore stuff after the xml 4361 while(r.type != 4) { 4362 r = readElement(); 4363 if(r.type != 4 && r.type != 2) { // if not empty and not ignored 4364 if(r.element !is null) 4365 piecesAfterRoot ~= r.element; 4366 } 4367 } 4368 4369 if(root is null) 4370 { 4371 if(strict) 4372 assert(0, "empty document should be impossible in strict mode"); 4373 else 4374 parseUtf8(`<html><head></head><body></body></html>`); // fill in a dummy document in loose mode since that's what browsers do 4375 } 4376 4377 if(paragraphHackfixRequired) { 4378 assert(!strict); // this should never happen in strict mode; it ought to never set the hack flag... 4379 4380 // in loose mode, we can see some "bad" nesting (it's valid html, but poorly formed xml). 4381 // It's hard to handle above though because my code sucks. So, we'll fix it here. 4382 4383 auto iterator = root.tree; 4384 foreach(ele; iterator) { 4385 if(ele.parentNode is null) 4386 continue; 4387 4388 if(ele.tagName == "p" && ele.parentNode.tagName == ele.tagName) { 4389 auto shouldBePreviousSibling = ele.parentNode; 4390 auto holder = shouldBePreviousSibling.parentNode; // this is the two element's mutual holder... 4391 holder.insertAfter(shouldBePreviousSibling, ele.removeFromTree()); 4392 iterator.currentKilled(); // the current branch can be skipped; we'll hit it soon anyway since it's now next up. 4393 } 4394 } 4395 } 4396 } 4397 4398 /* end massive parse function */ 4399 4400 /// Gets the <title> element's innerText, if one exists 4401 @property string title() { 4402 bool doesItMatch(Element e) { 4403 return (e.tagName == "title"); 4404 } 4405 4406 auto e = findFirst(&doesItMatch); 4407 if(e) 4408 return e.innerText(); 4409 return ""; 4410 } 4411 4412 /// Sets the title of the page, creating a <title> element if needed. 4413 @property void title(string t) { 4414 bool doesItMatch(Element e) { 4415 return (e.tagName == "title"); 4416 } 4417 4418 auto e = findFirst(&doesItMatch); 4419 4420 if(!e) { 4421 e = createElement("title"); 4422 auto heads = getElementsByTagName("head"); 4423 if(heads.length) 4424 heads[0].appendChild(e); 4425 } 4426 4427 if(e) 4428 e.innerText = t; 4429 } 4430 4431 // FIXME: would it work to alias root this; ???? might be a good idea 4432 /// These functions all forward to the root element. See the documentation in the Element class. 4433 Element getElementById(string id) { 4434 return root.getElementById(id); 4435 } 4436 4437 /// ditto 4438 final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__) 4439 if( is(SomeElementType : Element)) 4440 out(ret) { assert(ret !is null); } 4441 do { 4442 return root.requireElementById!(SomeElementType)(id, file, line); 4443 } 4444 4445 /// ditto 4446 final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__) 4447 if( is(SomeElementType : Element)) 4448 out(ret) { assert(ret !is null); } 4449 do { 4450 return root.requireSelector!(SomeElementType)(selector, file, line); 4451 } 4452 4453 4454 /// ditto 4455 Element querySelector(string selector) { 4456 return root.querySelector(selector); 4457 } 4458 4459 /// ditto 4460 Element[] querySelectorAll(string selector) { 4461 return root.querySelectorAll(selector); 4462 } 4463 4464 /// ditto 4465 Element[] getElementsBySelector(string selector) { 4466 return root.getElementsBySelector(selector); 4467 } 4468 4469 /// ditto 4470 Element[] getElementsByTagName(string tag) { 4471 return root.getElementsByTagName(tag); 4472 } 4473 4474 /** FIXME: btw, this could just be a lazy range...... */ 4475 Element getFirstElementByTagName(string tag) { 4476 if(loose) 4477 tag = tag.toLower(); 4478 bool doesItMatch(Element e) { 4479 return e.tagName == tag; 4480 } 4481 return findFirst(&doesItMatch); 4482 } 4483 4484 /// This returns the <body> element, if there is one. (It different than Javascript, where it is called 'body', because body is a keyword in D.) 4485 Element mainBody() { 4486 return getFirstElementByTagName("body"); 4487 } 4488 4489 /// this uses a weird thing... it's [name=] if no colon and 4490 /// [property=] if colon 4491 string getMeta(string name) { 4492 string thing = name.indexOf(":") == -1 ? "name" : "property"; 4493 auto e = querySelector("head meta["~thing~"="~name~"]"); 4494 if(e is null) 4495 return null; 4496 return e.content; 4497 } 4498 4499 /// Sets a meta tag in the document header. It is kinda hacky to work easily for both Facebook open graph and traditional html meta tags/ 4500 void setMeta(string name, string value) { 4501 string thing = name.indexOf(":") == -1 ? "name" : "property"; 4502 auto e = querySelector("head meta["~thing~"="~name~"]"); 4503 if(e is null) { 4504 e = requireSelector("head").addChild("meta"); 4505 e.setAttribute(thing, name); 4506 } 4507 4508 e.content = value; 4509 } 4510 4511 ///. 4512 Form[] forms() { 4513 return cast(Form[]) getElementsByTagName("form"); 4514 } 4515 4516 ///. 4517 Form createForm() 4518 out(ret) { 4519 assert(ret !is null); 4520 } 4521 do { 4522 return cast(Form) createElement("form"); 4523 } 4524 4525 ///. 4526 Element createElement(string name) { 4527 if(loose) 4528 name = name.toLower(); 4529 4530 auto e = Element.make(name); 4531 e.parentDocument = this; 4532 4533 return e; 4534 4535 // return new Element(this, name, null, selfClosed); 4536 } 4537 4538 ///. 4539 Element createFragment() { 4540 return new DocumentFragment(this); 4541 } 4542 4543 ///. 4544 Element createTextNode(string content) { 4545 return new TextNode(this, content); 4546 } 4547 4548 4549 ///. 4550 Element findFirst(bool delegate(Element) doesItMatch) { 4551 Element result; 4552 4553 bool goThroughElement(Element e) { 4554 if(doesItMatch(e)) { 4555 result = e; 4556 return true; 4557 } 4558 4559 foreach(child; e.children) { 4560 if(goThroughElement(child)) 4561 return true; 4562 } 4563 4564 return false; 4565 } 4566 4567 goThroughElement(root); 4568 4569 return result; 4570 } 4571 4572 ///. 4573 void clear() { 4574 root = null; 4575 loose = false; 4576 } 4577 4578 ///. 4579 void setProlog(string d) { 4580 _prolog = d; 4581 prologWasSet = true; 4582 } 4583 4584 ///. 4585 private string _prolog = "<!DOCTYPE html>\n"; 4586 private bool prologWasSet = false; // set to true if the user changed it 4587 4588 @property string prolog() const { 4589 // if the user explicitly changed it, do what they want 4590 // or if we didn't keep/find stuff from the document itself, 4591 // we'll use the builtin one as a default. 4592 if(prologWasSet || piecesBeforeRoot.length == 0) 4593 return _prolog; 4594 4595 string p; 4596 foreach(e; piecesBeforeRoot) 4597 p ~= e.toString() ~ "\n"; 4598 return p; 4599 } 4600 4601 ///. 4602 override string toString() const { 4603 return prolog ~ root.toString(); 4604 } 4605 4606 ///. 4607 Element root; 4608 4609 /// if these were kept, this is stuff that appeared before the root element, such as <?xml version ?> decls and <!DOCTYPE>s 4610 Element[] piecesBeforeRoot; 4611 4612 /// stuff after the root, only stored in non-strict mode and not used in toString, but available in case you want it 4613 Element[] piecesAfterRoot; 4614 4615 ///. 4616 bool loose; 4617 4618 4619 4620 // what follows are for mutation events that you can observe 4621 void delegate(DomMutationEvent)[] eventObservers; 4622 4623 void dispatchMutationEvent(DomMutationEvent e) { 4624 foreach(o; eventObservers) 4625 o(e); 4626 } 4627 } 4628 4629 4630 // FIXME: since Document loosens the input requirements, it should probably be the sub class... 4631 /// Specializes Document for handling generic XML. (always uses strict mode, uses xml mime type and file header) 4632 class XmlDocument : Document { 4633 this(string data) { 4634 contentType = "text/xml; charset=utf-8"; 4635 _prolog = `<?xml version="1.0" encoding="UTF-8"?>` ~ "\n"; 4636 4637 parseStrict(data); 4638 } 4639 } 4640 4641 4642 4643 // for the observers 4644 enum DomMutationOperations { 4645 setAttribute, 4646 removeAttribute, 4647 appendChild, // tagname, attributes[], innerHTML 4648 insertBefore, 4649 truncateChildren, 4650 removeChild, 4651 appendHtml, 4652 replaceHtml, 4653 appendText, 4654 replaceText, 4655 replaceTextOnly 4656 } 4657 4658 // and for observers too 4659 struct DomMutationEvent { 4660 DomMutationOperations operation; 4661 Element target; 4662 Element related; // what this means differs with the operation 4663 Element related2; 4664 string relatedString; 4665 string relatedString2; 4666 } 4667 4668 4669 private enum static string[] selfClosedElements = [ 4670 // html 4 4671 "img", "hr", "input", "br", "col", "link", "meta", 4672 // html 5 4673 "source" ]; 4674 4675 static import std.conv; 4676 4677 ///. 4678 int intFromHex(string hex) { 4679 int place = 1; 4680 int value = 0; 4681 for(sizediff_t a = hex.length - 1; a >= 0; a--) { 4682 int v; 4683 char q = hex[a]; 4684 if( q >= '0' && q <= '9') 4685 v = q - '0'; 4686 else if (q >= 'a' && q <= 'f') 4687 v = q - 'a' + 10; 4688 else throw new Exception("Illegal hex character: " ~ q); 4689 4690 value += v * place; 4691 4692 place *= 16; 4693 } 4694 4695 return value; 4696 } 4697 4698 4699 // CSS selector handling 4700 4701 // EXTENSIONS 4702 // dd - dt means get the dt directly before that dd (opposite of +) NOT IMPLEMENTED 4703 // dd -- dt means rewind siblings until you hit a dt, go as far as you need to NOT IMPLEMENTED 4704 // dt < dl means get the parent of that dt iff it is a dl (usable for "get a dt that are direct children of dl") 4705 // dt << dl means go as far up as needed to find a dl (you have an element and want its containers) NOT IMPLEMENTED 4706 // :first means to stop at the first hit, don't do more (so p + p == p ~ p:first 4707 4708 4709 4710 // CSS4 draft currently says you can change the subject (the element actually returned) by putting a ! at the end of it. 4711 // That might be useful to implement, though I do have parent selectors too. 4712 4713 ///. 4714 static immutable string[] selectorTokens = [ 4715 // It is important that the 2 character possibilities go first here for accurate lexing 4716 "~=", "*=", "|=", "^=", "$=", "!=", // "::" should be there too for full standard 4717 "<<", // my any-parent extension (reciprocal of whitespace) 4718 " - ", // previous-sibling extension (whitespace required to disambiguate tag-names) 4719 ".", ">", "+", "*", ":", "[", "]", "=", "\"", "#", ",", " ", "~", "<" 4720 ]; // other is white space or a name. 4721 4722 ///. 4723 sizediff_t idToken(string str, sizediff_t position) { 4724 sizediff_t tid = -1; 4725 char c = str[position]; 4726 foreach(a, token; selectorTokens) 4727 4728 if(c == token[0]) { 4729 if(token.length > 1) { 4730 if(position + 1 >= str.length || str[position+1] != token[1]) 4731 continue; // not this token 4732 } 4733 tid = a; 4734 break; 4735 } 4736 return tid; 4737 } 4738 4739 ///. 4740 // look, ma, no phobos! 4741 // new lexer by ketmar 4742 string[] lexSelector (string selstr) { 4743 4744 static sizediff_t idToken (string str, size_t stpos) { 4745 char c = str[stpos]; 4746 foreach (sizediff_t tidx, immutable token; selectorTokens) { 4747 if (c == token[0]) { 4748 if (token.length > 1) { 4749 assert(token.length == 2); // we don't have 3-char tokens yet 4750 if (str.length-stpos < 2 || str[stpos+1] != token[1]) continue; 4751 } 4752 return tidx; 4753 } 4754 } 4755 return -1; 4756 } 4757 4758 // skip spaces and comments 4759 static string removeLeadingBlanks (string str) { 4760 size_t curpos = 0; 4761 while (curpos < str.length) { 4762 immutable char ch = str[curpos]; 4763 // this can overflow on 4GB strings on 32-bit; 'cmon, don't be silly, nobody cares! 4764 if (ch == '/' && str.length-curpos > 1 && str[curpos+1] == '*') { 4765 // comment 4766 curpos += 2; 4767 while (curpos < str.length) { 4768 if (str[curpos] == '*' && str.length-curpos > 1 && str[curpos+1] == '/') { 4769 curpos += 2; 4770 break; 4771 } 4772 ++curpos; 4773 } 4774 } else if (ch <= 32) { 4775 // we should consider unicode spaces too, but... unicode sux anyway. 4776 ++curpos; 4777 } else { 4778 break; 4779 } 4780 } 4781 return str[curpos..$]; 4782 } 4783 4784 static bool isBlankAt() (string str, size_t pos) { 4785 // we should consider unicode spaces too, but... unicode sux anyway. 4786 return 4787 (pos < str.length && // in string 4788 (str[pos] <= 32 || // space 4789 (str.length-pos > 1 && str[pos] == '/' && str[pos+1] == '*'))); // comment 4790 } 4791 4792 string[] tokens; 4793 // lexx it! 4794 while ((selstr = removeLeadingBlanks(selstr)).length > 0) { 4795 if(selstr[0] == '\"') { 4796 auto pos = 1; 4797 bool escaping; 4798 while(pos < selstr.length && !escaping && selstr[pos] != '"') { 4799 if(escaping) 4800 escaping = false; 4801 else if(selstr[pos] == '\\') 4802 escaping = true; 4803 pos++; 4804 } 4805 4806 // FIXME: do better unescaping 4807 tokens ~= selstr[1 .. pos].replace(`\"`, `"`); 4808 selstr = selstr[pos + 1.. $]; 4809 continue; 4810 } 4811 4812 4813 // no tokens starts with escape 4814 immutable tid = idToken(selstr, 0); 4815 if (tid >= 0) { 4816 // special token 4817 tokens ~= selectorTokens[tid]; // it's funnier this way 4818 selstr = selstr[selectorTokens[tid].length..$]; 4819 continue; 4820 } 4821 // from start to space or special token 4822 size_t escapePos = size_t.max; 4823 size_t curpos = 0; // i can has chizburger^w escape at the start 4824 while (curpos < selstr.length) { 4825 if (selstr[curpos] == '\\') { 4826 // this is escape, just skip it and next char 4827 if (escapePos == size_t.max) escapePos = curpos; 4828 curpos = (selstr.length-curpos >= 2 ? curpos+2 : selstr.length); 4829 } else { 4830 if (isBlankAt(selstr, curpos) || idToken(selstr, curpos) >= 0) break; 4831 ++curpos; 4832 } 4833 } 4834 // identifier 4835 if (escapePos != size_t.max) { 4836 // i hate it when it happens 4837 string id = selstr[0..escapePos]; 4838 while (escapePos < curpos) { 4839 if (curpos-escapePos < 2) break; 4840 id ~= selstr[escapePos+1]; // escaped char 4841 escapePos += 2; 4842 immutable stp = escapePos; 4843 while (escapePos < curpos && selstr[escapePos] != '\\') ++escapePos; 4844 if (escapePos > stp) id ~= selstr[stp..escapePos]; 4845 } 4846 if (id.length > 0) tokens ~= id; 4847 } else { 4848 tokens ~= selstr[0..curpos]; 4849 } 4850 selstr = selstr[curpos..$]; 4851 } 4852 return tokens; 4853 } 4854 version(unittest_domd_lexer) unittest { 4855 assert(lexSelector(r" test\=me /*d*/") == [r"test=me"]); 4856 assert(lexSelector(r"div/**/. id") == ["div", ".", "id"]); 4857 assert(lexSelector(r" < <") == ["<", "<"]); 4858 assert(lexSelector(r" <<") == ["<<"]); 4859 assert(lexSelector(r" <</") == ["<<", "/"]); 4860 assert(lexSelector(r" <</*") == ["<<"]); 4861 assert(lexSelector(r" <\</*") == ["<", "<"]); 4862 assert(lexSelector(r"heh\") == ["heh"]); 4863 assert(lexSelector(r"alice \") == ["alice"]); 4864 assert(lexSelector(r"alice,is#best") == ["alice", ",", "is", "#", "best"]); 4865 } 4866 4867 ///. 4868 struct SelectorPart { 4869 string tagNameFilter; ///. 4870 string[] attributesPresent; /// [attr] 4871 string[2][] attributesEqual; /// [attr=value] 4872 string[2][] attributesStartsWith; /// [attr^=value] 4873 string[2][] attributesEndsWith; /// [attr$=value] 4874 // split it on space, then match to these 4875 string[2][] attributesIncludesSeparatedBySpaces; /// [attr~=value] 4876 // split it on dash, then match to these 4877 string[2][] attributesIncludesSeparatedByDashes; /// [attr|=value] 4878 string[2][] attributesInclude; /// [attr*=value] 4879 string[2][] attributesNotEqual; /// [attr!=value] -- extension by me 4880 4881 bool firstChild; ///. 4882 bool lastChild; ///. 4883 4884 bool emptyElement; ///. 4885 bool oddChild; ///. 4886 bool evenChild; ///. 4887 4888 bool rootElement; ///. 4889 4890 int separation = -1; /// -1 == only itself; the null selector, 0 == tree, 1 == childNodes, 2 == childAfter, 3 == youngerSibling, 4 == parentOf 4891 4892 ///. 4893 string toString() { 4894 string ret; 4895 switch(separation) { 4896 default: assert(0); 4897 case -1: break; 4898 case 0: ret ~= " "; break; 4899 case 1: ret ~= ">"; break; 4900 case 2: ret ~= "+"; break; 4901 case 3: ret ~= "~"; break; 4902 case 4: ret ~= "<"; break; 4903 } 4904 ret ~= tagNameFilter; 4905 foreach(a; attributesPresent) ret ~= "[" ~ a ~ "]"; 4906 foreach(a; attributesEqual) ret ~= "[" ~ a[0] ~ "=\"" ~ a[1] ~ "\"]"; 4907 foreach(a; attributesEndsWith) ret ~= "[" ~ a[0] ~ "$=\"" ~ a[1] ~ "\"]"; 4908 foreach(a; attributesStartsWith) ret ~= "[" ~ a[0] ~ "^=\"" ~ a[1] ~ "\"]"; 4909 foreach(a; attributesNotEqual) ret ~= "[" ~ a[0] ~ "!=\"" ~ a[1] ~ "\"]"; 4910 foreach(a; attributesInclude) ret ~= "[" ~ a[0] ~ "*=\"" ~ a[1] ~ "\"]"; 4911 foreach(a; attributesIncludesSeparatedByDashes) ret ~= "[" ~ a[0] ~ "|=\"" ~ a[1] ~ "\"]"; 4912 foreach(a; attributesIncludesSeparatedBySpaces) ret ~= "[" ~ a[0] ~ "~=\"" ~ a[1] ~ "\"]"; 4913 4914 if(firstChild) ret ~= ":first-child"; 4915 if(lastChild) ret ~= ":last-child"; 4916 if(emptyElement) ret ~= ":empty"; 4917 if(oddChild) ret ~= ":odd-child"; 4918 if(evenChild) ret ~= ":even-child"; 4919 if(rootElement) ret ~= ":root"; 4920 4921 return ret; 4922 } 4923 4924 // USEFUL 4925 ///. 4926 bool matchElement(Element e) { 4927 // FIXME: this can be called a lot of times, and really add up in times according to the profiler. 4928 // Each individual call is reasonably fast already, but it adds up. 4929 if(e is null) return false; 4930 if(e.nodeType != 1) return false; 4931 4932 if(tagNameFilter != "" && tagNameFilter != "*") 4933 if(e.tagName != tagNameFilter) 4934 return false; 4935 if(firstChild) { 4936 if(e.parentNode is null) 4937 return false; 4938 if(e.parentNode.childElements[0] !is e) 4939 return false; 4940 } 4941 if(lastChild) { 4942 if(e.parentNode is null) 4943 return false; 4944 auto ce = e.parentNode.childElements; 4945 if(ce[$-1] !is e) 4946 return false; 4947 } 4948 if(emptyElement) { 4949 if(e.children.length) 4950 return false; 4951 } 4952 if(rootElement) { 4953 if(e.parentNode !is null) 4954 return false; 4955 } 4956 if(oddChild || evenChild) { 4957 if(e.parentNode is null) 4958 return false; 4959 foreach(i, child; e.parentNode.childElements) { 4960 if(child is e) { 4961 if(oddChild && !(i&1)) 4962 return false; 4963 if(evenChild && (i&1)) 4964 return false; 4965 break; 4966 } 4967 } 4968 } 4969 4970 bool matchWithSeparator(string attr, string value, string separator) { 4971 foreach(s; attr.split(separator)) 4972 if(s == value) 4973 return true; 4974 return false; 4975 } 4976 4977 foreach(a; attributesPresent) 4978 if(a !in e.attributes) 4979 return false; 4980 foreach(a; attributesEqual) 4981 if(a[0] !in e.attributes || e.attributes[a[0]] != a[1]) 4982 return false; 4983 foreach(a; attributesNotEqual) 4984 // FIXME: maybe it should say null counts... this just bit me. 4985 // I did [attr][attr!=value] to work around. 4986 // 4987 // if it's null, it's not equal, right? 4988 //if(a[0] !in e.attributes || e.attributes[a[0]] == a[1]) 4989 if(e.getAttribute(a[0]) == a[1]) 4990 return false; 4991 foreach(a; attributesInclude) 4992 if(a[0] !in e.attributes || (e.attributes[a[0]].indexOf(a[1]) == -1)) 4993 return false; 4994 foreach(a; attributesStartsWith) 4995 if(a[0] !in e.attributes || !e.attributes[a[0]].startsWith(a[1])) 4996 return false; 4997 foreach(a; attributesEndsWith) 4998 if(a[0] !in e.attributes || !e.attributes[a[0]].endsWith(a[1])) 4999 return false; 5000 foreach(a; attributesIncludesSeparatedBySpaces) 5001 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], " ")) 5002 return false; 5003 foreach(a; attributesIncludesSeparatedByDashes) 5004 if(a[0] !in e.attributes || !matchWithSeparator(e.attributes[a[0]], a[1], "-")) 5005 return false; 5006 5007 return true; 5008 } 5009 } 5010 5011 // USEFUL 5012 ///. 5013 Element[] getElementsBySelectorParts(Element start, SelectorPart[] parts) { 5014 Element[] ret; 5015 if(!parts.length) { 5016 return [start]; // the null selector only matches the start point; it 5017 // is what terminates the recursion 5018 } 5019 5020 auto part = parts[0]; 5021 switch(part.separation) { 5022 default: assert(0); 5023 case -1: 5024 case 0: // tree 5025 foreach(e; start.tree) { 5026 if(part.separation == 0 && start is e) 5027 continue; // space doesn't match itself! 5028 if(part.matchElement(e)) { 5029 ret ~= getElementsBySelectorParts(e, parts[1..$]); 5030 } 5031 } 5032 break; 5033 case 1: // children 5034 foreach(e; start.childNodes) { 5035 if(part.matchElement(e)) { 5036 ret ~= getElementsBySelectorParts(e, parts[1..$]); 5037 } 5038 } 5039 break; 5040 case 2: // next-sibling 5041 auto tmp = start.parentNode; 5042 if(tmp !is null) { 5043 sizediff_t pos = -1; 5044 auto children = tmp.childElements; 5045 foreach(i, child; children) { 5046 if(child is start) { 5047 pos = i; 5048 break; 5049 } 5050 } 5051 assert(pos != -1); 5052 if(pos + 1 < children.length) { 5053 auto e = children[pos+1]; 5054 if(part.matchElement(e)) 5055 ret ~= getElementsBySelectorParts(e, parts[1..$]); 5056 } 5057 } 5058 break; 5059 case 3: // younger sibling 5060 auto tmp = start.parentNode; 5061 if(tmp !is null) { 5062 sizediff_t pos = -1; 5063 auto children = tmp.childElements; 5064 foreach(i, child; children) { 5065 if(child is start) { 5066 pos = i; 5067 break; 5068 } 5069 } 5070 assert(pos != -1); 5071 foreach(e; children[pos+1..$]) { 5072 if(part.matchElement(e)) 5073 ret ~= getElementsBySelectorParts(e, parts[1..$]); 5074 } 5075 } 5076 break; 5077 case 4: // immediate parent node, an extension of mine to walk back up the tree 5078 auto e = start.parentNode; 5079 if(part.matchElement(e)) { 5080 ret ~= getElementsBySelectorParts(e, parts[1..$]); 5081 } 5082 /* 5083 Example of usefulness: 5084 5085 Consider you have an HTML table. If you want to get all rows that have a th, you can do: 5086 5087 table th < tr 5088 5089 Get all th descendants of the table, then walk back up the tree to fetch their parent tr nodes 5090 */ 5091 break; 5092 case 5: // any parent note, another extension of mine to go up the tree (backward of the whitespace operator) 5093 /* 5094 Like with the < operator, this is best used to find some parent of a particular known element. 5095 5096 Say you have an anchor inside a 5097 */ 5098 } 5099 5100 return ret; 5101 } 5102 5103 ///. 5104 struct Selector { 5105 ///. 5106 SelectorPart[] parts; 5107 5108 ///. 5109 string toString() { 5110 string ret; 5111 foreach(part; parts) 5112 ret ~= part.toString(); 5113 return ret; 5114 } 5115 5116 // USEFUL 5117 ///. 5118 Element[] getElements(Element start) { 5119 return removeDuplicates(getElementsBySelectorParts(start, parts)); 5120 } 5121 5122 // USEFUL (but not implemented) 5123 /// If relativeTo == null, it assumes the root of the parent document. 5124 bool matchElement(Element e, Element relativeTo = null) { 5125 // FIXME 5126 /+ 5127 Element where = e; 5128 foreach(part; retro(parts)) { 5129 if(where is relativeTo) 5130 return false; // at end of line, if we aren't done by now, the match fails 5131 if(!part.matchElement(where)) 5132 return false; // didn't match 5133 5134 if(part.selection == 1) // the > operator 5135 where = where.parentNode; 5136 else if(part.selection == 0) { // generic parent 5137 // need to go up the whole chain 5138 } 5139 } 5140 +/ 5141 return true; // if we got here, it is a success 5142 } 5143 5144 // the string should NOT have commas. Use parseSelectorString for that instead 5145 ///. 5146 static Selector fromString(string selector) { 5147 return parseSelector(lexSelector(selector)); 5148 } 5149 } 5150 5151 ///. 5152 Selector[] parseSelectorString(string selector, bool caseSensitiveTags = true) { 5153 Selector[] ret; 5154 auto tokens = lexSelector(selector); // this will parse commas too 5155 // and now do comma-separated slices (i haz phobosophobia!) 5156 while (tokens.length > 0) { 5157 size_t end = 0; 5158 while (end < tokens.length && tokens[end] != ",") ++end; 5159 if (end > 0) ret ~= parseSelector(tokens[0..end], caseSensitiveTags); 5160 if (tokens.length-end < 2) break; 5161 tokens = tokens[end+1..$]; 5162 } 5163 return ret; 5164 } 5165 5166 ///. 5167 Selector parseSelector(string[] tokens, bool caseSensitiveTags = true) { 5168 Selector s; 5169 5170 SelectorPart current; 5171 void commit() { 5172 // might as well skip null items 5173 if(current != current.init) { 5174 s.parts ~= current; 5175 5176 current = current.init; // start right over 5177 } 5178 } 5179 enum State { 5180 Starting, 5181 ReadingClass, 5182 ReadingId, 5183 ReadingAttributeSelector, 5184 ReadingAttributeComparison, 5185 ExpectingAttributeCloser, 5186 ReadingPseudoClass, 5187 ReadingAttributeValue 5188 } 5189 State state = State.Starting; 5190 string attributeName, attributeValue, attributeComparison; 5191 foreach(token; tokens) { 5192 sizediff_t tid = -1; 5193 foreach(i, item; selectorTokens) 5194 if(token == item) { 5195 tid = i; 5196 break; 5197 } 5198 final switch(state) { 5199 case State.Starting: // fresh, might be reading an operator or a tagname 5200 if(tid == -1) { 5201 if(!caseSensitiveTags) 5202 token = token.toLower(); 5203 if(current.tagNameFilter) { 5204 // if it was already set, we must see two thingies 5205 // separated by whitespace... 5206 commit(); 5207 current.separation = 0; // tree 5208 } 5209 current.tagNameFilter = token; 5210 } else { 5211 // Selector operators 5212 switch(token) { 5213 case "*": 5214 current.tagNameFilter = "*"; 5215 break; 5216 case " ": 5217 commit(); 5218 current.separation = 0; // tree 5219 break; 5220 case ">": 5221 commit(); 5222 current.separation = 1; // child 5223 break; 5224 case "+": 5225 commit(); 5226 current.separation = 2; // sibling directly after 5227 break; 5228 case "~": 5229 commit(); 5230 current.separation = 3; // any sibling after 5231 break; 5232 case "<": 5233 commit(); 5234 current.separation = 4; // immediate parent of 5235 break; 5236 case "[": 5237 state = State.ReadingAttributeSelector; 5238 break; 5239 case ".": 5240 state = State.ReadingClass; 5241 break; 5242 case "#": 5243 state = State.ReadingId; 5244 break; 5245 case ":": 5246 state = State.ReadingPseudoClass; 5247 break; 5248 5249 default: 5250 assert(0, token); 5251 } 5252 } 5253 break; 5254 case State.ReadingClass: 5255 current.attributesIncludesSeparatedBySpaces ~= ["class", token]; 5256 state = State.Starting; 5257 break; 5258 case State.ReadingId: 5259 current.attributesEqual ~= ["id", token]; 5260 state = State.Starting; 5261 break; 5262 case State.ReadingPseudoClass: 5263 switch(token) { 5264 case "first-child": 5265 current.firstChild = true; 5266 break; 5267 case "last-child": 5268 current.lastChild = true; 5269 break; 5270 case "only-child": 5271 current.firstChild = true; 5272 current.lastChild = true; 5273 break; 5274 case "empty": 5275 // one with no children 5276 current.emptyElement = true; 5277 break; 5278 case "link": 5279 current.attributesPresent ~= "href"; 5280 break; 5281 case "root": 5282 current.rootElement = true; 5283 break; 5284 // FIXME: add :not() 5285 // My extensions 5286 case "odd-child": 5287 current.oddChild = true; 5288 break; 5289 case "even-child": 5290 current.evenChild = true; 5291 break; 5292 5293 case "visited", "active", "hover", "target", "focus", "checked", "selected": 5294 current.attributesPresent ~= "nothing"; 5295 // FIXME 5296 /* 5297 // defined in the standard, but I don't implement it 5298 case "not": 5299 */ 5300 /+ 5301 // extensions not implemented 5302 //case "text": // takes the text in the element and wraps it in an element, returning it 5303 +/ 5304 goto case; 5305 case "before", "after": 5306 current.attributesPresent ~= "FIXME"; 5307 5308 break; 5309 default: 5310 //if(token.indexOf("lang") == -1) 5311 //assert(0, token); 5312 break; 5313 } 5314 state = State.Starting; 5315 break; 5316 case State.ReadingAttributeSelector: 5317 attributeName = token; 5318 attributeComparison = null; 5319 attributeValue = null; 5320 state = State.ReadingAttributeComparison; 5321 break; 5322 case State.ReadingAttributeComparison: 5323 // FIXME: these things really should be quotable in the proper lexer... 5324 if(token != "]") { 5325 if(token.indexOf("=") == -1) { 5326 // not a comparison; consider it 5327 // part of the attribute 5328 attributeValue ~= token; 5329 } else { 5330 attributeComparison = token; 5331 state = State.ReadingAttributeValue; 5332 } 5333 break; 5334 } 5335 goto case; 5336 case State.ExpectingAttributeCloser: 5337 if(token != "]") { 5338 // not the closer; consider it part of comparison 5339 if(attributeComparison == "") 5340 attributeName ~= token; 5341 else 5342 attributeValue ~= token; 5343 break; 5344 } 5345 5346 // Selector operators 5347 switch(attributeComparison) { 5348 default: assert(0); 5349 case "": 5350 current.attributesPresent ~= attributeName; 5351 break; 5352 case "=": 5353 current.attributesEqual ~= [attributeName, attributeValue]; 5354 break; 5355 case "|=": 5356 current.attributesIncludesSeparatedByDashes ~= [attributeName, attributeValue]; 5357 break; 5358 case "~=": 5359 current.attributesIncludesSeparatedBySpaces ~= [attributeName, attributeValue]; 5360 break; 5361 case "$=": 5362 current.attributesEndsWith ~= [attributeName, attributeValue]; 5363 break; 5364 case "^=": 5365 current.attributesStartsWith ~= [attributeName, attributeValue]; 5366 break; 5367 case "*=": 5368 current.attributesInclude ~= [attributeName, attributeValue]; 5369 break; 5370 case "!=": 5371 current.attributesNotEqual ~= [attributeName, attributeValue]; 5372 break; 5373 } 5374 5375 state = State.Starting; 5376 break; 5377 case State.ReadingAttributeValue: 5378 attributeValue = token; 5379 state = State.ExpectingAttributeCloser; 5380 break; 5381 } 5382 } 5383 5384 commit(); 5385 5386 return s; 5387 } 5388 5389 ///. 5390 Element[] removeDuplicates(Element[] input) { 5391 Element[] ret; 5392 5393 bool[Element] already; 5394 foreach(e; input) { 5395 if(e in already) continue; 5396 already[e] = true; 5397 ret ~= e; 5398 } 5399 5400 return ret; 5401 } 5402 5403 // done with CSS selector handling 5404 5405 5406 // FIXME: use the better parser from html.d 5407 /// This is probably not useful to you unless you're writing a browser or something like that. 5408 /// It represents a *computed* style, like what the browser gives you after applying stylesheets, inline styles, and html attributes. 5409 /// From here, you can start to make a layout engine for the box model and have a css aware browser. 5410 class CssStyle { 5411 ///. 5412 this(string rule, string content) { 5413 rule = rule.strip(); 5414 content = content.strip(); 5415 5416 if(content.length == 0) 5417 return; 5418 5419 originatingRule = rule; 5420 originatingSpecificity = getSpecificityOfRule(rule); // FIXME: if there's commas, this won't actually work! 5421 5422 foreach(part; content.split(";")) { 5423 part = part.strip(); 5424 if(part.length == 0) 5425 continue; 5426 auto idx = part.indexOf(":"); 5427 if(idx == -1) 5428 continue; 5429 //throw new Exception("Bad css rule (no colon): " ~ part); 5430 5431 Property p; 5432 5433 p.name = part[0 .. idx].strip(); 5434 p.value = part[idx + 1 .. $].replace("! important", "!important").replace("!important", "").strip(); // FIXME don't drop important 5435 p.givenExplicitly = true; 5436 p.specificity = originatingSpecificity; 5437 5438 properties ~= p; 5439 } 5440 5441 foreach(property; properties) 5442 expandShortForm(property, originatingSpecificity); 5443 } 5444 5445 ///. 5446 Specificity getSpecificityOfRule(string rule) { 5447 Specificity s; 5448 if(rule.length == 0) { // inline 5449 // s.important = 2; 5450 } else { 5451 // FIXME 5452 } 5453 5454 return s; 5455 } 5456 5457 string originatingRule; ///. 5458 Specificity originatingSpecificity; ///. 5459 5460 ///. 5461 union Specificity { 5462 uint score; ///. 5463 // version(little_endian) 5464 ///. 5465 struct { 5466 ubyte tags; ///. 5467 ubyte classes; ///. 5468 ubyte ids; ///. 5469 ubyte important; /// 0 = none, 1 = stylesheet author, 2 = inline style, 3 = user important 5470 } 5471 } 5472 5473 ///. 5474 struct Property { 5475 bool givenExplicitly; /// this is false if for example the user said "padding" and this is "padding-left" 5476 string name; ///. 5477 string value; ///. 5478 Specificity specificity; ///. 5479 // do we care about the original source rule? 5480 } 5481 5482 ///. 5483 Property[] properties; 5484 5485 ///. 5486 string opDispatch(string nameGiven)(string value = null) if(nameGiven != "popFront") { 5487 string name = unCamelCase(nameGiven); 5488 if(value is null) 5489 return getValue(name); 5490 else 5491 return setValue(name, value, 0x02000000 /* inline specificity */); 5492 } 5493 5494 /// takes dash style name 5495 string getValue(string name) { 5496 foreach(property; properties) 5497 if(property.name == name) 5498 return property.value; 5499 return null; 5500 } 5501 5502 /// takes dash style name 5503 string setValue(string name, string value, Specificity newSpecificity, bool explicit = true) { 5504 value = value.replace("! important", "!important"); 5505 if(value.indexOf("!important") != -1) { 5506 newSpecificity.important = 1; // FIXME 5507 value = value.replace("!important", "").strip(); 5508 } 5509 5510 foreach(ref property; properties) 5511 if(property.name == name) { 5512 if(newSpecificity.score >= property.specificity.score) { 5513 property.givenExplicitly = explicit; 5514 expandShortForm(property, newSpecificity); 5515 return (property.value = value); 5516 } else { 5517 if(name == "display") 5518 {}//writeln("Not setting ", name, " to ", value, " because ", newSpecificity.score, " < ", property.specificity.score); 5519 return value; // do nothing - the specificity is too low 5520 } 5521 } 5522 5523 // it's not here... 5524 5525 Property p; 5526 p.givenExplicitly = true; 5527 p.name = name; 5528 p.value = value; 5529 p.specificity = originatingSpecificity; 5530 5531 properties ~= p; 5532 expandShortForm(p, originatingSpecificity); 5533 5534 return value; 5535 } 5536 5537 private void expandQuadShort(string name, string value, Specificity specificity) { 5538 auto parts = value.split(" "); 5539 switch(parts.length) { 5540 case 1: 5541 setValue(name ~"-left", parts[0], specificity, false); 5542 setValue(name ~"-right", parts[0], specificity, false); 5543 setValue(name ~"-top", parts[0], specificity, false); 5544 setValue(name ~"-bottom", parts[0], specificity, false); 5545 break; 5546 case 2: 5547 setValue(name ~"-left", parts[1], specificity, false); 5548 setValue(name ~"-right", parts[1], specificity, false); 5549 setValue(name ~"-top", parts[0], specificity, false); 5550 setValue(name ~"-bottom", parts[0], specificity, false); 5551 break; 5552 case 3: 5553 setValue(name ~"-top", parts[0], specificity, false); 5554 setValue(name ~"-right", parts[1], specificity, false); 5555 setValue(name ~"-bottom", parts[2], specificity, false); 5556 setValue(name ~"-left", parts[2], specificity, false); 5557 5558 break; 5559 case 4: 5560 setValue(name ~"-top", parts[0], specificity, false); 5561 setValue(name ~"-right", parts[1], specificity, false); 5562 setValue(name ~"-bottom", parts[2], specificity, false); 5563 setValue(name ~"-left", parts[3], specificity, false); 5564 break; 5565 default: 5566 assert(0, value); 5567 } 5568 } 5569 5570 ///. 5571 void expandShortForm(Property p, Specificity specificity) { 5572 switch(p.name) { 5573 case "margin": 5574 case "padding": 5575 expandQuadShort(p.name, p.value, specificity); 5576 break; 5577 case "border": 5578 case "outline": 5579 setValue(p.name ~ "-left", p.value, specificity, false); 5580 setValue(p.name ~ "-right", p.value, specificity, false); 5581 setValue(p.name ~ "-top", p.value, specificity, false); 5582 setValue(p.name ~ "-bottom", p.value, specificity, false); 5583 break; 5584 5585 case "border-top": 5586 case "border-bottom": 5587 case "border-left": 5588 case "border-right": 5589 case "outline-top": 5590 case "outline-bottom": 5591 case "outline-left": 5592 case "outline-right": 5593 5594 default: {} 5595 } 5596 } 5597 5598 ///. 5599 override string toString() { 5600 string ret; 5601 if(originatingRule.length) 5602 ret = originatingRule ~ " {"; 5603 5604 foreach(property; properties) { 5605 if(!property.givenExplicitly) 5606 continue; // skip the inferred shit 5607 5608 if(originatingRule.length) 5609 ret ~= "\n\t"; 5610 else 5611 ret ~= " "; 5612 5613 ret ~= property.name ~ ": " ~ property.value ~ ";"; 5614 } 5615 5616 if(originatingRule.length) 5617 ret ~= "\n}\n"; 5618 5619 return ret; 5620 } 5621 } 5622 5623 string cssUrl(string url) { 5624 return "url(\"" ~ url ~ "\")"; 5625 } 5626 5627 /// This probably isn't useful, unless you're writing a browser or something like that. 5628 /// You might want to look at arsd.html for css macro, nesting, etc., or just use standard css 5629 /// as text. 5630 /// 5631 /// The idea, however, is to represent a kind of CSS object model, complete with specificity, 5632 /// that you can apply to your documents to build the complete computedStyle object. 5633 class StyleSheet { 5634 ///. 5635 CssStyle[] rules; 5636 5637 ///. 5638 this(string source) { 5639 // FIXME: handle @ rules and probably could improve lexer 5640 // add nesting? 5641 int state; 5642 string currentRule; 5643 string currentValue; 5644 5645 string* currentThing = ¤tRule; 5646 foreach(c; source) { 5647 handle: switch(state) { 5648 default: assert(0); 5649 case 0: // starting - we assume we're reading a rule 5650 switch(c) { 5651 case '@': 5652 state = 4; 5653 break; 5654 case '/': 5655 state = 1; 5656 break; 5657 case '{': 5658 currentThing = ¤tValue; 5659 break; 5660 case '}': 5661 if(currentThing is ¤tValue) { 5662 rules ~= new CssStyle(currentRule, currentValue); 5663 5664 currentRule = ""; 5665 currentValue = ""; 5666 5667 currentThing = ¤tRule; 5668 } else { 5669 // idk what is going on here. 5670 // check sveit.com to reproduce 5671 currentRule = ""; 5672 currentValue = ""; 5673 } 5674 break; 5675 default: 5676 (*currentThing) ~= c; 5677 } 5678 break; 5679 case 1: // expecting * 5680 if(c == '*') 5681 state = 2; 5682 else { 5683 state = 0; 5684 (*currentThing) ~= "/" ~ c; 5685 } 5686 break; 5687 case 2: // inside comment 5688 if(c == '*') 5689 state = 3; 5690 break; 5691 case 3: // expecting / to end comment 5692 if(c == '/') 5693 state = 0; 5694 else 5695 state = 2; // it's just a comment so no need to append 5696 break; 5697 case 4: 5698 if(c == '{') 5699 state = 5; 5700 if(c == ';') 5701 state = 0; // just skipping import 5702 break; 5703 case 5: 5704 if(c == '}') 5705 state = 0; // skipping font face probably 5706 } 5707 } 5708 } 5709 5710 /// Run through the document and apply this stylesheet to it. The computedStyle member will be accurate after this call 5711 void apply(Document document) { 5712 foreach(rule; rules) { 5713 if(rule.originatingRule.length == 0) 5714 continue; // this shouldn't happen here in a stylesheet 5715 foreach(element; document.querySelectorAll(rule.originatingRule)) { 5716 // note: this should be a different object than the inline style 5717 // since givenExplicitly is likely destroyed here 5718 auto current = element.computedStyle; 5719 5720 foreach(item; rule.properties) 5721 current.setValue(item.name, item.value, item.specificity); 5722 } 5723 } 5724 } 5725 } 5726 5727 5728 /// This is kinda private; just a little utility container for use by the ElementStream class. 5729 final class Stack(T) { 5730 this() { 5731 internalLength = 0; 5732 arr = initialBuffer[]; 5733 } 5734 5735 ///. 5736 void push(T t) { 5737 if(internalLength >= arr.length) { 5738 auto oldarr = arr; 5739 if(arr.length < 4096) 5740 arr = new T[arr.length * 2]; 5741 else 5742 arr = new T[arr.length + 4096]; 5743 arr[0 .. oldarr.length] = oldarr[]; 5744 } 5745 5746 arr[internalLength] = t; 5747 internalLength++; 5748 } 5749 5750 ///. 5751 T pop() { 5752 assert(internalLength); 5753 internalLength--; 5754 return arr[internalLength]; 5755 } 5756 5757 ///. 5758 T peek() { 5759 assert(internalLength); 5760 return arr[internalLength - 1]; 5761 } 5762 5763 ///. 5764 @property bool empty() { 5765 return internalLength ? false : true; 5766 } 5767 5768 ///. 5769 private T[] arr; 5770 private size_t internalLength; 5771 private T[64] initialBuffer; 5772 // the static array is allocated with this object, so if we have a small stack (which we prolly do; dom trees usually aren't insanely deep), 5773 // using this saves us a bunch of trips to the GC. In my last profiling, I got about a 50x improvement in the push() 5774 // function thanks to this, and push() was actually one of the slowest individual functions in the code! 5775 } 5776 5777 /// This is the lazy range that walks the tree for you. It tries to go in the lexical order of the source: node, then children from first to last, each recursively. 5778 final class ElementStream { 5779 5780 ///. 5781 @property Element front() { 5782 return current.element; 5783 } 5784 5785 /// Use Element.tree instead. 5786 this(Element start) { 5787 current.element = start; 5788 current.childPosition = -1; 5789 isEmpty = false; 5790 stack = new Stack!(Current); 5791 } 5792 5793 /* 5794 Handle it 5795 handle its children 5796 5797 */ 5798 5799 ///. 5800 void popFront() { 5801 more: 5802 if(isEmpty) return; 5803 5804 // FIXME: the profiler says this function is somewhat slow (noticeable because it can be called a lot of times) 5805 5806 current.childPosition++; 5807 if(current.childPosition >= current.element.children.length) { 5808 if(stack.empty()) 5809 isEmpty = true; 5810 else { 5811 current = stack.pop(); 5812 goto more; 5813 } 5814 } else { 5815 stack.push(current); 5816 current.element = current.element.children[current.childPosition]; 5817 current.childPosition = -1; 5818 } 5819 } 5820 5821 /// You should call this when you remove an element from the tree. It then doesn't recurse into that node and adjusts the current position, keeping the range stable. 5822 void currentKilled() { 5823 if(stack.empty) // should never happen 5824 isEmpty = true; 5825 else { 5826 current = stack.pop(); 5827 current.childPosition--; // when it is killed, the parent is brought back a lil so when we popFront, this is then right 5828 } 5829 } 5830 5831 ///. 5832 @property bool empty() { 5833 return isEmpty; 5834 } 5835 5836 private: 5837 5838 struct Current { 5839 Element element; 5840 int childPosition; 5841 } 5842 5843 Current current; 5844 5845 Stack!(Current) stack; 5846 5847 bool isEmpty; 5848 } 5849 5850 5851 5852 // unbelievable. 5853 // Don't use any of these in your own code. Instead, try to use phobos or roll your own, as I might kill these at any time. 5854 sizediff_t indexOfBytes(immutable(ubyte)[] haystack, immutable(ubyte)[] needle) { 5855 static import std.algorithm; 5856 auto found = std.algorithm.find(haystack, needle); 5857 if(found.length == 0) 5858 return -1; 5859 return haystack.length - found.length; 5860 } 5861 5862 private T[] insertAfter(T)(T[] arr, int position, T[] what) { 5863 assert(position < arr.length); 5864 T[] ret; 5865 ret.length = arr.length + what.length; 5866 int a = 0; 5867 foreach(i; arr[0..position+1]) 5868 ret[a++] = i; 5869 5870 foreach(i; what) 5871 ret[a++] = i; 5872 5873 foreach(i; arr[position+1..$]) 5874 ret[a++] = i; 5875 5876 return ret; 5877 } 5878 5879 package bool isInArray(T)(T item, T[] arr) { 5880 foreach(i; arr) 5881 if(item == i) 5882 return true; 5883 return false; 5884 } 5885 5886 private string[string] aadup(in string[string] arr) { 5887 string[string] ret; 5888 foreach(k, v; arr) 5889 ret[k] = v; 5890 return ret; 5891 } 5892 5893 // dom event support, if you want to use it 5894 5895 /// used for DOM events 5896 alias void delegate(Element handlerAttachedTo, Event event) EventHandler; 5897 5898 /// This is a DOM event, like in javascript. Note that this library never fires events - it is only here for you to use if you want it. 5899 class Event { 5900 this(string eventName, Element target) { 5901 this.eventName = eventName; 5902 this.srcElement = target; 5903 } 5904 5905 /// Prevents the default event handler (if there is one) from being called 5906 void preventDefault() { 5907 defaultPrevented = true; 5908 } 5909 5910 /// Stops the event propagation immediately. 5911 void stopPropagation() { 5912 propagationStopped = true; 5913 } 5914 5915 bool defaultPrevented; 5916 bool propagationStopped; 5917 string eventName; 5918 5919 Element srcElement; 5920 alias srcElement target; 5921 5922 Element relatedTarget; 5923 5924 int clientX; 5925 int clientY; 5926 5927 int button; 5928 5929 bool isBubbling; 5930 5931 /// this sends it only to the target. If you want propagation, use dispatch() instead. 5932 void send() { 5933 if(srcElement is null) 5934 return; 5935 5936 auto e = srcElement; 5937 5938 if(eventName in e.bubblingEventHandlers) 5939 foreach(handler; e.bubblingEventHandlers[eventName]) 5940 handler(e, this); 5941 5942 if(!defaultPrevented) 5943 if(eventName in e.defaultEventHandlers) 5944 e.defaultEventHandlers[eventName](e, this); 5945 } 5946 5947 /// this dispatches the element using the capture -> target -> bubble process 5948 void dispatch() { 5949 if(srcElement is null) 5950 return; 5951 5952 // first capture, then bubble 5953 5954 Element[] chain; 5955 Element curr = srcElement; 5956 while(curr) { 5957 auto l = curr; 5958 chain ~= l; 5959 curr = curr.parentNode; 5960 5961 } 5962 5963 isBubbling = false; 5964 5965 foreach(e; chain.retro()) { 5966 if(eventName in e.capturingEventHandlers) 5967 foreach(handler; e.capturingEventHandlers[eventName]) 5968 handler(e, this); 5969 5970 // the default on capture should really be to always do nothing 5971 5972 //if(!defaultPrevented) 5973 // if(eventName in e.defaultEventHandlers) 5974 // e.defaultEventHandlers[eventName](e.element, this); 5975 5976 if(propagationStopped) 5977 break; 5978 } 5979 5980 isBubbling = true; 5981 if(!propagationStopped) 5982 foreach(e; chain) { 5983 if(eventName in e.bubblingEventHandlers) 5984 foreach(handler; e.bubblingEventHandlers[eventName]) 5985 handler(e, this); 5986 5987 if(propagationStopped) 5988 break; 5989 } 5990 5991 if(!defaultPrevented) 5992 foreach(e; chain) { 5993 if(eventName in e.defaultEventHandlers) 5994 e.defaultEventHandlers[eventName](e, this); 5995 } 5996 } 5997 } 5998 5999 struct FormFieldOptions { 6000 // usable for any 6001 6002 /// this is a regex pattern used to validate the field 6003 string pattern; 6004 /// must the field be filled in? Even with a regex, it can be submitted blank if this is false. 6005 bool isRequired; 6006 /// this is displayed as an example to the user 6007 string placeholder; 6008 6009 // usable for numeric ones 6010 6011 6012 // convenience methods to quickly get some options 6013 @property static FormFieldOptions none() { 6014 FormFieldOptions f; 6015 return f; 6016 } 6017 6018 static FormFieldOptions required() { 6019 FormFieldOptions f; 6020 f.isRequired = true; 6021 return f; 6022 } 6023 6024 static FormFieldOptions regex(string pattern, bool required = false) { 6025 FormFieldOptions f; 6026 f.pattern = pattern; 6027 f.isRequired = required; 6028 return f; 6029 } 6030 6031 static FormFieldOptions fromElement(Element e) { 6032 FormFieldOptions f; 6033 if(e.hasAttribute("required")) 6034 f.isRequired = true; 6035 if(e.hasAttribute("pattern")) 6036 f.pattern = e.pattern; 6037 if(e.hasAttribute("placeholder")) 6038 f.placeholder = e.placeholder; 6039 return f; 6040 } 6041 6042 Element applyToElement(Element e) { 6043 if(this.isRequired) 6044 e.required = "required"; 6045 if(this.pattern.length) 6046 e.pattern = this.pattern; 6047 if(this.placeholder.length) 6048 e.placeholder = this.placeholder; 6049 return e; 6050 } 6051 } 6052 6053 // this needs to look just like a string, but can expand as needed 6054 version(no_dom_stream) 6055 alias string Utf8Stream; 6056 else 6057 class Utf8Stream { 6058 protected: 6059 // these two should be overridden in subclasses to actually do the stream magic 6060 string getMore() { 6061 if(getMoreHelper !is null) 6062 return getMoreHelper(); 6063 return null; 6064 } 6065 6066 bool hasMore() { 6067 if(hasMoreHelper !is null) 6068 return hasMoreHelper(); 6069 return false; 6070 } 6071 // the rest should be ok 6072 6073 public: 6074 this(string d) { 6075 this.data = d; 6076 } 6077 6078 this(string delegate() getMoreHelper, bool delegate() hasMoreHelper) { 6079 this.getMoreHelper = getMoreHelper; 6080 this.hasMoreHelper = hasMoreHelper; 6081 6082 if(hasMore()) 6083 this.data ~= getMore(); 6084 6085 stdout.flush(); 6086 } 6087 6088 @property final size_t length() { 6089 // the parser checks length primarily directly before accessing the next character 6090 // so this is the place we'll hook to append more if possible and needed. 6091 if(lastIdx + 1 >= data.length && hasMore()) { 6092 data ~= getMore(); 6093 } 6094 return data.length; 6095 } 6096 6097 final char opIndex(size_t idx) { 6098 if(idx > lastIdx) 6099 lastIdx = idx; 6100 return data[idx]; 6101 } 6102 6103 final string opSlice(size_t start, size_t end) { 6104 if(end > lastIdx) 6105 lastIdx = end; 6106 return data[start .. end]; 6107 } 6108 6109 final size_t opDollar() { 6110 return length(); 6111 } 6112 6113 final Utf8Stream opBinary(string op : "~")(string s) { 6114 this.data ~= s; 6115 return this; 6116 } 6117 6118 final Utf8Stream opOpAssign(string op : "~")(string s) { 6119 this.data ~= s; 6120 return this; 6121 } 6122 6123 final Utf8Stream opAssign(string rhs) { 6124 this.data = rhs; 6125 return this; 6126 } 6127 private: 6128 string data; 6129 6130 size_t lastIdx; 6131 6132 bool delegate() hasMoreHelper; 6133 string delegate() getMoreHelper; 6134 6135 6136 /+ 6137 // used to maybe clear some old stuff 6138 // you might have to remove elements parsed with it too since they can hold slices into the 6139 // old stuff, preventing gc 6140 void dropFront(int bytes) { 6141 posAdjustment += bytes; 6142 data = data[bytes .. $]; 6143 } 6144 6145 int posAdjustment; 6146 +/ 6147 } 6148 6149 void fillForm(T)(Form form, T obj, string name) { 6150 import arsd.database; 6151 fillData((k, v) => form.setValue(k, v), obj, name); 6152 } 6153 6154 /* 6155 Copyright: Adam D. Ruppe, 2010 - 2013 6156 License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>. 6157 Authors: Adam D. Ruppe, with contributions by Nick Sabalausky and Trass3r among others 6158 6159 Copyright Adam D. Ruppe 2010-2013. 6160 Distributed under the Boost Software License, Version 1.0. 6161 (See accompanying file LICENSE_1_0.txt or copy at 6162 http://www.boost.org/LICENSE_1_0.txt) 6163 */ 6164