Friendica Communications Platform (please note that this is a clone of the repository at github, issues are handled there) https://friendi.ca
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

3718 lines
163 KiB

  1. <?php
  2. /*
  3. Copyright 2007 Jeroen van der Meer <http://jero.net/>
  4. Copyright 2009 Edward Z. Yang <edwardzyang@thewritingpot.com>
  5. Permission is hereby granted, free of charge, to any person obtaining a
  6. copy of this software and associated documentation files (the
  7. "Software"), to deal in the Software without restriction, including
  8. without limitation the rights to use, copy, modify, merge, publish,
  9. distribute, sublicense, and/or sell copies of the Software, and to
  10. permit persons to whom the Software is furnished to do so, subject to
  11. the following conditions:
  12. The above copyright notice and this permission notice shall be included
  13. in all copies or substantial portions of the Software.
  14. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  15. OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  16. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  17. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  18. CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  19. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  20. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  21. */
  22. // Tags for FIX ME!!!: (in order of priority)
  23. // XXX - should be fixed NAO!
  24. // XERROR - with regards to parse errors
  25. // XSCRIPT - with regards to scripting mode
  26. // XENCODING - with regards to encoding (for reparsing tests)
  27. class HTML5_TreeBuilder {
  28. public $stack = array();
  29. public $content_model;
  30. private $mode;
  31. private $original_mode;
  32. private $secondary_mode;
  33. private $dom;
  34. // Whether or not normal insertion of nodes should actually foster
  35. // parent (used in one case in spec)
  36. private $foster_parent = false;
  37. private $a_formatting = array();
  38. private $head_pointer = null;
  39. private $form_pointer = null;
  40. private $flag_frameset_ok = true;
  41. private $flag_force_quirks = false;
  42. private $ignored = false;
  43. private $quirks_mode = null;
  44. // this gets to 2 when we want to ignore the next lf character, and
  45. // is decrement at the beginning of each processed token (this way,
  46. // code can check for (bool)$ignore_lf_token, but it phases out
  47. // appropriately)
  48. private $ignore_lf_token = 0;
  49. private $fragment = false;
  50. private $root;
  51. private $scoping = array('applet','button','caption','html','marquee','object','table','td','th', 'svg:foreignObject');
  52. private $formatting = array('a','b','big','code','em','font','i','nobr','s','small','strike','strong','tt','u');
  53. private $special = array('address','area','article','aside','base','basefont','bgsound',
  54. 'blockquote','body','br','center','col','colgroup','command','dd','details','dialog','dir','div','dl',
  55. 'dt','embed','fieldset','figure','footer','form','frame','frameset','h1','h2','h3','h4','h5',
  56. 'h6','head','header','hgroup','hr','iframe','img','input','isindex','li','link',
  57. 'listing','menu','meta','nav','noembed','noframes','noscript','ol',
  58. 'p','param','plaintext','pre','script','select','spacer','style',
  59. 'tbody','textarea','tfoot','thead','title','tr','ul','wbr');
  60. // Tree construction modes
  61. const INITIAL = 0;
  62. const BEFORE_HTML = 1;
  63. const BEFORE_HEAD = 2;
  64. const IN_HEAD = 3;
  65. const IN_HEAD_NOSCRIPT = 4;
  66. const AFTER_HEAD = 5;
  67. const IN_BODY = 6;
  68. const IN_CDATA_RCDATA = 7;
  69. const IN_TABLE = 8;
  70. const IN_CAPTION = 9;
  71. const IN_COLUMN_GROUP = 10;
  72. const IN_TABLE_BODY = 11;
  73. const IN_ROW = 12;
  74. const IN_CELL = 13;
  75. const IN_SELECT = 14;
  76. const IN_SELECT_IN_TABLE= 15;
  77. const IN_FOREIGN_CONTENT= 16;
  78. const AFTER_BODY = 17;
  79. const IN_FRAMESET = 18;
  80. const AFTER_FRAMESET = 19;
  81. const AFTER_AFTER_BODY = 20;
  82. const AFTER_AFTER_FRAMESET = 21;
  83. /**
  84. * Converts a magic number to a readable name. Use for debugging.
  85. */
  86. private function strConst($number) {
  87. static $lookup;
  88. if (!$lookup) {
  89. $r = new ReflectionClass('HTML5_TreeBuilder');
  90. $lookup = array_flip($r->getConstants());
  91. }
  92. return $lookup[$number];
  93. }
  94. // The different types of elements.
  95. const SPECIAL = 100;
  96. const SCOPING = 101;
  97. const FORMATTING = 102;
  98. const PHRASING = 103;
  99. // Quirks modes in $quirks_mode
  100. const NO_QUIRKS = 200;
  101. const QUIRKS_MODE = 201;
  102. const LIMITED_QUIRKS_MODE = 202;
  103. // Marker to be placed in $a_formatting
  104. const MARKER = 300;
  105. // Namespaces for foreign content
  106. const NS_HTML = null; // to prevent DOM from requiring NS on everything
  107. const NS_MATHML = 'http://www.w3.org/1998/Math/MathML';
  108. const NS_SVG = 'http://www.w3.org/2000/svg';
  109. const NS_XLINK = 'http://www.w3.org/1999/xlink';
  110. const NS_XML = 'http://www.w3.org/XML/1998/namespace';
  111. const NS_XMLNS = 'http://www.w3.org/2000/xmlns/';
  112. public function __construct() {
  113. $this->mode = self::INITIAL;
  114. $this->dom = new DOMDocument;
  115. $this->dom->encoding = 'UTF-8';
  116. $this->dom->preserveWhiteSpace = true;
  117. $this->dom->substituteEntities = true;
  118. $this->dom->strictErrorChecking = false;
  119. }
  120. // Process tag tokens
  121. public function emitToken($token, $mode = null) {
  122. // XXX: ignore parse errors... why are we emitting them, again?
  123. if ($token['type'] === HTML5_Tokenizer::PARSEERROR) return;
  124. if ($mode === null) $mode = $this->mode;
  125. /*
  126. $backtrace = debug_backtrace();
  127. if ($backtrace[1]['class'] !== 'HTML5_TreeBuilder') echo "--\n";
  128. echo $this->strConst($mode);
  129. if ($this->original_mode) echo " (originally ".$this->strConst($this->original_mode).")";
  130. echo "\n ";
  131. token_dump($token);
  132. $this->printStack();
  133. $this->printActiveFormattingElements();
  134. if ($this->foster_parent) echo " -> this is a foster parent mode\n";
  135. */
  136. if ($this->ignore_lf_token) $this->ignore_lf_token--;
  137. $this->ignored = false;
  138. // indenting is a little wonky, this can be changed later on
  139. switch ($mode) {
  140. case self::INITIAL:
  141. /* A character token that is one of U+0009 CHARACTER TABULATION,
  142. * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE */
  143. if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  144. /* Ignore the token. */
  145. $this->ignored = true;
  146. } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  147. if (
  148. $token['name'] !== 'html' || !empty($token['public']) ||
  149. !empty($token['system']) || $token !== 'about:legacy-compat'
  150. ) {
  151. /* If the DOCTYPE token's name is not a case-sensitive match
  152. * for the string "html", or if the token's public identifier
  153. * is not missing, or if the token's system identifier is
  154. * neither missing nor a case-sensitive match for the string
  155. * "about:legacy-compat", then there is a parse error (this
  156. * is the DOCTYPE parse error). */
  157. // DOCTYPE parse error
  158. }
  159. /* Append a DocumentType node to the Document node, with the name
  160. * attribute set to the name given in the DOCTYPE token, or the
  161. * empty string if the name was missing; the publicId attribute
  162. * set to the public identifier given in the DOCTYPE token, or
  163. * the empty string if the public identifier was missing; the
  164. * systemId attribute set to the system identifier given in the
  165. * DOCTYPE token, or the empty string if the system identifier
  166. * was missing; and the other attributes specific to
  167. * DocumentType objects set to null and empty lists as
  168. * appropriate. Associate the DocumentType node with the
  169. * Document object so that it is returned as the value of the
  170. * doctype attribute of the Document object. */
  171. if (!isset($token['public'])) $token['public'] = null;
  172. if (!isset($token['system'])) $token['system'] = null;
  173. // Yes this is hacky. I'm kind of annoyed that I can't appendChild
  174. // a doctype to DOMDocument. Maybe I haven't chanted the right
  175. // syllables.
  176. $impl = new DOMImplementation();
  177. // This call can fail for particularly pathological cases (namely,
  178. // the qualifiedName parameter ($token['name']) could be missing.
  179. if ($token['name']) {
  180. $doctype = $impl->createDocumentType($token['name'], $token['public'], $token['system']);
  181. $this->dom->appendChild($doctype);
  182. } else {
  183. // It looks like libxml's not actually *able* to express this case.
  184. // So... don't.
  185. $this->dom->emptyDoctype = true;
  186. }
  187. $public = is_null($token['public']) ? false : strtolower($token['public']);
  188. $system = is_null($token['system']) ? false : strtolower($token['system']);
  189. $publicStartsWithForQuirks = array(
  190. "+//silmaril//dtd html pro v0r11 19970101//",
  191. "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
  192. "-//as//dtd html 3.0 aswedit + extensions//",
  193. "-//ietf//dtd html 2.0 level 1//",
  194. "-//ietf//dtd html 2.0 level 2//",
  195. "-//ietf//dtd html 2.0 strict level 1//",
  196. "-//ietf//dtd html 2.0 strict level 2//",
  197. "-//ietf//dtd html 2.0 strict//",
  198. "-//ietf//dtd html 2.0//",
  199. "-//ietf//dtd html 2.1e//",
  200. "-//ietf//dtd html 3.0//",
  201. "-//ietf//dtd html 3.2 final//",
  202. "-//ietf//dtd html 3.2//",
  203. "-//ietf//dtd html 3//",
  204. "-//ietf//dtd html level 0//",
  205. "-//ietf//dtd html level 1//",
  206. "-//ietf//dtd html level 2//",
  207. "-//ietf//dtd html level 3//",
  208. "-//ietf//dtd html strict level 0//",
  209. "-//ietf//dtd html strict level 1//",
  210. "-//ietf//dtd html strict level 2//",
  211. "-//ietf//dtd html strict level 3//",
  212. "-//ietf//dtd html strict//",
  213. "-//ietf//dtd html//",
  214. "-//metrius//dtd metrius presentational//",
  215. "-//microsoft//dtd internet explorer 2.0 html strict//",
  216. "-//microsoft//dtd internet explorer 2.0 html//",
  217. "-//microsoft//dtd internet explorer 2.0 tables//",
  218. "-//microsoft//dtd internet explorer 3.0 html strict//",
  219. "-//microsoft//dtd internet explorer 3.0 html//",
  220. "-//microsoft//dtd internet explorer 3.0 tables//",
  221. "-//netscape comm. corp.//dtd html//",
  222. "-//netscape comm. corp.//dtd strict html//",
  223. "-//o'reilly and associates//dtd html 2.0//",
  224. "-//o'reilly and associates//dtd html extended 1.0//",
  225. "-//o'reilly and associates//dtd html extended relaxed 1.0//",
  226. "-//spyglass//dtd html 2.0 extended//",
  227. "-//sq//dtd html 2.0 hotmetal + extensions//",
  228. "-//sun microsystems corp.//dtd hotjava html//",
  229. "-//sun microsystems corp.//dtd hotjava strict html//",
  230. "-//w3c//dtd html 3 1995-03-24//",
  231. "-//w3c//dtd html 3.2 draft//",
  232. "-//w3c//dtd html 3.2 final//",
  233. "-//w3c//dtd html 3.2//",
  234. "-//w3c//dtd html 3.2s draft//",
  235. "-//w3c//dtd html 4.0 frameset//",
  236. "-//w3c//dtd html 4.0 transitional//",
  237. "-//w3c//dtd html experimental 19960712//",
  238. "-//w3c//dtd html experimental 970421//",
  239. "-//w3c//dtd w3 html//",
  240. "-//w3o//dtd w3 html 3.0//",
  241. "-//webtechs//dtd mozilla html 2.0//",
  242. "-//webtechs//dtd mozilla html//",
  243. );
  244. $publicSetToForQuirks = array(
  245. "-//w3o//dtd w3 html strict 3.0//",
  246. "-/w3c/dtd html 4.0 transitional/en",
  247. "html",
  248. );
  249. $publicStartsWithAndSystemForQuirks = array(
  250. "-//w3c//dtd html 4.01 frameset//",
  251. "-//w3c//dtd html 4.01 transitional//",
  252. );
  253. $publicStartsWithForLimitedQuirks = array(
  254. "-//w3c//dtd xhtml 1.0 frameset//",
  255. "-//w3c//dtd xhtml 1.0 transitional//",
  256. );
  257. $publicStartsWithAndSystemForLimitedQuirks = array(
  258. "-//w3c//dtd html 4.01 frameset//",
  259. "-//w3c//dtd html 4.01 transitional//",
  260. );
  261. // first, do easy checks
  262. if (
  263. !empty($token['force-quirks']) ||
  264. strtolower($token['name']) !== 'html'
  265. ) {
  266. $this->quirks_mode = self::QUIRKS_MODE;
  267. } else {
  268. do {
  269. if ($system) {
  270. foreach ($publicStartsWithAndSystemForQuirks as $x) {
  271. if (strncmp($public, $x, strlen($x)) === 0) {
  272. $this->quirks_mode = self::QUIRKS_MODE;
  273. break;
  274. }
  275. }
  276. if (!is_null($this->quirks_mode)) break;
  277. foreach ($publicStartsWithAndSystemForLimitedQuirks as $x) {
  278. if (strncmp($public, $x, strlen($x)) === 0) {
  279. $this->quirks_mode = self::LIMITED_QUIRKS_MODE;
  280. break;
  281. }
  282. }
  283. if (!is_null($this->quirks_mode)) break;
  284. }
  285. foreach ($publicSetToForQuirks as $x) {
  286. if ($public === $x) {
  287. $this->quirks_mode = self::QUIRKS_MODE;
  288. break;
  289. }
  290. }
  291. if (!is_null($this->quirks_mode)) break;
  292. foreach ($publicStartsWithForLimitedQuirks as $x) {
  293. if (strncmp($public, $x, strlen($x)) === 0) {
  294. $this->quirks_mode = self::LIMITED_QUIRKS_MODE;
  295. }
  296. }
  297. if (!is_null($this->quirks_mode)) break;
  298. if ($system === "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
  299. $this->quirks_mode = self::QUIRKS_MODE;
  300. break;
  301. }
  302. foreach ($publicStartsWithForQuirks as $x) {
  303. if (strncmp($public, $x, strlen($x)) === 0) {
  304. $this->quirks_mode = self::QUIRKS_MODE;
  305. break;
  306. }
  307. }
  308. if (is_null($this->quirks_mode)) {
  309. $this->quirks_mode = self::NO_QUIRKS;
  310. }
  311. } while (false);
  312. }
  313. $this->mode = self::BEFORE_HTML;
  314. } else {
  315. // parse error
  316. /* Switch the insertion mode to "before html", then reprocess the
  317. * current token. */
  318. $this->mode = self::BEFORE_HTML;
  319. $this->quirks_mode = self::QUIRKS_MODE;
  320. $this->emitToken($token);
  321. }
  322. break;
  323. case self::BEFORE_HTML:
  324. /* A DOCTYPE token */
  325. if($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  326. // Parse error. Ignore the token.
  327. $this->ignored = true;
  328. /* A comment token */
  329. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  330. /* Append a Comment node to the Document object with the data
  331. attribute set to the data given in the comment token. */
  332. $comment = $this->dom->createComment($token['data']);
  333. $this->dom->appendChild($comment);
  334. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  335. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  336. or U+0020 SPACE */
  337. } elseif($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  338. /* Ignore the token. */
  339. $this->ignored = true;
  340. /* A start tag whose tag name is "html" */
  341. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] == 'html') {
  342. /* Create an element for the token in the HTML namespace. Append it
  343. * to the Document object. Put this element in the stack of open
  344. * elements. */
  345. $html = $this->insertElement($token, false);
  346. $this->dom->appendChild($html);
  347. $this->stack[] = $html;
  348. $this->mode = self::BEFORE_HEAD;
  349. } else {
  350. /* Create an html element. Append it to the Document object. Put
  351. * this element in the stack of open elements. */
  352. $html = $this->dom->createElementNS(self::NS_HTML, 'html');
  353. $this->dom->appendChild($html);
  354. $this->stack[] = $html;
  355. /* Switch the insertion mode to "before head", then reprocess the
  356. * current token. */
  357. $this->mode = self::BEFORE_HEAD;
  358. $this->emitToken($token);
  359. }
  360. break;
  361. case self::BEFORE_HEAD:
  362. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  363. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  364. or U+0020 SPACE */
  365. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  366. /* Ignore the token. */
  367. $this->ignored = true;
  368. /* A comment token */
  369. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  370. /* Append a Comment node to the current node with the data attribute
  371. set to the data given in the comment token. */
  372. $this->insertComment($token['data']);
  373. /* A DOCTYPE token */
  374. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  375. /* Parse error. Ignore the token */
  376. $this->ignored = true;
  377. // parse error
  378. /* A start tag token with the tag name "html" */
  379. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
  380. /* Process the token using the rules for the "in body"
  381. * insertion mode. */
  382. $this->processWithRulesFor($token, self::IN_BODY);
  383. /* A start tag token with the tag name "head" */
  384. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') {
  385. /* Insert an HTML element for the token. */
  386. $element = $this->insertElement($token);
  387. /* Set the head element pointer to this new element node. */
  388. $this->head_pointer = $element;
  389. /* Change the insertion mode to "in head". */
  390. $this->mode = self::IN_HEAD;
  391. /* An end tag whose tag name is one of: "head", "body", "html", "br" */
  392. } elseif(
  393. $token['type'] === HTML5_Tokenizer::ENDTAG && (
  394. $token['name'] === 'head' || $token['name'] === 'body' ||
  395. $token['name'] === 'html' || $token['name'] === 'br'
  396. )) {
  397. /* Act as if a start tag token with the tag name "head" and no
  398. * attributes had been seen, then reprocess the current token. */
  399. $this->emitToken(array(
  400. 'name' => 'head',
  401. 'type' => HTML5_Tokenizer::STARTTAG,
  402. 'attr' => array()
  403. ));
  404. $this->emitToken($token);
  405. /* Any other end tag */
  406. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG) {
  407. /* Parse error. Ignore the token. */
  408. $this->ignored = true;
  409. } else {
  410. /* Act as if a start tag token with the tag name "head" and no
  411. * attributes had been seen, then reprocess the current token.
  412. * Note: This will result in an empty head element being
  413. * generated, with the current token being reprocessed in the
  414. * "after head" insertion mode. */
  415. $this->emitToken(array(
  416. 'name' => 'head',
  417. 'type' => HTML5_Tokenizer::STARTTAG,
  418. 'attr' => array()
  419. ));
  420. $this->emitToken($token);
  421. }
  422. break;
  423. case self::IN_HEAD:
  424. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  425. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  426. or U+0020 SPACE. */
  427. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  428. /* Insert the character into the current node. */
  429. $this->insertText($token['data']);
  430. /* A comment token */
  431. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  432. /* Append a Comment node to the current node with the data attribute
  433. set to the data given in the comment token. */
  434. $this->insertComment($token['data']);
  435. /* A DOCTYPE token */
  436. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  437. /* Parse error. Ignore the token. */
  438. $this->ignored = true;
  439. // parse error
  440. /* A start tag whose tag name is "html" */
  441. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  442. $token['name'] === 'html') {
  443. $this->processWithRulesFor($token, self::IN_BODY);
  444. /* A start tag whose tag name is one of: "base", "command", "link" */
  445. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  446. ($token['name'] === 'base' || $token['name'] === 'command' ||
  447. $token['name'] === 'link')) {
  448. /* Insert an HTML element for the token. Immediately pop the
  449. * current node off the stack of open elements. */
  450. $this->insertElement($token);
  451. array_pop($this->stack);
  452. // YYY: Acknowledge the token's self-closing flag, if it is set.
  453. /* A start tag whose tag name is "meta" */
  454. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'meta') {
  455. /* Insert an HTML element for the token. Immediately pop the
  456. * current node off the stack of open elements. */
  457. $this->insertElement($token);
  458. array_pop($this->stack);
  459. // XERROR: Acknowledge the token's self-closing flag, if it is set.
  460. // XENCODING: If the element has a charset attribute, and its value is a
  461. // supported encoding, and the confidence is currently tentative,
  462. // then change the encoding to the encoding given by the value of
  463. // the charset attribute.
  464. //
  465. // Otherwise, if the element has a content attribute, and applying
  466. // the algorithm for extracting an encoding from a Content-Type to
  467. // its value returns a supported encoding encoding, and the
  468. // confidence is currently tentative, then change the encoding to
  469. // the encoding encoding.
  470. /* A start tag with the tag name "title" */
  471. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'title') {
  472. $this->insertRCDATAElement($token);
  473. /* A start tag whose tag name is "noscript", if the scripting flag is enabled, or
  474. * A start tag whose tag name is one of: "noframes", "style" */
  475. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  476. ($token['name'] === 'noscript' || $token['name'] === 'noframes' || $token['name'] === 'style')) {
  477. // XSCRIPT: Scripting flag not respected
  478. $this->insertCDATAElement($token);
  479. // XSCRIPT: Scripting flag disable not implemented
  480. /* A start tag with the tag name "script" */
  481. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') {
  482. /* 1. Create an element for the token in the HTML namespace. */
  483. $node = $this->insertElement($token, false);
  484. /* 2. Mark the element as being "parser-inserted" */
  485. // Uhhh... XSCRIPT
  486. /* 3. If the parser was originally created for the HTML
  487. * fragment parsing algorithm, then mark the script element as
  488. * "already executed". (fragment case) */
  489. // ditto... XSCRIPT
  490. /* 4. Append the new element to the current node and push it onto
  491. * the stack of open elements. */
  492. end($this->stack)->appendChild($node);
  493. $this->stack[] = $node;
  494. // I guess we could squash these together
  495. /* 6. Let the original insertion mode be the current insertion mode. */
  496. $this->original_mode = $this->mode;
  497. /* 7. Switch the insertion mode to "in CDATA/RCDATA" */
  498. $this->mode = self::IN_CDATA_RCDATA;
  499. /* 5. Switch the tokeniser's content model flag to the CDATA state. */
  500. $this->content_model = HTML5_Tokenizer::CDATA;
  501. /* An end tag with the tag name "head" */
  502. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'head') {
  503. /* Pop the current node (which will be the head element) off the stack of open elements. */
  504. array_pop($this->stack);
  505. /* Change the insertion mode to "after head". */
  506. $this->mode = self::AFTER_HEAD;
  507. // Slight logic inversion here to minimize duplication
  508. /* A start tag with the tag name "head". */
  509. /* An end tag whose tag name is not one of: "body", "html", "br" */
  510. } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') ||
  511. ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] !== 'html' &&
  512. $token['name'] !== 'body' && $token['name'] !== 'br')) {
  513. // Parse error. Ignore the token.
  514. $this->ignored = true;
  515. /* Anything else */
  516. } else {
  517. /* Act as if an end tag token with the tag name "head" had been
  518. * seen, and reprocess the current token. */
  519. $this->emitToken(array(
  520. 'name' => 'head',
  521. 'type' => HTML5_Tokenizer::ENDTAG
  522. ));
  523. /* Then, reprocess the current token. */
  524. $this->emitToken($token);
  525. }
  526. break;
  527. case self::IN_HEAD_NOSCRIPT:
  528. if ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  529. // parse error
  530. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
  531. $this->processWithRulesFor($token, self::IN_BODY);
  532. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'noscript') {
  533. /* Pop the current node (which will be a noscript element) from the
  534. * stack of open elements; the new current node will be a head
  535. * element. */
  536. array_pop($this->stack);
  537. $this->mode = self::IN_HEAD;
  538. } elseif (
  539. ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) ||
  540. ($token['type'] === HTML5_Tokenizer::COMMENT) ||
  541. ($token['type'] === HTML5_Tokenizer::STARTTAG && (
  542. $token['name'] === 'link' || $token['name'] === 'meta' ||
  543. $token['name'] === 'noframes' || $token['name'] === 'style'))) {
  544. $this->processWithRulesFor($token, self::IN_HEAD);
  545. // inverted logic
  546. } elseif (
  547. ($token['type'] === HTML5_Tokenizer::STARTTAG && (
  548. $token['name'] === 'head' || $token['name'] === 'noscript')) ||
  549. ($token['type'] === HTML5_Tokenizer::ENDTAG &&
  550. $token['name'] !== 'br')) {
  551. // parse error
  552. } else {
  553. // parse error
  554. $this->emitToken(array(
  555. 'type' => HTML5_Tokenizer::ENDTAG,
  556. 'name' => 'noscript',
  557. ));
  558. $this->emitToken($token);
  559. }
  560. break;
  561. case self::AFTER_HEAD:
  562. /* Handle the token as follows: */
  563. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  564. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  565. or U+0020 SPACE */
  566. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  567. /* Append the character to the current node. */
  568. $this->insertText($token['data']);
  569. /* A comment token */
  570. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  571. /* Append a Comment node to the current node with the data attribute
  572. set to the data given in the comment token. */
  573. $this->insertComment($token['data']);
  574. } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  575. // parse error
  576. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
  577. $this->processWithRulesFor($token, self::IN_BODY);
  578. /* A start tag token with the tag name "body" */
  579. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'body') {
  580. $this->insertElement($token);
  581. /* Set the frameset-ok flag to "not ok". */
  582. $this->flag_frameset_ok = false;
  583. /* Change the insertion mode to "in body". */
  584. $this->mode = self::IN_BODY;
  585. /* A start tag token with the tag name "frameset" */
  586. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'frameset') {
  587. /* Insert a frameset element for the token. */
  588. $this->insertElement($token);
  589. /* Change the insertion mode to "in frameset". */
  590. $this->mode = self::IN_FRAMESET;
  591. /* A start tag token whose tag name is one of: "base", "link", "meta",
  592. "script", "style", "title" */
  593. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
  594. array('base', 'link', 'meta', 'noframes', 'script', 'style', 'title'))) {
  595. // parse error
  596. /* Push the node pointed to by the head element pointer onto the
  597. * stack of open elements. */
  598. $this->stack[] = $this->head_pointer;
  599. $this->processWithRulesFor($token, self::IN_HEAD);
  600. array_splice($this->stack, array_search($this->head_pointer, $this->stack, true), 1);
  601. // inversion of specification
  602. } elseif(
  603. ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') ||
  604. ($token['type'] === HTML5_Tokenizer::ENDTAG &&
  605. $token['name'] !== 'body' && $token['name'] !== 'html' &&
  606. $token['name'] !== 'br')) {
  607. // parse error
  608. /* Anything else */
  609. } else {
  610. $this->emitToken(array(
  611. 'name' => 'body',
  612. 'type' => HTML5_Tokenizer::STARTTAG,
  613. 'attr' => array()
  614. ));
  615. $this->flag_frameset_ok = true;
  616. $this->emitToken($token);
  617. }
  618. break;
  619. case self::IN_BODY:
  620. /* Handle the token as follows: */
  621. switch($token['type']) {
  622. /* A character token */
  623. case HTML5_Tokenizer::CHARACTER:
  624. case HTML5_Tokenizer::SPACECHARACTER:
  625. /* Reconstruct the active formatting elements, if any. */
  626. $this->reconstructActiveFormattingElements();
  627. /* Append the token's character to the current node. */
  628. $this->insertText($token['data']);
  629. /* If the token is not one of U+0009 CHARACTER TABULATION,
  630. * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020
  631. * SPACE, then set the frameset-ok flag to "not ok". */
  632. // i.e., if any of the characters is not whitespace
  633. if (strlen($token['data']) !== strspn($token['data'], HTML5_Tokenizer::WHITESPACE)) {
  634. $this->flag_frameset_ok = false;
  635. }
  636. break;
  637. /* A comment token */
  638. case HTML5_Tokenizer::COMMENT:
  639. /* Append a Comment node to the current node with the data
  640. attribute set to the data given in the comment token. */
  641. $this->insertComment($token['data']);
  642. break;
  643. case HTML5_Tokenizer::DOCTYPE:
  644. // parse error
  645. break;
  646. case HTML5_Tokenizer::STARTTAG:
  647. switch($token['name']) {
  648. case 'html':
  649. // parse error
  650. /* For each attribute on the token, check to see if the
  651. * attribute is already present on the top element of the
  652. * stack of open elements. If it is not, add the attribute
  653. * and its corresponding value to that element. */
  654. foreach($token['attr'] as $attr) {
  655. if(!$this->stack[0]->hasAttribute($attr['name'])) {
  656. $this->stack[0]->setAttribute($attr['name'], $attr['value']);
  657. }
  658. }
  659. break;
  660. case 'base': case 'command': case 'link': case 'meta': case 'noframes':
  661. case 'script': case 'style': case 'title':
  662. /* Process the token as if the insertion mode had been "in
  663. head". */
  664. $this->processWithRulesFor($token, self::IN_HEAD);
  665. break;
  666. /* A start tag token with the tag name "body" */
  667. case 'body':
  668. /* Parse error. If the second element on the stack of open
  669. elements is not a body element, or, if the stack of open
  670. elements has only one node on it, then ignore the token.
  671. (fragment case) */
  672. if(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') {
  673. $this->ignored = true;
  674. // Ignore
  675. /* Otherwise, for each attribute on the token, check to see
  676. if the attribute is already present on the body element (the
  677. second element) on the stack of open elements. If it is not,
  678. add the attribute and its corresponding value to that
  679. element. */
  680. } else {
  681. foreach($token['attr'] as $attr) {
  682. if(!$this->stack[1]->hasAttribute($attr['name'])) {
  683. $this->stack[1]->setAttribute($attr['name'], $attr['value']);
  684. }
  685. }
  686. }
  687. break;
  688. case 'frameset':
  689. // parse error
  690. /* If the second element on the stack of open elements is
  691. * not a body element, or, if the stack of open elements
  692. * has only one node on it, then ignore the token.
  693. * (fragment case) */
  694. if(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') {
  695. $this->ignored = true;
  696. // Ignore
  697. } elseif (!$this->flag_frameset_ok) {
  698. $this->ignored = true;
  699. // Ignore
  700. } else {
  701. /* 1. Remove the second element on the stack of open
  702. * elements from its parent node, if it has one. */
  703. if($this->stack[1]->parentNode) {
  704. $this->stack[1]->parentNode->removeChild($this->stack[1]);
  705. }
  706. /* 2. Pop all the nodes from the bottom of the stack of
  707. * open elements, from the current node up to the root
  708. * html element. */
  709. array_splice($this->stack, 1);
  710. $this->insertElement($token);
  711. $this->mode = self::IN_FRAMESET;
  712. }
  713. break;
  714. // in spec, there is a diversion here
  715. case 'address': case 'article': case 'aside': case 'blockquote':
  716. case 'center': case 'datagrid': case 'details': case 'dialog': case 'dir':
  717. case 'div': case 'dl': case 'fieldset': case 'figure': case 'footer':
  718. case 'header': case 'hgroup': case 'menu': case 'nav':
  719. case 'ol': case 'p': case 'section': case 'ul':
  720. /* If the stack of open elements has a p element in scope,
  721. then act as if an end tag with the tag name p had been
  722. seen. */
  723. if($this->elementInScope('p')) {
  724. $this->emitToken(array(
  725. 'name' => 'p',
  726. 'type' => HTML5_Tokenizer::ENDTAG
  727. ));
  728. }
  729. /* Insert an HTML element for the token. */
  730. $this->insertElement($token);
  731. break;
  732. /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4",
  733. "h5", "h6" */
  734. case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
  735. /* If the stack of open elements has a p element in scope,
  736. then act as if an end tag with the tag name p had been seen. */
  737. if($this->elementInScope('p')) {
  738. $this->emitToken(array(
  739. 'name' => 'p',
  740. 'type' => HTML5_Tokenizer::ENDTAG
  741. ));
  742. }
  743. /* If the current node is an element whose tag name is one
  744. * of "h1", "h2", "h3", "h4", "h5", or "h6", then this is a
  745. * parse error; pop the current node off the stack of open
  746. * elements. */
  747. $peek = array_pop($this->stack);
  748. if (in_array($peek->tagName, array("h1", "h2", "h3", "h4", "h5", "h6"))) {
  749. // parse error
  750. } else {
  751. $this->stack[] = $peek;
  752. }
  753. /* Insert an HTML element for the token. */
  754. $this->insertElement($token);
  755. break;
  756. case 'pre': case 'listing':
  757. /* If the stack of open elements has a p element in scope,
  758. then act as if an end tag with the tag name p had been seen. */
  759. if($this->elementInScope('p')) {
  760. $this->emitToken(array(
  761. 'name' => 'p',
  762. 'type' => HTML5_Tokenizer::ENDTAG
  763. ));
  764. }
  765. $this->insertElement($token);
  766. /* If the next token is a U+000A LINE FEED (LF) character
  767. * token, then ignore that token and move on to the next
  768. * one. (Newlines at the start of pre blocks are ignored as
  769. * an authoring convenience.) */
  770. $this->ignore_lf_token = 2;
  771. $this->flag_frameset_ok = false;
  772. break;
  773. /* A start tag whose tag name is "form" */
  774. case 'form':
  775. /* If the form element pointer is not null, ignore the
  776. token with a parse error. */
  777. if($this->form_pointer !== null) {
  778. $this->ignored = true;
  779. // Ignore.
  780. /* Otherwise: */
  781. } else {
  782. /* If the stack of open elements has a p element in
  783. scope, then act as if an end tag with the tag name p
  784. had been seen. */
  785. if($this->elementInScope('p')) {
  786. $this->emitToken(array(
  787. 'name' => 'p',
  788. 'type' => HTML5_Tokenizer::ENDTAG
  789. ));
  790. }
  791. /* Insert an HTML element for the token, and set the
  792. form element pointer to point to the element created. */
  793. $element = $this->insertElement($token);
  794. $this->form_pointer = $element;
  795. }
  796. break;
  797. // condensed specification
  798. case 'li': case 'dd': case 'dt':
  799. /* 1. Set the frameset-ok flag to "not ok". */
  800. $this->flag_frameset_ok = false;
  801. $stack_length = count($this->stack) - 1;
  802. for($n = $stack_length; 0 <= $n; $n--) {
  803. /* 2. Initialise node to be the current node (the
  804. bottommost node of the stack). */
  805. $stop = false;
  806. $node = $this->stack[$n];
  807. $cat = $this->getElementCategory($node);
  808. // for case 'li':
  809. /* 3. If node is an li element, then act as if an end
  810. * tag with the tag name "li" had been seen, then jump
  811. * to the last step. */
  812. // for case 'dd': case 'dt':
  813. /* If node is a dd or dt element, then act as if an end
  814. * tag with the same tag name as node had been seen, then
  815. * jump to the last step. */
  816. if(($token['name'] === 'li' && $node->tagName === 'li') ||
  817. ($token['name'] !== 'li' && ($node->tagName === 'dd' || $node->tagName === 'dt'))) { // limited conditional
  818. $this->emitToken(array(
  819. 'type' => HTML5_Tokenizer::ENDTAG,
  820. 'name' => $node->tagName,
  821. ));
  822. break;
  823. }
  824. /* 4. If node is not in the formatting category, and is
  825. not in the phrasing category, and is not an address,
  826. div or p element, then stop this algorithm. */
  827. if($cat !== self::FORMATTING && $cat !== self::PHRASING &&
  828. $node->tagName !== 'address' && $node->tagName !== 'div' &&
  829. $node->tagName !== 'p') {
  830. break;
  831. }
  832. /* 5. Otherwise, set node to the previous entry in the
  833. * stack of open elements and return to step 2. */
  834. }
  835. /* 6. This is the last step. */
  836. /* If the stack of open elements has a p element in scope,
  837. then act as if an end tag with the tag name p had been
  838. seen. */
  839. if($this->elementInScope('p')) {
  840. $this->emitToken(array(
  841. 'name' => 'p',
  842. 'type' => HTML5_Tokenizer::ENDTAG
  843. ));
  844. }
  845. /* Finally, insert an HTML element with the same tag
  846. name as the token's. */
  847. $this->insertElement($token);
  848. break;
  849. /* A start tag token whose tag name is "plaintext" */
  850. case 'plaintext':
  851. /* If the stack of open elements has a p element in scope,
  852. then act as if an end tag with the tag name p had been
  853. seen. */
  854. if($this->elementInScope('p')) {
  855. $this->emitToken(array(
  856. 'name' => 'p',
  857. 'type' => HTML5_Tokenizer::ENDTAG
  858. ));
  859. }
  860. /* Insert an HTML element for the token. */
  861. $this->insertElement($token);
  862. $this->content_model = HTML5_Tokenizer::PLAINTEXT;
  863. break;
  864. // more diversions
  865. /* A start tag whose tag name is "a" */
  866. case 'a':
  867. /* If the list of active formatting elements contains
  868. an element whose tag name is "a" between the end of the
  869. list and the last marker on the list (or the start of
  870. the list if there is no marker on the list), then this
  871. is a parse error; act as if an end tag with the tag name
  872. "a" had been seen, then remove that element from the list
  873. of active formatting elements and the stack of open
  874. elements if the end tag didn't already remove it (it
  875. might not have if the element is not in table scope). */
  876. $leng = count($this->a_formatting);
  877. for($n = $leng - 1; $n >= 0; $n--) {
  878. if($this->a_formatting[$n] === self::MARKER) {
  879. break;
  880. } elseif($this->a_formatting[$n]->tagName === 'a') {
  881. $a = $this->a_formatting[$n];
  882. $this->emitToken(array(
  883. 'name' => 'a',
  884. 'type' => HTML5_Tokenizer::ENDTAG
  885. ));
  886. if (in_array($a, $this->a_formatting)) {
  887. $a_i = array_search($a, $this->a_formatting, true);
  888. if($a_i !== false) array_splice($this->a_formatting, $a_i, 1);
  889. }
  890. if (in_array($a, $this->stack)) {
  891. $a_i = array_search($a, $this->stack, true);
  892. if ($a_i !== false) array_splice($this->stack, $a_i, 1);
  893. }
  894. break;
  895. }
  896. }
  897. /* Reconstruct the active formatting elements, if any. */
  898. $this->reconstructActiveFormattingElements();
  899. /* Insert an HTML element for the token. */
  900. $el = $this->insertElement($token);
  901. /* Add that element to the list of active formatting
  902. elements. */
  903. $this->a_formatting[] = $el;
  904. break;
  905. case 'b': case 'big': case 'code': case 'em': case 'font': case 'i':
  906. case 's': case 'small': case 'strike':
  907. case 'strong': case 'tt': case 'u':
  908. /* Reconstruct the active formatting elements, if any. */
  909. $this->reconstructActiveFormattingElements();
  910. /* Insert an HTML element for the token. */
  911. $el = $this->insertElement($token);
  912. /* Add that element to the list of active formatting
  913. elements. */
  914. $this->a_formatting[] = $el;
  915. break;
  916. case 'nobr':
  917. /* Reconstruct the active formatting elements, if any. */
  918. $this->reconstructActiveFormattingElements();
  919. /* If the stack of open elements has a nobr element in
  920. * scope, then this is a parse error; act as if an end tag
  921. * with the tag name "nobr" had been seen, then once again
  922. * reconstruct the active formatting elements, if any. */
  923. if ($this->elementInScope('nobr')) {
  924. $this->emitToken(array(
  925. 'name' => 'nobr',
  926. 'type' => HTML5_Tokenizer::ENDTAG,
  927. ));
  928. $this->reconstructActiveFormattingElements();
  929. }
  930. /* Insert an HTML element for the token. */
  931. $el = $this->insertElement($token);
  932. /* Add that element to the list of active formatting
  933. elements. */
  934. $this->a_formatting[] = $el;
  935. break;
  936. // another diversion
  937. /* A start tag token whose tag name is "button" */
  938. case 'button':
  939. /* If the stack of open elements has a button element in scope,
  940. then this is a parse error; act as if an end tag with the tag
  941. name "button" had been seen, then reprocess the token. (We don't
  942. do that. Unnecessary.) (I hope you're right! -- ezyang) */
  943. if($this->elementInScope('button')) {
  944. $this->emitToken(array(
  945. 'name' => 'button',
  946. 'type' => HTML5_Tokenizer::ENDTAG
  947. ));
  948. }
  949. /* Reconstruct the active formatting elements, if any. */
  950. $this->reconstructActiveFormattingElements();
  951. /* Insert an HTML element for the token. */
  952. $this->insertElement($token);
  953. /* Insert a marker at the end of the list of active
  954. formatting elements. */
  955. $this->a_formatting[] = self::MARKER;
  956. $this->flag_frameset_ok = false;
  957. break;
  958. case 'applet': case 'marquee': case 'object':
  959. /* Reconstruct the active formatting elements, if any. */
  960. $this->reconstructActiveFormattingElements();
  961. /* Insert an HTML element for the token. */
  962. $this->insertElement($token);
  963. /* Insert a marker at the end of the list of active
  964. formatting elements. */
  965. $this->a_formatting[] = self::MARKER;
  966. $this->flag_frameset_ok = false;
  967. break;
  968. // spec diversion
  969. /* A start tag whose tag name is "table" */
  970. case 'table':
  971. /* If the stack of open elements has a p element in scope,
  972. then act as if an end tag with the tag name p had been seen. */
  973. if($this->quirks_mode !== self::QUIRKS_MODE &&
  974. $this->elementInScope('p')) {
  975. $this->emitToken(array(
  976. 'name' => 'p',
  977. 'type' => HTML5_Tokenizer::ENDTAG
  978. ));
  979. }
  980. /* Insert an HTML element for the token. */
  981. $this->insertElement($token);
  982. $this->flag_frameset_ok = false;
  983. /* Change the insertion mode to "in table". */
  984. $this->mode = self::IN_TABLE;
  985. break;
  986. /* A start tag whose tag name is one of: "area", "basefont",
  987. "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */
  988. case 'area': case 'basefont': case 'bgsound': case 'br':
  989. case 'embed': case 'img': case 'input': case 'keygen': case 'spacer':
  990. case 'wbr':
  991. /* Reconstruct the active formatting elements, if any. */
  992. $this->reconstructActiveFormattingElements();
  993. /* Insert an HTML element for the token. */
  994. $this->insertElement($token);
  995. /* Immediately pop the current node off the stack of open elements. */
  996. array_pop($this->stack);
  997. // YYY: Acknowledge the token's self-closing flag, if it is set.
  998. $this->flag_frameset_ok = false;
  999. break;
  1000. case 'param': case 'source':
  1001. /* Insert an HTML element for the token. */
  1002. $this->insertElement($token);
  1003. /* Immediately pop the current node off the stack of open elements. */
  1004. array_pop($this->stack);
  1005. // YYY: Acknowledge the token's self-closing flag, if it is set.
  1006. break;
  1007. /* A start tag whose tag name is "hr" */
  1008. case 'hr':
  1009. /* If the stack of open elements has a p element in scope,
  1010. then act as if an end tag with the tag name p had been seen. */
  1011. if($this->elementInScope('p')) {
  1012. $this->emitToken(array(
  1013. 'name' => 'p',
  1014. 'type' => HTML5_Tokenizer::ENDTAG
  1015. ));
  1016. }
  1017. /* Insert an HTML element for the token. */
  1018. $this->insertElement($token);
  1019. /* Immediately pop the current node off the stack of open elements. */
  1020. array_pop($this->stack);
  1021. // YYY: Acknowledge the token's self-closing flag, if it is set.
  1022. $this->flag_frameset_ok = false;
  1023. break;
  1024. /* A start tag whose tag name is "image" */
  1025. case 'image':
  1026. /* Parse error. Change the token's tag name to "img" and
  1027. reprocess it. (Don't ask.) */
  1028. $token['name'] = 'img';
  1029. $this->emitToken($token);
  1030. break;
  1031. /* A start tag whose tag name is "isindex" */
  1032. case 'isindex':
  1033. /* Parse error. */
  1034. /* If the form element pointer is not null,
  1035. then ignore the token. */
  1036. if($this->form_pointer === null) {
  1037. /* Act as if a start tag token with the tag name "form" had
  1038. been seen. */
  1039. /* If the token has an attribute called "action", set
  1040. * the action attribute on the resulting form
  1041. * element to the value of the "action" attribute of
  1042. * the token. */
  1043. $attr = array();
  1044. $action = $this->getAttr($token, 'action');
  1045. if ($action !== false) {
  1046. $attr[] = array('name' => 'action', 'value' => $action);
  1047. }
  1048. $this->emitToken(array(
  1049. 'name' => 'form',
  1050. 'type' => HTML5_Tokenizer::STARTTAG,
  1051. 'attr' => $attr
  1052. ));
  1053. /* Act as if a start tag token with the tag name "hr" had
  1054. been seen. */
  1055. $this->emitToken(array(
  1056. 'name' => 'hr',
  1057. 'type' => HTML5_Tokenizer::STARTTAG,
  1058. 'attr' => array()
  1059. ));
  1060. /* Act as if a start tag token with the tag name "p" had
  1061. been seen. */
  1062. $this->emitToken(array(
  1063. 'name' => 'p',
  1064. 'type' => HTML5_Tokenizer::STARTTAG,
  1065. 'attr' => array()
  1066. ));
  1067. /* Act as if a start tag token with the tag name "label"
  1068. had been seen. */
  1069. $this->emitToken(array(
  1070. 'name' => 'label',
  1071. 'type' => HTML5_Tokenizer::STARTTAG,
  1072. 'attr' => array()
  1073. ));
  1074. /* Act as if a stream of character tokens had been seen. */
  1075. $prompt = $this->getAttr($token, 'prompt');
  1076. if ($prompt === false) {
  1077. $prompt = 'This is a searchable index. '.
  1078. 'Insert your search keywords here: ';
  1079. }
  1080. $this->emitToken(array(
  1081. 'data' => $prompt,
  1082. 'type' => HTML5_Tokenizer::CHARACTER,
  1083. ));
  1084. /* Act as if a start tag token with the tag name "input"
  1085. had been seen, with all the attributes from the "isindex"
  1086. token, except with the "name" attribute set to the value
  1087. "isindex" (ignoring any explicit "name" attribute). */
  1088. $attr = array();
  1089. foreach ($token['attr'] as $keypair) {
  1090. if ($keypair['name'] === 'name' || $keypair['name'] === 'action' ||
  1091. $keypair['name'] === 'prompt') continue;
  1092. $attr[] = $keypair;
  1093. }
  1094. $attr[] = array('name' => 'name', 'value' => 'isindex');
  1095. $this->emitToken(array(
  1096. 'name' => 'input',
  1097. 'type' => HTML5_Tokenizer::STARTTAG,
  1098. 'attr' => $attr
  1099. ));
  1100. /* Act as if an end tag token with the tag name "label"
  1101. had been seen. */
  1102. $this->emitToken(array(
  1103. 'name' => 'label',
  1104. 'type' => HTML5_Tokenizer::ENDTAG
  1105. ));
  1106. /* Act as if an end tag token with the tag name "p" had
  1107. been seen. */
  1108. $this->emitToken(array(
  1109. 'name' => 'p',
  1110. 'type' => HTML5_Tokenizer::ENDTAG
  1111. ));
  1112. /* Act as if a start tag token with the tag name "hr" had
  1113. been seen. */
  1114. $this->emitToken(array(
  1115. 'name' => 'hr',
  1116. 'type' => HTML5_Tokenizer::STARTTAG
  1117. ));
  1118. /* Act as if an end tag token with the tag name "form" had
  1119. been seen. */
  1120. $this->emitToken(array(
  1121. 'name' => 'form',
  1122. 'type' => HTML5_Tokenizer::ENDTAG
  1123. ));
  1124. } else {
  1125. $this->ignored = true;
  1126. }
  1127. break;
  1128. /* A start tag whose tag name is "textarea" */
  1129. case 'textarea':
  1130. $this->insertElement($token);
  1131. /* If the next token is a U+000A LINE FEED (LF)
  1132. * character token, then ignore that token and move on to
  1133. * the next one. (Newlines at the start of textarea
  1134. * elements are ignored as an authoring convenience.)
  1135. * need flag, see also <pre> */
  1136. $this->ignore_lf_token = 2;
  1137. $this->original_mode = $this->mode;
  1138. $this->flag_frameset_ok = false;
  1139. $this->mode = self::IN_CDATA_RCDATA;
  1140. /* Switch the tokeniser's content model flag to the
  1141. RCDATA state. */
  1142. $this->content_model = HTML5_Tokenizer::RCDATA;
  1143. break;
  1144. /* A start tag token whose tag name is "xmp" */
  1145. case 'xmp':
  1146. /* Reconstruct the active formatting elements, if any. */
  1147. $this->reconstructActiveFormattingElements();
  1148. $this->flag_frameset_ok = false;
  1149. $this->insertCDATAElement($token);
  1150. break;
  1151. case 'iframe':
  1152. $this->flag_frameset_ok = false;
  1153. $this->insertCDATAElement($token);
  1154. break;
  1155. case 'noembed': case 'noscript':
  1156. // XSCRIPT: should check scripting flag
  1157. $this->insertCDATAElement($token);
  1158. break;
  1159. /* A start tag whose tag name is "select" */
  1160. case 'select':
  1161. /* Reconstruct the active formatting elements, if any. */
  1162. $this->reconstructActiveFormattingElements();
  1163. /* Insert an HTML element for the token. */
  1164. $this->insertElement($token);
  1165. $this->flag_frameset_ok = false;
  1166. /* If the insertion mode is one of in table", "in caption",
  1167. * "in column group", "in table body", "in row", or "in
  1168. * cell", then switch the insertion mode to "in select in
  1169. * table". Otherwise, switch the insertion mode to "in
  1170. * select". */
  1171. if (
  1172. $this->mode === self::IN_TABLE || $this->mode === self::IN_CAPTION ||
  1173. $this->mode === self::IN_COLUMN_GROUP || $this->mode ==+self::IN_TABLE_BODY ||
  1174. $this->mode === self::IN_ROW || $this->mode === self::IN_CELL
  1175. ) {
  1176. $this->mode = self::IN_SELECT_IN_TABLE;
  1177. } else {
  1178. $this->mode = self::IN_SELECT;
  1179. }
  1180. break;
  1181. case 'option': case 'optgroup':
  1182. if ($this->elementInScope('option')) {
  1183. $this->emitToken(array(
  1184. 'name' => 'option',
  1185. 'type' => HTML5_Tokenizer::ENDTAG,
  1186. ));
  1187. }
  1188. $this->reconstructActiveFormattingElements();
  1189. $this->insertElement($token);
  1190. break;
  1191. case 'rp': case 'rt':
  1192. /* If the stack of open elements has a ruby element in scope, then generate
  1193. * implied end tags. If the current node is not then a ruby element, this is
  1194. * a parse error; pop all the nodes from the current node up to the node
  1195. * immediately before the bottommost ruby element on the stack of open elements.
  1196. */
  1197. if ($this->elementInScope('ruby')) {
  1198. $this->generateImpliedEndTags();
  1199. }
  1200. $peek = false;
  1201. do {
  1202. if ($peek) {
  1203. // parse error
  1204. }
  1205. $peek = array_pop($this->stack);
  1206. } while ($peek->tagName !== 'ruby');
  1207. $this->stack[] = $peek; // we popped one too many
  1208. $this->insertElement($token);
  1209. break;
  1210. // spec diversion
  1211. case 'math':
  1212. $this->reconstructActiveFormattingElements();
  1213. $token = $this->adjustMathMLAttributes($token);
  1214. $token = $this->adjustForeignAttributes($token);
  1215. $this->insertForeignElement($token, self::NS_MATHML);
  1216. if (isset($token['self-closing'])) {
  1217. // XERROR: acknowledge the token's self-closing flag
  1218. array_pop($this->stack);
  1219. }
  1220. if ($this->mode !== self::IN_FOREIGN_CONTENT) {
  1221. $this->secondary_mode = $this->mode;
  1222. $this->mode = self::IN_FOREIGN_CONTENT;
  1223. }
  1224. break;
  1225. case 'svg':
  1226. $this->reconstructActiveFormattingElements();
  1227. $token = $this->adjustSVGAttributes($token);
  1228. $token = $this->adjustForeignAttributes($token);
  1229. $this->insertForeignElement($token, self::NS_SVG);
  1230. if (isset($token['self-closing'])) {
  1231. // XERROR: acknowledge the token's self-closing flag
  1232. array_pop($this->stack);
  1233. }
  1234. if ($this->mode !== self::IN_FOREIGN_CONTENT) {
  1235. $this->secondary_mode = $this->mode;
  1236. $this->mode = self::IN_FOREIGN_CONTENT;
  1237. }
  1238. break;
  1239. case 'caption': case 'col': case 'colgroup': case 'frame': case 'head':
  1240. case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': case 'tr':
  1241. // parse error
  1242. break;
  1243. /* A start tag token not covered by the previous entries */
  1244. default:
  1245. /* Reconstruct the active formatting elements, if any. */
  1246. $this->reconstructActiveFormattingElements();
  1247. $this->insertElement($token);
  1248. /* This element will be a phrasing element. */
  1249. break;
  1250. }
  1251. break;
  1252. case HTML5_Tokenizer::ENDTAG:
  1253. switch($token['name']) {
  1254. /* An end tag with the tag name "body" */
  1255. case 'body':
  1256. /* If the second element in the stack of open elements is
  1257. not a body element, this is a parse error. Ignore the token.
  1258. (innerHTML case) */
  1259. if(count($this->stack) < 2 || $this->stack[1]->tagName !== 'body') {
  1260. $this->ignored = true;
  1261. /* Otherwise, if there is a node in the stack of open
  1262. * elements that is not either a dd element, a dt
  1263. * element, an li element, an optgroup element, an
  1264. * option element, a p element, an rp element, an rt
  1265. * element, a tbody element, a td element, a tfoot
  1266. * element, a th element, a thead element, a tr element,
  1267. * the body element, or the html element, then this is a
  1268. * parse error. */
  1269. } else {
  1270. // XERROR: implement this check for parse error
  1271. }
  1272. /* Change the insertion mode to "after body". */
  1273. $this->mode = self::AFTER_BODY;
  1274. break;
  1275. /* An end tag with the tag name "html" */
  1276. case 'html':
  1277. /* Act as if an end tag with tag name "body" had been seen,
  1278. then, if that token wasn't ignored, reprocess the current
  1279. token. */
  1280. $this->emitToken(array(
  1281. 'name' => 'body',
  1282. 'type' => HTML5_Tokenizer::ENDTAG
  1283. ));
  1284. if (!$this->ignored) $this->emitToken($token);
  1285. break;
  1286. case 'address': case 'article': case 'aside': case 'blockquote':
  1287. case 'center': case 'datagrid': case 'details': case 'dir':
  1288. case 'div': case 'dl': case 'fieldset': case 'figure': case 'footer':
  1289. case 'header': case 'hgroup': case 'listing': case 'menu':
  1290. case 'nav': case 'ol': case 'pre': case 'section': case 'ul':
  1291. /* If the stack of open elements has an element in scope
  1292. with the same tag name as that of the token, then generate
  1293. implied end tags. */
  1294. if($this->elementInScope($token['name'])) {
  1295. $this->generateImpliedEndTags();
  1296. /* Now, if the current node is not an element with
  1297. the same tag name as that of the token, then this
  1298. is a parse error. */
  1299. // XERROR: implement parse error logic
  1300. /* If the stack of open elements has an element in
  1301. scope with the same tag name as that of the token,
  1302. then pop elements from this stack until an element
  1303. with that tag name has been popped from the stack. */
  1304. do {
  1305. $node = array_pop($this->stack);
  1306. } while ($node->tagName !== $token['name']);
  1307. } else {
  1308. // parse error
  1309. }
  1310. break;
  1311. /* An end tag whose tag name is "form" */
  1312. case 'form':
  1313. /* Let node be the element that the form element pointer is set to. */
  1314. $node = $this->form_pointer;
  1315. /* Set the form element pointer to null. */
  1316. $this->form_pointer = null;
  1317. /* If node is null or the stack of open elements does not
  1318. * have node in scope, then this is a parse error; ignore the token. */
  1319. if ($node === null || !in_array($node, $this->stack)) {
  1320. // parse error
  1321. $this->ignored = true;
  1322. } else {
  1323. /* 1. Generate implied end tags. */
  1324. $this->generateImpliedEndTags();
  1325. /* 2. If the current node is not node, then this is a parse error. */
  1326. if (end($this->stack) !== $node) {
  1327. // parse error
  1328. }
  1329. /* 3. Remove node from the stack of open elements. */
  1330. array_splice($this->stack, array_search($node, $this->stack, true), 1);
  1331. }
  1332. break;
  1333. /* An end tag whose tag name is "p" */
  1334. case 'p':
  1335. /* If the stack of open elements has a p element in scope,
  1336. then generate implied end tags, except for p elements. */
  1337. if($this->elementInScope('p')) {
  1338. /* Generate implied end tags, except for elements with
  1339. * the same tag name as the token. */
  1340. $this->generateImpliedEndTags(array('p'));
  1341. /* If the current node is not a p element, then this is
  1342. a parse error. */
  1343. // XERROR: implement
  1344. /* Pop elements from the stack of open elements until
  1345. * an element with the same tag name as the token has
  1346. * been popped from the stack. */
  1347. do {
  1348. $node = array_pop($this->stack);
  1349. } while ($node->tagName !== 'p');
  1350. } else {
  1351. // parse error
  1352. $this->emitToken(array(
  1353. 'name' => 'p',
  1354. 'type' => HTML5_Tokenizer::STARTTAG,
  1355. ));
  1356. $this->emitToken($token);
  1357. }
  1358. break;
  1359. /* An end tag whose tag name is "dd", "dt", or "li" */
  1360. case 'dd': case 'dt': case 'li':
  1361. if($this->elementInScope($token['name'])) {
  1362. $this->generateImpliedEndTags(array($token['name']));
  1363. /* If the current node is not an element with the same
  1364. tag name as the token, then this is a parse error. */
  1365. // XERROR: implement parse error
  1366. /* Pop elements from the stack of open elements until
  1367. * an element with the same tag name as the token has
  1368. * been popped from the stack. */
  1369. do {
  1370. $node = array_pop($this->stack);
  1371. } while ($node->tagName !== $token['name']);
  1372. } else {
  1373. // parse error
  1374. }
  1375. break;
  1376. /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",
  1377. "h5", "h6" */
  1378. case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
  1379. $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
  1380. /* If the stack of open elements has in scope an element whose
  1381. tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
  1382. generate implied end tags. */
  1383. if($this->elementInScope($elements)) {
  1384. $this->generateImpliedEndTags();
  1385. /* Now, if the current node is not an element with the same
  1386. tag name as that of the token, then this is a parse error. */
  1387. // XERROR: implement parse error
  1388. /* If the stack of open elements has in scope an element
  1389. whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
  1390. "h6", then pop elements from the stack until an element
  1391. with one of those tag names has been popped from the stack. */
  1392. do {
  1393. $node = array_pop($this->stack);
  1394. } while (!in_array($node->tagName, $elements));
  1395. } else {
  1396. // parse error
  1397. }
  1398. break;
  1399. /* An end tag whose tag name is one of: "a", "b", "big", "em",
  1400. "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
  1401. case 'a': case 'b': case 'big': case 'code': case 'em': case 'font':
  1402. case 'i': case 'nobr': case 's': case 'small': case 'strike':
  1403. case 'strong': case 'tt': case 'u':
  1404. // XERROR: generally speaking this needs parse error logic
  1405. /* 1. Let the formatting element be the last element in
  1406. the list of active formatting elements that:
  1407. * is between the end of the list and the last scope
  1408. marker in the list, if any, or the start of the list
  1409. otherwise, and
  1410. * has the same tag name as the token.
  1411. */
  1412. while(true) {
  1413. for($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
  1414. if($this->a_formatting[$a] === self::MARKER) {
  1415. break;
  1416. } elseif($this->a_formatting[$a]->tagName === $token['name']) {
  1417. $formatting_element = $this->a_formatting[$a];
  1418. $in_stack = in_array($formatting_element, $this->stack, true);
  1419. $fe_af_pos = $a;
  1420. break;
  1421. }
  1422. }
  1423. /* If there is no such node, or, if that node is
  1424. also in the stack of open elements but the element
  1425. is not in scope, then this is a parse error. Abort
  1426. these steps. The token is ignored. */
  1427. if(!isset($formatting_element) || ($in_stack &&
  1428. !$this->elementInScope($token['name']))) {
  1429. $this->ignored = true;
  1430. break;
  1431. /* Otherwise, if there is such a node, but that node
  1432. is not in the stack of open elements, then this is a
  1433. parse error; remove the element from the list, and
  1434. abort these steps. */
  1435. } elseif(isset($formatting_element) && !$in_stack) {
  1436. unset($this->a_formatting[$fe_af_pos]);
  1437. $this->a_formatting = array_merge($this->a_formatting);
  1438. break;
  1439. }
  1440. /* Otherwise, there is a formatting element and that
  1441. * element is in the stack and is in scope. If the
  1442. * element is not the current node, this is a parse
  1443. * error. In any case, proceed with the algorithm as
  1444. * written in the following steps. */
  1445. // XERROR: implement me
  1446. /* 2. Let the furthest block be the topmost node in the
  1447. stack of open elements that is lower in the stack
  1448. than the formatting element, and is not an element in
  1449. the phrasing or formatting categories. There might
  1450. not be one. */
  1451. $fe_s_pos = array_search($formatting_element, $this->stack, true);
  1452. $length = count($this->stack);
  1453. for($s = $fe_s_pos + 1; $s < $length; $s++) {
  1454. $category = $this->getElementCategory($this->stack[$s]);
  1455. if($category !== self::PHRASING && $category !== self::FORMATTING) {
  1456. $furthest_block = $this->stack[$s];
  1457. break;
  1458. }
  1459. }
  1460. /* 3. If there is no furthest block, then the UA must
  1461. skip the subsequent steps and instead just pop all
  1462. the nodes from the bottom of the stack of open
  1463. elements, from the current node up to the formatting
  1464. element, and remove the formatting element from the
  1465. list of active formatting elements. */
  1466. if(!isset($furthest_block)) {
  1467. for($n = $length - 1; $n >= $fe_s_pos; $n--) {
  1468. array_pop($this->stack);
  1469. }
  1470. unset($this->a_formatting[$fe_af_pos]);
  1471. $this->a_formatting = array_merge($this->a_formatting);
  1472. break;
  1473. }
  1474. /* 4. Let the common ancestor be the element
  1475. immediately above the formatting element in the stack
  1476. of open elements. */
  1477. $common_ancestor = $this->stack[$fe_s_pos - 1];
  1478. /* 5. Let a bookmark note the position of the
  1479. formatting element in the list of active formatting
  1480. elements relative to the elements on either side
  1481. of it in the list. */
  1482. $bookmark = $fe_af_pos;
  1483. /* 6. Let node and last node be the furthest block.
  1484. Follow these steps: */
  1485. $node = $furthest_block;
  1486. $last_node = $furthest_block;
  1487. while(true) {
  1488. for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
  1489. /* 6.1 Let node be the element immediately
  1490. prior to node in the stack of open elements. */
  1491. $node = $this->stack[$n];
  1492. /* 6.2 If node is not in the list of active
  1493. formatting elements, then remove node from
  1494. the stack of open elements and then go back
  1495. to step 1. */
  1496. if(!in_array($node, $this->a_formatting, true)) {
  1497. array_splice($this->stack, $n, 1);
  1498. } else {
  1499. break;
  1500. }
  1501. }
  1502. /* 6.3 Otherwise, if node is the formatting
  1503. element, then go to the next step in the overall
  1504. algorithm. */
  1505. if($node === $formatting_element) {
  1506. break;
  1507. /* 6.4 Otherwise, if last node is the furthest
  1508. block, then move the aforementioned bookmark to
  1509. be immediately after the node in the list of
  1510. active formatting elements. */
  1511. } elseif($last_node === $furthest_block) {
  1512. $bookmark = array_search($node, $this->a_formatting, true) + 1;
  1513. }
  1514. /* 6.5 Create an element for the token for which
  1515. * the element node was created, replace the entry
  1516. * for node in the list of active formatting
  1517. * elements with an entry for the new element,
  1518. * replace the entry for node in the stack of open
  1519. * elements with an entry for the new element, and
  1520. * let node be the new element. */
  1521. // we don't know what the token is anymore
  1522. $clone = $node->cloneNode();
  1523. $a_pos = array_search($node, $this->a_formatting, true);
  1524. $s_pos = array_search($node, $this->stack, true);
  1525. $this->a_formatting[$a_pos] = $clone;
  1526. $this->stack[$s_pos] = $clone;
  1527. $node = $clone;
  1528. /* 6.6 Insert last node into node, first removing
  1529. it from its previous parent node if any. */
  1530. if($last_node->parentNode !== null) {
  1531. $last_node->parentNode->removeChild($last_node);
  1532. }
  1533. $node->appendChild($last_node);
  1534. /* 6.7 Let last node be node. */
  1535. $last_node = $node;
  1536. /* 6.8 Return to step 1 of this inner set of steps. */
  1537. }
  1538. /* 7. If the common ancestor node is a table, tbody,
  1539. * tfoot, thead, or tr element, then, foster parent
  1540. * whatever last node ended up being in the previous
  1541. * step, first removing it from its previous parent
  1542. * node if any. */
  1543. if ($last_node->parentNode) { // common step
  1544. $last_node->parentNode->removeChild($last_node);
  1545. }
  1546. if (in_array($common_ancestor->tagName, array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
  1547. $this->fosterParent($last_node);
  1548. /* Otherwise, append whatever last node ended up being
  1549. * in the previous step to the common ancestor node,
  1550. * first removing it from its previous parent node if
  1551. * any. */
  1552. } else {
  1553. $common_ancestor->appendChild($last_node);
  1554. }
  1555. /* 8. Create an element for the token for which the
  1556. * formatting element was created. */
  1557. $clone = $formatting_element->cloneNode();
  1558. /* 9. Take all of the child nodes of the furthest
  1559. block and append them to the element created in the
  1560. last step. */
  1561. while($furthest_block->hasChildNodes()) {
  1562. $child = $furthest_block->firstChild;
  1563. $furthest_block->removeChild($child);
  1564. $clone->appendChild($child);
  1565. }
  1566. /* 10. Append that clone to the furthest block. */
  1567. $furthest_block->appendChild($clone);
  1568. /* 11. Remove the formatting element from the list
  1569. of active formatting elements, and insert the new element
  1570. into the list of active formatting elements at the
  1571. position of the aforementioned bookmark. */
  1572. $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
  1573. array_splice($this->a_formatting, $fe_af_pos, 1);
  1574. $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
  1575. $af_part2 = array_slice($this->a_formatting, $bookmark);
  1576. $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
  1577. /* 12. Remove the formatting element from the stack
  1578. of open elements, and insert the new element into the stack
  1579. of open elements immediately below the position of the
  1580. furthest block in that stack. */
  1581. $fe_s_pos = array_search($formatting_element, $this->stack, true);
  1582. array_splice($this->stack, $fe_s_pos, 1);
  1583. $fb_s_pos = array_search($furthest_block, $this->stack, true);
  1584. $s_part1 = array_slice($this->stack, 0, $fb_s_pos + 1);
  1585. $s_part2 = array_slice($this->stack, $fb_s_pos + 1);
  1586. $this->stack = array_merge($s_part1, array($clone), $s_part2);
  1587. /* 13. Jump back to step 1 in this series of steps. */
  1588. unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
  1589. }
  1590. break;
  1591. case 'applet': case 'button': case 'marquee': case 'object':
  1592. /* If the stack of open elements has an element in scope whose
  1593. tag name matches the tag name of the token, then generate implied
  1594. tags. */
  1595. if($this->elementInScope($token['name'])) {
  1596. $this->generateImpliedEndTags();
  1597. /* Now, if the current node is not an element with the same
  1598. tag name as the token, then this is a parse error. */
  1599. // XERROR: implement logic
  1600. /* Pop elements from the stack of open elements until
  1601. * an element with the same tag name as the token has
  1602. * been popped from the stack. */
  1603. do {
  1604. $node = array_pop($this->stack);
  1605. } while ($node->tagName !== $token['name']);
  1606. /* Clear the list of active formatting elements up to the
  1607. * last marker. */
  1608. $keys = array_keys($this->a_formatting, self::MARKER, true);
  1609. $marker = end($keys);
  1610. for($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
  1611. array_pop($this->a_formatting);
  1612. }
  1613. } else {
  1614. // parse error
  1615. }
  1616. break;
  1617. case 'br':
  1618. // Parse error
  1619. $this->emitToken(array(
  1620. 'name' => 'br',
  1621. 'type' => HTML5_Tokenizer::STARTTAG,
  1622. ));
  1623. break;
  1624. /* An end tag token not covered by the previous entries */
  1625. default:
  1626. for($n = count($this->stack) - 1; $n >= 0; $n--) {
  1627. /* Initialise node to be the current node (the bottommost
  1628. node of the stack). */
  1629. $node = $this->stack[$n];
  1630. /* If node has the same tag name as the end tag token,
  1631. then: */
  1632. if($token['name'] === $node->tagName) {
  1633. /* Generate implied end tags. */
  1634. $this->generateImpliedEndTags();
  1635. /* If the tag name of the end tag token does not
  1636. match the tag name of the current node, this is a
  1637. parse error. */
  1638. // XERROR: implement this
  1639. /* Pop all the nodes from the current node up to
  1640. node, including node, then stop these steps. */
  1641. // XSKETCHY
  1642. do {
  1643. $pop = array_pop($this->stack);
  1644. } while ($pop !== $node);
  1645. break;
  1646. } else {
  1647. $category = $this->getElementCategory($node);
  1648. if($category !== self::FORMATTING && $category !== self::PHRASING) {
  1649. /* Otherwise, if node is in neither the formatting
  1650. category nor the phrasing category, then this is a
  1651. parse error. Stop this algorithm. The end tag token
  1652. is ignored. */
  1653. $this->ignored = true;
  1654. break;
  1655. // parse error
  1656. }
  1657. }
  1658. /* Set node to the previous entry in the stack of open elements. Loop. */
  1659. }
  1660. break;
  1661. }
  1662. break;
  1663. }
  1664. break;
  1665. case self::IN_CDATA_RCDATA:
  1666. if (
  1667. $token['type'] === HTML5_Tokenizer::CHARACTER ||
  1668. $token['type'] === HTML5_Tokenizer::SPACECHARACTER
  1669. ) {
  1670. $this->insertText($token['data']);
  1671. } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
  1672. // parse error
  1673. /* If the current node is a script element, mark the script
  1674. * element as "already executed". */
  1675. // probably not necessary
  1676. array_pop($this->stack);
  1677. $this->mode = $this->original_mode;
  1678. $this->emitToken($token);
  1679. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'script') {
  1680. array_pop($this->stack);
  1681. $this->mode = $this->original_mode;
  1682. // we're ignoring all of the execution stuff
  1683. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG) {
  1684. array_pop($this->stack);
  1685. $this->mode = $this->original_mode;
  1686. }
  1687. break;
  1688. case self::IN_TABLE:
  1689. $clear = array('html', 'table');
  1690. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  1691. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  1692. or U+0020 SPACE */
  1693. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER &&
  1694. /* If the current table is tainted, then act as described in
  1695. * the "anything else" entry below. */
  1696. // Note: hsivonen has a test that fails due to this line
  1697. // because he wants to convince Hixie not to do taint
  1698. !$this->currentTableIsTainted()) {
  1699. /* Append the character to the current node. */
  1700. $this->insertText($token['data']);
  1701. /* A comment token */
  1702. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  1703. /* Append a Comment node to the current node with the data
  1704. attribute set to the data given in the comment token. */
  1705. $this->insertComment($token['data']);
  1706. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  1707. // parse error
  1708. /* A start tag whose tag name is "caption" */
  1709. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  1710. $token['name'] === 'caption') {
  1711. /* Clear the stack back to a table context. */
  1712. $this->clearStackToTableContext($clear);
  1713. /* Insert a marker at the end of the list of active
  1714. formatting elements. */
  1715. $this->a_formatting[] = self::MARKER;
  1716. /* Insert an HTML element for the token, then switch the
  1717. insertion mode to "in caption". */
  1718. $this->insertElement($token);
  1719. $this->mode = self::IN_CAPTION;
  1720. /* A start tag whose tag name is "colgroup" */
  1721. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  1722. $token['name'] === 'colgroup') {
  1723. /* Clear the stack back to a table context. */
  1724. $this->clearStackToTableContext($clear);
  1725. /* Insert an HTML element for the token, then switch the
  1726. insertion mode to "in column group". */
  1727. $this->insertElement($token);
  1728. $this->mode = self::IN_COLUMN_GROUP;
  1729. /* A start tag whose tag name is "col" */
  1730. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  1731. $token['name'] === 'col') {
  1732. $this->emitToken(array(
  1733. 'name' => 'colgroup',
  1734. 'type' => HTML5_Tokenizer::STARTTAG,
  1735. 'attr' => array()
  1736. ));
  1737. $this->emitToken($token);
  1738. /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
  1739. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
  1740. array('tbody', 'tfoot', 'thead'))) {
  1741. /* Clear the stack back to a table context. */
  1742. $this->clearStackToTableContext($clear);
  1743. /* Insert an HTML element for the token, then switch the insertion
  1744. mode to "in table body". */
  1745. $this->insertElement($token);
  1746. $this->mode = self::IN_TABLE_BODY;
  1747. /* A start tag whose tag name is one of: "td", "th", "tr" */
  1748. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  1749. in_array($token['name'], array('td', 'th', 'tr'))) {
  1750. /* Act as if a start tag token with the tag name "tbody" had been
  1751. seen, then reprocess the current token. */
  1752. $this->emitToken(array(
  1753. 'name' => 'tbody',
  1754. 'type' => HTML5_Tokenizer::STARTTAG,
  1755. 'attr' => array()
  1756. ));
  1757. $this->emitToken($token);
  1758. /* A start tag whose tag name is "table" */
  1759. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  1760. $token['name'] === 'table') {
  1761. /* Parse error. Act as if an end tag token with the tag name "table"
  1762. had been seen, then, if that token wasn't ignored, reprocess the
  1763. current token. */
  1764. $this->emitToken(array(
  1765. 'name' => 'table',
  1766. 'type' => HTML5_Tokenizer::ENDTAG
  1767. ));
  1768. if (!$this->ignored) $this->emitToken($token);
  1769. /* An end tag whose tag name is "table" */
  1770. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  1771. $token['name'] === 'table') {
  1772. /* If the stack of open elements does not have an element in table
  1773. scope with the same tag name as the token, this is a parse error.
  1774. Ignore the token. (fragment case) */
  1775. if(!$this->elementInScope($token['name'], true)) {
  1776. $this->ignored = true;
  1777. /* Otherwise: */
  1778. } else {
  1779. do {
  1780. $node = array_pop($this->stack);
  1781. } while ($node->tagName !== 'table');
  1782. /* Reset the insertion mode appropriately. */
  1783. $this->resetInsertionMode();
  1784. }
  1785. /* An end tag whose tag name is one of: "body", "caption", "col",
  1786. "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
  1787. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
  1788. array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td',
  1789. 'tfoot', 'th', 'thead', 'tr'))) {
  1790. // Parse error. Ignore the token.
  1791. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  1792. ($token['name'] === 'style' || $token['name'] === 'script')) {
  1793. $this->processWithRulesFor($token, self::IN_HEAD);
  1794. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'input' &&
  1795. // assignment is intentional
  1796. /* If the token does not have an attribute with the name "type", or
  1797. * if it does, but that attribute's value is not an ASCII
  1798. * case-insensitive match for the string "hidden", then: act as
  1799. * described in the "anything else" entry below. */
  1800. ($type = $this->getAttr($token, 'type')) && strtolower($type) === 'hidden') {
  1801. // I.e., if its an input with the type attribute == 'hidden'
  1802. /* Otherwise */
  1803. // parse error
  1804. $this->insertElement($token);
  1805. array_pop($this->stack);
  1806. } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
  1807. /* If the current node is not the root html element, then this is a parse error. */
  1808. if (end($this->stack)->tagName !== 'html') {
  1809. // Note: It can only be the current node in the fragment case.
  1810. // parse error
  1811. }
  1812. /* Stop parsing. */
  1813. /* Anything else */
  1814. } else {
  1815. /* Parse error. Process the token as if the insertion mode was "in
  1816. body", with the following exception: */
  1817. $old = $this->foster_parent;
  1818. $this->foster_parent = true;
  1819. $this->processWithRulesFor($token, self::IN_BODY);
  1820. $this->foster_parent = $old;
  1821. }
  1822. break;
  1823. case self::IN_CAPTION:
  1824. /* An end tag whose tag name is "caption" */
  1825. if($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'caption') {
  1826. /* If the stack of open elements does not have an element in table
  1827. scope with the same tag name as the token, this is a parse error.
  1828. Ignore the token. (fragment case) */
  1829. if(!$this->elementInScope($token['name'], true)) {
  1830. $this->ignored = true;
  1831. // Ignore
  1832. /* Otherwise: */
  1833. } else {
  1834. /* Generate implied end tags. */
  1835. $this->generateImpliedEndTags();
  1836. /* Now, if the current node is not a caption element, then this
  1837. is a parse error. */
  1838. // XERROR: implement
  1839. /* Pop elements from this stack until a caption element has
  1840. been popped from the stack. */
  1841. do {
  1842. $node = array_pop($this->stack);
  1843. } while ($node->tagName !== 'caption');
  1844. /* Clear the list of active formatting elements up to the last
  1845. marker. */
  1846. $this->clearTheActiveFormattingElementsUpToTheLastMarker();
  1847. /* Switch the insertion mode to "in table". */
  1848. $this->mode = self::IN_TABLE;
  1849. }
  1850. /* A start tag whose tag name is one of: "caption", "col", "colgroup",
  1851. "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag
  1852. name is "table" */
  1853. } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
  1854. array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
  1855. 'thead', 'tr'))) || ($token['type'] === HTML5_Tokenizer::ENDTAG &&
  1856. $token['name'] === 'table')) {
  1857. /* Parse error. Act as if an end tag with the tag name "caption"
  1858. had been seen, then, if that token wasn't ignored, reprocess the
  1859. current token. */
  1860. $this->emitToken(array(
  1861. 'name' => 'caption',
  1862. 'type' => HTML5_Tokenizer::ENDTAG
  1863. ));
  1864. if (!$this->ignored) $this->emitToken($token);
  1865. /* An end tag whose tag name is one of: "body", "col", "colgroup",
  1866. "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
  1867. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
  1868. array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th',
  1869. 'thead', 'tr'))) {
  1870. // Parse error. Ignore the token.
  1871. $this->ignored = true;
  1872. /* Anything else */
  1873. } else {
  1874. /* Process the token as if the insertion mode was "in body". */
  1875. $this->processWithRulesFor($token, self::IN_BODY);
  1876. }
  1877. break;
  1878. case self::IN_COLUMN_GROUP:
  1879. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  1880. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  1881. or U+0020 SPACE */
  1882. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  1883. /* Append the character to the current node. */
  1884. $this->insertText($token['data']);
  1885. /* A comment token */
  1886. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  1887. /* Append a Comment node to the current node with the data
  1888. attribute set to the data given in the comment token. */
  1889. $this->insertToken($token['data']);
  1890. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  1891. // parse error
  1892. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
  1893. $this->processWithRulesFor($token, self::IN_BODY);
  1894. /* A start tag whose tag name is "col" */
  1895. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'col') {
  1896. /* Insert a col element for the token. Immediately pop the current
  1897. node off the stack of open elements. */
  1898. $this->insertElement($token);
  1899. array_pop($this->stack);
  1900. // XERROR: Acknowledge the token's self-closing flag, if it is set.
  1901. /* An end tag whose tag name is "colgroup" */
  1902. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  1903. $token['name'] === 'colgroup') {
  1904. /* If the current node is the root html element, then this is a
  1905. parse error, ignore the token. (fragment case) */
  1906. if(end($this->stack)->tagName === 'html') {
  1907. $this->ignored = true;
  1908. /* Otherwise, pop the current node (which will be a colgroup
  1909. element) from the stack of open elements. Switch the insertion
  1910. mode to "in table". */
  1911. } else {
  1912. array_pop($this->stack);
  1913. $this->mode = self::IN_TABLE;
  1914. }
  1915. /* An end tag whose tag name is "col" */
  1916. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'col') {
  1917. /* Parse error. Ignore the token. */
  1918. $this->ignored = true;
  1919. /* An end-of-file token */
  1920. /* If the current node is the root html element */
  1921. } elseif($token['type'] === HTML5_Tokenizer::EOF && end($this->stack)->tagName === 'html') {
  1922. /* Stop parsing */
  1923. /* Anything else */
  1924. } else {
  1925. /* Act as if an end tag with the tag name "colgroup" had been seen,
  1926. and then, if that token wasn't ignored, reprocess the current token. */
  1927. $this->emitToken(array(
  1928. 'name' => 'colgroup',
  1929. 'type' => HTML5_Tokenizer::ENDTAG
  1930. ));
  1931. if (!$this->ignored) $this->emitToken($token);
  1932. }
  1933. break;
  1934. case self::IN_TABLE_BODY:
  1935. $clear = array('tbody', 'tfoot', 'thead', 'html');
  1936. /* A start tag whose tag name is "tr" */
  1937. if($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'tr') {
  1938. /* Clear the stack back to a table body context. */
  1939. $this->clearStackToTableContext($clear);
  1940. /* Insert a tr element for the token, then switch the insertion
  1941. mode to "in row". */
  1942. $this->insertElement($token);
  1943. $this->mode = self::IN_ROW;
  1944. /* A start tag whose tag name is one of: "th", "td" */
  1945. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  1946. ($token['name'] === 'th' || $token['name'] === 'td')) {
  1947. /* Parse error. Act as if a start tag with the tag name "tr" had
  1948. been seen, then reprocess the current token. */
  1949. $this->emitToken(array(
  1950. 'name' => 'tr',
  1951. 'type' => HTML5_Tokenizer::STARTTAG,
  1952. 'attr' => array()
  1953. ));
  1954. $this->emitToken($token);
  1955. /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
  1956. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  1957. in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
  1958. /* If the stack of open elements does not have an element in table
  1959. scope with the same tag name as the token, this is a parse error.
  1960. Ignore the token. */
  1961. if(!$this->elementInScope($token['name'], true)) {
  1962. // Parse error
  1963. $this->ignored = true;
  1964. /* Otherwise: */
  1965. } else {
  1966. /* Clear the stack back to a table body context. */
  1967. $this->clearStackToTableContext($clear);
  1968. /* Pop the current node from the stack of open elements. Switch
  1969. the insertion mode to "in table". */
  1970. array_pop($this->stack);
  1971. $this->mode = self::IN_TABLE;
  1972. }
  1973. /* A start tag whose tag name is one of: "caption", "col", "colgroup",
  1974. "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */
  1975. } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
  1976. array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead'))) ||
  1977. ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
  1978. /* If the stack of open elements does not have a tbody, thead, or
  1979. tfoot element in table scope, this is a parse error. Ignore the
  1980. token. (fragment case) */
  1981. if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) {
  1982. // parse error
  1983. $this->ignored = true;
  1984. /* Otherwise: */
  1985. } else {
  1986. /* Clear the stack back to a table body context. */
  1987. $this->clearStackToTableContext($clear);
  1988. /* Act as if an end tag with the same tag name as the current
  1989. node ("tbody", "tfoot", or "thead") had been seen, then
  1990. reprocess the current token. */
  1991. $this->emitToken(array(
  1992. 'name' => end($this->stack)->tagName,
  1993. 'type' => HTML5_Tokenizer::ENDTAG
  1994. ));
  1995. $this->emitToken($token);
  1996. }
  1997. /* An end tag whose tag name is one of: "body", "caption", "col",
  1998. "colgroup", "html", "td", "th", "tr" */
  1999. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
  2000. array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
  2001. /* Parse error. Ignore the token. */
  2002. $this->ignored = true;
  2003. /* Anything else */
  2004. } else {
  2005. /* Process the token as if the insertion mode was "in table". */
  2006. $this->processWithRulesFor($token, self::IN_TABLE);
  2007. }
  2008. break;
  2009. case self::IN_ROW:
  2010. $clear = array('tr', 'html');
  2011. /* A start tag whose tag name is one of: "th", "td" */
  2012. if($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2013. ($token['name'] === 'th' || $token['name'] === 'td')) {
  2014. /* Clear the stack back to a table row context. */
  2015. $this->clearStackToTableContext($clear);
  2016. /* Insert an HTML element for the token, then switch the insertion
  2017. mode to "in cell". */
  2018. $this->insertElement($token);
  2019. $this->mode = self::IN_CELL;
  2020. /* Insert a marker at the end of the list of active formatting
  2021. elements. */
  2022. $this->a_formatting[] = self::MARKER;
  2023. /* An end tag whose tag name is "tr" */
  2024. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'tr') {
  2025. /* If the stack of open elements does not have an element in table
  2026. scope with the same tag name as the token, this is a parse error.
  2027. Ignore the token. (fragment case) */
  2028. if(!$this->elementInScope($token['name'], true)) {
  2029. // Ignore.
  2030. $this->ignored = true;
  2031. /* Otherwise: */
  2032. } else {
  2033. /* Clear the stack back to a table row context. */
  2034. $this->clearStackToTableContext($clear);
  2035. /* Pop the current node (which will be a tr element) from the
  2036. stack of open elements. Switch the insertion mode to "in table
  2037. body". */
  2038. array_pop($this->stack);
  2039. $this->mode = self::IN_TABLE_BODY;
  2040. }
  2041. /* A start tag whose tag name is one of: "caption", "col", "colgroup",
  2042. "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */
  2043. } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
  2044. array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) ||
  2045. ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
  2046. /* Act as if an end tag with the tag name "tr" had been seen, then,
  2047. if that token wasn't ignored, reprocess the current token. */
  2048. $this->emitToken(array(
  2049. 'name' => 'tr',
  2050. 'type' => HTML5_Tokenizer::ENDTAG
  2051. ));
  2052. if (!$this->ignored) $this->emitToken($token);
  2053. /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
  2054. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2055. in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
  2056. /* If the stack of open elements does not have an element in table
  2057. scope with the same tag name as the token, this is a parse error.
  2058. Ignore the token. */
  2059. if(!$this->elementInScope($token['name'], true)) {
  2060. $this->ignored = true;
  2061. /* Otherwise: */
  2062. } else {
  2063. /* Otherwise, act as if an end tag with the tag name "tr" had
  2064. been seen, then reprocess the current token. */
  2065. $this->emitToken(array(
  2066. 'name' => 'tr',
  2067. 'type' => HTML5_Tokenizer::ENDTAG
  2068. ));
  2069. $this->emitToken($token);
  2070. }
  2071. /* An end tag whose tag name is one of: "body", "caption", "col",
  2072. "colgroup", "html", "td", "th" */
  2073. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
  2074. array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th'))) {
  2075. /* Parse error. Ignore the token. */
  2076. $this->ignored = true;
  2077. /* Anything else */
  2078. } else {
  2079. /* Process the token as if the insertion mode was "in table". */
  2080. $this->processWithRulesFor($token, self::IN_TABLE);
  2081. }
  2082. break;
  2083. case self::IN_CELL:
  2084. /* An end tag whose tag name is one of: "td", "th" */
  2085. if($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2086. ($token['name'] === 'td' || $token['name'] === 'th')) {
  2087. /* If the stack of open elements does not have an element in table
  2088. scope with the same tag name as that of the token, then this is a
  2089. parse error and the token must be ignored. */
  2090. if(!$this->elementInScope($token['name'], true)) {
  2091. $this->ignored = true;
  2092. /* Otherwise: */
  2093. } else {
  2094. /* Generate implied end tags, except for elements with the same
  2095. tag name as the token. */
  2096. $this->generateImpliedEndTags(array($token['name']));
  2097. /* Now, if the current node is not an element with the same tag
  2098. name as the token, then this is a parse error. */
  2099. // XERROR: Implement parse error code
  2100. /* Pop elements from this stack until an element with the same
  2101. tag name as the token has been popped from the stack. */
  2102. do {
  2103. $node = array_pop($this->stack);
  2104. } while ($node->tagName !== $token['name']);
  2105. /* Clear the list of active formatting elements up to the last
  2106. marker. */
  2107. $this->clearTheActiveFormattingElementsUpToTheLastMarker();
  2108. /* Switch the insertion mode to "in row". (The current node
  2109. will be a tr element at this point.) */
  2110. $this->mode = self::IN_ROW;
  2111. }
  2112. /* A start tag whose tag name is one of: "caption", "col", "colgroup",
  2113. "tbody", "td", "tfoot", "th", "thead", "tr" */
  2114. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
  2115. array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
  2116. 'thead', 'tr'))) {
  2117. /* If the stack of open elements does not have a td or th element
  2118. in table scope, then this is a parse error; ignore the token.
  2119. (fragment case) */
  2120. if(!$this->elementInScope(array('td', 'th'), true)) {
  2121. // parse error
  2122. $this->ignored = true;
  2123. /* Otherwise, close the cell (see below) and reprocess the current
  2124. token. */
  2125. } else {
  2126. $this->closeCell();
  2127. $this->emitToken($token);
  2128. }
  2129. /* An end tag whose tag name is one of: "body", "caption", "col",
  2130. "colgroup", "html" */
  2131. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
  2132. array('body', 'caption', 'col', 'colgroup', 'html'))) {
  2133. /* Parse error. Ignore the token. */
  2134. $this->ignored = true;
  2135. /* An end tag whose tag name is one of: "table", "tbody", "tfoot",
  2136. "thead", "tr" */
  2137. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
  2138. array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
  2139. /* If the stack of open elements does not have a td or th element
  2140. in table scope, then this is a parse error; ignore the token.
  2141. (innerHTML case) */
  2142. if(!$this->elementInScope(array('td', 'th'), true)) {
  2143. // Parse error
  2144. $this->ignored = true;
  2145. /* Otherwise, close the cell (see below) and reprocess the current
  2146. token. */
  2147. } else {
  2148. $this->closeCell();
  2149. $this->emitToken($token);
  2150. }
  2151. /* Anything else */
  2152. } else {
  2153. /* Process the token as if the insertion mode was "in body". */
  2154. $this->processWithRulesFor($token, self::IN_BODY);
  2155. }
  2156. break;
  2157. case self::IN_SELECT:
  2158. /* Handle the token as follows: */
  2159. /* A character token */
  2160. if(
  2161. $token['type'] === HTML5_Tokenizer::CHARACTER ||
  2162. $token['type'] === HTML5_Tokenizer::SPACECHARACTER
  2163. ) {
  2164. /* Append the token's character to the current node. */
  2165. $this->insertText($token['data']);
  2166. /* A comment token */
  2167. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  2168. /* Append a Comment node to the current node with the data
  2169. attribute set to the data given in the comment token. */
  2170. $this->insertComment($token['data']);
  2171. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  2172. // parse error
  2173. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
  2174. $this->processWithRulesFor($token, self::INBODY);
  2175. /* A start tag token whose tag name is "option" */
  2176. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2177. $token['name'] === 'option') {
  2178. /* If the current node is an option element, act as if an end tag
  2179. with the tag name "option" had been seen. */
  2180. if(end($this->stack)->tagName === 'option') {
  2181. $this->emitToken(array(
  2182. 'name' => 'option',
  2183. 'type' => HTML5_Tokenizer::ENDTAG
  2184. ));
  2185. }
  2186. /* Insert an HTML element for the token. */
  2187. $this->insertElement($token);
  2188. /* A start tag token whose tag name is "optgroup" */
  2189. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2190. $token['name'] === 'optgroup') {
  2191. /* If the current node is an option element, act as if an end tag
  2192. with the tag name "option" had been seen. */
  2193. if(end($this->stack)->tagName === 'option') {
  2194. $this->emitToken(array(
  2195. 'name' => 'option',
  2196. 'type' => HTML5_Tokenizer::ENDTAG
  2197. ));
  2198. }
  2199. /* If the current node is an optgroup element, act as if an end tag
  2200. with the tag name "optgroup" had been seen. */
  2201. if(end($this->stack)->tagName === 'optgroup') {
  2202. $this->emitToken(array(
  2203. 'name' => 'optgroup',
  2204. 'type' => HTML5_Tokenizer::ENDTAG
  2205. ));
  2206. }
  2207. /* Insert an HTML element for the token. */
  2208. $this->insertElement($token);
  2209. /* An end tag token whose tag name is "optgroup" */
  2210. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2211. $token['name'] === 'optgroup') {
  2212. /* First, if the current node is an option element, and the node
  2213. immediately before it in the stack of open elements is an optgroup
  2214. element, then act as if an end tag with the tag name "option" had
  2215. been seen. */
  2216. $elements_in_stack = count($this->stack);
  2217. if($this->stack[$elements_in_stack - 1]->tagName === 'option' &&
  2218. $this->stack[$elements_in_stack - 2]->tagName === 'optgroup') {
  2219. $this->emitToken(array(
  2220. 'name' => 'option',
  2221. 'type' => HTML5_Tokenizer::ENDTAG
  2222. ));
  2223. }
  2224. /* If the current node is an optgroup element, then pop that node
  2225. from the stack of open elements. Otherwise, this is a parse error,
  2226. ignore the token. */
  2227. if(end($this->stack)->tagName === 'optgroup') {
  2228. array_pop($this->stack);
  2229. } else {
  2230. // parse error
  2231. $this->ignored = true;
  2232. }
  2233. /* An end tag token whose tag name is "option" */
  2234. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2235. $token['name'] === 'option') {
  2236. /* If the current node is an option element, then pop that node
  2237. from the stack of open elements. Otherwise, this is a parse error,
  2238. ignore the token. */
  2239. if(end($this->stack)->tagName === 'option') {
  2240. array_pop($this->stack);
  2241. } else {
  2242. // parse error
  2243. $this->ignored = true;
  2244. }
  2245. /* An end tag whose tag name is "select" */
  2246. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2247. $token['name'] === 'select') {
  2248. /* If the stack of open elements does not have an element in table
  2249. scope with the same tag name as the token, this is a parse error.
  2250. Ignore the token. (fragment case) */
  2251. if(!$this->elementInScope($token['name'], true)) {
  2252. $this->ignored = true;
  2253. // parse error
  2254. /* Otherwise: */
  2255. } else {
  2256. /* Pop elements from the stack of open elements until a select
  2257. element has been popped from the stack. */
  2258. do {
  2259. $node = array_pop($this->stack);
  2260. } while ($node->tagName !== 'select');
  2261. /* Reset the insertion mode appropriately. */
  2262. $this->resetInsertionMode();
  2263. }
  2264. /* A start tag whose tag name is "select" */
  2265. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'select') {
  2266. /* Parse error. Act as if the token had been an end tag with the
  2267. tag name "select" instead. */
  2268. $this->emitToken(array(
  2269. 'name' => 'select',
  2270. 'type' => HTML5_Tokenizer::ENDTAG
  2271. ));
  2272. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2273. ($token['name'] === 'input' || $token['name'] === 'textarea')) {
  2274. // parse error
  2275. $this->emitToken(array(
  2276. 'name' => 'select',
  2277. 'type' => HTML5_Tokenizer::ENDTAG
  2278. ));
  2279. $this->emitToken($token);
  2280. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') {
  2281. $this->processWithRulesFor($token, self::IN_HEAD);
  2282. } elseif($token['type'] === HTML5_Tokenizer::EOF) {
  2283. // XERROR: If the current node is not the root html element, then this is a parse error.
  2284. /* Stop parsing */
  2285. /* Anything else */
  2286. } else {
  2287. /* Parse error. Ignore the token. */
  2288. $this->ignored = true;
  2289. }
  2290. break;
  2291. case self::IN_SELECT_IN_TABLE:
  2292. if($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2293. in_array($token['name'], array('caption', 'table', 'tbody',
  2294. 'tfoot', 'thead', 'tr', 'td', 'th'))) {
  2295. // parse error
  2296. $this->emitToken(array(
  2297. 'name' => 'select',
  2298. 'type' => HTML5_Tokenizer::ENDTAG,
  2299. ));
  2300. $this->emitToken($token);
  2301. /* An end tag whose tag name is one of: "caption", "table", "tbody",
  2302. "tfoot", "thead", "tr", "td", "th" */
  2303. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2304. in_array($token['name'], array('caption', 'table', 'tbody', 'tfoot', 'thead', 'tr', 'td', 'th'))) {
  2305. /* Parse error. */
  2306. // parse error
  2307. /* If the stack of open elements has an element in table scope with
  2308. the same tag name as that of the token, then act as if an end tag
  2309. with the tag name "select" had been seen, and reprocess the token.
  2310. Otherwise, ignore the token. */
  2311. if($this->elementInScope($token['name'], true)) {
  2312. $this->emitToken(array(
  2313. 'name' => 'select',
  2314. 'type' => HTML5_Tokenizer::ENDTAG
  2315. ));
  2316. $this->emitToken($token);
  2317. } else {
  2318. $this->ignored = true;
  2319. }
  2320. } else {
  2321. $this->processWithRulesFor($token, self::IN_SELECT);
  2322. }
  2323. break;
  2324. case self::IN_FOREIGN_CONTENT:
  2325. if ($token['type'] === HTML5_Tokenizer::CHARACTER ||
  2326. $token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  2327. $this->insertText($token['data']);
  2328. } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
  2329. $this->insertComment($token['data']);
  2330. } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  2331. // XERROR: parse error
  2332. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2333. $token['name'] === 'script' && end($this->stack)->tagName === 'script' &&
  2334. end($this->stack)->namespaceURI === self::NS_SVG) {
  2335. array_pop($this->stack);
  2336. // a bunch of script running mumbo jumbo
  2337. } elseif (
  2338. ($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2339. ((
  2340. $token['name'] !== 'mglyph' &&
  2341. $token['name'] !== 'malignmark' &&
  2342. end($this->stack)->namespaceURI === self::NS_MATHML &&
  2343. in_array(end($this->stack)->tagName, array('mi', 'mo', 'mn', 'ms', 'mtext'))
  2344. ) ||
  2345. (
  2346. $token['name'] === 'svg' &&
  2347. end($this->stack)->namespaceURI === self::NS_MATHML &&
  2348. end($this->stack)->tagName === 'annotation-xml'
  2349. ) ||
  2350. (
  2351. end($this->stack)->namespaceURI === self::NS_SVG &&
  2352. in_array(end($this->stack)->tagName, array('foreignObject', 'desc', 'title'))
  2353. ) ||
  2354. (
  2355. // XSKETCHY
  2356. end($this->stack)->namespaceURI === self::NS_HTML
  2357. ))
  2358. ) || $token['type'] === HTML5_Tokenizer::ENDTAG
  2359. ) {
  2360. $this->processWithRulesFor($token, $this->secondary_mode);
  2361. /* If, after doing so, the insertion mode is still "in foreign
  2362. * content", but there is no element in scope that has a namespace
  2363. * other than the HTML namespace, switch the insertion mode to the
  2364. * secondary insertion mode. */
  2365. if ($this->mode === self::IN_FOREIGN_CONTENT) {
  2366. $found = false;
  2367. // this basically duplicates elementInScope()
  2368. for ($i = count($this->stack) - 1; $i >= 0; $i--) {
  2369. $node = $this->stack[$i];
  2370. if ($node->namespaceURI !== self::NS_HTML) {
  2371. $found = true;
  2372. break;
  2373. } elseif (in_array($node->tagName, array('table', 'html',
  2374. 'applet', 'caption', 'td', 'th', 'button', 'marquee',
  2375. 'object')) || ($node->tagName === 'foreignObject' &&
  2376. $node->namespaceURI === self::NS_SVG)) {
  2377. break;
  2378. }
  2379. }
  2380. if (!$found) {
  2381. $this->mode = $this->secondary_mode;
  2382. }
  2383. }
  2384. } elseif ($token['type'] === HTML5_Tokenizer::EOF || (
  2385. $token['type'] === HTML5_Tokenizer::STARTTAG &&
  2386. (in_array($token['name'], array('b', "big", "blockquote", "body", "br",
  2387. "center", "code", "dd", "div", "dl", "dt", "em", "embed", "h1", "h2",
  2388. "h3", "h4", "h5", "h6", "head", "hr", "i", "img", "li", "listing",
  2389. "menu", "meta", "nobr", "ol", "p", "pre", "ruby", "s", "small",
  2390. "span", "strong", "strike", "sub", "sup", "table", "tt", "u", "ul",
  2391. "var")) || ($token['name'] === 'font' && ($this->getAttr($token, 'color') ||
  2392. $this->getAttr($token, 'face') || $this->getAttr($token, 'size')))))) {
  2393. // XERROR: parse error
  2394. do {
  2395. $node = array_pop($this->stack);
  2396. } while ($node->namespaceURI !== self::NS_HTML);
  2397. $this->stack[] = $node;
  2398. $this->mode = $this->secondary_mode;
  2399. $this->emitToken($token);
  2400. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG) {
  2401. static $svg_lookup = array(
  2402. 'altglyph' => 'altGlyph',
  2403. 'altglyphdef' => 'altGlyphDef',
  2404. 'altglyphitem' => 'altGlyphItem',
  2405. 'animatecolor' => 'animateColor',
  2406. 'animatemotion' => 'animateMotion',
  2407. 'animatetransform' => 'animateTransform',
  2408. 'clippath' => 'clipPath',
  2409. 'feblend' => 'feBlend',
  2410. 'fecolormatrix' => 'feColorMatrix',
  2411. 'fecomponenttransfer' => 'feComponentTransfer',
  2412. 'fecomposite' => 'feComposite',
  2413. 'feconvolvematrix' => 'feConvolveMatrix',
  2414. 'fediffuselighting' => 'feDiffuseLighting',
  2415. 'fedisplacementmap' => 'feDisplacementMap',
  2416. 'fedistantlight' => 'feDistantLight',
  2417. 'feflood' => 'feFlood',
  2418. 'fefunca' => 'feFuncA',
  2419. 'fefuncb' => 'feFuncB',
  2420. 'fefuncg' => 'feFuncG',
  2421. 'fefuncr' => 'feFuncR',
  2422. 'fegaussianblur' => 'feGaussianBlur',
  2423. 'feimage' => 'feImage',
  2424. 'femerge' => 'feMerge',
  2425. 'femergenode' => 'feMergeNode',
  2426. 'femorphology' => 'feMorphology',
  2427. 'feoffset' => 'feOffset',
  2428. 'fepointlight' => 'fePointLight',
  2429. 'fespecularlighting' => 'feSpecularLighting',
  2430. 'fespotlight' => 'feSpotLight',
  2431. 'fetile' => 'feTile',
  2432. 'feturbulence' => 'feTurbulence',
  2433. 'foreignobject' => 'foreignObject',
  2434. 'glyphref' => 'glyphRef',
  2435. 'lineargradient' => 'linearGradient',
  2436. 'radialgradient' => 'radialGradient',
  2437. 'textpath' => 'textPath',
  2438. );
  2439. $current = end($this->stack);
  2440. if ($current->namespaceURI === self::NS_MATHML) {
  2441. $token = $this->adjustMathMLAttributes($token);
  2442. }
  2443. if ($current->namespaceURI === self::NS_SVG &&
  2444. isset($svg_lookup[$token['name']])) {
  2445. $token['name'] = $svg_lookup[$token['name']];
  2446. }
  2447. if ($current->namespaceURI === self::NS_SVG) {
  2448. $token = $this->adjustSVGAttributes($token);
  2449. }
  2450. $token = $this->adjustForeignAttributes($token);
  2451. $this->insertForeignElement($token, $current->namespaceURI);
  2452. if (isset($token['self-closing'])) {
  2453. array_pop($this->stack);
  2454. // XERROR: acknowledge self-closing flag
  2455. }
  2456. }
  2457. break;
  2458. case self::AFTER_BODY:
  2459. /* Handle the token as follows: */
  2460. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  2461. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  2462. or U+0020 SPACE */
  2463. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  2464. /* Process the token as it would be processed if the insertion mode
  2465. was "in body". */
  2466. $this->processWithRulesFor($token, self::IN_BODY);
  2467. /* A comment token */
  2468. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  2469. /* Append a Comment node to the first element in the stack of open
  2470. elements (the html element), with the data attribute set to the
  2471. data given in the comment token. */
  2472. $comment = $this->dom->createComment($token['data']);
  2473. $this->stack[0]->appendChild($comment);
  2474. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  2475. // parse error
  2476. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
  2477. $this->processWithRulesFor($token, self::IN_BODY);
  2478. /* An end tag with the tag name "html" */
  2479. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'html') {
  2480. /* If the parser was originally created as part of the HTML
  2481. * fragment parsing algorithm, this is a parse error; ignore
  2482. * the token. (fragment case) */
  2483. $this->ignored = true;
  2484. // XERROR: implement this
  2485. $this->mode = self::AFTER_AFTER_BODY;
  2486. } elseif($token['type'] === HTML5_Tokenizer::EOF) {
  2487. /* Stop parsing */
  2488. /* Anything else */
  2489. } else {
  2490. /* Parse error. Set the insertion mode to "in body" and reprocess
  2491. the token. */
  2492. $this->mode = self::IN_BODY;
  2493. $this->emitToken($token);
  2494. }
  2495. break;
  2496. case self::IN_FRAMESET:
  2497. /* Handle the token as follows: */
  2498. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  2499. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  2500. U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
  2501. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  2502. /* Append the character to the current node. */
  2503. $this->insertText($token['data']);
  2504. /* A comment token */
  2505. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  2506. /* Append a Comment node to the current node with the data
  2507. attribute set to the data given in the comment token. */
  2508. $this->insertComment($token['data']);
  2509. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  2510. // parse error
  2511. /* A start tag with the tag name "frameset" */
  2512. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2513. $token['name'] === 'frameset') {
  2514. $this->insertElement($token);
  2515. /* An end tag with the tag name "frameset" */
  2516. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2517. $token['name'] === 'frameset') {
  2518. /* If the current node is the root html element, then this is a
  2519. parse error; ignore the token. (fragment case) */
  2520. if(end($this->stack)->tagName === 'html') {
  2521. $this->ignored = true;
  2522. // Parse error
  2523. } else {
  2524. /* Otherwise, pop the current node from the stack of open
  2525. elements. */
  2526. array_pop($this->stack);
  2527. /* If the parser was not originally created as part of the HTML
  2528. * fragment parsing algorithm (fragment case), and the current
  2529. * node is no longer a frameset element, then switch the
  2530. * insertion mode to "after frameset". */
  2531. $this->mode = self::AFTER_FRAMESET;
  2532. }
  2533. /* A start tag with the tag name "frame" */
  2534. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2535. $token['name'] === 'frame') {
  2536. /* Insert an HTML element for the token. */
  2537. $this->insertElement($token);
  2538. /* Immediately pop the current node off the stack of open elements. */
  2539. array_pop($this->stack);
  2540. // XERROR: Acknowledge the token's self-closing flag, if it is set.
  2541. /* A start tag with the tag name "noframes" */
  2542. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2543. $token['name'] === 'noframes') {
  2544. /* Process the token using the rules for the "in head" insertion mode. */
  2545. $this->processwithRulesFor($token, self::IN_HEAD);
  2546. } elseif($token['type'] === HTML5_Tokenizer::EOF) {
  2547. // XERROR: If the current node is not the root html element, then this is a parse error.
  2548. /* Stop parsing */
  2549. /* Anything else */
  2550. } else {
  2551. /* Parse error. Ignore the token. */
  2552. $this->ignored = true;
  2553. }
  2554. break;
  2555. case self::AFTER_FRAMESET:
  2556. /* Handle the token as follows: */
  2557. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  2558. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  2559. U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
  2560. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  2561. /* Append the character to the current node. */
  2562. $this->insertText($token['data']);
  2563. /* A comment token */
  2564. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  2565. /* Append a Comment node to the current node with the data
  2566. attribute set to the data given in the comment token. */
  2567. $this->insertComment($token['data']);
  2568. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  2569. // parse error
  2570. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
  2571. $this->processWithRulesFor($token, self::IN_BODY);
  2572. /* An end tag with the tag name "html" */
  2573. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2574. $token['name'] === 'html') {
  2575. $this->mode = self::AFTER_AFTER_FRAMESET;
  2576. /* A start tag with the tag name "noframes" */
  2577. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2578. $token['name'] === 'noframes') {
  2579. $this->processWithRulesFor($token, self::IN_HEAD);
  2580. } elseif($token['type'] === HTML5_Tokenizer::EOF) {
  2581. /* Stop parsing */
  2582. /* Anything else */
  2583. } else {
  2584. /* Parse error. Ignore the token. */
  2585. $this->ignored = true;
  2586. }
  2587. break;
  2588. case self::AFTER_AFTER_BODY:
  2589. /* A comment token */
  2590. if($token['type'] === HTML5_Tokenizer::COMMENT) {
  2591. /* Append a Comment node to the Document object with the data
  2592. attribute set to the data given in the comment token. */
  2593. $comment = $this->dom->createComment($token['data']);
  2594. $this->dom->appendChild($comment);
  2595. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE ||
  2596. $token['type'] === HTML5_Tokenizer::SPACECHARACTER ||
  2597. ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) {
  2598. $this->processWithRulesFor($token, self::IN_BODY);
  2599. /* An end-of-file token */
  2600. } elseif($token['type'] === HTML5_Tokenizer::EOF) {
  2601. /* OMG DONE!! */
  2602. } else {
  2603. // parse error
  2604. $this->mode = self::IN_BODY;
  2605. $this->emitToken($token);
  2606. }
  2607. break;
  2608. case self::AFTER_AFTER_FRAMESET:
  2609. /* A comment token */
  2610. if($token['type'] === HTML5_Tokenizer::COMMENT) {
  2611. /* Append a Comment node to the Document object with the data
  2612. attribute set to the data given in the comment token. */
  2613. $comment = $this->dom->createComment($token['data']);
  2614. $this->dom->appendChild($comment);
  2615. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE ||
  2616. $token['type'] === HTML5_Tokenizer::SPACECHARACTER ||
  2617. ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) {
  2618. $this->processWithRulesFor($token, self::IN_BODY);
  2619. /* An end-of-file token */
  2620. } elseif($token['type'] === HTML5_Tokenizer::EOF) {
  2621. /* OMG DONE!! */
  2622. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'nofrmaes') {
  2623. $this->processWithRulesFor($token, self::IN_HEAD);
  2624. } else {
  2625. // parse error
  2626. }
  2627. break;
  2628. }
  2629. // end funky indenting
  2630. }
  2631. private function insertElement($token, $append = true) {
  2632. $el = $this->dom->createElementNS(self::NS_HTML, $token['name']);
  2633. if (!empty($token['attr'])) {
  2634. foreach($token['attr'] as $attr) {
  2635. // mike@macgirvin.com 2011-10-21, stray double quotes cause everything to abort
  2636. $attr['name'] = str_replace('"','',$attr['name']);
  2637. if(!$el->hasAttribute($attr['name'])) {
  2638. $el->setAttribute($attr['name'], $attr['value']);
  2639. }
  2640. }
  2641. }
  2642. if ($append) {
  2643. $this->appendToRealParent($el);
  2644. $this->stack[] = $el;
  2645. }
  2646. return $el;
  2647. }
  2648. private function insertText($data) {
  2649. if ($data === '') return;
  2650. if ($this->ignore_lf_token) {
  2651. if ($data[0] === "\n") {
  2652. $data = substr($data, 1);
  2653. if ($data === false) return;
  2654. }
  2655. }
  2656. $text = $this->dom->createTextNode($data);
  2657. $this->appendToRealParent($text);
  2658. }
  2659. private function insertComment($data) {
  2660. $comment = $this->dom->createComment($data);
  2661. $this->appendToRealParent($comment);
  2662. }
  2663. private function appendToRealParent($node) {
  2664. // this is only for the foster_parent case
  2665. /* If the current node is a table, tbody, tfoot, thead, or tr
  2666. element, then, whenever a node would be inserted into the current
  2667. node, it must instead be inserted into the foster parent element. */
  2668. if(!$this->foster_parent || !in_array(end($this->stack)->tagName,
  2669. array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
  2670. end($this->stack)->appendChild($node);
  2671. } else {
  2672. $this->fosterParent($node);
  2673. }
  2674. }
  2675. private function elementInScope($el, $table = false) {
  2676. if(is_array($el)) {
  2677. foreach($el as $element) {
  2678. if($this->elementInScope($element, $table)) {
  2679. return true;
  2680. }
  2681. }
  2682. return false;
  2683. }
  2684. $leng = count($this->stack);
  2685. for($n = 0; $n < $leng; $n++) {
  2686. /* 1. Initialise node to be the current node (the bottommost node of
  2687. the stack). */
  2688. $node = $this->stack[$leng - 1 - $n];
  2689. if($node->tagName === $el) {
  2690. /* 2. If node is the target node, terminate in a match state. */
  2691. return true;
  2692. // these are the common states for "in scope" and "in table scope"
  2693. } elseif($node->tagName === 'table' || $node->tagName === 'html') {
  2694. return false;
  2695. // these are only valid for "in scope"
  2696. } elseif(!$table &&
  2697. (in_array($node->tagName, array('applet', 'caption', 'td',
  2698. 'th', 'button', 'marquee', 'object')) ||
  2699. $node->tagName === 'foreignObject' && $node->namespaceURI === self::NS_SVG)) {
  2700. return false;
  2701. }
  2702. /* Otherwise, set node to the previous entry in the stack of open
  2703. elements and return to step 2. (This will never fail, since the loop
  2704. will always terminate in the previous step if the top of the stack
  2705. is reached.) */
  2706. }
  2707. }
  2708. private function reconstructActiveFormattingElements() {
  2709. /* 1. If there are no entries in the list of active formatting elements,
  2710. then there is nothing to reconstruct; stop this algorithm. */
  2711. $formatting_elements = count($this->a_formatting);
  2712. if($formatting_elements === 0) {
  2713. return false;
  2714. }
  2715. /* 3. Let entry be the last (most recently added) element in the list
  2716. of active formatting elements. */
  2717. $entry = end($this->a_formatting);
  2718. /* 2. If the last (most recently added) entry in the list of active
  2719. formatting elements is a marker, or if it is an element that is in the
  2720. stack of open elements, then there is nothing to reconstruct; stop this
  2721. algorithm. */
  2722. if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
  2723. return false;
  2724. }
  2725. for($a = $formatting_elements - 1; $a >= 0; true) {
  2726. /* 4. If there are no entries before entry in the list of active
  2727. formatting elements, then jump to step 8. */
  2728. if($a === 0) {
  2729. $step_seven = false;
  2730. break;
  2731. }
  2732. /* 5. Let entry be the entry one earlier than entry in the list of
  2733. active formatting elements. */
  2734. $a--;
  2735. $entry = $this->a_formatting[$a];
  2736. /* 6. If entry is neither a marker nor an element that is also in
  2737. thetack of open elements, go to step 4. */
  2738. if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
  2739. break;
  2740. }
  2741. }
  2742. while(true) {
  2743. /* 7. Let entry be the element one later than entry in the list of
  2744. active formatting elements. */
  2745. if(isset($step_seven) && $step_seven === true) {
  2746. $a++;
  2747. $entry = $this->a_formatting[$a];
  2748. }
  2749. /* 8. Perform a shallow clone of the element entry to obtain clone. */
  2750. $clone = $entry->cloneNode();
  2751. /* 9. Append clone to the current node and push it onto the stack
  2752. of open elements so that it is the new current node. */
  2753. $this->appendToRealParent($clone);
  2754. $this->stack[] = $clone;
  2755. /* 10. Replace the entry for entry in the list with an entry for
  2756. clone. */
  2757. $this->a_formatting[$a] = $clone;
  2758. /* 11. If the entry for clone in the list of active formatting
  2759. elements is not the last entry in the list, return to step 7. */
  2760. if(end($this->a_formatting) !== $clone) {
  2761. $step_seven = true;
  2762. } else {
  2763. break;
  2764. }
  2765. }
  2766. }
  2767. private function clearTheActiveFormattingElementsUpToTheLastMarker() {
  2768. /* When the steps below require the UA to clear the list of active
  2769. formatting elements up to the last marker, the UA must perform the
  2770. following steps: */
  2771. while(true) {
  2772. /* 1. Let entry be the last (most recently added) entry in the list
  2773. of active formatting elements. */
  2774. $entry = end($this->a_formatting);
  2775. /* 2. Remove entry from the list of active formatting elements. */
  2776. array_pop($this->a_formatting);
  2777. /* 3. If entry was a marker, then stop the algorithm at this point.
  2778. The list has been cleared up to the last marker. */
  2779. if($entry === self::MARKER) {
  2780. break;
  2781. }
  2782. }
  2783. }
  2784. private function generateImpliedEndTags($exclude = array()) {
  2785. /* When the steps below require the UA to generate implied end tags,
  2786. then, if the current node is a dd element, a dt element, an li element,
  2787. a p element, a td element, a th element, or a tr element, the UA must
  2788. act as if an end tag with the respective tag name had been seen and
  2789. then generate implied end tags again. */
  2790. $node = end($this->stack);
  2791. $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude);
  2792. while(in_array(end($this->stack)->tagName, $elements)) {
  2793. array_pop($this->stack);
  2794. }
  2795. }
  2796. private function getElementCategory($node) {
  2797. if (!is_object($node)) debug_print_backtrace();
  2798. $name = $node->tagName;
  2799. if(in_array($name, $this->special))
  2800. return self::SPECIAL;
  2801. elseif(in_array($name, $this->scoping))
  2802. return self::SCOPING;
  2803. elseif(in_array($name, $this->formatting))
  2804. return self::FORMATTING;
  2805. else
  2806. return self::PHRASING;
  2807. }
  2808. private function clearStackToTableContext($elements) {
  2809. /* When the steps above require the UA to clear the stack back to a
  2810. table context, it means that the UA must, while the current node is not
  2811. a table element or an html element, pop elements from the stack of open
  2812. elements. */
  2813. while(true) {
  2814. $name = end($this->stack)->tagName;
  2815. if(in_array($name, $elements)) {
  2816. break;
  2817. } else {
  2818. array_pop($this->stack);
  2819. }
  2820. }
  2821. }
  2822. private function resetInsertionMode($context = null) {
  2823. /* 1. Let last be false. */
  2824. $last = false;
  2825. $leng = count($this->stack);
  2826. for($n = $leng - 1; $n >= 0; $n--) {
  2827. /* 2. Let node be the last node in the stack of open elements. */
  2828. $node = $this->stack[$n];
  2829. /* 3. If node is the first node in the stack of open elements, then
  2830. * set last to true and set node to the context element. (fragment
  2831. * case) */
  2832. if($this->stack[0]->isSameNode($node)) {
  2833. $last = true;
  2834. $node = $context;
  2835. }
  2836. /* 4. If node is a select element, then switch the insertion mode to
  2837. "in select" and abort these steps. (fragment case) */
  2838. if($node->tagName === 'select') {
  2839. $this->mode = self::IN_SELECT;
  2840. break;
  2841. /* 5. If node is a td or th element, then switch the insertion mode
  2842. to "in cell" and abort these steps. */
  2843. } elseif($node->tagName === 'td' || $node->nodeName === 'th') {
  2844. $this->mode = self::IN_CELL;
  2845. break;
  2846. /* 6. If node is a tr element, then switch the insertion mode to
  2847. "in row" and abort these steps. */
  2848. } elseif($node->tagName === 'tr') {
  2849. $this->mode = self::IN_ROW;
  2850. break;
  2851. /* 7. If node is a tbody, thead, or tfoot element, then switch the
  2852. insertion mode to "in table body" and abort these steps. */
  2853. } elseif(in_array($node->tagName, array('tbody', 'thead', 'tfoot'))) {
  2854. $this->mode = self::IN_TABLE_BODY;
  2855. break;
  2856. /* 8. If node is a caption element, then switch the insertion mode
  2857. to "in caption" and abort these steps. */
  2858. } elseif($node->tagName === 'caption') {
  2859. $this->mode = self::IN_CAPTION;
  2860. break;
  2861. /* 9. If node is a colgroup element, then switch the insertion mode
  2862. to "in column group" and abort these steps. (innerHTML case) */
  2863. } elseif($node->tagName === 'colgroup') {
  2864. $this->mode = self::IN_COLUMN_GROUP;
  2865. break;
  2866. /* 10. If node is a table element, then switch the insertion mode
  2867. to "in table" and abort these steps. */
  2868. } elseif($node->tagName === 'table') {
  2869. $this->mode = self::IN_TABLE;
  2870. break;
  2871. /* 11. If node is an element from the MathML namespace or the SVG
  2872. * namespace, then switch the insertion mode to "in foreign
  2873. * content", let the secondary insertion mode be "in body", and
  2874. * abort these steps. */
  2875. } elseif($node->namespaceURI === self::NS_SVG ||
  2876. $node->namespaceURI === self::NS_MATHML) {
  2877. $this->mode = self::IN_FOREIGN_CONTENT;
  2878. $this->secondary_mode = self::IN_BODY;
  2879. break;
  2880. /* 12. If node is a head element, then switch the insertion mode
  2881. to "in body" ("in body"! not "in head"!) and abort these steps.
  2882. (fragment case) */
  2883. } elseif($node->tagName === 'head') {
  2884. $this->mode = self::IN_BODY;
  2885. break;
  2886. /* 13. If node is a body element, then switch the insertion mode to
  2887. "in body" and abort these steps. */
  2888. } elseif($node->tagName === 'body') {
  2889. $this->mode = self::IN_BODY;
  2890. break;
  2891. /* 14. If node is a frameset element, then switch the insertion
  2892. mode to "in frameset" and abort these steps. (fragment case) */
  2893. } elseif($node->tagName === 'frameset') {
  2894. $this->mode = self::IN_FRAMESET;
  2895. break;
  2896. /* 15. If node is an html element, then: if the head element
  2897. pointer is null, switch the insertion mode to "before head",
  2898. otherwise, switch the insertion mode to "after head". In either
  2899. case, abort these steps. (fragment case) */
  2900. } elseif($node->tagName === 'html') {
  2901. $this->mode = ($this->head_pointer === null)
  2902. ? self::BEFORE_HEAD
  2903. : self::AFTER_HEAD;
  2904. break;
  2905. /* 16. If last is true, then set the insertion mode to "in body"
  2906. and abort these steps. (fragment case) */
  2907. } elseif($last) {
  2908. $this->mode = self::IN_BODY;
  2909. break;
  2910. }
  2911. }
  2912. }
  2913. private function closeCell() {
  2914. /* If the stack of open elements has a td or th element in table scope,
  2915. then act as if an end tag token with that tag name had been seen. */
  2916. foreach(array('td', 'th') as $cell) {
  2917. if($this->elementInScope($cell, true)) {
  2918. $this->emitToken(array(
  2919. 'name' => $cell,
  2920. 'type' => HTML5_Tokenizer::ENDTAG
  2921. ));
  2922. break;
  2923. }
  2924. }
  2925. }
  2926. private function processWithRulesFor($token, $mode) {
  2927. /* "using the rules for the m insertion mode", where m is one of these
  2928. * modes, the user agent must use the rules described under the m
  2929. * insertion mode's section, but must leave the insertion mode
  2930. * unchanged unless the rules in m themselves switch the insertion mode
  2931. * to a new value. */
  2932. return $this->emitToken($token, $mode);
  2933. }
  2934. private function insertCDATAElement($token) {
  2935. $this->insertElement($token);
  2936. $this->original_mode = $this->mode;
  2937. $this->mode = self::IN_CDATA_RCDATA;
  2938. $this->content_model = HTML5_Tokenizer::CDATA;
  2939. }
  2940. private function insertRCDATAElement($token) {
  2941. $this->insertElement($token);
  2942. $this->original_mode = $this->mode;
  2943. $this->mode = self::IN_CDATA_RCDATA;
  2944. $this->content_model = HTML5_Tokenizer::RCDATA;
  2945. }
  2946. private function getAttr($token, $key) {
  2947. if (!isset($token['attr'])) return false;
  2948. $ret = false;
  2949. foreach ($token['attr'] as $keypair) {
  2950. if ($keypair['name'] === $key) $ret = $keypair['value'];
  2951. }
  2952. return $ret;
  2953. }
  2954. private function getCurrentTable() {
  2955. /* The current table is the last table element in the stack of open
  2956. * elements, if there is one. If there is no table element in the stack
  2957. * of open elements (fragment case), then the current table is the
  2958. * first element in the stack of open elements (the html element). */
  2959. for ($i = count($this->stack) - 1; $i >= 0; $i--) {
  2960. if ($this->stack[$i]->tagName === 'table') {
  2961. return $this->stack[$i];
  2962. }
  2963. }
  2964. return $this->stack[0];
  2965. }
  2966. private function getFosterParent() {
  2967. /* The foster parent element is the parent element of the last
  2968. table element in the stack of open elements, if there is a
  2969. table element and it has such a parent element. If there is no
  2970. table element in the stack of open elements (innerHTML case),
  2971. then the foster parent element is the first element in the
  2972. stack of open elements (the html element). Otherwise, if there
  2973. is a table element in the stack of open elements, but the last
  2974. table element in the stack of open elements has no parent, or
  2975. its parent node is not an element, then the foster parent
  2976. element is the element before the last table element in the
  2977. stack of open elements. */
  2978. for($n = count($this->stack) - 1; $n >= 0; $n--) {
  2979. if($this->stack[$n]->tagName === 'table') {
  2980. $table = $this->stack[$n];
  2981. break;
  2982. }
  2983. }
  2984. if(isset($table) && $table->parentNode !== null) {
  2985. return $table->parentNode;
  2986. } elseif(!isset($table)) {
  2987. return $this->stack[0];
  2988. } elseif(isset($table) && ($table->parentNode === null ||
  2989. $table->parentNode->nodeType !== XML_ELEMENT_NODE)) {
  2990. return $this->stack[$n - 1];
  2991. }
  2992. }
  2993. public function fosterParent($node) {
  2994. $foster_parent = $this->getFosterParent();
  2995. $table = $this->getCurrentTable(); // almost equivalent to last table element, except it can be html
  2996. /* When a node node is to be foster parented, the node node must be
  2997. * inserted into the foster parent element, and the current table must
  2998. * be marked as tainted. (Once the current table has been tainted,
  2999. * whitespace characters are inserted into the foster parent element
  3000. * instead of the current node.) */
  3001. $table->tainted = true;
  3002. /* If the foster parent element is the parent element of the last table
  3003. * element in the stack of open elements, then node must be inserted
  3004. * immediately before the last table element in the stack of open
  3005. * elements in the foster parent element; otherwise, node must be
  3006. * appended to the foster parent element. */
  3007. if ($table->tagName === 'table' && $table->parentNode->isSameNode($foster_parent)) {
  3008. $foster_parent->insertBefore($node, $table);
  3009. } else {
  3010. $foster_parent->appendChild($node);
  3011. }
  3012. }
  3013. /**
  3014. * For debugging, prints the stack
  3015. */
  3016. private function printStack() {
  3017. $names = array();
  3018. foreach ($this->stack as $i => $element) {
  3019. $names[] = $element->tagName;
  3020. }
  3021. echo " -> stack [" . implode(', ', $names) . "]\n";
  3022. }
  3023. /**
  3024. * For debugging, prints active formatting elements
  3025. */
  3026. private function printActiveFormattingElements() {
  3027. if (!$this->a_formatting) return;
  3028. $names = array();
  3029. foreach ($this->a_formatting as $node) {
  3030. if ($node === self::MARKER) $names[] = 'MARKER';
  3031. else $names[] = $node->tagName;
  3032. }
  3033. echo " -> active formatting [" . implode(', ', $names) . "]\n";
  3034. }
  3035. public function currentTableIsTainted() {
  3036. return !empty($this->getCurrentTable()->tainted);
  3037. }
  3038. /**
  3039. * Sets up the tree constructor for building a fragment.
  3040. */
  3041. public function setupContext($context = null) {
  3042. $this->fragment = true;
  3043. if ($context) {
  3044. $context = $this->dom->createElementNS(self::NS_HTML, $context);
  3045. /* 4.1. Set the HTML parser's tokenization stage's content model
  3046. * flag according to the context element, as follows: */
  3047. switch ($context->tagName) {
  3048. case 'title': case 'textarea':
  3049. $this->content_model = HTML5_Tokenizer::RCDATA;
  3050. break;
  3051. case 'style': case 'script': case 'xmp': case 'iframe':
  3052. case 'noembed': case 'noframes':
  3053. $this->content_model = HTML5_Tokenizer::CDATA;
  3054. break;
  3055. case 'noscript':
  3056. // XSCRIPT: assuming scripting is enabled
  3057. $this->content_model = HTML5_Tokenizer::CDATA;
  3058. break;
  3059. case 'plaintext':
  3060. $this->content_model = HTML5_Tokenizer::PLAINTEXT;
  3061. break;
  3062. }
  3063. /* 4.2. Let root be a new html element with no attributes. */
  3064. $root = $this->dom->createElementNS(self::NS_HTML, 'html');
  3065. $this->root = $root;
  3066. /* 4.3 Append the element root to the Document node created above. */
  3067. $this->dom->appendChild($root);
  3068. /* 4.4 Set up the parser's stack of open elements so that it
  3069. * contains just the single element root. */
  3070. $this->stack = array($root);
  3071. /* 4.5 Reset the parser's insertion mode appropriately. */
  3072. $this->resetInsertionMode($context);
  3073. /* 4.6 Set the parser's form element pointer to the nearest node
  3074. * to the context element that is a form element (going straight up
  3075. * the ancestor chain, and including the element itself, if it is a
  3076. * form element), or, if there is no such form element, to null. */
  3077. $node = $context;
  3078. do {
  3079. if ($node->tagName === 'form') {
  3080. $this->form_pointer = $node;
  3081. break;
  3082. }
  3083. } while ($node = $node->parentNode);
  3084. }
  3085. }
  3086. public function adjustMathMLAttributes($token) {
  3087. foreach ($token['attr'] as &$kp) {
  3088. if ($kp['name'] === 'definitionurl') {
  3089. $kp['name'] = 'definitionURL';
  3090. }
  3091. }
  3092. return $token;
  3093. }
  3094. public function adjustSVGAttributes($token) {
  3095. static $lookup = array(
  3096. 'attributename' => 'attributeName',
  3097. 'attributetype' => 'attributeType',
  3098. 'basefrequency' => 'baseFrequency',
  3099. 'baseprofile' => 'baseProfile',
  3100. 'calcmode' => 'calcMode',
  3101. 'clippathunits' => 'clipPathUnits',
  3102. 'contentscripttype' => 'contentScriptType',
  3103. 'contentstyletype' => 'contentStyleType',
  3104. 'diffuseconstant' => 'diffuseConstant',
  3105. 'edgemode' => 'edgeMode',
  3106. 'externalresourcesrequired' => 'externalResourcesRequired',
  3107. 'filterres' => 'filterRes',
  3108. 'filterunits' => 'filterUnits',
  3109. 'glyphref' => 'glyphRef',
  3110. 'gradienttransform' => 'gradientTransform',
  3111. 'gradientunits' => 'gradientUnits',
  3112. 'kernelmatrix' => 'kernelMatrix',
  3113. 'kernelunitlength' => 'kernelUnitLength',
  3114. 'keypoints' => 'keyPoints',
  3115. 'keysplines' => 'keySplines',
  3116. 'keytimes' => 'keyTimes',
  3117. 'lengthadjust' => 'lengthAdjust',
  3118. 'limitingconeangle' => 'limitingConeAngle',
  3119. 'markerheight' => 'markerHeight',
  3120. 'markerunits' => 'markerUnits',
  3121. 'markerwidth' => 'markerWidth',
  3122. 'maskcontentunits' => 'maskContentUnits',
  3123. 'maskunits' => 'maskUnits',
  3124. 'numoctaves' => 'numOctaves',
  3125. 'pathlength' => 'pathLength',
  3126. 'patterncontentunits' => 'patternContentUnits',
  3127. 'patterntransform' => 'patternTransform',
  3128. 'patternunits' => 'patternUnits',
  3129. 'pointsatx' => 'pointsAtX',
  3130. 'pointsaty' => 'pointsAtY',
  3131. 'pointsatz' => 'pointsAtZ',
  3132. 'preservealpha' => 'preserveAlpha',
  3133. 'preserveaspectratio' => 'preserveAspectRatio',
  3134. 'primitiveunits' => 'primitiveUnits',
  3135. 'refx' => 'refX',
  3136. 'refy' => 'refY',
  3137. 'repeatcount' => 'repeatCount',
  3138. 'repeatdur' => 'repeatDur',
  3139. 'requiredextensions' => 'requiredExtensions',
  3140. 'requiredfeatures' => 'requiredFeatures',
  3141. 'specularconstant' => 'specularConstant',
  3142. 'specularexponent' => 'specularExponent',
  3143. 'spreadmethod' => 'spreadMethod',
  3144. 'startoffset' => 'startOffset',
  3145. 'stddeviation' => 'stdDeviation',
  3146. 'stitchtiles' => 'stitchTiles',
  3147. 'surfacescale' => 'surfaceScale',
  3148. 'systemlanguage' => 'systemLanguage',
  3149. 'tablevalues' => 'tableValues',
  3150. 'targetx' => 'targetX',
  3151. 'targety' => 'targetY',
  3152. 'textlength' => 'textLength',
  3153. 'viewbox' => 'viewBox',
  3154. 'viewtarget' => 'viewTarget',
  3155. 'xchannelselector' => 'xChannelSelector',
  3156. 'ychannelselector' => 'yChannelSelector',
  3157. 'zoomandpan' => 'zoomAndPan',
  3158. );
  3159. foreach ($token['attr'] as &$kp) {
  3160. if (isset($lookup[$kp['name']])) {
  3161. $kp['name'] = $lookup[$kp['name']];
  3162. }
  3163. }
  3164. return $token;
  3165. }
  3166. public function adjustForeignAttributes($token) {
  3167. static $lookup = array(
  3168. 'xlink:actuate' => array('xlink', 'actuate', self::NS_XLINK),
  3169. 'xlink:arcrole' => array('xlink', 'arcrole', self::NS_XLINK),
  3170. 'xlink:href' => array('xlink', 'href', self::NS_XLINK),
  3171. 'xlink:role' => array('xlink', 'role', self::NS_XLINK),
  3172. 'xlink:show' => array('xlink', 'show', self::NS_XLINK),
  3173. 'xlink:title' => array('xlink', 'title', self::NS_XLINK),
  3174. 'xlink:type' => array('xlink', 'type', self::NS_XLINK),
  3175. 'xml:base' => array('xml', 'base', self::NS_XML),
  3176. 'xml:lang' => array('xml', 'lang', self::NS_XML),
  3177. 'xml:space' => array('xml', 'space', self::NS_XML),
  3178. 'xmlns' => array(null, 'xmlns', self::NS_XMLNS),
  3179. 'xmlns:xlink' => array('xmlns', 'xlink', self::NS_XMLNS),
  3180. );
  3181. foreach ($token['attr'] as &$kp) {
  3182. if (isset($lookup[$kp['name']])) {
  3183. $kp['name'] = $lookup[$kp['name']];
  3184. }
  3185. }
  3186. return $token;
  3187. }
  3188. public function insertForeignElement($token, $namespaceURI) {
  3189. $el = $this->dom->createElementNS($namespaceURI, $token['name']);
  3190. if (!empty($token['attr'])) {
  3191. foreach ($token['attr'] as $kp) {
  3192. $attr = $kp['name'];
  3193. if (is_array($attr)) {
  3194. $ns = $attr[2];
  3195. $attr = $attr[1];
  3196. } else {
  3197. $ns = self::NS_HTML;
  3198. }
  3199. if (!$el->hasAttributeNS($ns, $attr)) {
  3200. // XSKETCHY: work around godawful libxml bug
  3201. if ($ns === self::NS_XLINK) {
  3202. $el->setAttribute('xlink:'.$attr, $kp['value']);
  3203. } elseif ($ns === self::NS_HTML) {
  3204. // Another godawful libxml bug
  3205. $el->setAttribute($attr, $kp['value']);
  3206. } else {
  3207. $el->setAttributeNS($ns, $attr, $kp['value']);
  3208. }
  3209. }
  3210. }
  3211. }
  3212. $this->appendToRealParent($el);
  3213. $this->stack[] = $el;
  3214. // XERROR: see below
  3215. /* If the newly created element has an xmlns attribute in the XMLNS
  3216. * namespace whose value is not exactly the same as the element's
  3217. * namespace, that is a parse error. Similarly, if the newly created
  3218. * element has an xmlns:xlink attribute in the XMLNS namespace whose
  3219. * value is not the XLink Namespace, that is a parse error. */
  3220. }
  3221. public function save() {
  3222. $this->dom->normalize();
  3223. if (!$this->fragment) {
  3224. return $this->dom;
  3225. } else {
  3226. if ($this->root) {
  3227. return $this->root->childNodes;
  3228. } else {
  3229. return $this->dom->childNodes;
  3230. }
  3231. }
  3232. }
  3233. }