Friendica Communications Platform (please note that this is a clone of the repository at github, issues are handled there) https://friendi.ca
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3723 lines
163 KiB

11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
  1. <?php
  2. /*
  3. Copyright 2007 Jeroen van der Meer <http://jero.net/>
  4. Copyright 2009 Edward Z. Yang <edwardzyang@thewritingpot.com>
  5. Permission is hereby granted, free of charge, to any person obtaining a
  6. copy of this software and associated documentation files (the
  7. "Software"), to deal in the Software without restriction, including
  8. without limitation the rights to use, copy, modify, merge, publish,
  9. distribute, sublicense, and/or sell copies of the Software, and to
  10. permit persons to whom the Software is furnished to do so, subject to
  11. the following conditions:
  12. The above copyright notice and this permission notice shall be included
  13. in all copies or substantial portions of the Software.
  14. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  15. OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  16. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  17. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  18. CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  19. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  20. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  21. */
  22. // Tags for FIX ME!!!: (in order of priority)
  23. // XXX - should be fixed NAO!
  24. // XERROR - with regards to parse errors
  25. // XSCRIPT - with regards to scripting mode
  26. // XENCODING - with regards to encoding (for reparsing tests)
  27. class HTML5_TreeBuilder {
  28. public $stack = array();
  29. public $content_model;
  30. private $mode;
  31. private $original_mode;
  32. private $secondary_mode;
  33. private $dom;
  34. // Whether or not normal insertion of nodes should actually foster
  35. // parent (used in one case in spec)
  36. private $foster_parent = false;
  37. private $a_formatting = array();
  38. private $head_pointer = null;
  39. private $form_pointer = null;
  40. private $flag_frameset_ok = true;
  41. private $flag_force_quirks = false;
  42. private $ignored = false;
  43. private $quirks_mode = null;
  44. // this gets to 2 when we want to ignore the next lf character, and
  45. // is decrement at the beginning of each processed token (this way,
  46. // code can check for (bool)$ignore_lf_token, but it phases out
  47. // appropriately)
  48. private $ignore_lf_token = 0;
  49. private $fragment = false;
  50. private $root;
  51. private $scoping = array('applet','button','caption','html','marquee','object','table','td','th', 'svg:foreignObject');
  52. private $formatting = array('a','b','big','code','em','font','i','nobr','s','small','strike','strong','tt','u');
  53. private $special = array('address','area','article','aside','base','basefont','bgsound',
  54. 'blockquote','body','br','center','col','colgroup','command','dd','details','dialog','dir','div','dl',
  55. 'dt','embed','fieldset','figure','footer','form','frame','frameset','h1','h2','h3','h4','h5',
  56. 'h6','head','header','hgroup','hr','iframe','img','input','isindex','li','link',
  57. 'listing','menu','meta','nav','noembed','noframes','noscript','ol',
  58. 'p','param','plaintext','pre','script','select','spacer','style',
  59. 'tbody','textarea','tfoot','thead','title','tr','ul','wbr');
  60. // Tree construction modes
  61. const INITIAL = 0;
  62. const BEFORE_HTML = 1;
  63. const BEFORE_HEAD = 2;
  64. const IN_HEAD = 3;
  65. const IN_HEAD_NOSCRIPT = 4;
  66. const AFTER_HEAD = 5;
  67. const IN_BODY = 6;
  68. const IN_CDATA_RCDATA = 7;
  69. const IN_TABLE = 8;
  70. const IN_CAPTION = 9;
  71. const IN_COLUMN_GROUP = 10;
  72. const IN_TABLE_BODY = 11;
  73. const IN_ROW = 12;
  74. const IN_CELL = 13;
  75. const IN_SELECT = 14;
  76. const IN_SELECT_IN_TABLE= 15;
  77. const IN_FOREIGN_CONTENT= 16;
  78. const AFTER_BODY = 17;
  79. const IN_FRAMESET = 18;
  80. const AFTER_FRAMESET = 19;
  81. const AFTER_AFTER_BODY = 20;
  82. const AFTER_AFTER_FRAMESET = 21;
  83. /**
  84. * Converts a magic number to a readable name. Use for debugging.
  85. */
  86. private function strConst($number) {
  87. static $lookup;
  88. if (!$lookup) {
  89. $r = new ReflectionClass('HTML5_TreeBuilder');
  90. $lookup = array_flip($r->getConstants());
  91. }
  92. return $lookup[$number];
  93. }
  94. // The different types of elements.
  95. const SPECIAL = 100;
  96. const SCOPING = 101;
  97. const FORMATTING = 102;
  98. const PHRASING = 103;
  99. // Quirks modes in $quirks_mode
  100. const NO_QUIRKS = 200;
  101. const QUIRKS_MODE = 201;
  102. const LIMITED_QUIRKS_MODE = 202;
  103. // Marker to be placed in $a_formatting
  104. const MARKER = 300;
  105. // Namespaces for foreign content
  106. const NS_HTML = null; // to prevent DOM from requiring NS on everything
  107. const NS_MATHML = 'http://www.w3.org/1998/Math/MathML';
  108. const NS_SVG = 'http://www.w3.org/2000/svg';
  109. const NS_XLINK = 'http://www.w3.org/1999/xlink';
  110. const NS_XML = 'http://www.w3.org/XML/1998/namespace';
  111. const NS_XMLNS = 'http://www.w3.org/2000/xmlns/';
  112. public function __construct() {
  113. $this->mode = self::INITIAL;
  114. $this->dom = new DOMDocument;
  115. $this->dom->encoding = 'UTF-8';
  116. $this->dom->preserveWhiteSpace = true;
  117. $this->dom->substituteEntities = true;
  118. $this->dom->strictErrorChecking = false;
  119. }
  120. // Process tag tokens
  121. public function emitToken($token, $mode = null) {
  122. // XXX: ignore parse errors... why are we emitting them, again?
  123. if ($token['type'] === HTML5_Tokenizer::PARSEERROR) return;
  124. if ($mode === null) $mode = $this->mode;
  125. /*
  126. $backtrace = debug_backtrace();
  127. if ($backtrace[1]['class'] !== 'HTML5_TreeBuilder') echo "--\n";
  128. echo $this->strConst($mode);
  129. if ($this->original_mode) echo " (originally ".$this->strConst($this->original_mode).")";
  130. echo "\n ";
  131. token_dump($token);
  132. $this->printStack();
  133. $this->printActiveFormattingElements();
  134. if ($this->foster_parent) echo " -> this is a foster parent mode\n";
  135. */
  136. if ($this->ignore_lf_token) $this->ignore_lf_token--;
  137. $this->ignored = false;
  138. $token['name'] = str_replace(':', '-', $token['name']);
  139. // indenting is a little wonky, this can be changed later on
  140. switch ($mode) {
  141. case self::INITIAL:
  142. /* A character token that is one of U+0009 CHARACTER TABULATION,
  143. * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE */
  144. if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  145. /* Ignore the token. */
  146. $this->ignored = true;
  147. } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  148. if (
  149. $token['name'] !== 'html' || !empty($token['public']) ||
  150. !empty($token['system']) || $token !== 'about:legacy-compat'
  151. ) {
  152. /* If the DOCTYPE token's name is not a case-sensitive match
  153. * for the string "html", or if the token's public identifier
  154. * is not missing, or if the token's system identifier is
  155. * neither missing nor a case-sensitive match for the string
  156. * "about:legacy-compat", then there is a parse error (this
  157. * is the DOCTYPE parse error). */
  158. // DOCTYPE parse error
  159. }
  160. /* Append a DocumentType node to the Document node, with the name
  161. * attribute set to the name given in the DOCTYPE token, or the
  162. * empty string if the name was missing; the publicId attribute
  163. * set to the public identifier given in the DOCTYPE token, or
  164. * the empty string if the public identifier was missing; the
  165. * systemId attribute set to the system identifier given in the
  166. * DOCTYPE token, or the empty string if the system identifier
  167. * was missing; and the other attributes specific to
  168. * DocumentType objects set to null and empty lists as
  169. * appropriate. Associate the DocumentType node with the
  170. * Document object so that it is returned as the value of the
  171. * doctype attribute of the Document object. */
  172. if (!isset($token['public'])) $token['public'] = null;
  173. if (!isset($token['system'])) $token['system'] = null;
  174. // Yes this is hacky. I'm kind of annoyed that I can't appendChild
  175. // a doctype to DOMDocument. Maybe I haven't chanted the right
  176. // syllables.
  177. $impl = new DOMImplementation();
  178. // This call can fail for particularly pathological cases (namely,
  179. // the qualifiedName parameter ($token['name']) could be missing.
  180. if ($token['name']) {
  181. $doctype = $impl->createDocumentType($token['name'], $token['public'], $token['system']);
  182. $this->dom->appendChild($doctype);
  183. } else {
  184. // It looks like libxml's not actually *able* to express this case.
  185. // So... don't.
  186. $this->dom->emptyDoctype = true;
  187. }
  188. $public = is_null($token['public']) ? false : strtolower($token['public']);
  189. $system = is_null($token['system']) ? false : strtolower($token['system']);
  190. $publicStartsWithForQuirks = array(
  191. "+//silmaril//dtd html pro v0r11 19970101//",
  192. "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
  193. "-//as//dtd html 3.0 aswedit + extensions//",
  194. "-//ietf//dtd html 2.0 level 1//",
  195. "-//ietf//dtd html 2.0 level 2//",
  196. "-//ietf//dtd html 2.0 strict level 1//",
  197. "-//ietf//dtd html 2.0 strict level 2//",
  198. "-//ietf//dtd html 2.0 strict//",
  199. "-//ietf//dtd html 2.0//",
  200. "-//ietf//dtd html 2.1e//",
  201. "-//ietf//dtd html 3.0//",
  202. "-//ietf//dtd html 3.2 final//",
  203. "-//ietf//dtd html 3.2//",
  204. "-//ietf//dtd html 3//",
  205. "-//ietf//dtd html level 0//",
  206. "-//ietf//dtd html level 1//",
  207. "-//ietf//dtd html level 2//",
  208. "-//ietf//dtd html level 3//",
  209. "-//ietf//dtd html strict level 0//",
  210. "-//ietf//dtd html strict level 1//",
  211. "-//ietf//dtd html strict level 2//",
  212. "-//ietf//dtd html strict level 3//",
  213. "-//ietf//dtd html strict//",
  214. "-//ietf//dtd html//",
  215. "-//metrius//dtd metrius presentational//",
  216. "-//microsoft//dtd internet explorer 2.0 html strict//",
  217. "-//microsoft//dtd internet explorer 2.0 html//",
  218. "-//microsoft//dtd internet explorer 2.0 tables//",
  219. "-//microsoft//dtd internet explorer 3.0 html strict//",
  220. "-//microsoft//dtd internet explorer 3.0 html//",
  221. "-//microsoft//dtd internet explorer 3.0 tables//",
  222. "-//netscape comm. corp.//dtd html//",
  223. "-//netscape comm. corp.//dtd strict html//",
  224. "-//o'reilly and associates//dtd html 2.0//",
  225. "-//o'reilly and associates//dtd html extended 1.0//",
  226. "-//o'reilly and associates//dtd html extended relaxed 1.0//",
  227. "-//spyglass//dtd html 2.0 extended//",
  228. "-//sq//dtd html 2.0 hotmetal + extensions//",
  229. "-//sun microsystems corp.//dtd hotjava html//",
  230. "-//sun microsystems corp.//dtd hotjava strict html//",
  231. "-//w3c//dtd html 3 1995-03-24//",
  232. "-//w3c//dtd html 3.2 draft//",
  233. "-//w3c//dtd html 3.2 final//",
  234. "-//w3c//dtd html 3.2//",
  235. "-//w3c//dtd html 3.2s draft//",
  236. "-//w3c//dtd html 4.0 frameset//",
  237. "-//w3c//dtd html 4.0 transitional//",
  238. "-//w3c//dtd html experimental 19960712//",
  239. "-//w3c//dtd html experimental 970421//",
  240. "-//w3c//dtd w3 html//",
  241. "-//w3o//dtd w3 html 3.0//",
  242. "-//webtechs//dtd mozilla html 2.0//",
  243. "-//webtechs//dtd mozilla html//",
  244. );
  245. $publicSetToForQuirks = array(
  246. "-//w3o//dtd w3 html strict 3.0//",
  247. "-/w3c/dtd html 4.0 transitional/en",
  248. "html",
  249. );
  250. $publicStartsWithAndSystemForQuirks = array(
  251. "-//w3c//dtd html 4.01 frameset//",
  252. "-//w3c//dtd html 4.01 transitional//",
  253. );
  254. $publicStartsWithForLimitedQuirks = array(
  255. "-//w3c//dtd xhtml 1.0 frameset//",
  256. "-//w3c//dtd xhtml 1.0 transitional//",
  257. );
  258. $publicStartsWithAndSystemForLimitedQuirks = array(
  259. "-//w3c//dtd html 4.01 frameset//",
  260. "-//w3c//dtd html 4.01 transitional//",
  261. );
  262. // first, do easy checks
  263. if (
  264. !empty($token['force-quirks']) ||
  265. strtolower($token['name']) !== 'html'
  266. ) {
  267. $this->quirks_mode = self::QUIRKS_MODE;
  268. } else {
  269. do {
  270. if ($system) {
  271. foreach ($publicStartsWithAndSystemForQuirks as $x) {
  272. if (strncmp($public, $x, strlen($x)) === 0) {
  273. $this->quirks_mode = self::QUIRKS_MODE;
  274. break;
  275. }
  276. }
  277. if (!is_null($this->quirks_mode)) break;
  278. foreach ($publicStartsWithAndSystemForLimitedQuirks as $x) {
  279. if (strncmp($public, $x, strlen($x)) === 0) {
  280. $this->quirks_mode = self::LIMITED_QUIRKS_MODE;
  281. break;
  282. }
  283. }
  284. if (!is_null($this->quirks_mode)) break;
  285. }
  286. foreach ($publicSetToForQuirks as $x) {
  287. if ($public === $x) {
  288. $this->quirks_mode = self::QUIRKS_MODE;
  289. break;
  290. }
  291. }
  292. if (!is_null($this->quirks_mode)) break;
  293. foreach ($publicStartsWithForLimitedQuirks as $x) {
  294. if (strncmp($public, $x, strlen($x)) === 0) {
  295. $this->quirks_mode = self::LIMITED_QUIRKS_MODE;
  296. }
  297. }
  298. if (!is_null($this->quirks_mode)) break;
  299. if ($system === "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
  300. $this->quirks_mode = self::QUIRKS_MODE;
  301. break;
  302. }
  303. foreach ($publicStartsWithForQuirks as $x) {
  304. if (strncmp($public, $x, strlen($x)) === 0) {
  305. $this->quirks_mode = self::QUIRKS_MODE;
  306. break;
  307. }
  308. }
  309. if (is_null($this->quirks_mode)) {
  310. $this->quirks_mode = self::NO_QUIRKS;
  311. }
  312. } while (false);
  313. }
  314. $this->mode = self::BEFORE_HTML;
  315. } else {
  316. // parse error
  317. /* Switch the insertion mode to "before html", then reprocess the
  318. * current token. */
  319. $this->mode = self::BEFORE_HTML;
  320. $this->quirks_mode = self::QUIRKS_MODE;
  321. $this->emitToken($token);
  322. }
  323. break;
  324. case self::BEFORE_HTML:
  325. /* A DOCTYPE token */
  326. if($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  327. // Parse error. Ignore the token.
  328. $this->ignored = true;
  329. /* A comment token */
  330. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  331. /* Append a Comment node to the Document object with the data
  332. attribute set to the data given in the comment token. */
  333. $comment = $this->dom->createComment($token['data']);
  334. $this->dom->appendChild($comment);
  335. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  336. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  337. or U+0020 SPACE */
  338. } elseif($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  339. /* Ignore the token. */
  340. $this->ignored = true;
  341. /* A start tag whose tag name is "html" */
  342. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] == 'html') {
  343. /* Create an element for the token in the HTML namespace. Append it
  344. * to the Document object. Put this element in the stack of open
  345. * elements. */
  346. $html = $this->insertElement($token, false);
  347. $this->dom->appendChild($html);
  348. $this->stack[] = $html;
  349. $this->mode = self::BEFORE_HEAD;
  350. } else {
  351. /* Create an html element. Append it to the Document object. Put
  352. * this element in the stack of open elements. */
  353. $html = $this->dom->createElementNS(self::NS_HTML, 'html');
  354. $this->dom->appendChild($html);
  355. $this->stack[] = $html;
  356. /* Switch the insertion mode to "before head", then reprocess the
  357. * current token. */
  358. $this->mode = self::BEFORE_HEAD;
  359. $this->emitToken($token);
  360. }
  361. break;
  362. case self::BEFORE_HEAD:
  363. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  364. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  365. or U+0020 SPACE */
  366. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  367. /* Ignore the token. */
  368. $this->ignored = true;
  369. /* A comment token */
  370. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  371. /* Append a Comment node to the current node with the data attribute
  372. set to the data given in the comment token. */
  373. $this->insertComment($token['data']);
  374. /* A DOCTYPE token */
  375. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  376. /* Parse error. Ignore the token */
  377. $this->ignored = true;
  378. // parse error
  379. /* A start tag token with the tag name "html" */
  380. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
  381. /* Process the token using the rules for the "in body"
  382. * insertion mode. */
  383. $this->processWithRulesFor($token, self::IN_BODY);
  384. /* A start tag token with the tag name "head" */
  385. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') {
  386. /* Insert an HTML element for the token. */
  387. $element = $this->insertElement($token);
  388. /* Set the head element pointer to this new element node. */
  389. $this->head_pointer = $element;
  390. /* Change the insertion mode to "in head". */
  391. $this->mode = self::IN_HEAD;
  392. /* An end tag whose tag name is one of: "head", "body", "html", "br" */
  393. } elseif(
  394. $token['type'] === HTML5_Tokenizer::ENDTAG && (
  395. $token['name'] === 'head' || $token['name'] === 'body' ||
  396. $token['name'] === 'html' || $token['name'] === 'br'
  397. )) {
  398. /* Act as if a start tag token with the tag name "head" and no
  399. * attributes had been seen, then reprocess the current token. */
  400. $this->emitToken(array(
  401. 'name' => 'head',
  402. 'type' => HTML5_Tokenizer::STARTTAG,
  403. 'attr' => array()
  404. ));
  405. $this->emitToken($token);
  406. /* Any other end tag */
  407. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG) {
  408. /* Parse error. Ignore the token. */
  409. $this->ignored = true;
  410. } else {
  411. /* Act as if a start tag token with the tag name "head" and no
  412. * attributes had been seen, then reprocess the current token.
  413. * Note: This will result in an empty head element being
  414. * generated, with the current token being reprocessed in the
  415. * "after head" insertion mode. */
  416. $this->emitToken(array(
  417. 'name' => 'head',
  418. 'type' => HTML5_Tokenizer::STARTTAG,
  419. 'attr' => array()
  420. ));
  421. $this->emitToken($token);
  422. }
  423. break;
  424. case self::IN_HEAD:
  425. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  426. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  427. or U+0020 SPACE. */
  428. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  429. /* Insert the character into the current node. */
  430. $this->insertText($token['data']);
  431. /* A comment token */
  432. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  433. /* Append a Comment node to the current node with the data attribute
  434. set to the data given in the comment token. */
  435. $this->insertComment($token['data']);
  436. /* A DOCTYPE token */
  437. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  438. /* Parse error. Ignore the token. */
  439. $this->ignored = true;
  440. // parse error
  441. /* A start tag whose tag name is "html" */
  442. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  443. $token['name'] === 'html') {
  444. $this->processWithRulesFor($token, self::IN_BODY);
  445. /* A start tag whose tag name is one of: "base", "command", "link" */
  446. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  447. ($token['name'] === 'base' || $token['name'] === 'command' ||
  448. $token['name'] === 'link')) {
  449. /* Insert an HTML element for the token. Immediately pop the
  450. * current node off the stack of open elements. */
  451. $this->insertElement($token);
  452. array_pop($this->stack);
  453. // YYY: Acknowledge the token's self-closing flag, if it is set.
  454. /* A start tag whose tag name is "meta" */
  455. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'meta') {
  456. /* Insert an HTML element for the token. Immediately pop the
  457. * current node off the stack of open elements. */
  458. $this->insertElement($token);
  459. array_pop($this->stack);
  460. // XERROR: Acknowledge the token's self-closing flag, if it is set.
  461. // XENCODING: If the element has a charset attribute, and its value is a
  462. // supported encoding, and the confidence is currently tentative,
  463. // then change the encoding to the encoding given by the value of
  464. // the charset attribute.
  465. //
  466. // Otherwise, if the element has a content attribute, and applying
  467. // the algorithm for extracting an encoding from a Content-Type to
  468. // its value returns a supported encoding encoding, and the
  469. // confidence is currently tentative, then change the encoding to
  470. // the encoding encoding.
  471. /* A start tag with the tag name "title" */
  472. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'title') {
  473. $this->insertRCDATAElement($token);
  474. /* A start tag whose tag name is "noscript", if the scripting flag is enabled, or
  475. * A start tag whose tag name is one of: "noframes", "style" */
  476. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  477. ($token['name'] === 'noscript' || $token['name'] === 'noframes' || $token['name'] === 'style')) {
  478. // XSCRIPT: Scripting flag not respected
  479. $this->insertCDATAElement($token);
  480. // XSCRIPT: Scripting flag disable not implemented
  481. /* A start tag with the tag name "script" */
  482. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') {
  483. /* 1. Create an element for the token in the HTML namespace. */
  484. $node = $this->insertElement($token, false);
  485. /* 2. Mark the element as being "parser-inserted" */
  486. // Uhhh... XSCRIPT
  487. /* 3. If the parser was originally created for the HTML
  488. * fragment parsing algorithm, then mark the script element as
  489. * "already executed". (fragment case) */
  490. // ditto... XSCRIPT
  491. /* 4. Append the new element to the current node and push it onto
  492. * the stack of open elements. */
  493. end($this->stack)->appendChild($node);
  494. $this->stack[] = $node;
  495. // I guess we could squash these together
  496. /* 6. Let the original insertion mode be the current insertion mode. */
  497. $this->original_mode = $this->mode;
  498. /* 7. Switch the insertion mode to "in CDATA/RCDATA" */
  499. $this->mode = self::IN_CDATA_RCDATA;
  500. /* 5. Switch the tokeniser's content model flag to the CDATA state. */
  501. $this->content_model = HTML5_Tokenizer::CDATA;
  502. /* An end tag with the tag name "head" */
  503. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'head') {
  504. /* Pop the current node (which will be the head element) off the stack of open elements. */
  505. array_pop($this->stack);
  506. /* Change the insertion mode to "after head". */
  507. $this->mode = self::AFTER_HEAD;
  508. // Slight logic inversion here to minimize duplication
  509. /* A start tag with the tag name "head". */
  510. /* An end tag whose tag name is not one of: "body", "html", "br" */
  511. } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') ||
  512. ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] !== 'html' &&
  513. $token['name'] !== 'body' && $token['name'] !== 'br')) {
  514. // Parse error. Ignore the token.
  515. $this->ignored = true;
  516. /* Anything else */
  517. } else {
  518. /* Act as if an end tag token with the tag name "head" had been
  519. * seen, and reprocess the current token. */
  520. $this->emitToken(array(
  521. 'name' => 'head',
  522. 'type' => HTML5_Tokenizer::ENDTAG
  523. ));
  524. /* Then, reprocess the current token. */
  525. $this->emitToken($token);
  526. }
  527. break;
  528. case self::IN_HEAD_NOSCRIPT:
  529. if ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  530. // parse error
  531. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
  532. $this->processWithRulesFor($token, self::IN_BODY);
  533. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'noscript') {
  534. /* Pop the current node (which will be a noscript element) from the
  535. * stack of open elements; the new current node will be a head
  536. * element. */
  537. array_pop($this->stack);
  538. $this->mode = self::IN_HEAD;
  539. } elseif (
  540. ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) ||
  541. ($token['type'] === HTML5_Tokenizer::COMMENT) ||
  542. ($token['type'] === HTML5_Tokenizer::STARTTAG && (
  543. $token['name'] === 'link' || $token['name'] === 'meta' ||
  544. $token['name'] === 'noframes' || $token['name'] === 'style'))) {
  545. $this->processWithRulesFor($token, self::IN_HEAD);
  546. // inverted logic
  547. } elseif (
  548. ($token['type'] === HTML5_Tokenizer::STARTTAG && (
  549. $token['name'] === 'head' || $token['name'] === 'noscript')) ||
  550. ($token['type'] === HTML5_Tokenizer::ENDTAG &&
  551. $token['name'] !== 'br')) {
  552. // parse error
  553. } else {
  554. // parse error
  555. $this->emitToken(array(
  556. 'type' => HTML5_Tokenizer::ENDTAG,
  557. 'name' => 'noscript',
  558. ));
  559. $this->emitToken($token);
  560. }
  561. break;
  562. case self::AFTER_HEAD:
  563. /* Handle the token as follows: */
  564. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  565. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  566. or U+0020 SPACE */
  567. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  568. /* Append the character to the current node. */
  569. $this->insertText($token['data']);
  570. /* A comment token */
  571. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  572. /* Append a Comment node to the current node with the data attribute
  573. set to the data given in the comment token. */
  574. $this->insertComment($token['data']);
  575. } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  576. // parse error
  577. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
  578. $this->processWithRulesFor($token, self::IN_BODY);
  579. /* A start tag token with the tag name "body" */
  580. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'body') {
  581. $this->insertElement($token);
  582. /* Set the frameset-ok flag to "not ok". */
  583. $this->flag_frameset_ok = false;
  584. /* Change the insertion mode to "in body". */
  585. $this->mode = self::IN_BODY;
  586. /* A start tag token with the tag name "frameset" */
  587. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'frameset') {
  588. /* Insert a frameset element for the token. */
  589. $this->insertElement($token);
  590. /* Change the insertion mode to "in frameset". */
  591. $this->mode = self::IN_FRAMESET;
  592. /* A start tag token whose tag name is one of: "base", "link", "meta",
  593. "script", "style", "title" */
  594. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
  595. array('base', 'link', 'meta', 'noframes', 'script', 'style', 'title'))) {
  596. // parse error
  597. /* Push the node pointed to by the head element pointer onto the
  598. * stack of open elements. */
  599. $this->stack[] = $this->head_pointer;
  600. $this->processWithRulesFor($token, self::IN_HEAD);
  601. array_splice($this->stack, array_search($this->head_pointer, $this->stack, true), 1);
  602. // inversion of specification
  603. } elseif(
  604. ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') ||
  605. ($token['type'] === HTML5_Tokenizer::ENDTAG &&
  606. $token['name'] !== 'body' && $token['name'] !== 'html' &&
  607. $token['name'] !== 'br')) {
  608. // parse error
  609. /* Anything else */
  610. } else {
  611. $this->emitToken(array(
  612. 'name' => 'body',
  613. 'type' => HTML5_Tokenizer::STARTTAG,
  614. 'attr' => array()
  615. ));
  616. $this->flag_frameset_ok = true;
  617. $this->emitToken($token);
  618. }
  619. break;
  620. case self::IN_BODY:
  621. /* Handle the token as follows: */
  622. switch($token['type']) {
  623. /* A character token */
  624. case HTML5_Tokenizer::CHARACTER:
  625. case HTML5_Tokenizer::SPACECHARACTER:
  626. /* Reconstruct the active formatting elements, if any. */
  627. $this->reconstructActiveFormattingElements();
  628. /* Append the token's character to the current node. */
  629. $this->insertText($token['data']);
  630. /* If the token is not one of U+0009 CHARACTER TABULATION,
  631. * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020
  632. * SPACE, then set the frameset-ok flag to "not ok". */
  633. // i.e., if any of the characters is not whitespace
  634. if (strlen($token['data']) !== strspn($token['data'], HTML5_Tokenizer::WHITESPACE)) {
  635. $this->flag_frameset_ok = false;
  636. }
  637. break;
  638. /* A comment token */
  639. case HTML5_Tokenizer::COMMENT:
  640. /* Append a Comment node to the current node with the data
  641. attribute set to the data given in the comment token. */
  642. $this->insertComment($token['data']);
  643. break;
  644. case HTML5_Tokenizer::DOCTYPE:
  645. // parse error
  646. break;
  647. case HTML5_Tokenizer::STARTTAG:
  648. switch($token['name']) {
  649. case 'html':
  650. // parse error
  651. /* For each attribute on the token, check to see if the
  652. * attribute is already present on the top element of the
  653. * stack of open elements. If it is not, add the attribute
  654. * and its corresponding value to that element. */
  655. foreach($token['attr'] as $attr) {
  656. if(!$this->stack[0]->hasAttribute($attr['name'])) {
  657. $this->stack[0]->setAttribute($attr['name'], $attr['value']);
  658. }
  659. }
  660. break;
  661. case 'base': case 'command': case 'link': case 'meta': case 'noframes':
  662. case 'script': case 'style': case 'title':
  663. /* Process the token as if the insertion mode had been "in
  664. head". */
  665. $this->processWithRulesFor($token, self::IN_HEAD);
  666. break;
  667. /* A start tag token with the tag name "body" */
  668. case 'body':
  669. /* Parse error. If the second element on the stack of open
  670. elements is not a body element, or, if the stack of open
  671. elements has only one node on it, then ignore the token.
  672. (fragment case) */
  673. if(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') {
  674. $this->ignored = true;
  675. // Ignore
  676. /* Otherwise, for each attribute on the token, check to see
  677. if the attribute is already present on the body element (the
  678. second element) on the stack of open elements. If it is not,
  679. add the attribute and its corresponding value to that
  680. element. */
  681. } else {
  682. foreach($token['attr'] as $attr) {
  683. if(!$this->stack[1]->hasAttribute($attr['name'])) {
  684. $this->stack[1]->setAttribute($attr['name'], $attr['value']);
  685. }
  686. }
  687. }
  688. break;
  689. case 'frameset':
  690. // parse error
  691. /* If the second element on the stack of open elements is
  692. * not a body element, or, if the stack of open elements
  693. * has only one node on it, then ignore the token.
  694. * (fragment case) */
  695. if(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') {
  696. $this->ignored = true;
  697. // Ignore
  698. } elseif (!$this->flag_frameset_ok) {
  699. $this->ignored = true;
  700. // Ignore
  701. } else {
  702. /* 1. Remove the second element on the stack of open
  703. * elements from its parent node, if it has one. */
  704. if($this->stack[1]->parentNode) {
  705. $this->stack[1]->parentNode->removeChild($this->stack[1]);
  706. }
  707. /* 2. Pop all the nodes from the bottom of the stack of
  708. * open elements, from the current node up to the root
  709. * html element. */
  710. array_splice($this->stack, 1);
  711. $this->insertElement($token);
  712. $this->mode = self::IN_FRAMESET;
  713. }
  714. break;
  715. // in spec, there is a diversion here
  716. case 'address': case 'article': case 'aside': case 'blockquote':
  717. case 'center': case 'datagrid': case 'details': case 'dialog': case 'dir':
  718. case 'div': case 'dl': case 'fieldset': case 'figure': case 'footer':
  719. case 'header': case 'hgroup': case 'menu': case 'nav':
  720. case 'ol': case 'p': case 'section': case 'ul':
  721. /* If the stack of open elements has a p element in scope,
  722. then act as if an end tag with the tag name p had been
  723. seen. */
  724. if($this->elementInScope('p')) {
  725. $this->emitToken(array(
  726. 'name' => 'p',
  727. 'type' => HTML5_Tokenizer::ENDTAG
  728. ));
  729. }
  730. /* Insert an HTML element for the token. */
  731. $this->insertElement($token);
  732. break;
  733. /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4",
  734. "h5", "h6" */
  735. case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
  736. /* If the stack of open elements has a p element in scope,
  737. then act as if an end tag with the tag name p had been seen. */
  738. if($this->elementInScope('p')) {
  739. $this->emitToken(array(
  740. 'name' => 'p',
  741. 'type' => HTML5_Tokenizer::ENDTAG
  742. ));
  743. }
  744. /* If the current node is an element whose tag name is one
  745. * of "h1", "h2", "h3", "h4", "h5", or "h6", then this is a
  746. * parse error; pop the current node off the stack of open
  747. * elements. */
  748. $peek = array_pop($this->stack);
  749. if (in_array($peek->tagName, array("h1", "h2", "h3", "h4", "h5", "h6"))) {
  750. // parse error
  751. } else {
  752. $this->stack[] = $peek;
  753. }
  754. /* Insert an HTML element for the token. */
  755. $this->insertElement($token);
  756. break;
  757. case 'pre': case 'listing':
  758. /* If the stack of open elements has a p element in scope,
  759. then act as if an end tag with the tag name p had been seen. */
  760. if($this->elementInScope('p')) {
  761. $this->emitToken(array(
  762. 'name' => 'p',
  763. 'type' => HTML5_Tokenizer::ENDTAG
  764. ));
  765. }
  766. $this->insertElement($token);
  767. /* If the next token is a U+000A LINE FEED (LF) character
  768. * token, then ignore that token and move on to the next
  769. * one. (Newlines at the start of pre blocks are ignored as
  770. * an authoring convenience.) */
  771. $this->ignore_lf_token = 2;
  772. $this->flag_frameset_ok = false;
  773. break;
  774. /* A start tag whose tag name is "form" */
  775. case 'form':
  776. /* If the form element pointer is not null, ignore the
  777. token with a parse error. */
  778. if($this->form_pointer !== null) {
  779. $this->ignored = true;
  780. // Ignore.
  781. /* Otherwise: */
  782. } else {
  783. /* If the stack of open elements has a p element in
  784. scope, then act as if an end tag with the tag name p
  785. had been seen. */
  786. if($this->elementInScope('p')) {
  787. $this->emitToken(array(
  788. 'name' => 'p',
  789. 'type' => HTML5_Tokenizer::ENDTAG
  790. ));
  791. }
  792. /* Insert an HTML element for the token, and set the
  793. form element pointer to point to the element created. */
  794. $element = $this->insertElement($token);
  795. $this->form_pointer = $element;
  796. }
  797. break;
  798. // condensed specification
  799. case 'li': case 'dd': case 'dt':
  800. /* 1. Set the frameset-ok flag to "not ok". */
  801. $this->flag_frameset_ok = false;
  802. $stack_length = count($this->stack) - 1;
  803. for($n = $stack_length; 0 <= $n; $n--) {
  804. /* 2. Initialise node to be the current node (the
  805. bottommost node of the stack). */
  806. $stop = false;
  807. $node = $this->stack[$n];
  808. $cat = $this->getElementCategory($node);
  809. // for case 'li':
  810. /* 3. If node is an li element, then act as if an end
  811. * tag with the tag name "li" had been seen, then jump
  812. * to the last step. */
  813. // for case 'dd': case 'dt':
  814. /* If node is a dd or dt element, then act as if an end
  815. * tag with the same tag name as node had been seen, then
  816. * jump to the last step. */
  817. if(($token['name'] === 'li' && $node->tagName === 'li') ||
  818. ($token['name'] !== 'li' && ($node->tagName === 'dd' || $node->tagName === 'dt'))) { // limited conditional
  819. $this->emitToken(array(
  820. 'type' => HTML5_Tokenizer::ENDTAG,
  821. 'name' => $node->tagName,
  822. ));
  823. break;
  824. }
  825. /* 4. If node is not in the formatting category, and is
  826. not in the phrasing category, and is not an address,
  827. div or p element, then stop this algorithm. */
  828. if($cat !== self::FORMATTING && $cat !== self::PHRASING &&
  829. $node->tagName !== 'address' && $node->tagName !== 'div' &&
  830. $node->tagName !== 'p') {
  831. break;
  832. }
  833. /* 5. Otherwise, set node to the previous entry in the
  834. * stack of open elements and return to step 2. */
  835. }
  836. /* 6. This is the last step. */
  837. /* If the stack of open elements has a p element in scope,
  838. then act as if an end tag with the tag name p had been
  839. seen. */
  840. if($this->elementInScope('p')) {
  841. $this->emitToken(array(
  842. 'name' => 'p',
  843. 'type' => HTML5_Tokenizer::ENDTAG
  844. ));
  845. }
  846. /* Finally, insert an HTML element with the same tag
  847. name as the token's. */
  848. $this->insertElement($token);
  849. break;
  850. /* A start tag token whose tag name is "plaintext" */
  851. case 'plaintext':
  852. /* If the stack of open elements has a p element in scope,
  853. then act as if an end tag with the tag name p had been
  854. seen. */
  855. if($this->elementInScope('p')) {
  856. $this->emitToken(array(
  857. 'name' => 'p',
  858. 'type' => HTML5_Tokenizer::ENDTAG
  859. ));
  860. }
  861. /* Insert an HTML element for the token. */
  862. $this->insertElement($token);
  863. $this->content_model = HTML5_Tokenizer::PLAINTEXT;
  864. break;
  865. // more diversions
  866. /* A start tag whose tag name is "a" */
  867. case 'a':
  868. /* If the list of active formatting elements contains
  869. an element whose tag name is "a" between the end of the
  870. list and the last marker on the list (or the start of
  871. the list if there is no marker on the list), then this
  872. is a parse error; act as if an end tag with the tag name
  873. "a" had been seen, then remove that element from the list
  874. of active formatting elements and the stack of open
  875. elements if the end tag didn't already remove it (it
  876. might not have if the element is not in table scope). */
  877. $leng = count($this->a_formatting);
  878. for($n = $leng - 1; $n >= 0; $n--) {
  879. if($this->a_formatting[$n] === self::MARKER) {
  880. break;
  881. } elseif($this->a_formatting[$n]->tagName === 'a') {
  882. $a = $this->a_formatting[$n];
  883. $this->emitToken(array(
  884. 'name' => 'a',
  885. 'type' => HTML5_Tokenizer::ENDTAG
  886. ));
  887. if (in_array($a, $this->a_formatting)) {
  888. $a_i = array_search($a, $this->a_formatting, true);
  889. if($a_i !== false) array_splice($this->a_formatting, $a_i, 1);
  890. }
  891. if (in_array($a, $this->stack)) {
  892. $a_i = array_search($a, $this->stack, true);
  893. if ($a_i !== false) array_splice($this->stack, $a_i, 1);
  894. }
  895. break;
  896. }
  897. }
  898. /* Reconstruct the active formatting elements, if any. */
  899. $this->reconstructActiveFormattingElements();
  900. /* Insert an HTML element for the token. */
  901. $el = $this->insertElement($token);
  902. /* Add that element to the list of active formatting
  903. elements. */
  904. $this->a_formatting[] = $el;
  905. break;
  906. case 'b': case 'big': case 'code': case 'em': case 'font': case 'i':
  907. case 's': case 'small': case 'strike':
  908. case 'strong': case 'tt': case 'u':
  909. /* Reconstruct the active formatting elements, if any. */
  910. $this->reconstructActiveFormattingElements();
  911. /* Insert an HTML element for the token. */
  912. $el = $this->insertElement($token);
  913. /* Add that element to the list of active formatting
  914. elements. */
  915. $this->a_formatting[] = $el;
  916. break;
  917. case 'nobr':
  918. /* Reconstruct the active formatting elements, if any. */
  919. $this->reconstructActiveFormattingElements();
  920. /* If the stack of open elements has a nobr element in
  921. * scope, then this is a parse error; act as if an end tag
  922. * with the tag name "nobr" had been seen, then once again
  923. * reconstruct the active formatting elements, if any. */
  924. if ($this->elementInScope('nobr')) {
  925. $this->emitToken(array(
  926. 'name' => 'nobr',
  927. 'type' => HTML5_Tokenizer::ENDTAG,
  928. ));
  929. $this->reconstructActiveFormattingElements();
  930. }
  931. /* Insert an HTML element for the token. */
  932. $el = $this->insertElement($token);
  933. /* Add that element to the list of active formatting
  934. elements. */
  935. $this->a_formatting[] = $el;
  936. break;
  937. // another diversion
  938. /* A start tag token whose tag name is "button" */
  939. case 'button':
  940. /* If the stack of open elements has a button element in scope,
  941. then this is a parse error; act as if an end tag with the tag
  942. name "button" had been seen, then reprocess the token. (We don't
  943. do that. Unnecessary.) (I hope you're right! -- ezyang) */
  944. if($this->elementInScope('button')) {
  945. $this->emitToken(array(
  946. 'name' => 'button',
  947. 'type' => HTML5_Tokenizer::ENDTAG
  948. ));
  949. }
  950. /* Reconstruct the active formatting elements, if any. */
  951. $this->reconstructActiveFormattingElements();
  952. /* Insert an HTML element for the token. */
  953. $this->insertElement($token);
  954. /* Insert a marker at the end of the list of active
  955. formatting elements. */
  956. $this->a_formatting[] = self::MARKER;
  957. $this->flag_frameset_ok = false;
  958. break;
  959. case 'applet': case 'marquee': case 'object':
  960. /* Reconstruct the active formatting elements, if any. */
  961. $this->reconstructActiveFormattingElements();
  962. /* Insert an HTML element for the token. */
  963. $this->insertElement($token);
  964. /* Insert a marker at the end of the list of active
  965. formatting elements. */
  966. $this->a_formatting[] = self::MARKER;
  967. $this->flag_frameset_ok = false;
  968. break;
  969. // spec diversion
  970. /* A start tag whose tag name is "table" */
  971. case 'table':
  972. /* If the stack of open elements has a p element in scope,
  973. then act as if an end tag with the tag name p had been seen. */
  974. if($this->quirks_mode !== self::QUIRKS_MODE &&
  975. $this->elementInScope('p')) {
  976. $this->emitToken(array(
  977. 'name' => 'p',
  978. 'type' => HTML5_Tokenizer::ENDTAG
  979. ));
  980. }
  981. /* Insert an HTML element for the token. */
  982. $this->insertElement($token);
  983. $this->flag_frameset_ok = false;
  984. /* Change the insertion mode to "in table". */
  985. $this->mode = self::IN_TABLE;
  986. break;
  987. /* A start tag whose tag name is one of: "area", "basefont",
  988. "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */
  989. case 'area': case 'basefont': case 'bgsound': case 'br':
  990. case 'embed': case 'img': case 'input': case 'keygen': case 'spacer':
  991. case 'wbr':
  992. /* Reconstruct the active formatting elements, if any. */
  993. $this->reconstructActiveFormattingElements();
  994. /* Insert an HTML element for the token. */
  995. $this->insertElement($token);
  996. /* Immediately pop the current node off the stack of open elements. */
  997. array_pop($this->stack);
  998. // YYY: Acknowledge the token's self-closing flag, if it is set.
  999. $this->flag_frameset_ok = false;
  1000. break;
  1001. case 'param': case 'source':
  1002. /* Insert an HTML element for the token. */
  1003. $this->insertElement($token);
  1004. /* Immediately pop the current node off the stack of open elements. */
  1005. array_pop($this->stack);
  1006. // YYY: Acknowledge the token's self-closing flag, if it is set.
  1007. break;
  1008. /* A start tag whose tag name is "hr" */
  1009. case 'hr':
  1010. /* If the stack of open elements has a p element in scope,
  1011. then act as if an end tag with the tag name p had been seen. */
  1012. if($this->elementInScope('p')) {
  1013. $this->emitToken(array(
  1014. 'name' => 'p',
  1015. 'type' => HTML5_Tokenizer::ENDTAG
  1016. ));
  1017. }
  1018. /* Insert an HTML element for the token. */
  1019. $this->insertElement($token);
  1020. /* Immediately pop the current node off the stack of open elements. */
  1021. array_pop($this->stack);
  1022. // YYY: Acknowledge the token's self-closing flag, if it is set.
  1023. $this->flag_frameset_ok = false;
  1024. break;
  1025. /* A start tag whose tag name is "image" */
  1026. case 'image':
  1027. /* Parse error. Change the token's tag name to "img" and
  1028. reprocess it. (Don't ask.) */
  1029. $token['name'] = 'img';
  1030. $this->emitToken($token);
  1031. break;
  1032. /* A start tag whose tag name is "isindex" */
  1033. case 'isindex':
  1034. /* Parse error. */
  1035. /* If the form element pointer is not null,
  1036. then ignore the token. */
  1037. if($this->form_pointer === null) {
  1038. /* Act as if a start tag token with the tag name "form" had
  1039. been seen. */
  1040. /* If the token has an attribute called "action", set
  1041. * the action attribute on the resulting form
  1042. * element to the value of the "action" attribute of
  1043. * the token. */
  1044. $attr = array();
  1045. $action = $this->getAttr($token, 'action');
  1046. if ($action !== false) {
  1047. $attr[] = array('name' => 'action', 'value' => $action);
  1048. }
  1049. $this->emitToken(array(
  1050. 'name' => 'form',
  1051. 'type' => HTML5_Tokenizer::STARTTAG,
  1052. 'attr' => $attr
  1053. ));
  1054. /* Act as if a start tag token with the tag name "hr" had
  1055. been seen. */
  1056. $this->emitToken(array(
  1057. 'name' => 'hr',
  1058. 'type' => HTML5_Tokenizer::STARTTAG,
  1059. 'attr' => array()
  1060. ));
  1061. /* Act as if a start tag token with the tag name "p" had
  1062. been seen. */
  1063. $this->emitToken(array(
  1064. 'name' => 'p',
  1065. 'type' => HTML5_Tokenizer::STARTTAG,
  1066. 'attr' => array()
  1067. ));
  1068. /* Act as if a start tag token with the tag name "label"
  1069. had been seen. */
  1070. $this->emitToken(array(
  1071. 'name' => 'label',
  1072. 'type' => HTML5_Tokenizer::STARTTAG,
  1073. 'attr' => array()
  1074. ));
  1075. /* Act as if a stream of character tokens had been seen. */
  1076. $prompt = $this->getAttr($token, 'prompt');
  1077. if ($prompt === false) {
  1078. $prompt = 'This is a searchable index. '.
  1079. 'Insert your search keywords here: ';
  1080. }
  1081. $this->emitToken(array(
  1082. 'data' => $prompt,
  1083. 'type' => HTML5_Tokenizer::CHARACTER,
  1084. ));
  1085. /* Act as if a start tag token with the tag name "input"
  1086. had been seen, with all the attributes from the "isindex"
  1087. token, except with the "name" attribute set to the value
  1088. "isindex" (ignoring any explicit "name" attribute). */
  1089. $attr = array();
  1090. foreach ($token['attr'] as $keypair) {
  1091. if ($keypair['name'] === 'name' || $keypair['name'] === 'action' ||
  1092. $keypair['name'] === 'prompt') continue;
  1093. $attr[] = $keypair;
  1094. }
  1095. $attr[] = array('name' => 'name', 'value' => 'isindex');
  1096. $this->emitToken(array(
  1097. 'name' => 'input',
  1098. 'type' => HTML5_Tokenizer::STARTTAG,
  1099. 'attr' => $attr
  1100. ));
  1101. /* Act as if an end tag token with the tag name "label"
  1102. had been seen. */
  1103. $this->emitToken(array(
  1104. 'name' => 'label',
  1105. 'type' => HTML5_Tokenizer::ENDTAG
  1106. ));
  1107. /* Act as if an end tag token with the tag name "p" had
  1108. been seen. */
  1109. $this->emitToken(array(
  1110. 'name' => 'p',
  1111. 'type' => HTML5_Tokenizer::ENDTAG
  1112. ));
  1113. /* Act as if a start tag token with the tag name "hr" had
  1114. been seen. */
  1115. $this->emitToken(array(
  1116. 'name' => 'hr',
  1117. 'type' => HTML5_Tokenizer::STARTTAG
  1118. ));
  1119. /* Act as if an end tag token with the tag name "form" had
  1120. been seen. */
  1121. $this->emitToken(array(
  1122. 'name' => 'form',
  1123. 'type' => HTML5_Tokenizer::ENDTAG
  1124. ));
  1125. } else {
  1126. $this->ignored = true;
  1127. }
  1128. break;
  1129. /* A start tag whose tag name is "textarea" */
  1130. case 'textarea':
  1131. $this->insertElement($token);
  1132. /* If the next token is a U+000A LINE FEED (LF)
  1133. * character token, then ignore that token and move on to
  1134. * the next one. (Newlines at the start of textarea
  1135. * elements are ignored as an authoring convenience.)
  1136. * need flag, see also <pre> */
  1137. $this->ignore_lf_token = 2;
  1138. $this->original_mode = $this->mode;
  1139. $this->flag_frameset_ok = false;
  1140. $this->mode = self::IN_CDATA_RCDATA;
  1141. /* Switch the tokeniser's content model flag to the
  1142. RCDATA state. */
  1143. $this->content_model = HTML5_Tokenizer::RCDATA;
  1144. break;
  1145. /* A start tag token whose tag name is "xmp" */
  1146. case 'xmp':
  1147. /* Reconstruct the active formatting elements, if any. */
  1148. $this->reconstructActiveFormattingElements();
  1149. $this->flag_frameset_ok = false;
  1150. $this->insertCDATAElement($token);
  1151. break;
  1152. case 'iframe':
  1153. $this->flag_frameset_ok = false;
  1154. $this->insertCDATAElement($token);
  1155. break;
  1156. case 'noembed': case 'noscript':
  1157. // XSCRIPT: should check scripting flag
  1158. $this->insertCDATAElement($token);
  1159. break;
  1160. /* A start tag whose tag name is "select" */
  1161. case 'select':
  1162. /* Reconstruct the active formatting elements, if any. */
  1163. $this->reconstructActiveFormattingElements();
  1164. /* Insert an HTML element for the token. */
  1165. $this->insertElement($token);
  1166. $this->flag_frameset_ok = false;
  1167. /* If the insertion mode is one of in table", "in caption",
  1168. * "in column group", "in table body", "in row", or "in
  1169. * cell", then switch the insertion mode to "in select in
  1170. * table". Otherwise, switch the insertion mode to "in
  1171. * select". */
  1172. if (
  1173. $this->mode === self::IN_TABLE || $this->mode === self::IN_CAPTION ||
  1174. $this->mode === self::IN_COLUMN_GROUP || $this->mode ==+self::IN_TABLE_BODY ||
  1175. $this->mode === self::IN_ROW || $this->mode === self::IN_CELL
  1176. ) {
  1177. $this->mode = self::IN_SELECT_IN_TABLE;
  1178. } else {
  1179. $this->mode = self::IN_SELECT;
  1180. }
  1181. break;
  1182. case 'option': case 'optgroup':
  1183. if ($this->elementInScope('option')) {
  1184. $this->emitToken(array(
  1185. 'name' => 'option',
  1186. 'type' => HTML5_Tokenizer::ENDTAG,
  1187. ));
  1188. }
  1189. $this->reconstructActiveFormattingElements();
  1190. $this->insertElement($token);
  1191. break;
  1192. case 'rp': case 'rt':
  1193. /* If the stack of open elements has a ruby element in scope, then generate
  1194. * implied end tags. If the current node is not then a ruby element, this is
  1195. * a parse error; pop all the nodes from the current node up to the node
  1196. * immediately before the bottommost ruby element on the stack of open elements.
  1197. */
  1198. if ($this->elementInScope('ruby')) {
  1199. $this->generateImpliedEndTags();
  1200. }
  1201. $peek = false;
  1202. do {
  1203. if ($peek) {
  1204. // parse error
  1205. }
  1206. $peek = array_pop($this->stack);
  1207. } while ($peek->tagName !== 'ruby');
  1208. $this->stack[] = $peek; // we popped one too many
  1209. $this->insertElement($token);
  1210. break;
  1211. // spec diversion
  1212. case 'math':
  1213. $this->reconstructActiveFormattingElements();
  1214. $token = $this->adjustMathMLAttributes($token);
  1215. $token = $this->adjustForeignAttributes($token);
  1216. $this->insertForeignElement($token, self::NS_MATHML);
  1217. if (isset($token['self-closing'])) {
  1218. // XERROR: acknowledge the token's self-closing flag
  1219. array_pop($this->stack);
  1220. }
  1221. if ($this->mode !== self::IN_FOREIGN_CONTENT) {
  1222. $this->secondary_mode = $this->mode;
  1223. $this->mode = self::IN_FOREIGN_CONTENT;
  1224. }
  1225. break;
  1226. case 'svg':
  1227. $this->reconstructActiveFormattingElements();
  1228. $token = $this->adjustSVGAttributes($token);
  1229. $token = $this->adjustForeignAttributes($token);
  1230. $this->insertForeignElement($token, self::NS_SVG);
  1231. if (isset($token['self-closing'])) {
  1232. // XERROR: acknowledge the token's self-closing flag
  1233. array_pop($this->stack);
  1234. }
  1235. if ($this->mode !== self::IN_FOREIGN_CONTENT) {
  1236. $this->secondary_mode = $this->mode;
  1237. $this->mode = self::IN_FOREIGN_CONTENT;
  1238. }
  1239. break;
  1240. case 'caption': case 'col': case 'colgroup': case 'frame': case 'head':
  1241. case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': case 'tr':
  1242. // parse error
  1243. break;
  1244. /* A start tag token not covered by the previous entries */
  1245. default:
  1246. /* Reconstruct the active formatting elements, if any. */
  1247. $this->reconstructActiveFormattingElements();
  1248. $this->insertElement($token);
  1249. /* This element will be a phrasing element. */
  1250. break;
  1251. }
  1252. break;
  1253. case HTML5_Tokenizer::ENDTAG:
  1254. switch($token['name']) {
  1255. /* An end tag with the tag name "body" */
  1256. case 'body':
  1257. /* If the second element in the stack of open elements is
  1258. not a body element, this is a parse error. Ignore the token.
  1259. (innerHTML case) */
  1260. if(count($this->stack) < 2 || $this->stack[1]->tagName !== 'body') {
  1261. $this->ignored = true;
  1262. /* Otherwise, if there is a node in the stack of open
  1263. * elements that is not either a dd element, a dt
  1264. * element, an li element, an optgroup element, an
  1265. * option element, a p element, an rp element, an rt
  1266. * element, a tbody element, a td element, a tfoot
  1267. * element, a th element, a thead element, a tr element,
  1268. * the body element, or the html element, then this is a
  1269. * parse error. */
  1270. } else {
  1271. // XERROR: implement this check for parse error
  1272. }
  1273. /* Change the insertion mode to "after body". */
  1274. $this->mode = self::AFTER_BODY;
  1275. break;
  1276. /* An end tag with the tag name "html" */
  1277. case 'html':
  1278. /* Act as if an end tag with tag name "body" had been seen,
  1279. then, if that token wasn't ignored, reprocess the current
  1280. token. */
  1281. $this->emitToken(array(
  1282. 'name' => 'body',
  1283. 'type' => HTML5_Tokenizer::ENDTAG
  1284. ));
  1285. if (!$this->ignored) $this->emitToken($token);
  1286. break;
  1287. case 'address': case 'article': case 'aside': case 'blockquote':
  1288. case 'center': case 'datagrid': case 'details': case 'dir':
  1289. case 'div': case 'dl': case 'fieldset': case 'figure': case 'footer':
  1290. case 'header': case 'hgroup': case 'listing': case 'menu':
  1291. case 'nav': case 'ol': case 'pre': case 'section': case 'ul':
  1292. /* If the stack of open elements has an element in scope
  1293. with the same tag name as that of the token, then generate
  1294. implied end tags. */
  1295. if($this->elementInScope($token['name'])) {
  1296. $this->generateImpliedEndTags();
  1297. /* Now, if the current node is not an element with
  1298. the same tag name as that of the token, then this
  1299. is a parse error. */
  1300. // XERROR: implement parse error logic
  1301. /* If the stack of open elements has an element in
  1302. scope with the same tag name as that of the token,
  1303. then pop elements from this stack until an element
  1304. with that tag name has been popped from the stack. */
  1305. do {
  1306. $node = array_pop($this->stack);
  1307. } while ($node->tagName !== $token['name']);
  1308. } else {
  1309. // parse error
  1310. }
  1311. break;
  1312. /* An end tag whose tag name is "form" */
  1313. case 'form':
  1314. /* Let node be the element that the form element pointer is set to. */
  1315. $node = $this->form_pointer;
  1316. /* Set the form element pointer to null. */
  1317. $this->form_pointer = null;
  1318. /* If node is null or the stack of open elements does not
  1319. * have node in scope, then this is a parse error; ignore the token. */
  1320. if ($node === null || !in_array($node, $this->stack)) {
  1321. // parse error
  1322. $this->ignored = true;
  1323. } else {
  1324. /* 1. Generate implied end tags. */
  1325. $this->generateImpliedEndTags();
  1326. /* 2. If the current node is not node, then this is a parse error. */
  1327. if (end($this->stack) !== $node) {
  1328. // parse error
  1329. }
  1330. /* 3. Remove node from the stack of open elements. */
  1331. array_splice($this->stack, array_search($node, $this->stack, true), 1);
  1332. }
  1333. break;
  1334. /* An end tag whose tag name is "p" */
  1335. case 'p':
  1336. /* If the stack of open elements has a p element in scope,
  1337. then generate implied end tags, except for p elements. */
  1338. if($this->elementInScope('p')) {
  1339. /* Generate implied end tags, except for elements with
  1340. * the same tag name as the token. */
  1341. $this->generateImpliedEndTags(array('p'));
  1342. /* If the current node is not a p element, then this is
  1343. a parse error. */
  1344. // XERROR: implement
  1345. /* Pop elements from the stack of open elements until
  1346. * an element with the same tag name as the token has
  1347. * been popped from the stack. */
  1348. do {
  1349. $node = array_pop($this->stack);
  1350. } while ($node->tagName !== 'p');
  1351. } else {
  1352. // parse error
  1353. $this->emitToken(array(
  1354. 'name' => 'p',
  1355. 'type' => HTML5_Tokenizer::STARTTAG,
  1356. ));
  1357. $this->emitToken($token);
  1358. }
  1359. break;
  1360. /* An end tag whose tag name is "dd", "dt", or "li" */
  1361. case 'dd': case 'dt': case 'li':
  1362. if($this->elementInScope($token['name'])) {
  1363. $this->generateImpliedEndTags(array($token['name']));
  1364. /* If the current node is not an element with the same
  1365. tag name as the token, then this is a parse error. */
  1366. // XERROR: implement parse error
  1367. /* Pop elements from the stack of open elements until
  1368. * an element with the same tag name as the token has
  1369. * been popped from the stack. */
  1370. do {
  1371. $node = array_pop($this->stack);
  1372. } while ($node->tagName !== $token['name']);
  1373. } else {
  1374. // parse error
  1375. }
  1376. break;
  1377. /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",
  1378. "h5", "h6" */
  1379. case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
  1380. $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
  1381. /* If the stack of open elements has in scope an element whose
  1382. tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
  1383. generate implied end tags. */
  1384. if($this->elementInScope($elements)) {
  1385. $this->generateImpliedEndTags();
  1386. /* Now, if the current node is not an element with the same
  1387. tag name as that of the token, then this is a parse error. */
  1388. // XERROR: implement parse error
  1389. /* If the stack of open elements has in scope an element
  1390. whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
  1391. "h6", then pop elements from the stack until an element
  1392. with one of those tag names has been popped from the stack. */
  1393. do {
  1394. $node = array_pop($this->stack);
  1395. } while (!in_array($node->tagName, $elements));
  1396. } else {
  1397. // parse error
  1398. }
  1399. break;
  1400. /* An end tag whose tag name is one of: "a", "b", "big", "em",
  1401. "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
  1402. case 'a': case 'b': case 'big': case 'code': case 'em': case 'font':
  1403. case 'i': case 'nobr': case 's': case 'small': case 'strike':
  1404. case 'strong': case 'tt': case 'u':
  1405. // XERROR: generally speaking this needs parse error logic
  1406. /* 1. Let the formatting element be the last element in
  1407. the list of active formatting elements that:
  1408. * is between the end of the list and the last scope
  1409. marker in the list, if any, or the start of the list
  1410. otherwise, and
  1411. * has the same tag name as the token.
  1412. */
  1413. while(true) {
  1414. for($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
  1415. if($this->a_formatting[$a] === self::MARKER) {
  1416. break;
  1417. } elseif($this->a_formatting[$a]->tagName === $token['name']) {
  1418. $formatting_element = $this->a_formatting[$a];
  1419. $in_stack = in_array($formatting_element, $this->stack, true);
  1420. $fe_af_pos = $a;
  1421. break;
  1422. }
  1423. }
  1424. /* If there is no such node, or, if that node is
  1425. also in the stack of open elements but the element
  1426. is not in scope, then this is a parse error. Abort
  1427. these steps. The token is ignored. */
  1428. if(!isset($formatting_element) || ($in_stack &&
  1429. !$this->elementInScope($token['name']))) {
  1430. $this->ignored = true;
  1431. break;
  1432. /* Otherwise, if there is such a node, but that node
  1433. is not in the stack of open elements, then this is a
  1434. parse error; remove the element from the list, and
  1435. abort these steps. */
  1436. } elseif(isset($formatting_element) && !$in_stack) {
  1437. unset($this->a_formatting[$fe_af_pos]);
  1438. $this->a_formatting = array_merge($this->a_formatting);
  1439. break;
  1440. }
  1441. /* Otherwise, there is a formatting element and that
  1442. * element is in the stack and is in scope. If the
  1443. * element is not the current node, this is a parse
  1444. * error. In any case, proceed with the algorithm as
  1445. * written in the following steps. */
  1446. // XERROR: implement me
  1447. /* 2. Let the furthest block be the topmost node in the
  1448. stack of open elements that is lower in the stack
  1449. than the formatting element, and is not an element in
  1450. the phrasing or formatting categories. There might
  1451. not be one. */
  1452. $fe_s_pos = array_search($formatting_element, $this->stack, true);
  1453. $length = count($this->stack);
  1454. for($s = $fe_s_pos + 1; $s < $length; $s++) {
  1455. $category = $this->getElementCategory($this->stack[$s]);
  1456. if($category !== self::PHRASING && $category !== self::FORMATTING) {
  1457. $furthest_block = $this->stack[$s];
  1458. break;
  1459. }
  1460. }
  1461. /* 3. If there is no furthest block, then the UA must
  1462. skip the subsequent steps and instead just pop all
  1463. the nodes from the bottom of the stack of open
  1464. elements, from the current node up to the formatting
  1465. element, and remove the formatting element from the
  1466. list of active formatting elements. */
  1467. if(!isset($furthest_block)) {
  1468. for($n = $length - 1; $n >= $fe_s_pos; $n--) {
  1469. array_pop($this->stack);
  1470. }
  1471. unset($this->a_formatting[$fe_af_pos]);
  1472. $this->a_formatting = array_merge($this->a_formatting);
  1473. break;
  1474. }
  1475. /* 4. Let the common ancestor be the element
  1476. immediately above the formatting element in the stack
  1477. of open elements. */
  1478. $common_ancestor = $this->stack[$fe_s_pos - 1];
  1479. /* 5. Let a bookmark note the position of the
  1480. formatting element in the list of active formatting
  1481. elements relative to the elements on either side
  1482. of it in the list. */
  1483. $bookmark = $fe_af_pos;
  1484. /* 6. Let node and last node be the furthest block.
  1485. Follow these steps: */
  1486. $node = $furthest_block;
  1487. $last_node = $furthest_block;
  1488. while(true) {
  1489. for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
  1490. /* 6.1 Let node be the element immediately
  1491. prior to node in the stack of open elements. */
  1492. $node = $this->stack[$n];
  1493. /* 6.2 If node is not in the list of active
  1494. formatting elements, then remove node from
  1495. the stack of open elements and then go back
  1496. to step 1. */
  1497. if(!in_array($node, $this->a_formatting, true)) {
  1498. array_splice($this->stack, $n, 1);
  1499. } else {
  1500. break;
  1501. }
  1502. }
  1503. /* 6.3 Otherwise, if node is the formatting
  1504. element, then go to the next step in the overall
  1505. algorithm. */
  1506. if($node === $formatting_element) {
  1507. break;
  1508. /* 6.4 Otherwise, if last node is the furthest
  1509. block, then move the aforementioned bookmark to
  1510. be immediately after the node in the list of
  1511. active formatting elements. */
  1512. } elseif($last_node === $furthest_block) {
  1513. $bookmark = array_search($node, $this->a_formatting, true) + 1;
  1514. }
  1515. /* 6.5 Create an element for the token for which
  1516. * the element node was created, replace the entry
  1517. * for node in the list of active formatting
  1518. * elements with an entry for the new element,
  1519. * replace the entry for node in the stack of open
  1520. * elements with an entry for the new element, and
  1521. * let node be the new element. */
  1522. // we don't know what the token is anymore
  1523. $clone = $node->cloneNode();
  1524. $a_pos = array_search($node, $this->a_formatting, true);
  1525. $s_pos = array_search($node, $this->stack, true);
  1526. $this->a_formatting[$a_pos] = $clone;
  1527. $this->stack[$s_pos] = $clone;
  1528. $node = $clone;
  1529. /* 6.6 Insert last node into node, first removing
  1530. it from its previous parent node if any. */
  1531. if($last_node->parentNode !== null) {
  1532. $last_node->parentNode->removeChild($last_node);
  1533. }
  1534. $node->appendChild($last_node);
  1535. /* 6.7 Let last node be node. */
  1536. $last_node = $node;
  1537. /* 6.8 Return to step 1 of this inner set of steps. */
  1538. }
  1539. /* 7. If the common ancestor node is a table, tbody,
  1540. * tfoot, thead, or tr element, then, foster parent
  1541. * whatever last node ended up being in the previous
  1542. * step, first removing it from its previous parent
  1543. * node if any. */
  1544. if ($last_node->parentNode) { // common step
  1545. $last_node->parentNode->removeChild($last_node);
  1546. }
  1547. if (in_array($common_ancestor->tagName, array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
  1548. $this->fosterParent($last_node);
  1549. /* Otherwise, append whatever last node ended up being
  1550. * in the previous step to the common ancestor node,
  1551. * first removing it from its previous parent node if
  1552. * any. */
  1553. } else {
  1554. $common_ancestor->appendChild($last_node);
  1555. }
  1556. /* 8. Create an element for the token for which the
  1557. * formatting element was created. */
  1558. $clone = $formatting_element->cloneNode();
  1559. /* 9. Take all of the child nodes of the furthest
  1560. block and append them to the element created in the
  1561. last step. */
  1562. while($furthest_block->hasChildNodes()) {
  1563. $child = $furthest_block->firstChild;
  1564. $furthest_block->removeChild($child);
  1565. $clone->appendChild($child);
  1566. }
  1567. /* 10. Append that clone to the furthest block. */
  1568. $furthest_block->appendChild($clone);
  1569. /* 11. Remove the formatting element from the list
  1570. of active formatting elements, and insert the new element
  1571. into the list of active formatting elements at the
  1572. position of the aforementioned bookmark. */
  1573. $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
  1574. array_splice($this->a_formatting, $fe_af_pos, 1);
  1575. $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
  1576. $af_part2 = array_slice($this->a_formatting, $bookmark);
  1577. $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
  1578. /* 12. Remove the formatting element from the stack
  1579. of open elements, and insert the new element into the stack
  1580. of open elements immediately below the position of the
  1581. furthest block in that stack. */
  1582. $fe_s_pos = array_search($formatting_element, $this->stack, true);
  1583. array_splice($this->stack, $fe_s_pos, 1);
  1584. $fb_s_pos = array_search($furthest_block, $this->stack, true);
  1585. $s_part1 = array_slice($this->stack, 0, $fb_s_pos + 1);
  1586. $s_part2 = array_slice($this->stack, $fb_s_pos + 1);
  1587. $this->stack = array_merge($s_part1, array($clone), $s_part2);
  1588. /* 13. Jump back to step 1 in this series of steps. */
  1589. unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
  1590. }
  1591. break;
  1592. case 'applet': case 'button': case 'marquee': case 'object':
  1593. /* If the stack of open elements has an element in scope whose
  1594. tag name matches the tag name of the token, then generate implied
  1595. tags. */
  1596. if($this->elementInScope($token['name'])) {
  1597. $this->generateImpliedEndTags();
  1598. /* Now, if the current node is not an element with the same
  1599. tag name as the token, then this is a parse error. */
  1600. // XERROR: implement logic
  1601. /* Pop elements from the stack of open elements until
  1602. * an element with the same tag name as the token has
  1603. * been popped from the stack. */
  1604. do {
  1605. $node = array_pop($this->stack);
  1606. } while ($node->tagName !== $token['name']);
  1607. /* Clear the list of active formatting elements up to the
  1608. * last marker. */
  1609. $keys = array_keys($this->a_formatting, self::MARKER, true);
  1610. $marker = end($keys);
  1611. for($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
  1612. array_pop($this->a_formatting);
  1613. }
  1614. } else {
  1615. // parse error
  1616. }
  1617. break;
  1618. case 'br':
  1619. // Parse error
  1620. $this->emitToken(array(
  1621. 'name' => 'br',
  1622. 'type' => HTML5_Tokenizer::STARTTAG,
  1623. ));
  1624. break;
  1625. /* An end tag token not covered by the previous entries */
  1626. default:
  1627. for($n = count($this->stack) - 1; $n >= 0; $n--) {
  1628. /* Initialise node to be the current node (the bottommost
  1629. node of the stack). */
  1630. $node = $this->stack[$n];
  1631. /* If node has the same tag name as the end tag token,
  1632. then: */
  1633. if($token['name'] === $node->tagName) {
  1634. /* Generate implied end tags. */
  1635. $this->generateImpliedEndTags();
  1636. /* If the tag name of the end tag token does not
  1637. match the tag name of the current node, this is a
  1638. parse error. */
  1639. // XERROR: implement this
  1640. /* Pop all the nodes from the current node up to
  1641. node, including node, then stop these steps. */
  1642. // XSKETCHY
  1643. do {
  1644. $pop = array_pop($this->stack);
  1645. } while ($pop !== $node);
  1646. break;
  1647. } else {
  1648. $category = $this->getElementCategory($node);
  1649. if($category !== self::FORMATTING && $category !== self::PHRASING) {
  1650. /* Otherwise, if node is in neither the formatting
  1651. category nor the phrasing category, then this is a
  1652. parse error. Stop this algorithm. The end tag token
  1653. is ignored. */
  1654. $this->ignored = true;
  1655. break;
  1656. // parse error
  1657. }
  1658. }
  1659. /* Set node to the previous entry in the stack of open elements. Loop. */
  1660. }
  1661. break;
  1662. }
  1663. break;
  1664. }
  1665. break;
  1666. case self::IN_CDATA_RCDATA:
  1667. if (
  1668. $token['type'] === HTML5_Tokenizer::CHARACTER ||
  1669. $token['type'] === HTML5_Tokenizer::SPACECHARACTER
  1670. ) {
  1671. $this->insertText($token['data']);
  1672. } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
  1673. // parse error
  1674. /* If the current node is a script element, mark the script
  1675. * element as "already executed". */
  1676. // probably not necessary
  1677. array_pop($this->stack);
  1678. $this->mode = $this->original_mode;
  1679. $this->emitToken($token);
  1680. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'script') {
  1681. array_pop($this->stack);
  1682. $this->mode = $this->original_mode;
  1683. // we're ignoring all of the execution stuff
  1684. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG) {
  1685. array_pop($this->stack);
  1686. $this->mode = $this->original_mode;
  1687. }
  1688. break;
  1689. case self::IN_TABLE:
  1690. $clear = array('html', 'table');
  1691. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  1692. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  1693. or U+0020 SPACE */
  1694. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER &&
  1695. /* If the current table is tainted, then act as described in
  1696. * the "anything else" entry below. */
  1697. // Note: hsivonen has a test that fails due to this line
  1698. // because he wants to convince Hixie not to do taint
  1699. !$this->currentTableIsTainted()) {
  1700. /* Append the character to the current node. */
  1701. $this->insertText($token['data']);
  1702. /* A comment token */
  1703. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  1704. /* Append a Comment node to the current node with the data
  1705. attribute set to the data given in the comment token. */
  1706. $this->insertComment($token['data']);
  1707. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  1708. // parse error
  1709. /* A start tag whose tag name is "caption" */
  1710. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  1711. $token['name'] === 'caption') {
  1712. /* Clear the stack back to a table context. */
  1713. $this->clearStackToTableContext($clear);
  1714. /* Insert a marker at the end of the list of active
  1715. formatting elements. */
  1716. $this->a_formatting[] = self::MARKER;
  1717. /* Insert an HTML element for the token, then switch the
  1718. insertion mode to "in caption". */
  1719. $this->insertElement($token);
  1720. $this->mode = self::IN_CAPTION;
  1721. /* A start tag whose tag name is "colgroup" */
  1722. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  1723. $token['name'] === 'colgroup') {
  1724. /* Clear the stack back to a table context. */
  1725. $this->clearStackToTableContext($clear);
  1726. /* Insert an HTML element for the token, then switch the
  1727. insertion mode to "in column group". */
  1728. $this->insertElement($token);
  1729. $this->mode = self::IN_COLUMN_GROUP;
  1730. /* A start tag whose tag name is "col" */
  1731. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  1732. $token['name'] === 'col') {
  1733. $this->emitToken(array(
  1734. 'name' => 'colgroup',
  1735. 'type' => HTML5_Tokenizer::STARTTAG,
  1736. 'attr' => array()
  1737. ));
  1738. $this->emitToken($token);
  1739. /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
  1740. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
  1741. array('tbody', 'tfoot', 'thead'))) {
  1742. /* Clear the stack back to a table context. */
  1743. $this->clearStackToTableContext($clear);
  1744. /* Insert an HTML element for the token, then switch the insertion
  1745. mode to "in table body". */
  1746. $this->insertElement($token);
  1747. $this->mode = self::IN_TABLE_BODY;
  1748. /* A start tag whose tag name is one of: "td", "th", "tr" */
  1749. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  1750. in_array($token['name'], array('td', 'th', 'tr'))) {
  1751. /* Act as if a start tag token with the tag name "tbody" had been
  1752. seen, then reprocess the current token. */
  1753. $this->emitToken(array(
  1754. 'name' => 'tbody',
  1755. 'type' => HTML5_Tokenizer::STARTTAG,
  1756. 'attr' => array()
  1757. ));
  1758. $this->emitToken($token);
  1759. /* A start tag whose tag name is "table" */
  1760. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  1761. $token['name'] === 'table') {
  1762. /* Parse error. Act as if an end tag token with the tag name "table"
  1763. had been seen, then, if that token wasn't ignored, reprocess the
  1764. current token. */
  1765. $this->emitToken(array(
  1766. 'name' => 'table',
  1767. 'type' => HTML5_Tokenizer::ENDTAG
  1768. ));
  1769. if (!$this->ignored) $this->emitToken($token);
  1770. /* An end tag whose tag name is "table" */
  1771. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  1772. $token['name'] === 'table') {
  1773. /* If the stack of open elements does not have an element in table
  1774. scope with the same tag name as the token, this is a parse error.
  1775. Ignore the token. (fragment case) */
  1776. if(!$this->elementInScope($token['name'], true)) {
  1777. $this->ignored = true;
  1778. /* Otherwise: */
  1779. } else {
  1780. do {
  1781. $node = array_pop($this->stack);
  1782. } while ($node->tagName !== 'table');
  1783. /* Reset the insertion mode appropriately. */
  1784. $this->resetInsertionMode();
  1785. }
  1786. /* An end tag whose tag name is one of: "body", "caption", "col",
  1787. "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
  1788. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
  1789. array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td',
  1790. 'tfoot', 'th', 'thead', 'tr'))) {
  1791. // Parse error. Ignore the token.
  1792. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  1793. ($token['name'] === 'style' || $token['name'] === 'script')) {
  1794. $this->processWithRulesFor($token, self::IN_HEAD);
  1795. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'input' &&
  1796. // assignment is intentional
  1797. /* If the token does not have an attribute with the name "type", or
  1798. * if it does, but that attribute's value is not an ASCII
  1799. * case-insensitive match for the string "hidden", then: act as
  1800. * described in the "anything else" entry below. */
  1801. ($type = $this->getAttr($token, 'type')) && strtolower($type) === 'hidden') {
  1802. // I.e., if its an input with the type attribute == 'hidden'
  1803. /* Otherwise */
  1804. // parse error
  1805. $this->insertElement($token);
  1806. array_pop($this->stack);
  1807. } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
  1808. /* If the current node is not the root html element, then this is a parse error. */
  1809. if (end($this->stack)->tagName !== 'html') {
  1810. // Note: It can only be the current node in the fragment case.
  1811. // parse error
  1812. }
  1813. /* Stop parsing. */
  1814. /* Anything else */
  1815. } else {
  1816. /* Parse error. Process the token as if the insertion mode was "in
  1817. body", with the following exception: */
  1818. $old = $this->foster_parent;
  1819. $this->foster_parent = true;
  1820. $this->processWithRulesFor($token, self::IN_BODY);
  1821. $this->foster_parent = $old;
  1822. }
  1823. break;
  1824. case self::IN_CAPTION:
  1825. /* An end tag whose tag name is "caption" */
  1826. if($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'caption') {
  1827. /* If the stack of open elements does not have an element in table
  1828. scope with the same tag name as the token, this is a parse error.
  1829. Ignore the token. (fragment case) */
  1830. if(!$this->elementInScope($token['name'], true)) {
  1831. $this->ignored = true;
  1832. // Ignore
  1833. /* Otherwise: */
  1834. } else {
  1835. /* Generate implied end tags. */
  1836. $this->generateImpliedEndTags();
  1837. /* Now, if the current node is not a caption element, then this
  1838. is a parse error. */
  1839. // XERROR: implement
  1840. /* Pop elements from this stack until a caption element has
  1841. been popped from the stack. */
  1842. do {
  1843. $node = array_pop($this->stack);
  1844. } while ($node->tagName !== 'caption');
  1845. /* Clear the list of active formatting elements up to the last
  1846. marker. */
  1847. $this->clearTheActiveFormattingElementsUpToTheLastMarker();
  1848. /* Switch the insertion mode to "in table". */
  1849. $this->mode = self::IN_TABLE;
  1850. }
  1851. /* A start tag whose tag name is one of: "caption", "col", "colgroup",
  1852. "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag
  1853. name is "table" */
  1854. } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
  1855. array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
  1856. 'thead', 'tr'))) || ($token['type'] === HTML5_Tokenizer::ENDTAG &&
  1857. $token['name'] === 'table')) {
  1858. /* Parse error. Act as if an end tag with the tag name "caption"
  1859. had been seen, then, if that token wasn't ignored, reprocess the
  1860. current token. */
  1861. $this->emitToken(array(
  1862. 'name' => 'caption',
  1863. 'type' => HTML5_Tokenizer::ENDTAG
  1864. ));
  1865. if (!$this->ignored) $this->emitToken($token);
  1866. /* An end tag whose tag name is one of: "body", "col", "colgroup",
  1867. "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
  1868. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
  1869. array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th',
  1870. 'thead', 'tr'))) {
  1871. // Parse error. Ignore the token.
  1872. $this->ignored = true;
  1873. /* Anything else */
  1874. } else {
  1875. /* Process the token as if the insertion mode was "in body". */
  1876. $this->processWithRulesFor($token, self::IN_BODY);
  1877. }
  1878. break;
  1879. case self::IN_COLUMN_GROUP:
  1880. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  1881. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  1882. or U+0020 SPACE */
  1883. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  1884. /* Append the character to the current node. */
  1885. $this->insertText($token['data']);
  1886. /* A comment token */
  1887. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  1888. /* Append a Comment node to the current node with the data
  1889. attribute set to the data given in the comment token. */
  1890. $this->insertToken($token['data']);
  1891. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  1892. // parse error
  1893. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
  1894. $this->processWithRulesFor($token, self::IN_BODY);
  1895. /* A start tag whose tag name is "col" */
  1896. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'col') {
  1897. /* Insert a col element for the token. Immediately pop the current
  1898. node off the stack of open elements. */
  1899. $this->insertElement($token);
  1900. array_pop($this->stack);
  1901. // XERROR: Acknowledge the token's self-closing flag, if it is set.
  1902. /* An end tag whose tag name is "colgroup" */
  1903. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  1904. $token['name'] === 'colgroup') {
  1905. /* If the current node is the root html element, then this is a
  1906. parse error, ignore the token. (fragment case) */
  1907. if(end($this->stack)->tagName === 'html') {
  1908. $this->ignored = true;
  1909. /* Otherwise, pop the current node (which will be a colgroup
  1910. element) from the stack of open elements. Switch the insertion
  1911. mode to "in table". */
  1912. } else {
  1913. array_pop($this->stack);
  1914. $this->mode = self::IN_TABLE;
  1915. }
  1916. /* An end tag whose tag name is "col" */
  1917. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'col') {
  1918. /* Parse error. Ignore the token. */
  1919. $this->ignored = true;
  1920. /* An end-of-file token */
  1921. /* If the current node is the root html element */
  1922. } elseif($token['type'] === HTML5_Tokenizer::EOF && end($this->stack)->tagName === 'html') {
  1923. /* Stop parsing */
  1924. /* Anything else */
  1925. } else {
  1926. /* Act as if an end tag with the tag name "colgroup" had been seen,
  1927. and then, if that token wasn't ignored, reprocess the current token. */
  1928. $this->emitToken(array(
  1929. 'name' => 'colgroup',
  1930. 'type' => HTML5_Tokenizer::ENDTAG
  1931. ));
  1932. if (!$this->ignored) $this->emitToken($token);
  1933. }
  1934. break;
  1935. case self::IN_TABLE_BODY:
  1936. $clear = array('tbody', 'tfoot', 'thead', 'html');
  1937. /* A start tag whose tag name is "tr" */
  1938. if($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'tr') {
  1939. /* Clear the stack back to a table body context. */
  1940. $this->clearStackToTableContext($clear);
  1941. /* Insert a tr element for the token, then switch the insertion
  1942. mode to "in row". */
  1943. $this->insertElement($token);
  1944. $this->mode = self::IN_ROW;
  1945. /* A start tag whose tag name is one of: "th", "td" */
  1946. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  1947. ($token['name'] === 'th' || $token['name'] === 'td')) {
  1948. /* Parse error. Act as if a start tag with the tag name "tr" had
  1949. been seen, then reprocess the current token. */
  1950. $this->emitToken(array(
  1951. 'name' => 'tr',
  1952. 'type' => HTML5_Tokenizer::STARTTAG,
  1953. 'attr' => array()
  1954. ));
  1955. $this->emitToken($token);
  1956. /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
  1957. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  1958. in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
  1959. /* If the stack of open elements does not have an element in table
  1960. scope with the same tag name as the token, this is a parse error.
  1961. Ignore the token. */
  1962. if(!$this->elementInScope($token['name'], true)) {
  1963. // Parse error
  1964. $this->ignored = true;
  1965. /* Otherwise: */
  1966. } else {
  1967. /* Clear the stack back to a table body context. */
  1968. $this->clearStackToTableContext($clear);
  1969. /* Pop the current node from the stack of open elements. Switch
  1970. the insertion mode to "in table". */
  1971. array_pop($this->stack);
  1972. $this->mode = self::IN_TABLE;
  1973. }
  1974. /* A start tag whose tag name is one of: "caption", "col", "colgroup",
  1975. "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */
  1976. } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
  1977. array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead'))) ||
  1978. ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
  1979. /* If the stack of open elements does not have a tbody, thead, or
  1980. tfoot element in table scope, this is a parse error. Ignore the
  1981. token. (fragment case) */
  1982. if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) {
  1983. // parse error
  1984. $this->ignored = true;
  1985. /* Otherwise: */
  1986. } else {
  1987. /* Clear the stack back to a table body context. */
  1988. $this->clearStackToTableContext($clear);
  1989. /* Act as if an end tag with the same tag name as the current
  1990. node ("tbody", "tfoot", or "thead") had been seen, then
  1991. reprocess the current token. */
  1992. $this->emitToken(array(
  1993. 'name' => end($this->stack)->tagName,
  1994. 'type' => HTML5_Tokenizer::ENDTAG
  1995. ));
  1996. $this->emitToken($token);
  1997. }
  1998. /* An end tag whose tag name is one of: "body", "caption", "col",
  1999. "colgroup", "html", "td", "th", "tr" */
  2000. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
  2001. array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
  2002. /* Parse error. Ignore the token. */
  2003. $this->ignored = true;
  2004. /* Anything else */
  2005. } else {
  2006. /* Process the token as if the insertion mode was "in table". */
  2007. $this->processWithRulesFor($token, self::IN_TABLE);
  2008. }
  2009. break;
  2010. case self::IN_ROW:
  2011. $clear = array('tr', 'html');
  2012. /* A start tag whose tag name is one of: "th", "td" */
  2013. if($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2014. ($token['name'] === 'th' || $token['name'] === 'td')) {
  2015. /* Clear the stack back to a table row context. */
  2016. $this->clearStackToTableContext($clear);
  2017. /* Insert an HTML element for the token, then switch the insertion
  2018. mode to "in cell". */
  2019. $this->insertElement($token);
  2020. $this->mode = self::IN_CELL;
  2021. /* Insert a marker at the end of the list of active formatting
  2022. elements. */
  2023. $this->a_formatting[] = self::MARKER;
  2024. /* An end tag whose tag name is "tr" */
  2025. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'tr') {
  2026. /* If the stack of open elements does not have an element in table
  2027. scope with the same tag name as the token, this is a parse error.
  2028. Ignore the token. (fragment case) */
  2029. if(!$this->elementInScope($token['name'], true)) {
  2030. // Ignore.
  2031. $this->ignored = true;
  2032. /* Otherwise: */
  2033. } else {
  2034. /* Clear the stack back to a table row context. */
  2035. $this->clearStackToTableContext($clear);
  2036. /* Pop the current node (which will be a tr element) from the
  2037. stack of open elements. Switch the insertion mode to "in table
  2038. body". */
  2039. array_pop($this->stack);
  2040. $this->mode = self::IN_TABLE_BODY;
  2041. }
  2042. /* A start tag whose tag name is one of: "caption", "col", "colgroup",
  2043. "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */
  2044. } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
  2045. array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) ||
  2046. ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
  2047. /* Act as if an end tag with the tag name "tr" had been seen, then,
  2048. if that token wasn't ignored, reprocess the current token. */
  2049. $this->emitToken(array(
  2050. 'name' => 'tr',
  2051. 'type' => HTML5_Tokenizer::ENDTAG
  2052. ));
  2053. if (!$this->ignored) $this->emitToken($token);
  2054. /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
  2055. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2056. in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
  2057. /* If the stack of open elements does not have an element in table
  2058. scope with the same tag name as the token, this is a parse error.
  2059. Ignore the token. */
  2060. if(!$this->elementInScope($token['name'], true)) {
  2061. $this->ignored = true;
  2062. /* Otherwise: */
  2063. } else {
  2064. /* Otherwise, act as if an end tag with the tag name "tr" had
  2065. been seen, then reprocess the current token. */
  2066. $this->emitToken(array(
  2067. 'name' => 'tr',
  2068. 'type' => HTML5_Tokenizer::ENDTAG
  2069. ));
  2070. $this->emitToken($token);
  2071. }
  2072. /* An end tag whose tag name is one of: "body", "caption", "col",
  2073. "colgroup", "html", "td", "th" */
  2074. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
  2075. array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th'))) {
  2076. /* Parse error. Ignore the token. */
  2077. $this->ignored = true;
  2078. /* Anything else */
  2079. } else {
  2080. /* Process the token as if the insertion mode was "in table". */
  2081. $this->processWithRulesFor($token, self::IN_TABLE);
  2082. }
  2083. break;
  2084. case self::IN_CELL:
  2085. /* An end tag whose tag name is one of: "td", "th" */
  2086. if($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2087. ($token['name'] === 'td' || $token['name'] === 'th')) {
  2088. /* If the stack of open elements does not have an element in table
  2089. scope with the same tag name as that of the token, then this is a
  2090. parse error and the token must be ignored. */
  2091. if(!$this->elementInScope($token['name'], true)) {
  2092. $this->ignored = true;
  2093. /* Otherwise: */
  2094. } else {
  2095. /* Generate implied end tags, except for elements with the same
  2096. tag name as the token. */
  2097. $this->generateImpliedEndTags(array($token['name']));
  2098. /* Now, if the current node is not an element with the same tag
  2099. name as the token, then this is a parse error. */
  2100. // XERROR: Implement parse error code
  2101. /* Pop elements from this stack until an element with the same
  2102. tag name as the token has been popped from the stack. */
  2103. do {
  2104. $node = array_pop($this->stack);
  2105. } while ($node->tagName !== $token['name']);
  2106. /* Clear the list of active formatting elements up to the last
  2107. marker. */
  2108. $this->clearTheActiveFormattingElementsUpToTheLastMarker();
  2109. /* Switch the insertion mode to "in row". (The current node
  2110. will be a tr element at this point.) */
  2111. $this->mode = self::IN_ROW;
  2112. }
  2113. /* A start tag whose tag name is one of: "caption", "col", "colgroup",
  2114. "tbody", "td", "tfoot", "th", "thead", "tr" */
  2115. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
  2116. array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
  2117. 'thead', 'tr'))) {
  2118. /* If the stack of open elements does not have a td or th element
  2119. in table scope, then this is a parse error; ignore the token.
  2120. (fragment case) */
  2121. if(!$this->elementInScope(array('td', 'th'), true)) {
  2122. // parse error
  2123. $this->ignored = true;
  2124. /* Otherwise, close the cell (see below) and reprocess the current
  2125. token. */
  2126. } else {
  2127. $this->closeCell();
  2128. $this->emitToken($token);
  2129. }
  2130. /* An end tag whose tag name is one of: "body", "caption", "col",
  2131. "colgroup", "html" */
  2132. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
  2133. array('body', 'caption', 'col', 'colgroup', 'html'))) {
  2134. /* Parse error. Ignore the token. */
  2135. $this->ignored = true;
  2136. /* An end tag whose tag name is one of: "table", "tbody", "tfoot",
  2137. "thead", "tr" */
  2138. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
  2139. array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
  2140. /* If the stack of open elements does not have a td or th element
  2141. in table scope, then this is a parse error; ignore the token.
  2142. (innerHTML case) */
  2143. if(!$this->elementInScope(array('td', 'th'), true)) {
  2144. // Parse error
  2145. $this->ignored = true;
  2146. /* Otherwise, close the cell (see below) and reprocess the current
  2147. token. */
  2148. } else {
  2149. $this->closeCell();
  2150. $this->emitToken($token);
  2151. }
  2152. /* Anything else */
  2153. } else {
  2154. /* Process the token as if the insertion mode was "in body". */
  2155. $this->processWithRulesFor($token, self::IN_BODY);
  2156. }
  2157. break;
  2158. case self::IN_SELECT:
  2159. /* Handle the token as follows: */
  2160. /* A character token */
  2161. if(
  2162. $token['type'] === HTML5_Tokenizer::CHARACTER ||
  2163. $token['type'] === HTML5_Tokenizer::SPACECHARACTER
  2164. ) {
  2165. /* Append the token's character to the current node. */
  2166. $this->insertText($token['data']);
  2167. /* A comment token */
  2168. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  2169. /* Append a Comment node to the current node with the data
  2170. attribute set to the data given in the comment token. */
  2171. $this->insertComment($token['data']);
  2172. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  2173. // parse error
  2174. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
  2175. $this->processWithRulesFor($token, self::INBODY);
  2176. /* A start tag token whose tag name is "option" */
  2177. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2178. $token['name'] === 'option') {
  2179. /* If the current node is an option element, act as if an end tag
  2180. with the tag name "option" had been seen. */
  2181. if(end($this->stack)->tagName === 'option') {
  2182. $this->emitToken(array(
  2183. 'name' => 'option',
  2184. 'type' => HTML5_Tokenizer::ENDTAG
  2185. ));
  2186. }
  2187. /* Insert an HTML element for the token. */
  2188. $this->insertElement($token);
  2189. /* A start tag token whose tag name is "optgroup" */
  2190. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2191. $token['name'] === 'optgroup') {
  2192. /* If the current node is an option element, act as if an end tag
  2193. with the tag name "option" had been seen. */
  2194. if(end($this->stack)->tagName === 'option') {
  2195. $this->emitToken(array(
  2196. 'name' => 'option',
  2197. 'type' => HTML5_Tokenizer::ENDTAG
  2198. ));
  2199. }
  2200. /* If the current node is an optgroup element, act as if an end tag
  2201. with the tag name "optgroup" had been seen. */
  2202. if(end($this->stack)->tagName === 'optgroup') {
  2203. $this->emitToken(array(
  2204. 'name' => 'optgroup',
  2205. 'type' => HTML5_Tokenizer::ENDTAG
  2206. ));
  2207. }
  2208. /* Insert an HTML element for the token. */
  2209. $this->insertElement($token);
  2210. /* An end tag token whose tag name is "optgroup" */
  2211. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2212. $token['name'] === 'optgroup') {
  2213. /* First, if the current node is an option element, and the node
  2214. immediately before it in the stack of open elements is an optgroup
  2215. element, then act as if an end tag with the tag name "option" had
  2216. been seen. */
  2217. $elements_in_stack = count($this->stack);
  2218. if($this->stack[$elements_in_stack - 1]->tagName === 'option' &&
  2219. $this->stack[$elements_in_stack - 2]->tagName === 'optgroup') {
  2220. $this->emitToken(array(
  2221. 'name' => 'option',
  2222. 'type' => HTML5_Tokenizer::ENDTAG
  2223. ));
  2224. }
  2225. /* If the current node is an optgroup element, then pop that node
  2226. from the stack of open elements. Otherwise, this is a parse error,
  2227. ignore the token. */
  2228. if(end($this->stack)->tagName === 'optgroup') {
  2229. array_pop($this->stack);
  2230. } else {
  2231. // parse error
  2232. $this->ignored = true;
  2233. }
  2234. /* An end tag token whose tag name is "option" */
  2235. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2236. $token['name'] === 'option') {
  2237. /* If the current node is an option element, then pop that node
  2238. from the stack of open elements. Otherwise, this is a parse error,
  2239. ignore the token. */
  2240. if(end($this->stack)->tagName === 'option') {
  2241. array_pop($this->stack);
  2242. } else {
  2243. // parse error
  2244. $this->ignored = true;
  2245. }
  2246. /* An end tag whose tag name is "select" */
  2247. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2248. $token['name'] === 'select') {
  2249. /* If the stack of open elements does not have an element in table
  2250. scope with the same tag name as the token, this is a parse error.
  2251. Ignore the token. (fragment case) */
  2252. if(!$this->elementInScope($token['name'], true)) {
  2253. $this->ignored = true;
  2254. // parse error
  2255. /* Otherwise: */
  2256. } else {
  2257. /* Pop elements from the stack of open elements until a select
  2258. element has been popped from the stack. */
  2259. do {
  2260. $node = array_pop($this->stack);
  2261. } while ($node->tagName !== 'select');
  2262. /* Reset the insertion mode appropriately. */
  2263. $this->resetInsertionMode();
  2264. }
  2265. /* A start tag whose tag name is "select" */
  2266. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'select') {
  2267. /* Parse error. Act as if the token had been an end tag with the
  2268. tag name "select" instead. */
  2269. $this->emitToken(array(
  2270. 'name' => 'select',