From 44592611e1582fd97ae1988343418a0dae1ae2a0 Mon Sep 17 00:00:00 2001 From: fabrixxm Date: Sun, 7 Feb 2016 14:27:13 +0100 Subject: [PATCH 01/55] new api for notifications /api/friendica/notification returns first 50 notifications for current user /api/friendica¬ification/ set note as seen and return item object if possible new class NotificationsManager to query for notifications and set seen state --- include/NotificationsManager.php | 113 +++++++++++++++++++++++++++++++ include/api.php | 79 +++++++++++++++++++-- mod/notify.php | 82 ++++++++++------------ mod/ping.php | 4 +- 4 files changed, 223 insertions(+), 55 deletions(-) create mode 100644 include/NotificationsManager.php diff --git a/include/NotificationsManager.php b/include/NotificationsManager.php new file mode 100644 index 0000000000..8b0ca9e13d --- /dev/null +++ b/include/NotificationsManager.php @@ -0,0 +1,113 @@ +a = get_app(); + } + + private function _set_extra($notes) { + $rets = array(); + foreach($notes as $n) { + $local_time = datetime_convert('UTC',date_default_timezone_get(),$n['date']); + $n['timestamp'] = strtotime($local_time); + $n['date_rel'] = relative_date($n['date']); + $rets[] = $n; + } + return $rets; + } + + + /** + * @brief get all notifications for local_user() + * + * @param array $filter optional Array "column name"=>value: filter query by columns values + * @param string $order optional Space separated list of column to sort by. prepend name with "+" to sort ASC, "-" to sort DESC. Default to "-date" + * @param string $limit optional Query limits + * + * @return array of results or false on errors + */ + public function getAll($filter = array(), $order="-date", $limit="") { + $filter_str = array(); + $filter_sql = ""; + foreach($filter as $column => $value) { + $filter_str[] = sprintf("`%s` = '%s'", $column, dbesc($value)); + } + if (count($filter_str)>0) { + $filter_sql = "AND ".implode(" AND ", $filter_str); + } + + $aOrder = explode(" ", $order); + $asOrder = array(); + foreach($aOrder as $o) { + $dir = "asc"; + if ($o[0]==="-") { + $dir = "desc"; + $o = substr($o,1); + } + if ($o[0]==="+") { + $dir = "asc"; + $o = substr($o,1); + } + $asOrder[] = "$o $dir"; + } + $order_sql = implode(", ", $asOrder); + + if ($limit!="") $limit = " LIMIT ".$limit; + + $r = q("SELECT * from notify where uid = %d $filter_sql order by $order_sql $limit", + intval(local_user()) + ); + if ($r!==false && count($r)>0) return $this->_set_extra($r); + return false; + } + + /** + * @brief get one note for local_user() by $id value + * + * @param int $id + * @return array note values or null if not found + */ + public function getByID($id) { + $r = q("select * from notify where id = %d and uid = %d limit 1", + intval($id), + intval(local_user()) + ); + if($r!==false && count($r)>0) { + return $this->_set_extra($r)[0]; + } + return null; + } + + /** + * @brief set seen state of $note of local_user() + * + * @param array $note + * @param bool $seen optional true or false, default true + * @return bool true on success, false on errors + */ + public function setSeen($note, $seen = true) { + return q("update notify set seen = %d where ( link = '%s' or ( parent != 0 and parent = %d and otype = '%s' )) and uid = %d", + intval($seen), + dbesc($note['link']), + intval($note['parent']), + dbesc($note['otype']), + intval(local_user()) + ); + } + + /** + * @brief set seen state of all notifications of local_user() + * + * @param bool $seen optional true or false. default true + * @return bool true on success, false on error + */ + public function setAllSeen($seen = true) { + return q("update notify set seen = %d where uid = %d", + intval($seen), + intval(local_user()) + ); + } +} diff --git a/include/api.php b/include/api.php index 4d206da28e..5acc816575 100644 --- a/include/api.php +++ b/include/api.php @@ -23,6 +23,7 @@ require_once('include/message.php'); require_once('include/group.php'); require_once('include/like.php'); + require_once('include/NotificationsManager.php'); define('API_METHOD_ANY','*'); @@ -250,7 +251,7 @@ */ function api_call(&$a){ GLOBAL $API, $called_api; - + $type="json"; if (strpos($a->query_string, ".xml")>0) $type="xml"; if (strpos($a->query_string, ".json")>0) $type="json"; @@ -680,6 +681,29 @@ } + /** + * @brief transform $data array in xml without a template + * + * @param array $data + * @return string xml string + */ + function api_array_to_xml($data, $ename="") { + $attrs=""; + $childs=""; + foreach($data as $k=>$v) { + $k=trim($k,'$'); + if (!is_array($v)) { + $attrs .= sprintf('%s="%s" ', $k, $v); + } else { + if (is_numeric($k)) $k=trim($ename,'s'); + $childs.=api_array_to_xml($v, $k); + } + } + $res = $childs; + if ($ename!="") $res = "<$ename $attrs>$res"; + return $res; + } + /** * load api $templatename for $type and replace $data array */ @@ -692,13 +716,17 @@ case "rss": case "xml": $data = array_xmlify($data); - $tpl = get_markup_template("api_".$templatename."_".$type.".tpl"); - if(! $tpl) { - header ("Content-Type: text/xml"); - echo ''."\n".'not implemented'; - killme(); + if ($templatename==="") { + $ret = api_array_to_xml($data); + } else { + $tpl = get_markup_template("api_".$templatename."_".$type.".tpl"); + if(! $tpl) { + header ("Content-Type: text/xml"); + echo ''."\n".'not implemented'; + killme(); + } + $ret = replace_macros($tpl, $data); } - $ret = replace_macros($tpl, $data); break; case "json": $ret = $data; @@ -3386,6 +3414,43 @@ api_register_func('api/friendica/activity/unattendno', 'api_friendica_activity', true, API_METHOD_POST); api_register_func('api/friendica/activity/unattendmaybe', 'api_friendica_activity', true, API_METHOD_POST); + /** + * returns notifications + * if called with note id set note seen and returns associated item (if possible) + */ + function api_friendica_notification(&$a, $type) { + if (api_user()===false) throw new ForbiddenException(); + + $nm = new NotificationsManager(); + + if ($a->argc==3) { + $notes = $nm->getAll(array(), "+seen -date", 50); + return api_apply_template("", $type, array('$notes' => $notes)); + } + if ($a->argc==4) { + $note = $nm->getByID(intval($a->argv[3])); + if (is_null($note)) throw new BadRequestException("Invalid argument"); + $nm->setSeen($note); + if ($note['otype']=='item') { + // would be really better with a ItemsManager and $im->getByID() :-P + $r = q("SELECT * FROM item WHERE id=%d AND uid=%d", + intval($note['iid']), + intval(local_user()) + ); + if ($r===false) throw new NotFoundException(); + $user_info = api_get_user($a); + $ret = api_format_items($r,$user_info); + $data = array('$statuses' => $ret); + return api_apply_template("timeline", $type, $data); + } else { + return api_apply_template('test', $type, array('ok' => $ok)); + } + + } + throw new BadRequestException("Invalid argument count"); + } + api_register_func('api/friendica/notification', 'api_friendica_notification', true, API_METHOD_GET); + /* To.Do: [pagename] => api/1.1/statuses/lookup.json diff --git a/mod/notify.php b/mod/notify.php index 7acac1084a..7c367708bb 100644 --- a/mod/notify.php +++ b/mod/notify.php @@ -1,43 +1,34 @@ argc > 2 && $a->argv[1] === 'view' && intval($a->argv[2])) { - $r = q("select * from notify where id = %d and uid = %d limit 1", - intval($a->argv[2]), - intval(local_user()) - ); - if(count($r)) { - q("update notify set seen = 1 where ( link = '%s' or ( parent != 0 and parent = %d and otype = '%s' )) and uid = %d", - dbesc($r[0]['link']), - intval($r[0]['parent']), - dbesc($r[0]['otype']), - intval(local_user()) - ); - + $note = $nm->getByID($a->argv[2]); + if ($note) { + $nm->setSeen($note); + // The friendica client has problems with the GUID. this is some workaround if ($a->is_friendica_app()) { require_once("include/items.php"); - $urldata = parse_url($r[0]['link']); + $urldata = parse_url($note['link']); $guid = basename($urldata["path"]); $itemdata = get_item_id($guid, local_user()); if ($itemdata["id"] != 0) - $r[0]['link'] = $a->get_baseurl().'/display/'.$itemdata["nick"].'/'.$itemdata["id"]; + $note['link'] = $a->get_baseurl().'/display/'.$itemdata["nick"].'/'.$itemdata["id"]; } - goaway($r[0]['link']); + goaway($note['link']); } goaway($a->get_baseurl(true)); } if($a->argc > 2 && $a->argv[1] === 'mark' && $a->argv[2] === 'all' ) { - $r = q("update notify set seen = 1 where uid = %d", - intval(local_user()) - ); + $r = $nm->setAllSeen(); $j = json_encode(array('result' => ($r) ? 'success' : 'fail')); echo $j; killme(); @@ -47,38 +38,37 @@ function notify_init(&$a) { if(! function_exists('notify_content')) { function notify_content(&$a) { - if(! local_user()) - return login(); + if(! local_user()) return login(); - $notif_tpl = get_markup_template('notifications.tpl'); + $nm = new NotificationsManager(); + + $notif_tpl = get_markup_template('notifications.tpl'); - $not_tpl = get_markup_template('notify.tpl'); - require_once('include/bbcode.php'); + $not_tpl = get_markup_template('notify.tpl'); + require_once('include/bbcode.php'); - $r = q("SELECT * from notify where uid = %d and seen = 0 order by date desc", - intval(local_user()) - ); - - if (count($r) > 0) { - foreach ($r as $it) { - $notif_content .= replace_macros($not_tpl,array( - '$item_link' => $a->get_baseurl(true).'/notify/view/'. $it['id'], - '$item_image' => $it['photo'], - '$item_text' => strip_tags(bbcode($it['msg'])), - '$item_when' => relative_date($it['date']) - )); - } - } else { - $notif_content .= t('No more system notifications.'); + $r = $nm->getAll(array('seen'=>0)); + if ($r!==false && count($r) > 0) { + foreach ($r as $it) { + $notif_content .= replace_macros($not_tpl,array( + '$item_link' => $a->get_baseurl(true).'/notify/view/'. $it['id'], + '$item_image' => $it['photo'], + '$item_text' => strip_tags(bbcode($it['msg'])), + '$item_when' => relative_date($it['date']) + )); } + } else { + $notif_content .= t('No more system notifications.'); + } - $o .= replace_macros($notif_tpl, array( - '$notif_header' => t('System Notifications'), - '$tabs' => '', // $tabs, - '$notif_content' => $notif_content, - )); + $o .= replace_macros($notif_tpl, array( + '$notif_header' => t('System Notifications'), + '$tabs' => false, // $tabs, + '$notif_content' => $notif_content, + )); return $o; } } + diff --git a/mod/ping.php b/mod/ping.php index 0d1659a763..adff0912f4 100644 --- a/mod/ping.php +++ b/mod/ping.php @@ -206,8 +206,8 @@ function ping_init(&$a) { $local_time = datetime_convert('UTC',date_default_timezone_get(),$n['date']); call_hooks('ping_xmlize', $n); - $notsxml = '%s'."\n"; - return sprintf ( $notsxml, + $notsxml = '%s'."\n"; + return sprintf ( $notsxml, intval($n['id']), xmlify($n['href']), xmlify($n['name']), xmlify($n['url']), xmlify($n['photo']), xmlify(relative_date($n['date'])), xmlify($n['seen']), xmlify(strtotime($local_time)), xmlify($n['message']) From 102c06a41f3fb6308a75a748a347ebc84b7362dd Mon Sep 17 00:00:00 2001 From: fabrixxm Date: Sun, 7 Feb 2016 14:29:07 +0100 Subject: [PATCH 02/55] fix error in template in notifications page --- view/templates/notifications.tpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/view/templates/notifications.tpl b/view/templates/notifications.tpl index 0b7e77a07b..54f9de0c7a 100644 --- a/view/templates/notifications.tpl +++ b/view/templates/notifications.tpl @@ -2,7 +2,7 @@

{{$notif_header}}

-{{include file="common_tabs.tpl"}} +{{if $tabs }}{{include file="common_tabs.tpl"}}{{/if}}
{{$notif_content}} From 756b90a4e073c5f1e7cfb0314262b90f408b8f83 Mon Sep 17 00:00:00 2001 From: Fabrixxm Date: Mon, 8 Feb 2016 09:47:59 +0100 Subject: [PATCH 03/55] add docs, rewrite part of the notification api list notifications and set note as seen functionalities are now splitted in two functions, with correct http method requirement. Fixed returned value from `notification/seen` --- include/NotificationsManager.php | 30 +++++++++++-- include/api.php | 74 +++++++++++++++++++++----------- 2 files changed, 76 insertions(+), 28 deletions(-) diff --git a/include/NotificationsManager.php b/include/NotificationsManager.php index 8b0ca9e13d..b7e31d4f65 100644 --- a/include/NotificationsManager.php +++ b/include/NotificationsManager.php @@ -1,6 +1,13 @@ a = get_app(); } + /** + * @brief set some extra note properties + * + * @param array $notes array of note arrays from db + * @return array Copy of input array with added properties + * + * Set some extra properties to note array from db: + * - timestamp as int in default TZ + * - date_rel : relative date string + * - msg_html: message as html string + * - msg_plain: message as plain text string + */ private function _set_extra($notes) { $rets = array(); foreach($notes as $n) { $local_time = datetime_convert('UTC',date_default_timezone_get(),$n['date']); $n['timestamp'] = strtotime($local_time); $n['date_rel'] = relative_date($n['date']); + $n['msg_html'] = bbcode($n['msg'], false, false, false, false); + $n['msg_plain'] = explode("\n",trim(html2plain($n['msg_html'], 0)))[0]; + $rets[] = $n; } return $rets; @@ -57,7 +79,7 @@ class NotificationsManager { if ($limit!="") $limit = " LIMIT ".$limit; - $r = q("SELECT * from notify where uid = %d $filter_sql order by $order_sql $limit", + $r = q("SELECT * FROM notify WHERE uid = %d $filter_sql ORDER BY $order_sql $limit", intval(local_user()) ); if ($r!==false && count($r)>0) return $this->_set_extra($r); @@ -71,7 +93,7 @@ class NotificationsManager { * @return array note values or null if not found */ public function getByID($id) { - $r = q("select * from notify where id = %d and uid = %d limit 1", + $r = q("SELECT * FROM notify WHERE id = %d AND uid = %d LIMIT 1", intval($id), intval(local_user()) ); @@ -89,7 +111,7 @@ class NotificationsManager { * @return bool true on success, false on errors */ public function setSeen($note, $seen = true) { - return q("update notify set seen = %d where ( link = '%s' or ( parent != 0 and parent = %d and otype = '%s' )) and uid = %d", + return q("UPDATE notify SET seen = %d WHERE ( link = '%s' OR ( parent != 0 AND parent = %d AND otype = '%s' )) AND uid = %d", intval($seen), dbesc($note['link']), intval($note['parent']), @@ -105,7 +127,7 @@ class NotificationsManager { * @return bool true on success, false on error */ public function setAllSeen($seen = true) { - return q("update notify set seen = %d where uid = %d", + return q("UPDATE notify SET seen = %d WHERE uid = %d", intval($seen), intval(local_user()) ); diff --git a/include/api.php b/include/api.php index 5acc816575..16481201e3 100644 --- a/include/api.php +++ b/include/api.php @@ -690,6 +690,11 @@ function api_array_to_xml($data, $ename="") { $attrs=""; $childs=""; + if (count($data)==1 && !is_array($data[0])) { + $ename = array_keys($data)[0]; + $v = $data[$ename]; + return "<$ename>$v"; + } foreach($data as $k=>$v) { $k=trim($k,'$'); if (!is_array($v)) { @@ -3415,41 +3420,62 @@ api_register_func('api/friendica/activity/unattendmaybe', 'api_friendica_activity', true, API_METHOD_POST); /** - * returns notifications - * if called with note id set note seen and returns associated item (if possible) - */ + * @brief Returns notifications + * + * @param App $a + * @param string $type Known types are 'atom', 'rss', 'xml' and 'json' + * @return string + */ function api_friendica_notification(&$a, $type) { if (api_user()===false) throw new ForbiddenException(); - + if ($a->argc!==3) throw new BadRequestException("Invalid argument count"); $nm = new NotificationsManager(); - if ($a->argc==3) { - $notes = $nm->getAll(array(), "+seen -date", 50); - return api_apply_template("", $type, array('$notes' => $notes)); - } - if ($a->argc==4) { - $note = $nm->getByID(intval($a->argv[3])); - if (is_null($note)) throw new BadRequestException("Invalid argument"); - $nm->setSeen($note); - if ($note['otype']=='item') { - // would be really better with a ItemsManager and $im->getByID() :-P - $r = q("SELECT * FROM item WHERE id=%d AND uid=%d", - intval($note['iid']), - intval(local_user()) - ); - if ($r===false) throw new NotFoundException(); + $notes = $nm->getAll(array(), "+seen -date", 50); + return api_apply_template("", $type, array('$notes' => $notes)); + } + + /** + * @brief Set notification as seen and returns associated item (if possible) + * + * POST request with 'id' param as notification id + * + * @param App $a + * @param string $type Known types are 'atom', 'rss', 'xml' and 'json' + * @return string + */ + function api_friendica_notification_seen(&$a, $type){ + if (api_user()===false) throw new ForbiddenException(); + if ($a->argc!==4) throw new BadRequestException("Invalid argument count"); + + $id = (x($_REQUEST, 'id') ? intval($_REQUEST['id']) : 0); + + $nm = new NotificationsManager(); + $note = $nm->getByID($id); + if (is_null($note)) throw new BadRequestException("Invalid argument"); + + $nm->setSeen($note); + if ($note['otype']=='item') { + // would be really better with an ItemsManager and $im->getByID() :-P + $r = q("SELECT * FROM item WHERE id=%d AND uid=%d", + intval($note['iid']), + intval(local_user()) + ); + if ($r!==false) { + // we found the item, return it to the user $user_info = api_get_user($a); $ret = api_format_items($r,$user_info); $data = array('$statuses' => $ret); return api_apply_template("timeline", $type, $data); - } else { - return api_apply_template('test', $type, array('ok' => $ok)); } - - } - throw new BadRequestException("Invalid argument count"); + // the item can't be found, but we set the note as seen, so we count this as a success + } + return api_apply_template('', $type, array('status' => "success")); } + + api_register_func('api/friendica/notification/seen', 'api_friendica_notification_seen', true, API_METHOD_POST); api_register_func('api/friendica/notification', 'api_friendica_notification', true, API_METHOD_GET); + /* To.Do: From 870d2f844d83741c7fc1878dda72807dab92d992 Mon Sep 17 00:00:00 2001 From: Fabrixxm Date: Mon, 8 Feb 2016 10:22:12 +0100 Subject: [PATCH 04/55] Update API docs Add `friendica/notification` and `friendica/notification/seen` endpoints Fix some list rendering issues Move all `friendica/*` endpoints under "Implemented API calls (not compatible with other APIs)" section Add info about permitted HTTP methods and required auth --- doc/api.md | 450 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 263 insertions(+), 187 deletions(-) diff --git a/doc/api.md b/doc/api.md index ced078f556..f050ae4304 100644 --- a/doc/api.md +++ b/doc/api.md @@ -7,6 +7,21 @@ Please refer to the linked documentation for further information. ## Implemented API calls ### General +#### HTTP Method + +API endpoints can restrict the method used to request them. +Using an invalid method results in HTTP error 405 "Method Not Allowed". + +In this document, the required method is listed after the endpoint name. "*" means every method can be used. + +#### Auth + +Friendica supports basic http auth and OAuth 1 to authenticate the user to the api. + +OAuth settings can be added by the user in web UI under /settings/oauth/ + +In this document, endpoints which requires auth are marked with "AUTH" after endpoint name + #### Unsupported parameters * cursor: Not implemented in GNU Social * trim_user: Not implemented in GNU Social @@ -37,36 +52,37 @@ Error body is json: ``` - { - "error": "Specific error message", - "request": "API path requested", - "code": "HTTP error code" - } +{ +"error": "Specific error message", +"request": "API path requested", +"code": "HTTP error code" +} ``` xml: ``` - - Specific error message - API path requested - HTTP error code - + +Specific error message +API path requested +HTTP error code + ``` --- -### account/rate_limit_status +### account/rate_limit_status (*; AUTH) --- -### account/verify_credentials +### account/verify_credentials (*; AUTH) #### Parameters + * skip_status: Don't show the "status" field. (Default: false) * include_entities: "true" shows entities for pictures and links (Default: false) --- -### conversation/show +### conversation/show (*; AUTH) Unofficial Twitter command. It shows all direct answers (excluding the original post) to a given id. -#### Parameters +#### Parameter * id: id of the post * count: Items per page (default: 20) * page: page number @@ -80,7 +96,7 @@ Unofficial Twitter command. It shows all direct answers (excluding the original * contributor_details --- -### direct_messages +### direct_messages (*; AUTH) #### Parameters * count: Items per page (default: 20) * page: page number @@ -93,7 +109,7 @@ Unofficial Twitter command. It shows all direct answers (excluding the original * skip_status --- -### direct_messages/all +### direct_messages/all (*; AUTH) #### Parameters * count: Items per page (default: 20) * page: page number @@ -102,7 +118,7 @@ Unofficial Twitter command. It shows all direct answers (excluding the original * getText: Defines the format of the status field. Can be "html" or "plain" --- -### direct_messages/conversation +### direct_messages/conversation (*; AUTH) Shows all direct messages of a conversation #### Parameters * count: Items per page (default: 20) @@ -113,7 +129,7 @@ Shows all direct messages of a conversation * uri: URI of the conversation --- -### direct_messages/new +### direct_messages/new (POST,PUT; AUTH) #### Parameters * user_id: id of the user * screen_name: screen name (for technical reasons, this value is not unique!) @@ -122,7 +138,7 @@ Shows all direct messages of a conversation * title: Title of the direct message --- -### direct_messages/sent +### direct_messages/sent (*; AUTH) #### Parameters * count: Items per page (default: 20) * page: page number @@ -132,7 +148,7 @@ Shows all direct messages of a conversation * include_entities: "true" shows entities for pictures and links (Default: false) --- -### favorites +### favorites (*; AUTH) #### Parameters * count: Items per page (default: 20) * page: page number @@ -144,22 +160,23 @@ Shows all direct messages of a conversation * user_id * screen_name -Favorites aren't displayed to other users, so "user_id" and "screen_name". So setting this value will result in an empty array. +Favorites aren't displayed to other users, so "user_id" and "screen_name" are unsupported. +Set this values will result in an empty array. --- -### favorites/create +### favorites/create (POST,PUT; AUTH) #### Parameters * id * include_entities: "true" shows entities for pictures and links (Default: false) --- -### favorites/destroy +### favorites/destroy (POST,DELETE; AUTH) #### Parameters * id * include_entities: "true" shows entities for pictures and links (Default: false) --- -### followers/ids +### followers/ids (*; AUTH) #### Parameters * stringify_ids: Should the id numbers be sent as text (true) or number (false)? (default: false) @@ -171,139 +188,7 @@ Favorites aren't displayed to other users, so "user_id" and "screen_name". So se Friendica doesn't allow showing followers of other users. --- -### friendica/activity/ -#### parameters -* id: item id - -Add or remove an activity from an item. -'verb' can be one of: -- like -- dislike -- attendyes -- attendno -- attendmaybe - -To remove an activity, prepend the verb with "un", eg. "unlike" or "undislike" -Attend verbs disable eachother: that means that if "attendyes" was added to an item, adding "attendno" remove previous "attendyes". -Attend verbs should be used only with event-related items (there is no check at the moment) - -#### Return values - -On success: -json -```"ok"``` - -xml -```true``` - -On error: -HTTP 400 BadRequest - ---- -### friendica/photo -#### Parameters -* photo_id: Resource id of a photo. -* scale: (optional) scale value of the photo - -Returns data of a picture with the given resource. -If 'scale' isn't provided, returned data include full url to each scale of the photo. -If 'scale' is set, returned data include image data base64 encoded. - -possibile scale value are: -0: original or max size by server settings -1: image with or height at <= 640 -2: image with or height at <= 320 -3: thumbnail 160x160 - -4: Profile image at 175x175 -5: Profile image at 80x80 -6: Profile image at 48x48 - -An image used as profile image has only scale 4-6, other images only 0-3 - -#### Return values - -json -``` - { - "id": "photo id" - "created": "date(YYYY-MM-GG HH:MM:SS)", - "edited": "date(YYYY-MM-GG HH:MM:SS)", - "title": "photo title", - "desc": "photo description", - "album": "album name", - "filename": "original file name", - "type": "mime type", - "height": "number", - "width": "number", - "profile": "1 if is profile photo", - "link": { - "": "url to image" - ... - }, - // if 'scale' is set - "datasize": "size in byte", - "data": "base64 encoded image data" - } -``` - -xml -``` - - photo id - date(YYYY-MM-GG HH:MM:SS) - date(YYYY-MM-GG HH:MM:SS) - photo title - photo description - album name - original file name - mime type - number - number - 1 if is profile photo - - - ... - - -``` - ---- -### friendica/photos/list - -Returns a list of all photo resources of the logged in user. - -#### Return values - -json -``` - [ - { - id: "resource_id", - album: "album name", - filename: "original file name", - type: "image mime type", - thumb: "url to thumb sized image" - }, - ... - ] -``` - -xml -``` - - - "url to thumb sized image" - - ... - -``` - ---- -### friends/ids +### friends/ids (*; AUTH) #### Parameters * stringify_ids: Should the id numbers be sent as text (true) or number (false)? (default: false) @@ -315,15 +200,15 @@ xml Friendica doesn't allow showing friends of other users. --- -### help/test +### help/test (*) --- -### media/upload +### media/upload (POST,PUT; AUTH) #### Parameters * media: image data --- -### oauth/request_token +### oauth/request_token (*) #### Parameters * oauth_callback @@ -331,7 +216,7 @@ Friendica doesn't allow showing friends of other users. * x_auth_access_type --- -### oauth/access_token +### oauth/access_token (*) #### Parameters * oauth_verifier @@ -341,7 +226,7 @@ Friendica doesn't allow showing friends of other users. * x_auth_mode --- -### statuses/destroy +### statuses/destroy (POST,DELETE; AUTH) #### Parameters * id: message number * include_entities: "true" shows entities for pictures and links (Default: false) @@ -350,15 +235,21 @@ Friendica doesn't allow showing friends of other users. * trim_user --- -### statuses/followers +### statuses/followers (*; AUTH) + +#### Parameters + * include_entities: "true" shows entities for pictures and links (Default: false) --- -### statuses/friends +### statuses/friends (*; AUTH) + +#### Parameters + * include_entities: "true" shows entities for pictures and links (Default: false) --- -### statuses/friends_timeline +### statuses/friends_timeline (*; AUTH) #### Parameters * count: Items per page (default: 20) * page: page number @@ -374,7 +265,7 @@ Friendica doesn't allow showing friends of other users. * contributor_details --- -### statuses/home_timeline +### statuses/home_timeline (*; AUTH) #### Parameters * count: Items per page (default: 20) * page: page number @@ -390,7 +281,7 @@ Friendica doesn't allow showing friends of other users. * contributor_details --- -### statuses/mentions +### statuses/mentions (*; AUTH) #### Parameters * count: Items per page (default: 20) * page: page number @@ -404,7 +295,7 @@ Friendica doesn't allow showing friends of other users. * contributor_details --- -### statuses/public_timeline +### statuses/public_timeline (*; AUTH) #### Parameters * count: Items per page (default: 20) * page: page number @@ -418,7 +309,7 @@ Friendica doesn't allow showing friends of other users. * trim_user --- -### statuses/replies +### statuses/replies (*; AUTH) #### Parameters * count: Items per page (default: 20) * page: page number @@ -432,7 +323,7 @@ Friendica doesn't allow showing friends of other users. * contributor_details --- -### statuses/retweet +### statuses/retweet (POST,PUT; AUTH) #### Parameters * id: message number * include_entities: "true" shows entities for pictures and links (Default: false) @@ -441,7 +332,7 @@ Friendica doesn't allow showing friends of other users. * trim_user --- -### statuses/show +### statuses/show (*; AUTH) #### Parameters * id: message number * conversation: if set to "1" show all messages of the conversation with the given id @@ -476,7 +367,7 @@ Friendica doesn't allow showing friends of other users. * display_coordinates --- -### statuses/user_timeline +### statuses/user_timeline (*; AUTH) #### Parameters * user_id: id of the user * screen_name: screen name (for technical reasons, this value is not unique!) @@ -489,15 +380,16 @@ Friendica doesn't allow showing friends of other users. * include_entities: "true" shows entities for pictures and links (Default: false) #### Unsupported parameters + * include_rts * trim_user * contributor_details --- -### statusnet/config +### statusnet/config (*) --- -### statusnet/version +### statusnet/version (*) #### Unsupported parameters * user_id @@ -507,7 +399,7 @@ Friendica doesn't allow showing friends of other users. Friendica doesn't allow showing followers of other users. --- -### users/search +### users/search (*) #### Parameters * q: name of the user @@ -517,7 +409,7 @@ Friendica doesn't allow showing followers of other users. * include_entities --- -### users/show +### users/show (*) #### Parameters * user_id: id of the user * screen_name: screen name (for technical reasons, this value is not unique!) @@ -533,8 +425,39 @@ Friendica doesn't allow showing friends of other users. ## Implemented API calls (not compatible with other APIs) + --- -### friendica/group_show +### friendica/activity/ +#### parameters +* id: item id + +Add or remove an activity from an item. +'verb' can be one of: + +- like +- dislike +- attendyes +- attendno +- attendmaybe + +To remove an activity, prepend the verb with "un", eg. "unlike" or "undislike" +Attend verbs disable eachother: that means that if "attendyes" was added to an item, adding "attendno" remove previous "attendyes". +Attend verbs should be used only with event-related items (there is no check at the moment) + +#### Return values + +On success: +json +```"ok"``` + +xml +```true``` + +On error: +HTTP 400 BadRequest + +--- +### friendica/group_show (*; AUTH) Return all or a specified group of the user with the containing contacts as array. #### Parameters @@ -542,22 +465,23 @@ Return all or a specified group of the user with the containing contacts as arra #### Return values Array of: + * name: name of the group * gid: id of the group * user: array of group members (return from api_get_user() function for each member) --- -### friendica/group_delete +### friendica/group_delete (POST,DELETE; AUTH) delete the specified group of contacts; API call need to include the correct gid AND name of the group to be deleted. ---- -### Parameters +#### Parameters * gid: id of the group to be deleted * name: name of the group to be deleted #### Return values Array of: + * success: true if successfully deleted * gid: gid of the deleted group * name: name of the deleted group @@ -566,19 +490,22 @@ Array of: --- -### friendica/group_create +### friendica/group_create (POST,PUT; AUTH) Create the group with the posted array of contacts as members. + #### Parameters * name: name of the group to be created #### POST data -JSON data as Array like the result of „users/group_show“: +JSON data as Array like the result of "users/group_show": + * gid * name * array of users #### Return values Array of: + * success: true if successfully created or reactivated * gid: gid of the created group * name: name of the created group @@ -587,26 +514,175 @@ Array of: --- -### friendica/group_update +### friendica/group_update (POST) Update the group with the posted array of contacts as members (post all members of the group to the call; function will remove members not posted). + #### Parameters * gid: id of the group to be changed * name: name of the group to be changed #### POST data JSON data as array like the result of „users/group_show“: + * gid * name * array of users #### Return values Array of: + * success: true if successfully updated * gid: gid of the changed group * name: name of the changed group * status: „missing user“ | „ok“ * wrong users: array of users, which were not available in the contact table + + +--- +### friendica/notifications (GET) +Return last 50 notification for current user, ordered by date with unseen item on top + +#### Parameters +none + +#### Return values +Array of: + +* id: id of the note +* type: type of notification as int (see NOTIFY_* constants in boot.php) +* name: full name of the contact subject of the note +* url: contact's profile url +* photo: contact's profile photo +* date: datetime string of the note +* timestamp: timestamp of the node +* date_rel: relative date of the note (eg. "1 hour ago") +* msg: note message in bbcode +* msg_html: note message in html +* msg_plain: note message in plain text +* link: link to note +* seen: seen state: 0 or 1 + + +--- +### friendica/notifications/seen (POST) +Set note as seen, returns item object if possible + +#### Parameters +id: id of the note to set seen + +#### Return values +If the note is linked to an item, the item is returned, just like one of the "statuses/*_timeline" api. + +If the note is not linked to an item, a success status is returned: + +* "success" (json) | "<status>success</status>" (xml) + + +--- +### friendica/photo (*; AUTH) +#### Parameters +* photo_id: Resource id of a photo. +* scale: (optional) scale value of the photo + +Returns data of a picture with the given resource. +If 'scale' isn't provided, returned data include full url to each scale of the photo. +If 'scale' is set, returned data include image data base64 encoded. + +possibile scale value are: + +* 0: original or max size by server settings +* 1: image with or height at <= 640 +* 2: image with or height at <= 320 +* 3: thumbnail 160x160 +* 4: Profile image at 175x175 +* 5: Profile image at 80x80 +* 6: Profile image at 48x48 + +An image used as profile image has only scale 4-6, other images only 0-3 + +#### Return values + +json +``` +{ +"id": "photo id" +"created": "date(YYYY-MM-GG HH:MM:SS)", +"edited": "date(YYYY-MM-GG HH:MM:SS)", +"title": "photo title", +"desc": "photo description", +"album": "album name", +"filename": "original file name", +"type": "mime type", +"height": "number", +"width": "number", +"profile": "1 if is profile photo", +"link": { +"": "url to image" +... +}, +// if 'scale' is set +"datasize": "size in byte", +"data": "base64 encoded image data" +} +``` + +xml +``` + +photo id +date(YYYY-MM-GG HH:MM:SS) +date(YYYY-MM-GG HH:MM:SS) +photo title +photo description +album name +original file name +mime type +number +number +1 if is profile photo + + +... + + +``` + +--- +### friendica/photos/list (*; AUTH) + +Returns a list of all photo resources of the logged in user. + +#### Return values + +json +``` +[ +{ +id: "resource_id", +album: "album name", +filename: "original file name", +type: "image mime type", +thumb: "url to thumb sized image" +}, +... +] +``` + +xml +``` + + +"url to thumb sized image" + +... + +``` + + --- ## Not Implemented API calls The following API calls are implemented in GNU Social but not in Friendica: (incomplete) @@ -702,13 +778,13 @@ The following API calls from the Twitter API aren't implemented neither in Frien ### BASH / cURL Betamax has documentated some example API usage from a [bash script](https://en.wikipedia.org/wiki/Bash_(Unix_shell) employing [curl](https://en.wikipedia.org/wiki/CURL) (see [his posting](https://betamax65.de/display/betamax65/43539)). - /usr/bin/curl -u USER:PASS https://YOUR.FRIENDICA.TLD/api/statuses/update.xml -d source="some source id" -d status="the status you want to post" +/usr/bin/curl -u USER:PASS https://YOUR.FRIENDICA.TLD/api/statuses/update.xml -d source="some source id" -d status="the status you want to post" ### Python The [RSStoFriedika](https://github.com/pafcu/RSStoFriendika) code can be used as an example of how to use the API with python. The lines for posting are located at [line 21](https://github.com/pafcu/RSStoFriendika/blob/master/RSStoFriendika.py#L21) and following. - def tweet(server, message, group_allow=None): - url = server + '/api/statuses/update' - urllib2.urlopen(url, urllib.urlencode({'status': message,'group_allow[]':group_allow}, doseq=True)) +def tweet(server, message, group_allow=None): +url = server + '/api/statuses/update' +urllib2.urlopen(url, urllib.urlencode({'status': message,'group_allow[]':group_allow}, doseq=True)) There is also a [module for python 3](https://bitbucket.org/tobiasd/python-friendica) for using the API. From a283691149fa9224be8b7ba8edc4dcdef88c3612 Mon Sep 17 00:00:00 2001 From: Fabrixxm Date: Mon, 8 Feb 2016 10:34:27 +0100 Subject: [PATCH 05/55] api docs: fix indentation --- doc/api.md | 132 ++++++++++++++++++++++++++--------------------------- 1 file changed, 66 insertions(+), 66 deletions(-) diff --git a/doc/api.md b/doc/api.md index f050ae4304..c020f403ff 100644 --- a/doc/api.md +++ b/doc/api.md @@ -52,20 +52,20 @@ Error body is json: ``` -{ -"error": "Specific error message", -"request": "API path requested", -"code": "HTTP error code" -} + { + "error": "Specific error message", + "request": "API path requested", + "code": "HTTP error code" + } ``` xml: ``` - -Specific error message -API path requested -HTTP error code - + + Specific error message + API path requested + HTTP error code + ``` --- @@ -605,47 +605,47 @@ An image used as profile image has only scale 4-6, other images only 0-3 json ``` -{ -"id": "photo id" -"created": "date(YYYY-MM-GG HH:MM:SS)", -"edited": "date(YYYY-MM-GG HH:MM:SS)", -"title": "photo title", -"desc": "photo description", -"album": "album name", -"filename": "original file name", -"type": "mime type", -"height": "number", -"width": "number", -"profile": "1 if is profile photo", -"link": { -"": "url to image" -... -}, -// if 'scale' is set -"datasize": "size in byte", -"data": "base64 encoded image data" -} + { + "id": "photo id" + "created": "date(YYYY-MM-GG HH:MM:SS)", + "edited": "date(YYYY-MM-GG HH:MM:SS)", + "title": "photo title", + "desc": "photo description", + "album": "album name", + "filename": "original file name", + "type": "mime type", + "height": "number", + "width": "number", + "profile": "1 if is profile photo", + "link": { + "": "url to image" + ... + }, + // if 'scale' is set + "datasize": "size in byte", + "data": "base64 encoded image data" + } ``` xml ``` - -photo id -date(YYYY-MM-GG HH:MM:SS) -date(YYYY-MM-GG HH:MM:SS) -photo title -photo description -album name -original file name -mime type -number -number -1 if is profile photo - - -... - - + + photo id + date(YYYY-MM-GG HH:MM:SS) + date(YYYY-MM-GG HH:MM:SS) + photo title + photo description + album name + original file name + mime type + number + number + 1 if is profile photo + + + ... + + ``` --- @@ -657,29 +657,29 @@ Returns a list of all photo resources of the logged in user. json ``` -[ -{ -id: "resource_id", -album: "album name", -filename: "original file name", -type: "image mime type", -thumb: "url to thumb sized image" -}, -... -] + [ + { + id: "resource_id", + album: "album name", + filename: "original file name", + type: "image mime type", + thumb: "url to thumb sized image" + }, + ... + ] ``` xml ``` - - -"url to thumb sized image" - -... - + + + "url to thumb sized image" + + ... + ``` From 9ff0fc92dd1525450ea99347895fbbe5e367d56b Mon Sep 17 00:00:00 2001 From: Fabrixxm Date: Mon, 8 Feb 2016 13:42:06 +0100 Subject: [PATCH 06/55] NotificationsManager: add backtick to queries --- include/NotificationsManager.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/NotificationsManager.php b/include/NotificationsManager.php index b7e31d4f65..c99d00742c 100644 --- a/include/NotificationsManager.php +++ b/include/NotificationsManager.php @@ -79,7 +79,7 @@ class NotificationsManager { if ($limit!="") $limit = " LIMIT ".$limit; - $r = q("SELECT * FROM notify WHERE uid = %d $filter_sql ORDER BY $order_sql $limit", + $r = q("SELECT * FROM `notify` WHERE `uid` = %d $filter_sql ORDER BY $order_sql $limit", intval(local_user()) ); if ($r!==false && count($r)>0) return $this->_set_extra($r); @@ -93,7 +93,7 @@ class NotificationsManager { * @return array note values or null if not found */ public function getByID($id) { - $r = q("SELECT * FROM notify WHERE id = %d AND uid = %d LIMIT 1", + $r = q("SELECT * FROM `notify` WHERE `id` = %d AND `uid` = %d LIMIT 1", intval($id), intval(local_user()) ); @@ -111,7 +111,7 @@ class NotificationsManager { * @return bool true on success, false on errors */ public function setSeen($note, $seen = true) { - return q("UPDATE notify SET seen = %d WHERE ( link = '%s' OR ( parent != 0 AND parent = %d AND otype = '%s' )) AND uid = %d", + return q("UPDATE `notify` SET `seen` = %d WHERE ( `link` = '%s' OR ( `parent` != 0 AND `parent` = %d AND `otype` = '%s' )) AND `uid` = %d", intval($seen), dbesc($note['link']), intval($note['parent']), @@ -127,7 +127,7 @@ class NotificationsManager { * @return bool true on success, false on error */ public function setAllSeen($seen = true) { - return q("UPDATE notify SET seen = %d WHERE uid = %d", + return q("UPDATE `notify` SET `seen` = %d WHERE `uid` = %d", intval($seen), intval(local_user()) ); From 2a016e7685e76b29641dce09172058f716f4ee4b Mon Sep 17 00:00:00 2001 From: Fabrixxm Date: Mon, 8 Feb 2016 14:35:41 +0100 Subject: [PATCH 07/55] add missing query backticks --- include/api.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/api.php b/include/api.php index 16481201e3..d205451e5e 100644 --- a/include/api.php +++ b/include/api.php @@ -3457,7 +3457,7 @@ $nm->setSeen($note); if ($note['otype']=='item') { // would be really better with an ItemsManager and $im->getByID() :-P - $r = q("SELECT * FROM item WHERE id=%d AND uid=%d", + $r = q("SELECT * FROM `item` WHERE `id`=%d AND `uid`=%d", intval($note['iid']), intval(local_user()) ); From 7b2fadcf4330b47ffedddf6892b94fd0dd4ae86f Mon Sep 17 00:00:00 2001 From: Michael Vogel Date: Tue, 9 Feb 2016 10:21:10 +0100 Subject: [PATCH 08/55] Get rid of the "guid" table. We don't need it anymore. --- doc/database.md | 1 - doc/database/db_guid.md | 12 ------------ include/dbstructure.php | 15 --------------- include/items.php | 13 ------------- mod/item.php | 3 --- 5 files changed, 44 deletions(-) delete mode 100644 doc/database/db_guid.md diff --git a/doc/database.md b/doc/database.md index a0b28f4e84..e37df05e09 100644 --- a/doc/database.md +++ b/doc/database.md @@ -27,7 +27,6 @@ Database Tables | [group](help/database/db_group) | privacy groups, group info | | [group_member](help/database/db_group_member) | privacy groups, member info | | [gserver](help/database/db_gserver) | | -| [guid](help/database/db_guid) | | | [hook](help/database/db_hook) | plugin hook registry | | [intro](help/database/db_intro) | | | [item](help/database/db_item) | all posts | diff --git a/doc/database/db_guid.md b/doc/database/db_guid.md deleted file mode 100644 index f607597a21..0000000000 --- a/doc/database/db_guid.md +++ /dev/null @@ -1,12 +0,0 @@ -Table guid -========== - -| Field | Description | Type | Null | Key | Default | Extra | -|---------|------------------|------------------|------|-----|---------|----------------| -| id | sequential ID | int(10) unsigned | NO | PRI | NULL | auto_increment | -| guid | | varchar(255) | NO | MUL | | | -| plink | | varchar(255) | NO | MUL | | | -| uri | | varchar(255) | NO | MUL | | | -| network | | varchar(32) | NO | | | | - -Return to [database documentation](help/database) diff --git a/include/dbstructure.php b/include/dbstructure.php index 96d18cd789..ddf036f2c1 100644 --- a/include/dbstructure.php +++ b/include/dbstructure.php @@ -748,21 +748,6 @@ function db_definition() { "nurl" => array("nurl"), ) ); - $database["guid"] = array( - "fields" => array( - "id" => array("type" => "int(10) unsigned", "not null" => "1", "extra" => "auto_increment", "primary" => "1"), - "guid" => array("type" => "varchar(255)", "not null" => "1", "default" => ""), - "plink" => array("type" => "varchar(255)", "not null" => "1", "default" => ""), - "uri" => array("type" => "varchar(255)", "not null" => "1", "default" => ""), - "network" => array("type" => "varchar(32)", "not null" => "1", "default" => ""), - ), - "indexes" => array( - "PRIMARY" => array("id"), - "guid" => array("guid"), - "plink" => array("plink"), - "uri" => array("uri"), - ) - ); $database["hook"] = array( "fields" => array( "id" => array("type" => "int(11)", "not null" => "1", "extra" => "auto_increment", "primary" => "1"), diff --git a/include/items.php b/include/items.php index 90c39a9889..1af6fe1b52 100644 --- a/include/items.php +++ b/include/items.php @@ -291,16 +291,6 @@ function add_page_info_to_body($body, $texturl = false, $no_photos = false) { return $body; } -function add_guid($item) { - $r = q("SELECT `guid` FROM `guid` WHERE `guid` = '%s' LIMIT 1", dbesc($item["guid"])); - if ($r) - return; - - q("INSERT INTO `guid` (`guid`,`plink`,`uri`,`network`) VALUES ('%s','%s','%s','%s')", - dbesc($item["guid"]), dbesc($item["plink"]), - dbesc($item["uri"]), dbesc($item["network"])); -} - /** * Adds a "lang" specification in a "postopts" element of given $arr, * if possible and not already present. @@ -778,9 +768,6 @@ function item_store($arr,$force_parent = false, $notify = false, $dontcache = fa return 0; } elseif(count($r)) { - // Store the guid and other relevant data - add_guid($arr); - $current_post = $r[0]['id']; logger('item_store: created item ' . $current_post); diff --git a/mod/item.php b/mod/item.php index 8c5a479646..7e575a17e4 100644 --- a/mod/item.php +++ b/mod/item.php @@ -844,9 +844,6 @@ function item_post(&$a) { // NOTREACHED } - // Store the guid and other relevant data - add_guid($datarray); - $post_id = $r[0]['id']; logger('mod_item: saved item ' . $post_id); From c28109ca947b5c5867ec052058dd2115f82aa0ff Mon Sep 17 00:00:00 2001 From: Fabrixxm Date: Tue, 9 Feb 2016 11:06:17 +0100 Subject: [PATCH 09/55] Update HTMLPurifier to v4.7.0 --- .../HTMLPurifier/AttrDef/CSS/AlphaValue.php | 21 - .../AttrDef/CSS/DenyElementDecorator.php | 28 - .../HTMLPurifier/AttrDef/CSS/FontFamily.php | 72 - library/HTMLPurifier/AttrDef/CSS/Length.php | 47 - .../HTMLPurifier/AttrDef/CSS/ListStyle.php | 78 - .../HTMLPurifier/AttrDef/CSS/Percentage.php | 40 - library/HTMLPurifier/AttrDef/HTML/Bool.php | 28 - library/HTMLPurifier/AttrDef/HTML/Color.php | 32 - .../HTMLPurifier/AttrDef/HTML/FrameTarget.php | 21 - library/HTMLPurifier/AttrDef/HTML/ID.php | 70 - library/HTMLPurifier/AttrDef/HTML/Length.php | 41 - .../HTMLPurifier/AttrDef/HTML/MultiLength.php | 41 - library/HTMLPurifier/AttrDef/HTML/Pixels.php | 48 - library/HTMLPurifier/AttrDef/Text.php | 15 - library/HTMLPurifier/AttrDef/URI/Host.php | 62 - library/HTMLPurifier/AttrDef/URI/IPv6.php | 99 - .../HTMLPurifier/AttrTransform/BoolToCSS.php | 36 - .../HTMLPurifier/AttrTransform/EnumToCSS.php | 58 - library/HTMLPurifier/AttrTransform/Length.php | 27 - library/HTMLPurifier/AttrTransform/Name.php | 21 - .../HTMLPurifier/AttrTransform/NameSync.php | 27 - .../HTMLPurifier/AttrTransform/SafeObject.php | 16 - .../HTMLPurifier/AttrTransform/Textarea.php | 18 - library/HTMLPurifier/Bootstrap.php | 98 - library/HTMLPurifier/CSSDefinition.php | 292 - library/HTMLPurifier/ChildDef/Optional.php | 26 - library/HTMLPurifier/ChildDef/Required.php | 117 - .../ChildDef/StrictBlockquote.php | 88 - library/HTMLPurifier/ChildDef/Table.php | 142 - library/HTMLPurifier/Config.php | 580 -- .../ConfigSchema/ValidatorAtom.php | 66 - library/HTMLPurifier/ConfigSchema/schema.ser | Bin 13244 -> 0 bytes .../schema/HTML.AllowedElements.txt | 18 - library/HTMLPurifier/Context.php | 82 - library/HTMLPurifier/Definition.php | 39 - .../DefinitionCache/Decorator.php | 62 - .../DefinitionCache/Decorator/Cleanup.php | 43 - .../DefinitionCache/Decorator/Memory.php | 46 - .../DefinitionCache/Decorator/Template.php.in | 47 - library/HTMLPurifier/DefinitionCache/Null.php | 39 - .../DefinitionCache/Serializer.php | 172 - .../HTMLPurifier/EntityLookup/entities.ser | 1 - .../Filter/ExtractStyleBlocks.php | 135 - library/HTMLPurifier/Filter/YouTube.php | 39 - library/HTMLPurifier/HTMLModule/Forms.php | 118 - .../HTMLPurifier/HTMLModule/Tidy/Strict.php | 21 - library/HTMLPurifier/Injector/RemoveEmpty.php | 51 - library/HTMLPurifier/Language/messages/en.php | 63 - library/HTMLPurifier/Lexer/PEARSax3.php | 139 - library/HTMLPurifier/Lexer/PH5P.php | 3906 -------------- library/HTMLPurifier/Strategy/FixNesting.php | 328 -- library/HTMLPurifier/Token.php | 57 - library/HTMLPurifier/Token/Comment.php | 22 - library/HTMLPurifier/TokenFactory.php | 94 - library/HTMLPurifier/URI.php | 173 - library/HTMLPurifier/URIFilter.php | 45 - .../URIFilter/DisableExternal.php | 23 - .../URIFilter/DisableExternalResources.php | 12 - .../HTMLPurifier/URIFilter/HostBlacklist.php | 21 - library/HTMLPurifier/URIFilter/Munge.php | 58 - library/HTMLPurifier/URIScheme.php | 42 - library/HTMLPurifier/URIScheme/news.php | 22 - library/HTMLPurifier/URIScheme/nntp.php | 20 - library/HTMLPurifier/VarParser.php | 154 - library/ezyang/htmlpurifier/CREDITS | 9 + library/ezyang/htmlpurifier/INSTALL | 374 ++ library/ezyang/htmlpurifier/INSTALL.fr.utf8 | 60 + library/ezyang/htmlpurifier/LICENSE | 504 ++ library/ezyang/htmlpurifier/NEWS | 1094 ++++ library/ezyang/htmlpurifier/README | 24 + library/ezyang/htmlpurifier/TODO | 150 + library/ezyang/htmlpurifier/VERSION | 1 + library/ezyang/htmlpurifier/WHATSNEW | 4 + library/ezyang/htmlpurifier/WYSIWYG | 20 + library/ezyang/htmlpurifier/composer.json | 22 + .../extras/ConfigDoc/HTMLXSLTProcessor.php | 91 + .../ezyang/htmlpurifier/extras/FSTools.php | 164 + .../htmlpurifier/extras/FSTools/File.php | 141 + .../extras/HTMLPurifierExtras.auto.php | 11 + .../extras/HTMLPurifierExtras.autoload.php | 26 + .../extras/HTMLPurifierExtras.php | 31 + library/ezyang/htmlpurifier/extras/README | 32 + .../library}/HTMLPurifier.auto.php | 0 .../library}/HTMLPurifier.autoload.php | 8 +- .../library/HTMLPurifier.composer.php | 4 + .../library}/HTMLPurifier.func.php | 8 +- .../library}/HTMLPurifier.includes.php | 21 +- .../library}/HTMLPurifier.kses.php | 4 +- .../library}/HTMLPurifier.path.php | 0 .../htmlpurifier/library}/HTMLPurifier.php | 153 +- .../library}/HTMLPurifier.safe-includes.php | 19 + .../library/HTMLPurifier/Arborize.php | 71 + .../library}/HTMLPurifier/AttrCollections.php | 53 +- .../library}/HTMLPurifier/AttrDef.php | 53 +- .../library}/HTMLPurifier/AttrDef/CSS.php | 39 +- .../HTMLPurifier/AttrDef/CSS/AlphaValue.php | 34 + .../HTMLPurifier/AttrDef/CSS/Background.php | 60 +- .../AttrDef/CSS/BackgroundPosition.php | 54 +- .../HTMLPurifier/AttrDef/CSS/Border.php | 21 +- .../HTMLPurifier/AttrDef/CSS/Color.php | 67 +- .../HTMLPurifier/AttrDef/CSS/Composite.php | 22 +- .../AttrDef/CSS/DenyElementDecorator.php | 44 + .../HTMLPurifier/AttrDef/CSS/Filter.php | 49 +- .../HTMLPurifier/AttrDef/CSS/Font.php | 89 +- .../HTMLPurifier/AttrDef/CSS/FontFamily.php | 219 + .../HTMLPurifier/AttrDef/CSS/Ident.php | 32 + .../AttrDef/CSS/ImportantDecorator.php | 28 +- .../HTMLPurifier/AttrDef/CSS/Length.php | 77 + .../HTMLPurifier/AttrDef/CSS/ListStyle.php | 112 + .../HTMLPurifier/AttrDef/CSS/Multiple.php | 33 +- .../HTMLPurifier/AttrDef/CSS/Number.php | 45 +- .../HTMLPurifier/AttrDef/CSS/Percentage.php | 54 + .../AttrDef/CSS/TextDecoration.php | 20 +- .../library}/HTMLPurifier/AttrDef/CSS/URI.php | 38 +- .../library/HTMLPurifier/AttrDef/Clone.php | 44 + .../library}/HTMLPurifier/AttrDef/Enum.php | 28 +- .../HTMLPurifier/AttrDef/HTML/Bool.php | 48 + .../HTMLPurifier/AttrDef/HTML/Class.php | 22 +- .../HTMLPurifier/AttrDef/HTML/Color.php | 51 + .../HTMLPurifier/AttrDef/HTML/FrameTarget.php | 38 + .../library/HTMLPurifier/AttrDef/HTML/ID.php | 105 + .../HTMLPurifier/AttrDef/HTML/Length.php | 56 + .../HTMLPurifier/AttrDef/HTML/LinkTypes.php | 43 +- .../HTMLPurifier/AttrDef/HTML/MultiLength.php | 60 + .../HTMLPurifier/AttrDef/HTML/Nmtokens.php | 40 +- .../HTMLPurifier/AttrDef/HTML/Pixels.php | 76 + .../library}/HTMLPurifier/AttrDef/Integer.php | 56 +- .../library}/HTMLPurifier/AttrDef/Lang.php | 39 +- .../library}/HTMLPurifier/AttrDef/Switch.php | 27 +- .../library/HTMLPurifier/AttrDef/Text.php | 21 + .../library}/HTMLPurifier/AttrDef/URI.php | 72 +- .../HTMLPurifier/AttrDef/URI/Email.php | 5 +- .../AttrDef/URI/Email/SimpleCheck.php | 14 +- .../library/HTMLPurifier/AttrDef/URI/Host.php | 128 + .../HTMLPurifier/AttrDef/URI/IPv4.php | 28 +- .../library/HTMLPurifier/AttrDef/URI/IPv6.php | 89 + .../library}/HTMLPurifier/AttrTransform.php | 28 +- .../HTMLPurifier/AttrTransform/Background.php | 21 +- .../HTMLPurifier/AttrTransform/BdoDir.php | 14 +- .../HTMLPurifier/AttrTransform/BgColor.php | 21 +- .../HTMLPurifier/AttrTransform/BoolToCSS.php | 47 + .../HTMLPurifier/AttrTransform/Border.php | 18 +- .../HTMLPurifier/AttrTransform/EnumToCSS.php | 68 + .../AttrTransform/ImgRequired.php | 19 +- .../HTMLPurifier/AttrTransform/ImgSpace.php | 39 +- .../HTMLPurifier/AttrTransform/Input.php | 34 +- .../HTMLPurifier/AttrTransform/Lang.php | 15 +- .../HTMLPurifier/AttrTransform/Length.php | 45 + .../HTMLPurifier/AttrTransform/Name.php | 33 + .../HTMLPurifier/AttrTransform/NameSync.php | 41 + .../HTMLPurifier/AttrTransform/Nofollow.php | 52 + .../HTMLPurifier/AttrTransform/SafeEmbed.php | 12 +- .../HTMLPurifier/AttrTransform/SafeObject.php | 28 + .../HTMLPurifier/AttrTransform/SafeParam.php | 29 +- .../AttrTransform/ScriptRequired.php | 9 +- .../AttrTransform/TargetBlank.php | 45 + .../HTMLPurifier/AttrTransform/Textarea.php | 27 + .../library}/HTMLPurifier/AttrTypes.php | 45 +- .../library}/HTMLPurifier/AttrValidator.php | 66 +- .../library/HTMLPurifier/Bootstrap.php | 124 + .../library/HTMLPurifier/CSSDefinition.php | 474 ++ .../library}/HTMLPurifier/ChildDef.php | 26 +- .../HTMLPurifier/ChildDef/Chameleon.php | 33 +- .../library}/HTMLPurifier/ChildDef/Custom.php | 60 +- .../library}/HTMLPurifier/ChildDef/Empty.php | 22 +- .../library/HTMLPurifier/ChildDef/List.php | 86 + .../HTMLPurifier/ChildDef/Optional.php | 45 + .../HTMLPurifier/ChildDef/Required.php | 118 + .../ChildDef/StrictBlockquote.php | 110 + .../library/HTMLPurifier/ChildDef/Table.php | 224 + .../library/HTMLPurifier/Config.php | 920 ++++ .../library}/HTMLPurifier/ConfigSchema.php | 78 +- .../ConfigSchema/Builder/ConfigSchema.php | 8 +- .../HTMLPurifier/ConfigSchema/Builder/Xml.php | 94 +- .../HTMLPurifier/ConfigSchema/Exception.php | 0 .../HTMLPurifier/ConfigSchema/Interchange.php | 11 +- .../ConfigSchema/Interchange/Directive.php | 28 +- .../ConfigSchema/Interchange/Id.php | 33 +- .../ConfigSchema/InterchangeBuilder.php | 90 +- .../HTMLPurifier/ConfigSchema/Validator.php | 90 +- .../ConfigSchema/ValidatorAtom.php | 130 + .../HTMLPurifier/ConfigSchema/schema.ser | Bin 0 -> 15305 bytes .../schema/Attr.AllowedClasses.txt | 0 .../schema/Attr.AllowedFrameTargets.txt | 0 .../ConfigSchema/schema/Attr.AllowedRel.txt | 0 .../ConfigSchema/schema/Attr.AllowedRev.txt | 0 .../schema/Attr.ClassUseCDATA.txt | 0 .../schema/Attr.DefaultImageAlt.txt | 0 .../schema/Attr.DefaultInvalidImage.txt | 0 .../schema/Attr.DefaultInvalidImageAlt.txt | 0 .../schema/Attr.DefaultTextDir.txt | 0 .../ConfigSchema/schema/Attr.EnableID.txt | 0 .../schema/Attr.ForbiddenClasses.txt | 0 .../ConfigSchema/schema/Attr.IDBlacklist.txt | 0 .../schema/Attr.IDBlacklistRegexp.txt | 0 .../ConfigSchema/schema/Attr.IDPrefix.txt | 0 .../schema/Attr.IDPrefixLocal.txt | 0 .../schema/AutoFormat.AutoParagraph.txt | 0 .../ConfigSchema/schema/AutoFormat.Custom.txt | 0 .../schema/AutoFormat.DisplayLinkURI.txt | 0 .../schema/AutoFormat.Linkify.txt | 0 .../AutoFormat.PurifierLinkify.DocURL.txt | 0 .../schema/AutoFormat.PurifierLinkify.txt | 0 .../AutoFormat.RemoveEmpty.Predicate.txt | 14 + ...rmat.RemoveEmpty.RemoveNbsp.Exceptions.txt | 0 .../AutoFormat.RemoveEmpty.RemoveNbsp.txt | 0 .../schema/AutoFormat.RemoveEmpty.txt | 0 ...utoFormat.RemoveSpansWithoutAttributes.txt | 0 .../schema/CSS.AllowImportant.txt | 0 .../ConfigSchema/schema/CSS.AllowTricky.txt | 0 .../ConfigSchema/schema/CSS.AllowedFonts.txt | 12 + .../schema/CSS.AllowedProperties.txt | 0 .../ConfigSchema/schema/CSS.DefinitionRev.txt | 0 .../schema/CSS.ForbiddenProperties.txt | 13 + .../ConfigSchema/schema/CSS.MaxImgLength.txt | 0 .../ConfigSchema/schema/CSS.Proprietary.txt | 0 .../ConfigSchema/schema/CSS.Trusted.txt | 9 + .../schema/Cache.DefinitionImpl.txt | 0 .../schema/Cache.SerializerPath.txt | 0 .../schema/Cache.SerializerPermissions.txt | 11 + .../schema/Core.AggressivelyFixLt.txt | 0 .../schema/Core.AllowHostnameUnderscore.txt | 16 + .../schema/Core.CollectErrors.txt | 0 .../schema/Core.ColorKeywords.txt | 3 +- .../schema/Core.ConvertDocumentToFragment.txt | 0 .../Core.DirectLexLineNumberSyncInterval.txt | 0 .../schema/Core.DisableExcludes.txt | 14 + .../ConfigSchema/schema/Core.EnableIDNA.txt | 9 + .../ConfigSchema/schema/Core.Encoding.txt | 0 .../schema/Core.EscapeInvalidChildren.txt | 6 +- .../schema/Core.EscapeInvalidTags.txt | 0 .../schema/Core.EscapeNonASCIICharacters.txt | 0 .../schema/Core.HiddenElements.txt | 0 .../ConfigSchema/schema/Core.Language.txt | 0 .../ConfigSchema/schema/Core.LexerImpl.txt | 0 .../schema/Core.MaintainLineNumbers.txt | 0 .../schema/Core.NormalizeNewlines.txt | 11 + .../schema/Core.RemoveInvalidImg.txt | 0 .../Core.RemoveProcessingInstructions.txt | 11 + .../schema/Core.RemoveScriptContents.txt | 0 .../ConfigSchema/schema/Filter.Custom.txt | 0 .../Filter.ExtractStyleBlocks.Escaping.txt | 0 .../Filter.ExtractStyleBlocks.Scope.txt | 0 .../Filter.ExtractStyleBlocks.TidyImpl.txt | 0 .../schema/Filter.ExtractStyleBlocks.txt | 0 .../ConfigSchema/schema/Filter.YouTube.txt | 5 + .../ConfigSchema/schema/HTML.Allowed.txt | 11 +- .../schema/HTML.AllowedAttributes.txt | 0 .../schema/HTML.AllowedComments.txt | 10 + .../schema/HTML.AllowedCommentsRegexp.txt | 15 + .../schema/HTML.AllowedElements.txt | 23 + .../schema/HTML.AllowedModules.txt | 0 .../schema/HTML.Attr.Name.UseCDATA.txt | 0 .../ConfigSchema/schema/HTML.BlockWrapper.txt | 0 .../ConfigSchema/schema/HTML.CoreModules.txt | 0 .../schema/HTML.CustomDoctype.txt | 2 +- .../ConfigSchema/schema/HTML.DefinitionID.txt | 0 .../schema/HTML.DefinitionRev.txt | 0 .../ConfigSchema/schema/HTML.Doctype.txt | 0 .../schema/HTML.FlashAllowFullScreen.txt | 11 + .../schema/HTML.ForbiddenAttributes.txt | 0 .../schema/HTML.ForbiddenElements.txt | 0 .../ConfigSchema/schema/HTML.MaxImgLength.txt | 0 .../ConfigSchema/schema/HTML.Nofollow.txt | 7 + .../ConfigSchema/schema/HTML.Parent.txt | 0 .../ConfigSchema/schema/HTML.Proprietary.txt | 0 .../ConfigSchema/schema/HTML.SafeEmbed.txt | 0 .../ConfigSchema/schema/HTML.SafeIframe.txt | 13 + .../ConfigSchema/schema/HTML.SafeObject.txt | 0 .../schema/HTML.SafeScripting.txt | 10 + .../ConfigSchema/schema/HTML.Strict.txt | 0 .../ConfigSchema/schema/HTML.TargetBlank.txt | 8 + .../ConfigSchema/schema/HTML.TidyAdd.txt | 0 .../ConfigSchema/schema/HTML.TidyLevel.txt | 0 .../ConfigSchema/schema/HTML.TidyRemove.txt | 0 .../ConfigSchema/schema/HTML.Trusted.txt | 1 + .../ConfigSchema/schema/HTML.XHTML.txt | 0 .../schema/Output.CommentScriptContents.txt | 0 .../schema/Output.FixInnerHTML.txt | 15 + .../schema/Output.FlashCompat.txt | 0 .../ConfigSchema/schema/Output.Newline.txt | 0 .../ConfigSchema/schema/Output.SortAttr.txt | 0 .../ConfigSchema/schema/Output.TidyFormat.txt | 0 .../ConfigSchema/schema/Test.ForceNoIconv.txt | 0 .../schema/URI.AllowedSchemes.txt | 4 +- .../ConfigSchema/schema/URI.Base.txt | 0 .../ConfigSchema/schema/URI.DefaultScheme.txt | 0 .../ConfigSchema/schema/URI.DefinitionID.txt | 0 .../ConfigSchema/schema/URI.DefinitionRev.txt | 0 .../ConfigSchema/schema/URI.Disable.txt | 0 .../schema/URI.DisableExternal.txt | 0 .../schema/URI.DisableExternalResources.txt | 0 .../schema/URI.DisableResources.txt | 7 +- .../ConfigSchema/schema/URI.Host.txt | 0 .../ConfigSchema/schema/URI.HostBlacklist.txt | 0 .../ConfigSchema/schema/URI.MakeAbsolute.txt | 0 .../ConfigSchema/schema/URI.Munge.txt | 0 .../schema/URI.MungeResources.txt | 0 .../schema/URI.MungeSecretKey.txt | 2 +- .../schema/URI.OverrideAllowedSchemes.txt | 0 .../schema/URI.SafeIframeRegexp.txt | 22 + .../HTMLPurifier/ConfigSchema/schema/info.ini | 0 .../library}/HTMLPurifier/ContentSets.php | 55 +- .../library/HTMLPurifier/Context.php | 95 + .../library/HTMLPurifier/Definition.php | 55 + .../library}/HTMLPurifier/DefinitionCache.php | 57 +- .../DefinitionCache/Decorator.php | 112 + .../DefinitionCache/Decorator/Cleanup.php | 78 + .../DefinitionCache/Decorator/Memory.php | 85 + .../DefinitionCache/Decorator/Template.php.in | 82 + .../HTMLPurifier/DefinitionCache/Null.php | 76 + .../DefinitionCache/Serializer.php | 291 + .../DefinitionCache/Serializer/README | 0 .../HTMLPurifier/DefinitionCacheFactory.php | 47 +- .../library}/HTMLPurifier/Doctype.php | 17 +- .../library}/HTMLPurifier/DoctypeRegistry.php | 87 +- .../library}/HTMLPurifier/ElementDef.php | 85 +- .../library}/HTMLPurifier/Encoder.php | 303 +- .../library}/HTMLPurifier/EntityLookup.php | 16 +- .../HTMLPurifier/EntityLookup/entities.ser | 1 + .../library}/HTMLPurifier/EntityParser.php | 53 +- .../library}/HTMLPurifier/ErrorCollector.php | 87 +- .../library}/HTMLPurifier/ErrorStruct.php | 20 +- .../library}/HTMLPurifier/Exception.php | 0 .../library}/HTMLPurifier/Filter.php | 18 +- .../Filter/ExtractStyleBlocks.php | 338 ++ .../library/HTMLPurifier/Filter/YouTube.php | 65 + .../library}/HTMLPurifier/Generator.php | 164 +- .../library}/HTMLPurifier/HTMLDefinition.php | 211 +- .../library}/HTMLPurifier/HTMLModule.php | 122 +- .../library}/HTMLPurifier/HTMLModule/Bdo.php | 21 +- .../HTMLModule/CommonAttributes.php | 7 +- .../library}/HTMLPurifier/HTMLModule/Edit.php | 25 +- .../library/HTMLPurifier/HTMLModule/Forms.php | 190 + .../HTMLPurifier/HTMLModule/Hypertext.php | 15 +- .../HTMLPurifier/HTMLModule/Iframe.php | 51 + .../HTMLPurifier/HTMLModule/Image.php | 17 +- .../HTMLPurifier/HTMLModule/Legacy.php | 89 +- .../library}/HTMLPurifier/HTMLModule/List.php | 28 +- .../library}/HTMLPurifier/HTMLModule/Name.php | 13 +- .../HTMLPurifier/HTMLModule/Nofollow.php | 25 + .../HTMLModule/NonXMLCommonAttributes.php | 6 + .../HTMLPurifier/HTMLModule/Object.php | 31 +- .../HTMLPurifier/HTMLModule/Presentation.php | 26 +- .../HTMLPurifier/HTMLModule/Proprietary.php | 19 +- .../library}/HTMLPurifier/HTMLModule/Ruby.php | 17 +- .../HTMLPurifier/HTMLModule/SafeEmbed.php | 20 +- .../HTMLPurifier/HTMLModule/SafeObject.php | 33 +- .../HTMLPurifier/HTMLModule/SafeScripting.php | 40 + .../HTMLPurifier/HTMLModule/Scripting.php | 31 +- .../HTMLModule/StyleAttribute.php | 15 +- .../HTMLPurifier/HTMLModule/Tables.php | 29 +- .../HTMLPurifier/HTMLModule/Target.php | 11 +- .../HTMLPurifier/HTMLModule/TargetBlank.php | 24 + .../library}/HTMLPurifier/HTMLModule/Text.php | 56 +- .../library}/HTMLPurifier/HTMLModule/Tidy.php | 85 +- .../HTMLPurifier/HTMLModule/Tidy/Name.php | 15 +- .../HTMLModule/Tidy/Proprietary.php | 14 +- .../HTMLPurifier/HTMLModule/Tidy/Strict.php | 43 + .../HTMLModule/Tidy/Transitional.php | 7 + .../HTMLPurifier/HTMLModule/Tidy/XHTML.php | 15 +- .../HTMLModule/Tidy/XHTMLAndHTML4.php | 146 +- .../HTMLModule/XMLCommonAttributes.php | 6 + .../HTMLPurifier/HTMLModuleManager.php | 166 +- .../library}/HTMLPurifier/IDAccumulator.php | 24 +- .../library}/HTMLPurifier/Injector.php | 194 +- .../HTMLPurifier/Injector/AutoParagraph.php | 99 +- .../HTMLPurifier/Injector/DisplayLinkURI.php | 22 +- .../HTMLPurifier/Injector/Linkify.php | 27 +- .../HTMLPurifier/Injector/PurifierLinkify.php | 46 +- .../HTMLPurifier/Injector/RemoveEmpty.php | 106 + .../Injector/RemoveSpansWithoutAttributes.php | 36 +- .../HTMLPurifier/Injector/SafeObject.php | 53 +- .../library}/HTMLPurifier/Language.php | 97 +- .../Language/classes/en-x-test.php | 3 - .../Language/messages/en-x-test.php | 0 .../Language/messages/en-x-testmini.php | 0 .../HTMLPurifier/Language/messages/en.php | 55 + .../library}/HTMLPurifier/LanguageFactory.php | 81 +- .../library}/HTMLPurifier/Length.php | 91 +- .../library}/HTMLPurifier/Lexer.php | 189 +- .../library}/HTMLPurifier/Lexer/DOMLex.php | 162 +- .../library}/HTMLPurifier/Lexer/DirectLex.php | 217 +- .../library/HTMLPurifier/Lexer/PH5P.php | 4787 +++++++++++++++++ .../library/HTMLPurifier/Node.php | 49 + .../library/HTMLPurifier/Node/Comment.php | 36 + .../library/HTMLPurifier/Node/Element.php | 59 + .../library/HTMLPurifier/Node/Text.php | 54 + .../library}/HTMLPurifier/PercentEncoder.php | 37 +- .../library}/HTMLPurifier/Printer.php | 134 +- .../HTMLPurifier/Printer/CSSDefinition.php | 12 +- .../HTMLPurifier/Printer/ConfigForm.css | 0 .../HTMLPurifier/Printer/ConfigForm.js | 0 .../HTMLPurifier/Printer/ConfigForm.php | 255 +- .../HTMLPurifier/Printer/HTMLDefinition.php | 208 +- .../library}/HTMLPurifier/PropertyList.php | 68 +- .../HTMLPurifier/PropertyListIterator.php | 22 +- .../library/HTMLPurifier/Queue.php | 56 + .../library}/HTMLPurifier/Strategy.php | 8 +- .../HTMLPurifier/Strategy/Composite.php | 13 +- .../library}/HTMLPurifier/Strategy/Core.php | 5 +- .../HTMLPurifier/Strategy/FixNesting.php | 181 + .../HTMLPurifier/Strategy/MakeWellFormed.php | 383 +- .../Strategy/RemoveForeignElements.php | 94 +- .../Strategy/ValidateAttributes.php | 22 +- .../library}/HTMLPurifier/StringHash.php | 16 +- .../HTMLPurifier/StringHashParser.php | 52 +- .../library}/HTMLPurifier/TagTransform.php | 15 +- .../HTMLPurifier/TagTransform/Font.php | 42 +- .../HTMLPurifier/TagTransform/Simple.php | 21 +- .../library/HTMLPurifier/Token.php | 100 + .../library/HTMLPurifier/Token/Comment.php | 38 + .../library}/HTMLPurifier/Token/Empty.php | 6 +- .../library}/HTMLPurifier/Token/End.php | 9 +- .../library}/HTMLPurifier/Token/Start.php | 1 - .../library}/HTMLPurifier/Token/Tag.php | 22 +- .../library}/HTMLPurifier/Token/Text.php | 34 +- .../library/HTMLPurifier/TokenFactory.php | 118 + .../htmlpurifier/library/HTMLPurifier/URI.php | 314 ++ .../library}/HTMLPurifier/URIDefinition.php | 39 +- .../library/HTMLPurifier/URIFilter.php | 74 + .../URIFilter/DisableExternal.php | 54 + .../URIFilter/DisableExternalResources.php | 25 + .../URIFilter/DisableResources.php | 22 + .../HTMLPurifier/URIFilter/HostBlacklist.php | 46 + .../HTMLPurifier/URIFilter/MakeAbsolute.php | 74 +- .../library/HTMLPurifier/URIFilter/Munge.php | 115 + .../HTMLPurifier/URIFilter/SafeIframe.php | 68 + .../library}/HTMLPurifier/URIParser.php | 9 +- .../library/HTMLPurifier/URIScheme.php | 102 + .../library}/HTMLPurifier/URIScheme/data.php | 58 +- .../library/HTMLPurifier/URIScheme/file.php | 44 + .../library}/HTMLPurifier/URIScheme/ftp.php | 29 +- .../library}/HTMLPurifier/URIScheme/http.php | 26 +- .../library}/HTMLPurifier/URIScheme/https.php | 12 +- .../HTMLPurifier/URIScheme/mailto.php | 23 +- .../library/HTMLPurifier/URIScheme/news.php | 35 + .../library/HTMLPurifier/URIScheme/nntp.php | 32 + .../HTMLPurifier/URISchemeRegistry.php | 41 +- .../library}/HTMLPurifier/UnitConverter.php | 117 +- .../library/HTMLPurifier/VarParser.php | 198 + .../HTMLPurifier/VarParser/Flexible.php | 88 +- .../HTMLPurifier/VarParser/Native.php | 18 +- .../HTMLPurifier/VarParserException.php | 0 .../library/HTMLPurifier/Zipper.php | 157 + library/ezyang/htmlpurifier/package.php | 61 + library/ezyang/htmlpurifier/phpdoc.ini | 102 + library/ezyang/htmlpurifier/plugins/modx.txt | 112 + .../htmlpurifier/plugins/phorum/.gitignore | 2 + .../htmlpurifier/plugins/phorum/Changelog | 27 + .../htmlpurifier/plugins/phorum/INSTALL | 84 + .../ezyang/htmlpurifier/plugins/phorum/README | 45 + .../plugins/phorum/config.default.php | 57 + .../plugins/phorum/htmlpurifier.php | 316 ++ .../htmlpurifier/plugins/phorum/info.txt | 18 + .../plugins/phorum/init-config.php | 30 + .../plugins/phorum/migrate.bbcode.php | 31 + .../htmlpurifier/plugins/phorum/settings.php | 64 + .../plugins/phorum/settings/form.php | 95 + .../phorum/settings/migrate-sigs-form.php | 22 + .../plugins/phorum/settings/migrate-sigs.php | 79 + .../plugins/phorum/settings/save.php | 29 + .../ezyang/htmlpurifier/release1-update.php | 110 + library/ezyang/htmlpurifier/release2-tag.php | 22 + .../htmlpurifier/test-settings.sample.php | 76 + 465 files changed, 22433 insertions(+), 10865 deletions(-) delete mode 100644 library/HTMLPurifier/AttrDef/CSS/AlphaValue.php delete mode 100644 library/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php delete mode 100644 library/HTMLPurifier/AttrDef/CSS/FontFamily.php delete mode 100644 library/HTMLPurifier/AttrDef/CSS/Length.php delete mode 100644 library/HTMLPurifier/AttrDef/CSS/ListStyle.php delete mode 100644 library/HTMLPurifier/AttrDef/CSS/Percentage.php delete mode 100644 library/HTMLPurifier/AttrDef/HTML/Bool.php delete mode 100644 library/HTMLPurifier/AttrDef/HTML/Color.php delete mode 100644 library/HTMLPurifier/AttrDef/HTML/FrameTarget.php delete mode 100644 library/HTMLPurifier/AttrDef/HTML/ID.php delete mode 100644 library/HTMLPurifier/AttrDef/HTML/Length.php delete mode 100644 library/HTMLPurifier/AttrDef/HTML/MultiLength.php delete mode 100644 library/HTMLPurifier/AttrDef/HTML/Pixels.php delete mode 100644 library/HTMLPurifier/AttrDef/Text.php delete mode 100644 library/HTMLPurifier/AttrDef/URI/Host.php delete mode 100644 library/HTMLPurifier/AttrDef/URI/IPv6.php delete mode 100644 library/HTMLPurifier/AttrTransform/BoolToCSS.php delete mode 100644 library/HTMLPurifier/AttrTransform/EnumToCSS.php delete mode 100644 library/HTMLPurifier/AttrTransform/Length.php delete mode 100644 library/HTMLPurifier/AttrTransform/Name.php delete mode 100644 library/HTMLPurifier/AttrTransform/NameSync.php delete mode 100644 library/HTMLPurifier/AttrTransform/SafeObject.php delete mode 100644 library/HTMLPurifier/AttrTransform/Textarea.php delete mode 100644 library/HTMLPurifier/Bootstrap.php delete mode 100644 library/HTMLPurifier/CSSDefinition.php delete mode 100644 library/HTMLPurifier/ChildDef/Optional.php delete mode 100644 library/HTMLPurifier/ChildDef/Required.php delete mode 100644 library/HTMLPurifier/ChildDef/StrictBlockquote.php delete mode 100644 library/HTMLPurifier/ChildDef/Table.php delete mode 100644 library/HTMLPurifier/Config.php delete mode 100644 library/HTMLPurifier/ConfigSchema/ValidatorAtom.php delete mode 100644 library/HTMLPurifier/ConfigSchema/schema.ser delete mode 100644 library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedElements.txt delete mode 100644 library/HTMLPurifier/Context.php delete mode 100644 library/HTMLPurifier/Definition.php delete mode 100644 library/HTMLPurifier/DefinitionCache/Decorator.php delete mode 100644 library/HTMLPurifier/DefinitionCache/Decorator/Cleanup.php delete mode 100644 library/HTMLPurifier/DefinitionCache/Decorator/Memory.php delete mode 100644 library/HTMLPurifier/DefinitionCache/Decorator/Template.php.in delete mode 100644 library/HTMLPurifier/DefinitionCache/Null.php delete mode 100644 library/HTMLPurifier/DefinitionCache/Serializer.php delete mode 100644 library/HTMLPurifier/EntityLookup/entities.ser delete mode 100644 library/HTMLPurifier/Filter/ExtractStyleBlocks.php delete mode 100644 library/HTMLPurifier/Filter/YouTube.php delete mode 100644 library/HTMLPurifier/HTMLModule/Forms.php delete mode 100644 library/HTMLPurifier/HTMLModule/Tidy/Strict.php delete mode 100644 library/HTMLPurifier/Injector/RemoveEmpty.php delete mode 100644 library/HTMLPurifier/Language/messages/en.php delete mode 100644 library/HTMLPurifier/Lexer/PEARSax3.php delete mode 100644 library/HTMLPurifier/Lexer/PH5P.php delete mode 100644 library/HTMLPurifier/Strategy/FixNesting.php delete mode 100644 library/HTMLPurifier/Token.php delete mode 100644 library/HTMLPurifier/Token/Comment.php delete mode 100644 library/HTMLPurifier/TokenFactory.php delete mode 100644 library/HTMLPurifier/URI.php delete mode 100644 library/HTMLPurifier/URIFilter.php delete mode 100644 library/HTMLPurifier/URIFilter/DisableExternal.php delete mode 100644 library/HTMLPurifier/URIFilter/DisableExternalResources.php delete mode 100644 library/HTMLPurifier/URIFilter/HostBlacklist.php delete mode 100644 library/HTMLPurifier/URIFilter/Munge.php delete mode 100644 library/HTMLPurifier/URIScheme.php delete mode 100644 library/HTMLPurifier/URIScheme/news.php delete mode 100644 library/HTMLPurifier/URIScheme/nntp.php delete mode 100644 library/HTMLPurifier/VarParser.php create mode 100644 library/ezyang/htmlpurifier/CREDITS create mode 100644 library/ezyang/htmlpurifier/INSTALL create mode 100644 library/ezyang/htmlpurifier/INSTALL.fr.utf8 create mode 100644 library/ezyang/htmlpurifier/LICENSE create mode 100644 library/ezyang/htmlpurifier/NEWS create mode 100644 library/ezyang/htmlpurifier/README create mode 100644 library/ezyang/htmlpurifier/TODO create mode 100644 library/ezyang/htmlpurifier/VERSION create mode 100644 library/ezyang/htmlpurifier/WHATSNEW create mode 100644 library/ezyang/htmlpurifier/WYSIWYG create mode 100644 library/ezyang/htmlpurifier/composer.json create mode 100644 library/ezyang/htmlpurifier/extras/ConfigDoc/HTMLXSLTProcessor.php create mode 100644 library/ezyang/htmlpurifier/extras/FSTools.php create mode 100644 library/ezyang/htmlpurifier/extras/FSTools/File.php create mode 100644 library/ezyang/htmlpurifier/extras/HTMLPurifierExtras.auto.php create mode 100644 library/ezyang/htmlpurifier/extras/HTMLPurifierExtras.autoload.php create mode 100644 library/ezyang/htmlpurifier/extras/HTMLPurifierExtras.php create mode 100644 library/ezyang/htmlpurifier/extras/README rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier.auto.php (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier.autoload.php (70%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier.composer.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier.func.php (67%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier.includes.php (91%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier.kses.php (96%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier.path.php (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier.php (66%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier.safe-includes.php (91%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/Arborize.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrCollections.php (74%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrDef.php (74%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrDef/CSS.php (77%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/AlphaValue.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrDef/CSS/Background.php (63%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php (73%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrDef/CSS/Border.php (71%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrDef/CSS/Color.php (54%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrDef/CSS/Composite.php (61%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrDef/CSS/Filter.php (62%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrDef/CSS/Font.php (67%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/FontFamily.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Ident.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php (62%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Length.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/ListStyle.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrDef/CSS/Multiple.php (65%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrDef/CSS/Number.php (55%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Percentage.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrDef/CSS/TextDecoration.php (69%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrDef/CSS/URI.php (58%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/Clone.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrDef/Enum.php (70%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Bool.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrDef/HTML/Class.php (66%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Length.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrDef/HTML/LinkTypes.php (56%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/MultiLength.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrDef/HTML/Nmtokens.php (55%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Pixels.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrDef/Integer.php (55%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrDef/Lang.php (67%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrDef/Switch.php (62%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/Text.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrDef/URI.php (53%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrDef/URI/Email.php (73%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php (65%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrDef/URI/IPv4.php (51%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrDef/URI/IPv6.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrTransform.php (63%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrTransform/Background.php (55%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrTransform/BdoDir.php (55%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrTransform/BgColor.php (55%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrTransform/BoolToCSS.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrTransform/Border.php (55%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrTransform/EnumToCSS.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrTransform/ImgRequired.php (77%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrTransform/ImgSpace.php (60%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrTransform/Input.php (61%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrTransform/Lang.php (68%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrTransform/Length.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrTransform/Name.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrTransform/NameSync.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrTransform/Nofollow.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrTransform/SafeEmbed.php (56%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrTransform/SafeObject.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrTransform/SafeParam.php (67%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrTransform/ScriptRequired.php (59%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrTransform/TargetBlank.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/AttrTransform/Textarea.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrTypes.php (64%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/AttrValidator.php (74%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/Bootstrap.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/CSSDefinition.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ChildDef.php (52%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ChildDef/Chameleon.php (57%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ChildDef/Custom.php (71%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ChildDef/Empty.php (58%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/List.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/Optional.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/Required.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/StrictBlockquote.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/Config.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema.php (69%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/Builder/ConfigSchema.php (86%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/Builder/Xml.php (52%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/Exception.php (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/Interchange.php (76%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/Interchange/Directive.php (65%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/Interchange/Id.php (54%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/InterchangeBuilder.php (67%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/Validator.php (73%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/ValidatorAtom.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema.ser rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Attr.AllowedClasses.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Attr.AllowedFrameTargets.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRel.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRev.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Attr.ClassUseCDATA.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Attr.DefaultImageAlt.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImage.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImageAlt.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Attr.DefaultTextDir.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Attr.EnableID.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Attr.ForbiddenClasses.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklist.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklistRegexp.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefix.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefixLocal.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/AutoFormat.AutoParagraph.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/AutoFormat.Custom.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/AutoFormat.DisplayLinkURI.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/AutoFormat.Linkify.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/AutoFormat.PurifierLinkify.DocURL.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/AutoFormat.PurifierLinkify.txt (100%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.Predicate.txt rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveSpansWithoutAttributes.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/CSS.AllowImportant.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/CSS.AllowTricky.txt (100%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.AllowedFonts.txt rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/CSS.AllowedProperties.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/CSS.DefinitionRev.txt (100%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.ForbiddenProperties.txt rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/CSS.MaxImgLength.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/CSS.Proprietary.txt (100%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/CSS.Trusted.txt rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Cache.DefinitionImpl.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPath.txt (100%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPermissions.txt rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Core.AggressivelyFixLt.txt (100%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.AllowHostnameUnderscore.txt rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Core.CollectErrors.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Core.ColorKeywords.txt (84%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Core.ConvertDocumentToFragment.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Core.DirectLexLineNumberSyncInterval.txt (100%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.DisableExcludes.txt create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.EnableIDNA.txt rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Core.Encoding.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Core.EscapeInvalidChildren.txt (62%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Core.EscapeInvalidTags.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Core.EscapeNonASCIICharacters.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Core.HiddenElements.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Core.Language.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Core.LexerImpl.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Core.MaintainLineNumbers.txt (100%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.NormalizeNewlines.txt rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Core.RemoveInvalidImg.txt (100%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Core.RemoveProcessingInstructions.txt rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Core.RemoveScriptContents.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Filter.Custom.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.Escaping.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.Scope.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.TidyImpl.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Filter.YouTube.txt (65%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/HTML.Allowed.txt (54%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/HTML.AllowedAttributes.txt (100%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedComments.txt create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedCommentsRegexp.txt create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedElements.txt rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/HTML.AllowedModules.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/HTML.Attr.Name.UseCDATA.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/HTML.BlockWrapper.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/HTML.CoreModules.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/HTML.CustomDoctype.txt (72%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/HTML.DefinitionID.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/HTML.DefinitionRev.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/HTML.Doctype.txt (100%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.FlashAllowFullScreen.txt rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/HTML.ForbiddenAttributes.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/HTML.ForbiddenElements.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/HTML.MaxImgLength.txt (100%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.Nofollow.txt rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/HTML.Parent.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/HTML.Proprietary.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/HTML.SafeEmbed.txt (100%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.SafeIframe.txt rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/HTML.SafeObject.txt (100%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.SafeScripting.txt rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/HTML.Strict.txt (100%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/HTML.TargetBlank.txt rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/HTML.TidyAdd.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/HTML.TidyLevel.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/HTML.TidyRemove.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/HTML.Trusted.txt (91%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/HTML.XHTML.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Output.CommentScriptContents.txt (100%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/Output.FixInnerHTML.txt rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Output.FlashCompat.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Output.Newline.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Output.SortAttr.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Output.TidyFormat.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/Test.ForceNoIconv.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt (74%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/URI.Base.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/URI.DefaultScheme.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/URI.DefinitionID.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/URI.DefinitionRev.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/URI.Disable.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/URI.DisableExternal.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/URI.DisableExternalResources.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/URI.DisableResources.txt (64%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/URI.Host.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/URI.HostBlacklist.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/URI.MakeAbsolute.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/URI.Munge.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/URI.MungeResources.txt (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/URI.MungeSecretKey.txt (93%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/URI.OverrideAllowedSchemes.txt (100%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/ConfigSchema/schema/URI.SafeIframeRegexp.txt rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ConfigSchema/schema/info.ini (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ContentSets.php (76%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/Context.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/Definition.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/DefinitionCache.php (66%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/DefinitionCache/Decorator.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/DefinitionCache/Decorator/Cleanup.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/DefinitionCache/Decorator/Memory.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/DefinitionCache/Decorator/Template.php.in create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/DefinitionCache/Null.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/DefinitionCache/Serializer.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/DefinitionCache/Serializer/README (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/DefinitionCacheFactory.php (67%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Doctype.php (77%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/DoctypeRegistry.php (51%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ElementDef.php (69%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Encoder.php (63%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/EntityLookup.php (75%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/EntityLookup/entities.ser rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/EntityParser.php (75%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ErrorCollector.php (82%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/ErrorStruct.php (81%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Exception.php (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Filter.php (71%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/Filter/ExtractStyleBlocks.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/Filter/YouTube.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Generator.php (56%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLDefinition.php (70%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule.php (71%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/Bdo.php (66%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/CommonAttributes.php (89%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/Edit.php (71%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/HTMLModule/Forms.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/Hypertext.php (78%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/HTMLModule/Iframe.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/Image.php (80%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/Legacy.php (67%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/List.php (57%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/Name.php (65%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/HTMLModule/Nofollow.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php (79%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/Object.php (71%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/Presentation.php (52%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/Proprietary.php (74%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/Ruby.php (77%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/SafeEmbed.php (74%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/SafeObject.php (67%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/HTMLModule/SafeScripting.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/Scripting.php (76%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/StyleAttribute.php (78%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/Tables.php (75%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/Target.php (77%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/HTMLModule/TargetBlank.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/Text.php (58%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/Tidy.php (78%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/Tidy/Name.php (80%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/Tidy/Proprietary.php (85%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/HTMLModule/Tidy/Strict.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/Tidy/Transitional.php (74%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/Tidy/XHTML.php (66%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php (50%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModule/XMLCommonAttributes.php (79%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/HTMLModuleManager.php (75%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/IDAccumulator.php (66%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Injector.php (55%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Injector/AutoParagraph.php (88%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Injector/DisplayLinkURI.php (64%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Injector/Linkify.php (72%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Injector/PurifierLinkify.php (58%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/Injector/RemoveEmpty.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php (74%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Injector/SafeObject.php (78%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Language.php (67%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Language/classes/en-x-test.php (97%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Language/messages/en-x-test.php (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Language/messages/en-x-testmini.php (100%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/Language/messages/en.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/LanguageFactory.php (75%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Length.php (56%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Lexer.php (63%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Lexer/DOMLex.php (58%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Lexer/DirectLex.php (76%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/Lexer/PH5P.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/Node.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/Node/Comment.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/Node/Element.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/Node/Text.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/PercentEncoder.php (78%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Printer.php (54%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Printer/CSSDefinition.php (85%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Printer/ConfigForm.css (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Printer/ConfigForm.js (100%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Printer/ConfigForm.php (60%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Printer/HTMLDefinition.php (53%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/PropertyList.php (50%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/PropertyListIterator.php (60%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/Queue.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Strategy.php (66%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Strategy/Composite.php (61%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Strategy/Core.php (92%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/Strategy/FixNesting.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Strategy/MakeWellFormed.php (52%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Strategy/RemoveForeignElements.php (64%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Strategy/ValidateAttributes.php (65%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/StringHash.php (75%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/StringHashParser.php (73%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/TagTransform.php (62%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/TagTransform/Font.php (71%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/TagTransform/Simple.php (61%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/Token.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/Token/Comment.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Token/Empty.php (54%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Token/End.php (58%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Token/Start.php (99%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Token/Tag.php (72%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/Token/Text.php (54%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/TokenFactory.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/URI.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/URIDefinition.php (70%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/URIFilter.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/URIFilter/DisableExternal.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/URIFilter/DisableExternalResources.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/URIFilter/DisableResources.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/URIFilter/HostBlacklist.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/URIFilter/MakeAbsolute.php (71%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/URIFilter/Munge.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/URIFilter/SafeIframe.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/URIParser.php (94%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/URIScheme.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/URIScheme/data.php (74%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/URIScheme/file.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/URIScheme/ftp.php (74%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/URIScheme/http.php (50%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/URIScheme/https.php (65%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/URIScheme/mailto.php (63%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/URIScheme/news.php create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/URIScheme/nntp.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/URISchemeRegistry.php (58%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/UnitConverter.php (75%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/VarParser.php rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/VarParser/Flexible.php (56%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/VarParser/Native.php (67%) rename library/{ => ezyang/htmlpurifier/library}/HTMLPurifier/VarParserException.php (100%) create mode 100644 library/ezyang/htmlpurifier/library/HTMLPurifier/Zipper.php create mode 100644 library/ezyang/htmlpurifier/package.php create mode 100644 library/ezyang/htmlpurifier/phpdoc.ini create mode 100644 library/ezyang/htmlpurifier/plugins/modx.txt create mode 100644 library/ezyang/htmlpurifier/plugins/phorum/.gitignore create mode 100644 library/ezyang/htmlpurifier/plugins/phorum/Changelog create mode 100644 library/ezyang/htmlpurifier/plugins/phorum/INSTALL create mode 100644 library/ezyang/htmlpurifier/plugins/phorum/README create mode 100644 library/ezyang/htmlpurifier/plugins/phorum/config.default.php create mode 100644 library/ezyang/htmlpurifier/plugins/phorum/htmlpurifier.php create mode 100644 library/ezyang/htmlpurifier/plugins/phorum/info.txt create mode 100644 library/ezyang/htmlpurifier/plugins/phorum/init-config.php create mode 100644 library/ezyang/htmlpurifier/plugins/phorum/migrate.bbcode.php create mode 100644 library/ezyang/htmlpurifier/plugins/phorum/settings.php create mode 100644 library/ezyang/htmlpurifier/plugins/phorum/settings/form.php create mode 100644 library/ezyang/htmlpurifier/plugins/phorum/settings/migrate-sigs-form.php create mode 100644 library/ezyang/htmlpurifier/plugins/phorum/settings/migrate-sigs.php create mode 100644 library/ezyang/htmlpurifier/plugins/phorum/settings/save.php create mode 100644 library/ezyang/htmlpurifier/release1-update.php create mode 100644 library/ezyang/htmlpurifier/release2-tag.php create mode 100644 library/ezyang/htmlpurifier/test-settings.sample.php diff --git a/library/HTMLPurifier/AttrDef/CSS/AlphaValue.php b/library/HTMLPurifier/AttrDef/CSS/AlphaValue.php deleted file mode 100644 index 292c040d4b..0000000000 --- a/library/HTMLPurifier/AttrDef/CSS/AlphaValue.php +++ /dev/null @@ -1,21 +0,0 @@ - 1.0) $result = '1'; - return $result; - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php b/library/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php deleted file mode 100644 index 6599c5b2dd..0000000000 --- a/library/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php +++ /dev/null @@ -1,28 +0,0 @@ -def = $def; - $this->element = $element; - } - /** - * Checks if CurrentToken is set and equal to $this->element - */ - public function validate($string, $config, $context) { - $token = $context->get('CurrentToken', true); - if ($token && $token->name == $this->element) return false; - return $this->def->validate($string, $config, $context); - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/CSS/FontFamily.php b/library/HTMLPurifier/AttrDef/CSS/FontFamily.php deleted file mode 100644 index 42c2054c2a..0000000000 --- a/library/HTMLPurifier/AttrDef/CSS/FontFamily.php +++ /dev/null @@ -1,72 +0,0 @@ - true, - 'sans-serif' => true, - 'monospace' => true, - 'fantasy' => true, - 'cursive' => true - ); - - // assume that no font names contain commas in them - $fonts = explode(',', $string); - $final = ''; - foreach($fonts as $font) { - $font = trim($font); - if ($font === '') continue; - // match a generic name - if (isset($generic_names[$font])) { - $final .= $font . ', '; - continue; - } - // match a quoted name - if ($font[0] === '"' || $font[0] === "'") { - $length = strlen($font); - if ($length <= 2) continue; - $quote = $font[0]; - if ($font[$length - 1] !== $quote) continue; - $font = substr($font, 1, $length - 2); - } - - $font = $this->expandCSSEscape($font); - - // $font is a pure representation of the font name - - if (ctype_alnum($font) && $font !== '') { - // very simple font, allow it in unharmed - $final .= $font . ', '; - continue; - } - - // bugger out on whitespace. form feed (0C) really - // shouldn't show up regardless - $font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font); - - // These ugly transforms don't pose a security - // risk (as \\ and \" might). We could try to be clever and - // use single-quote wrapping when there is a double quote - // present, but I have choosen not to implement that. - // (warning: this code relies on the selection of quotation - // mark below) - $font = str_replace('\\', '\\5C ', $font); - $font = str_replace('"', '\\22 ', $font); - - // complicated font, requires quoting - $final .= "\"$font\", "; // note that this will later get turned into " - } - $final = rtrim($final, ', '); - if ($final === '') return false; - return $final; - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/CSS/Length.php b/library/HTMLPurifier/AttrDef/CSS/Length.php deleted file mode 100644 index a07ec58135..0000000000 --- a/library/HTMLPurifier/AttrDef/CSS/Length.php +++ /dev/null @@ -1,47 +0,0 @@ -min = $min !== null ? HTMLPurifier_Length::make($min) : null; - $this->max = $max !== null ? HTMLPurifier_Length::make($max) : null; - } - - public function validate($string, $config, $context) { - $string = $this->parseCDATA($string); - - // Optimizations - if ($string === '') return false; - if ($string === '0') return '0'; - if (strlen($string) === 1) return false; - - $length = HTMLPurifier_Length::make($string); - if (!$length->isValid()) return false; - - if ($this->min) { - $c = $length->compareTo($this->min); - if ($c === false) return false; - if ($c < 0) return false; - } - if ($this->max) { - $c = $length->compareTo($this->max); - if ($c === false) return false; - if ($c > 0) return false; - } - - return $length->toString(); - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/CSS/ListStyle.php b/library/HTMLPurifier/AttrDef/CSS/ListStyle.php deleted file mode 100644 index 4406868c08..0000000000 --- a/library/HTMLPurifier/AttrDef/CSS/ListStyle.php +++ /dev/null @@ -1,78 +0,0 @@ -getCSSDefinition(); - $this->info['list-style-type'] = $def->info['list-style-type']; - $this->info['list-style-position'] = $def->info['list-style-position']; - $this->info['list-style-image'] = $def->info['list-style-image']; - } - - public function validate($string, $config, $context) { - - // regular pre-processing - $string = $this->parseCDATA($string); - if ($string === '') return false; - - // assumes URI doesn't have spaces in it - $bits = explode(' ', strtolower($string)); // bits to process - - $caught = array(); - $caught['type'] = false; - $caught['position'] = false; - $caught['image'] = false; - - $i = 0; // number of catches - $none = false; - - foreach ($bits as $bit) { - if ($i >= 3) return; // optimization bit - if ($bit === '') continue; - foreach ($caught as $key => $status) { - if ($status !== false) continue; - $r = $this->info['list-style-' . $key]->validate($bit, $config, $context); - if ($r === false) continue; - if ($r === 'none') { - if ($none) continue; - else $none = true; - if ($key == 'image') continue; - } - $caught[$key] = $r; - $i++; - break; - } - } - - if (!$i) return false; - - $ret = array(); - - // construct type - if ($caught['type']) $ret[] = $caught['type']; - - // construct image - if ($caught['image']) $ret[] = $caught['image']; - - // construct position - if ($caught['position']) $ret[] = $caught['position']; - - if (empty($ret)) return false; - return implode(' ', $ret); - - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/CSS/Percentage.php b/library/HTMLPurifier/AttrDef/CSS/Percentage.php deleted file mode 100644 index c34b8fc3c3..0000000000 --- a/library/HTMLPurifier/AttrDef/CSS/Percentage.php +++ /dev/null @@ -1,40 +0,0 @@ -number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative); - } - - public function validate($string, $config, $context) { - - $string = $this->parseCDATA($string); - - if ($string === '') return false; - $length = strlen($string); - if ($length === 1) return false; - if ($string[$length - 1] !== '%') return false; - - $number = substr($string, 0, $length - 1); - $number = $this->number_def->validate($number, $config, $context); - - if ($number === false) return false; - return "$number%"; - - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/Bool.php b/library/HTMLPurifier/AttrDef/HTML/Bool.php deleted file mode 100644 index e06987eb8d..0000000000 --- a/library/HTMLPurifier/AttrDef/HTML/Bool.php +++ /dev/null @@ -1,28 +0,0 @@ -name = $name;} - - public function validate($string, $config, $context) { - if (empty($string)) return false; - return $this->name; - } - - /** - * @param $string Name of attribute - */ - public function make($string) { - return new HTMLPurifier_AttrDef_HTML_Bool($string); - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/Color.php b/library/HTMLPurifier/AttrDef/HTML/Color.php deleted file mode 100644 index d01e20454e..0000000000 --- a/library/HTMLPurifier/AttrDef/HTML/Color.php +++ /dev/null @@ -1,32 +0,0 @@ -get('Core.ColorKeywords'); - - $string = trim($string); - - if (empty($string)) return false; - if (isset($colors[$string])) return $colors[$string]; - if ($string[0] === '#') $hex = substr($string, 1); - else $hex = $string; - - $length = strlen($hex); - if ($length !== 3 && $length !== 6) return false; - if (!ctype_xdigit($hex)) return false; - if ($length === 3) $hex = $hex[0].$hex[0].$hex[1].$hex[1].$hex[2].$hex[2]; - - return "#$hex"; - - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php b/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php deleted file mode 100644 index ae6ea7c01d..0000000000 --- a/library/HTMLPurifier/AttrDef/HTML/FrameTarget.php +++ /dev/null @@ -1,21 +0,0 @@ -valid_values === false) $this->valid_values = $config->get('Attr.AllowedFrameTargets'); - return parent::validate($string, $config, $context); - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/ID.php b/library/HTMLPurifier/AttrDef/HTML/ID.php deleted file mode 100644 index 81d03762de..0000000000 --- a/library/HTMLPurifier/AttrDef/HTML/ID.php +++ /dev/null @@ -1,70 +0,0 @@ -get('Attr.EnableID')) return false; - - $id = trim($id); // trim it first - - if ($id === '') return false; - - $prefix = $config->get('Attr.IDPrefix'); - if ($prefix !== '') { - $prefix .= $config->get('Attr.IDPrefixLocal'); - // prevent re-appending the prefix - if (strpos($id, $prefix) !== 0) $id = $prefix . $id; - } elseif ($config->get('Attr.IDPrefixLocal') !== '') { - trigger_error('%Attr.IDPrefixLocal cannot be used unless '. - '%Attr.IDPrefix is set', E_USER_WARNING); - } - - //if (!$this->ref) { - $id_accumulator =& $context->get('IDAccumulator'); - if (isset($id_accumulator->ids[$id])) return false; - //} - - // we purposely avoid using regex, hopefully this is faster - - if (ctype_alpha($id)) { - $result = true; - } else { - if (!ctype_alpha(@$id[0])) return false; - $trim = trim( // primitive style of regexps, I suppose - $id, - 'A..Za..z0..9:-._' - ); - $result = ($trim === ''); - } - - $regexp = $config->get('Attr.IDBlacklistRegexp'); - if ($regexp && preg_match($regexp, $id)) { - return false; - } - - if (/*!$this->ref && */$result) $id_accumulator->add($id); - - // if no change was made to the ID, return the result - // else, return the new id if stripping whitespace made it - // valid, or return false. - return $result ? $id : false; - - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/Length.php b/library/HTMLPurifier/AttrDef/HTML/Length.php deleted file mode 100644 index a242f9c238..0000000000 --- a/library/HTMLPurifier/AttrDef/HTML/Length.php +++ /dev/null @@ -1,41 +0,0 @@ - 100) return '100%'; - - return ((string) $points) . '%'; - - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/HTML/MultiLength.php b/library/HTMLPurifier/AttrDef/HTML/MultiLength.php deleted file mode 100644 index c72fc76e4d..0000000000 --- a/library/HTMLPurifier/AttrDef/HTML/MultiLength.php +++ /dev/null @@ -1,41 +0,0 @@ -max = $max; - } - - public function validate($string, $config, $context) { - - $string = trim($string); - if ($string === '0') return $string; - if ($string === '') return false; - $length = strlen($string); - if (substr($string, $length - 2) == 'px') { - $string = substr($string, 0, $length - 2); - } - if (!is_numeric($string)) return false; - $int = (int) $string; - - if ($int < 0) return '0'; - - // upper-bound value, extremely high values can - // crash operating systems, see - // WARNING, above link WILL crash you if you're using Windows - - if ($this->max !== null && $int > $this->max) return (string) $this->max; - - return (string) $int; - - } - - public function make($string) { - if ($string === '') $max = null; - else $max = (int) $string; - $class = get_class($this); - return new $class($max); - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/Text.php b/library/HTMLPurifier/AttrDef/Text.php deleted file mode 100644 index c6216cc531..0000000000 --- a/library/HTMLPurifier/AttrDef/Text.php +++ /dev/null @@ -1,15 +0,0 @@ -parseCDATA($string); - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/URI/Host.php b/library/HTMLPurifier/AttrDef/URI/Host.php deleted file mode 100644 index 2156c10c66..0000000000 --- a/library/HTMLPurifier/AttrDef/URI/Host.php +++ /dev/null @@ -1,62 +0,0 @@ -ipv4 = new HTMLPurifier_AttrDef_URI_IPv4(); - $this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6(); - } - - public function validate($string, $config, $context) { - $length = strlen($string); - if ($string === '') return ''; - if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') { - //IPv6 - $ip = substr($string, 1, $length - 2); - $valid = $this->ipv6->validate($ip, $config, $context); - if ($valid === false) return false; - return '['. $valid . ']'; - } - - // need to do checks on unusual encodings too - $ipv4 = $this->ipv4->validate($string, $config, $context); - if ($ipv4 !== false) return $ipv4; - - // A regular domain name. - - // This breaks I18N domain names, but we don't have proper IRI support, - // so force users to insert Punycode. If there's complaining we'll - // try to fix things into an international friendly form. - - // The productions describing this are: - $a = '[a-z]'; // alpha - $an = '[a-z0-9]'; // alphanum - $and = '[a-z0-9-]'; // alphanum | "-" - // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum - $domainlabel = "$an($and*$an)?"; - // toplabel = alpha | alpha *( alphanum | "-" ) alphanum - $toplabel = "$a($and*$an)?"; - // hostname = *( domainlabel "." ) toplabel [ "." ] - $match = preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string); - if (!$match) return false; - - return $string; - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/URI/IPv6.php b/library/HTMLPurifier/AttrDef/URI/IPv6.php deleted file mode 100644 index 9454e9be50..0000000000 --- a/library/HTMLPurifier/AttrDef/URI/IPv6.php +++ /dev/null @@ -1,99 +0,0 @@ -ip4) $this->_loadRegex(); - - $original = $aIP; - - $hex = '[0-9a-fA-F]'; - $blk = '(?:' . $hex . '{1,4})'; - $pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))'; // /0 - /128 - - // prefix check - if (strpos($aIP, '/') !== false) - { - if (preg_match('#' . $pre . '$#s', $aIP, $find)) - { - $aIP = substr($aIP, 0, 0-strlen($find[0])); - unset($find); - } - else - { - return false; - } - } - - // IPv4-compatiblity check - if (preg_match('#(?<=:'.')' . $this->ip4 . '$#s', $aIP, $find)) - { - $aIP = substr($aIP, 0, 0-strlen($find[0])); - $ip = explode('.', $find[0]); - $ip = array_map('dechex', $ip); - $aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3]; - unset($find, $ip); - } - - // compression check - $aIP = explode('::', $aIP); - $c = count($aIP); - if ($c > 2) - { - return false; - } - elseif ($c == 2) - { - list($first, $second) = $aIP; - $first = explode(':', $first); - $second = explode(':', $second); - - if (count($first) + count($second) > 8) - { - return false; - } - - while(count($first) < 8) - { - array_push($first, '0'); - } - - array_splice($first, 8 - count($second), 8, $second); - $aIP = $first; - unset($first,$second); - } - else - { - $aIP = explode(':', $aIP[0]); - } - $c = count($aIP); - - if ($c != 8) - { - return false; - } - - // All the pieces should be 16-bit hex strings. Are they? - foreach ($aIP as $piece) - { - if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece))) - { - return false; - } - } - - return $original; - - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrTransform/BoolToCSS.php b/library/HTMLPurifier/AttrTransform/BoolToCSS.php deleted file mode 100644 index 51159b6715..0000000000 --- a/library/HTMLPurifier/AttrTransform/BoolToCSS.php +++ /dev/null @@ -1,36 +0,0 @@ -attr = $attr; - $this->css = $css; - } - - public function transform($attr, $config, $context) { - if (!isset($attr[$this->attr])) return $attr; - unset($attr[$this->attr]); - $this->prependCSS($attr, $this->css); - return $attr; - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrTransform/EnumToCSS.php b/library/HTMLPurifier/AttrTransform/EnumToCSS.php deleted file mode 100644 index 2a5b4514ab..0000000000 --- a/library/HTMLPurifier/AttrTransform/EnumToCSS.php +++ /dev/null @@ -1,58 +0,0 @@ -attr = $attr; - $this->enumToCSS = $enum_to_css; - $this->caseSensitive = (bool) $case_sensitive; - } - - public function transform($attr, $config, $context) { - - if (!isset($attr[$this->attr])) return $attr; - - $value = trim($attr[$this->attr]); - unset($attr[$this->attr]); - - if (!$this->caseSensitive) $value = strtolower($value); - - if (!isset($this->enumToCSS[$value])) { - return $attr; - } - - $this->prependCSS($attr, $this->enumToCSS[$value]); - - return $attr; - - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrTransform/Length.php b/library/HTMLPurifier/AttrTransform/Length.php deleted file mode 100644 index ea2f30473d..0000000000 --- a/library/HTMLPurifier/AttrTransform/Length.php +++ /dev/null @@ -1,27 +0,0 @@ -name = $name; - $this->cssName = $css_name ? $css_name : $name; - } - - public function transform($attr, $config, $context) { - if (!isset($attr[$this->name])) return $attr; - $length = $this->confiscateAttr($attr, $this->name); - if(ctype_digit($length)) $length .= 'px'; - $this->prependCSS($attr, $this->cssName . ":$length;"); - return $attr; - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrTransform/Name.php b/library/HTMLPurifier/AttrTransform/Name.php deleted file mode 100644 index 15315bc735..0000000000 --- a/library/HTMLPurifier/AttrTransform/Name.php +++ /dev/null @@ -1,21 +0,0 @@ -get('HTML.Attr.Name.UseCDATA')) return $attr; - if (!isset($attr['name'])) return $attr; - $id = $this->confiscateAttr($attr, 'name'); - if ( isset($attr['id'])) return $attr; - $attr['id'] = $id; - return $attr; - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrTransform/NameSync.php b/library/HTMLPurifier/AttrTransform/NameSync.php deleted file mode 100644 index a95638c140..0000000000 --- a/library/HTMLPurifier/AttrTransform/NameSync.php +++ /dev/null @@ -1,27 +0,0 @@ -idDef = new HTMLPurifier_AttrDef_HTML_ID(); - } - - public function transform($attr, $config, $context) { - if (!isset($attr['name'])) return $attr; - $name = $attr['name']; - if (isset($attr['id']) && $attr['id'] === $name) return $attr; - $result = $this->idDef->validate($name, $config, $context); - if ($result === false) unset($attr['name']); - else $attr['name'] = $result; - return $attr; - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrTransform/SafeObject.php b/library/HTMLPurifier/AttrTransform/SafeObject.php deleted file mode 100644 index 1ed74898ba..0000000000 --- a/library/HTMLPurifier/AttrTransform/SafeObject.php +++ /dev/null @@ -1,16 +0,0 @@ - - */ -class HTMLPurifier_AttrTransform_Textarea extends HTMLPurifier_AttrTransform -{ - - public function transform($attr, $config, $context) { - // Calculated from Firefox - if (!isset($attr['cols'])) $attr['cols'] = '22'; - if (!isset($attr['rows'])) $attr['rows'] = '3'; - return $attr; - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Bootstrap.php b/library/HTMLPurifier/Bootstrap.php deleted file mode 100644 index 559f61a232..0000000000 --- a/library/HTMLPurifier/Bootstrap.php +++ /dev/null @@ -1,98 +0,0 @@ - -if (!defined('PHP_EOL')) { - switch (strtoupper(substr(PHP_OS, 0, 3))) { - case 'WIN': - define('PHP_EOL', "\r\n"); - break; - case 'DAR': - define('PHP_EOL', "\r"); - break; - default: - define('PHP_EOL', "\n"); - } -} - -/** - * Bootstrap class that contains meta-functionality for HTML Purifier such as - * the autoload function. - * - * @note - * This class may be used without any other files from HTML Purifier. - */ -class HTMLPurifier_Bootstrap -{ - - /** - * Autoload function for HTML Purifier - * @param $class Class to load - */ - public static function autoload($class) { - $file = HTMLPurifier_Bootstrap::getPath($class); - if (!$file) return false; - require HTMLPURIFIER_PREFIX . '/' . $file; - return true; - } - - /** - * Returns the path for a specific class. - */ - public static function getPath($class) { - if (strncmp('HTMLPurifier', $class, 12) !== 0) return false; - // Custom implementations - if (strncmp('HTMLPurifier_Language_', $class, 22) === 0) { - $code = str_replace('_', '-', substr($class, 22)); - $file = 'HTMLPurifier/Language/classes/' . $code . '.php'; - } else { - $file = str_replace('_', '/', $class) . '.php'; - } - if (!file_exists(HTMLPURIFIER_PREFIX . '/' . $file)) return false; - return $file; - } - - /** - * "Pre-registers" our autoloader on the SPL stack. - */ - public static function registerAutoload() { - $autoload = array('HTMLPurifier_Bootstrap', 'autoload'); - if ( ($funcs = spl_autoload_functions()) === false ) { - spl_autoload_register($autoload); - } elseif (function_exists('spl_autoload_unregister')) { - $compat = version_compare(PHP_VERSION, '5.1.2', '<=') && - version_compare(PHP_VERSION, '5.1.0', '>='); - foreach ($funcs as $func) { - if (is_array($func)) { - // :TRICKY: There are some compatibility issues and some - // places where we need to error out - $reflector = new ReflectionMethod($func[0], $func[1]); - if (!$reflector->isStatic()) { - throw new Exception(' - HTML Purifier autoloader registrar is not compatible - with non-static object methods due to PHP Bug #44144; - Please do not use HTMLPurifier.autoload.php (or any - file that includes this file); instead, place the code: - spl_autoload_register(array(\'HTMLPurifier_Bootstrap\', \'autoload\')) - after your own autoloaders. - '); - } - // Suprisingly, spl_autoload_register supports the - // Class::staticMethod callback format, although call_user_func doesn't - if ($compat) $func = implode('::', $func); - } - spl_autoload_unregister($func); - } - spl_autoload_register($autoload); - foreach ($funcs as $func) spl_autoload_register($func); - } - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/CSSDefinition.php b/library/HTMLPurifier/CSSDefinition.php deleted file mode 100644 index 6a2e6f56d9..0000000000 --- a/library/HTMLPurifier/CSSDefinition.php +++ /dev/null @@ -1,292 +0,0 @@ -info['text-align'] = new HTMLPurifier_AttrDef_Enum( - array('left', 'right', 'center', 'justify'), false); - - $border_style = - $this->info['border-bottom-style'] = - $this->info['border-right-style'] = - $this->info['border-left-style'] = - $this->info['border-top-style'] = new HTMLPurifier_AttrDef_Enum( - array('none', 'hidden', 'dotted', 'dashed', 'solid', 'double', - 'groove', 'ridge', 'inset', 'outset'), false); - - $this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style); - - $this->info['clear'] = new HTMLPurifier_AttrDef_Enum( - array('none', 'left', 'right', 'both'), false); - $this->info['float'] = new HTMLPurifier_AttrDef_Enum( - array('none', 'left', 'right'), false); - $this->info['font-style'] = new HTMLPurifier_AttrDef_Enum( - array('normal', 'italic', 'oblique'), false); - $this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum( - array('normal', 'small-caps'), false); - - $uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite( - array( - new HTMLPurifier_AttrDef_Enum(array('none')), - new HTMLPurifier_AttrDef_CSS_URI() - ) - ); - - $this->info['list-style-position'] = new HTMLPurifier_AttrDef_Enum( - array('inside', 'outside'), false); - $this->info['list-style-type'] = new HTMLPurifier_AttrDef_Enum( - array('disc', 'circle', 'square', 'decimal', 'lower-roman', - 'upper-roman', 'lower-alpha', 'upper-alpha', 'none'), false); - $this->info['list-style-image'] = $uri_or_none; - - $this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config); - - $this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum( - array('capitalize', 'uppercase', 'lowercase', 'none'), false); - $this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color(); - - $this->info['background-image'] = $uri_or_none; - $this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum( - array('repeat', 'repeat-x', 'repeat-y', 'no-repeat') - ); - $this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum( - array('scroll', 'fixed') - ); - $this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition(); - - $border_color = - $this->info['border-top-color'] = - $this->info['border-bottom-color'] = - $this->info['border-left-color'] = - $this->info['border-right-color'] = - $this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite(array( - new HTMLPurifier_AttrDef_Enum(array('transparent')), - new HTMLPurifier_AttrDef_CSS_Color() - )); - - $this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config); - - $this->info['border-color'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_color); - - $border_width = - $this->info['border-top-width'] = - $this->info['border-bottom-width'] = - $this->info['border-left-width'] = - $this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(array( - new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')), - new HTMLPurifier_AttrDef_CSS_Length('0') //disallow negative - )); - - $this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width); - - $this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array( - new HTMLPurifier_AttrDef_Enum(array('normal')), - new HTMLPurifier_AttrDef_CSS_Length() - )); - - $this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array( - new HTMLPurifier_AttrDef_Enum(array('normal')), - new HTMLPurifier_AttrDef_CSS_Length() - )); - - $this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite(array( - new HTMLPurifier_AttrDef_Enum(array('xx-small', 'x-small', - 'small', 'medium', 'large', 'x-large', 'xx-large', - 'larger', 'smaller')), - new HTMLPurifier_AttrDef_CSS_Percentage(), - new HTMLPurifier_AttrDef_CSS_Length() - )); - - $this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(array( - new HTMLPurifier_AttrDef_Enum(array('normal')), - new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives - new HTMLPurifier_AttrDef_CSS_Length('0'), - new HTMLPurifier_AttrDef_CSS_Percentage(true) - )); - - $margin = - $this->info['margin-top'] = - $this->info['margin-bottom'] = - $this->info['margin-left'] = - $this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array( - new HTMLPurifier_AttrDef_CSS_Length(), - new HTMLPurifier_AttrDef_CSS_Percentage(), - new HTMLPurifier_AttrDef_Enum(array('auto')) - )); - - $this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin); - - // non-negative - $padding = - $this->info['padding-top'] = - $this->info['padding-bottom'] = - $this->info['padding-left'] = - $this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array( - new HTMLPurifier_AttrDef_CSS_Length('0'), - new HTMLPurifier_AttrDef_CSS_Percentage(true) - )); - - $this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding); - - $this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite(array( - new HTMLPurifier_AttrDef_CSS_Length(), - new HTMLPurifier_AttrDef_CSS_Percentage() - )); - - $trusted_wh = new HTMLPurifier_AttrDef_CSS_Composite(array( - new HTMLPurifier_AttrDef_CSS_Length('0'), - new HTMLPurifier_AttrDef_CSS_Percentage(true), - new HTMLPurifier_AttrDef_Enum(array('auto')) - )); - $max = $config->get('CSS.MaxImgLength'); - - $this->info['width'] = - $this->info['height'] = - $max === null ? - $trusted_wh : - new HTMLPurifier_AttrDef_Switch('img', - // For img tags: - new HTMLPurifier_AttrDef_CSS_Composite(array( - new HTMLPurifier_AttrDef_CSS_Length('0', $max), - new HTMLPurifier_AttrDef_Enum(array('auto')) - )), - // For everyone else: - $trusted_wh - ); - - $this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration(); - - $this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily(); - - // this could use specialized code - $this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum( - array('normal', 'bold', 'bolder', 'lighter', '100', '200', '300', - '400', '500', '600', '700', '800', '900'), false); - - // MUST be called after other font properties, as it references - // a CSSDefinition object - $this->info['font'] = new HTMLPurifier_AttrDef_CSS_Font($config); - - // same here - $this->info['border'] = - $this->info['border-bottom'] = - $this->info['border-top'] = - $this->info['border-left'] = - $this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config); - - $this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(array( - 'collapse', 'separate')); - - $this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum(array( - 'top', 'bottom')); - - $this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum(array( - 'auto', 'fixed')); - - $this->info['vertical-align'] = new HTMLPurifier_AttrDef_CSS_Composite(array( - new HTMLPurifier_AttrDef_Enum(array('baseline', 'sub', 'super', - 'top', 'text-top', 'middle', 'bottom', 'text-bottom')), - new HTMLPurifier_AttrDef_CSS_Length(), - new HTMLPurifier_AttrDef_CSS_Percentage() - )); - - $this->info['border-spacing'] = new HTMLPurifier_AttrDef_CSS_Multiple(new HTMLPurifier_AttrDef_CSS_Length(), 2); - - // partial support - $this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap')); - - if ($config->get('CSS.Proprietary')) { - $this->doSetupProprietary($config); - } - - if ($config->get('CSS.AllowTricky')) { - $this->doSetupTricky($config); - } - - $allow_important = $config->get('CSS.AllowImportant'); - // wrap all attr-defs with decorator that handles !important - foreach ($this->info as $k => $v) { - $this->info[$k] = new HTMLPurifier_AttrDef_CSS_ImportantDecorator($v, $allow_important); - } - - $this->setupConfigStuff($config); - } - - protected function doSetupProprietary($config) { - // Internet Explorer only scrollbar colors - $this->info['scrollbar-arrow-color'] = new HTMLPurifier_AttrDef_CSS_Color(); - $this->info['scrollbar-base-color'] = new HTMLPurifier_AttrDef_CSS_Color(); - $this->info['scrollbar-darkshadow-color'] = new HTMLPurifier_AttrDef_CSS_Color(); - $this->info['scrollbar-face-color'] = new HTMLPurifier_AttrDef_CSS_Color(); - $this->info['scrollbar-highlight-color'] = new HTMLPurifier_AttrDef_CSS_Color(); - $this->info['scrollbar-shadow-color'] = new HTMLPurifier_AttrDef_CSS_Color(); - - // technically not proprietary, but CSS3, and no one supports it - $this->info['opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue(); - $this->info['-moz-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue(); - $this->info['-khtml-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue(); - - // only opacity, for now - $this->info['filter'] = new HTMLPurifier_AttrDef_CSS_Filter(); - - } - - protected function doSetupTricky($config) { - $this->info['display'] = new HTMLPurifier_AttrDef_Enum(array( - 'inline', 'block', 'list-item', 'run-in', 'compact', - 'marker', 'table', 'inline-table', 'table-row-group', - 'table-header-group', 'table-footer-group', 'table-row', - 'table-column-group', 'table-column', 'table-cell', 'table-caption', 'none' - )); - $this->info['visibility'] = new HTMLPurifier_AttrDef_Enum(array( - 'visible', 'hidden', 'collapse' - )); - $this->info['overflow'] = new HTMLPurifier_AttrDef_Enum(array('visible', 'hidden', 'auto', 'scroll')); - } - - - /** - * Performs extra config-based processing. Based off of - * HTMLPurifier_HTMLDefinition. - * @todo Refactor duplicate elements into common class (probably using - * composition, not inheritance). - */ - protected function setupConfigStuff($config) { - - // setup allowed elements - $support = "(for information on implementing this, see the ". - "support forums) "; - $allowed_attributes = $config->get('CSS.AllowedProperties'); - if ($allowed_attributes !== null) { - foreach ($this->info as $name => $d) { - if(!isset($allowed_attributes[$name])) unset($this->info[$name]); - unset($allowed_attributes[$name]); - } - // emit errors - foreach ($allowed_attributes as $name => $d) { - // :TODO: Is this htmlspecialchars() call really necessary? - $name = htmlspecialchars($name); - trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING); - } - } - - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/ChildDef/Optional.php b/library/HTMLPurifier/ChildDef/Optional.php deleted file mode 100644 index 32bcb9898e..0000000000 --- a/library/HTMLPurifier/ChildDef/Optional.php +++ /dev/null @@ -1,26 +0,0 @@ -whitespace) return $tokens_of_children; - else return array(); - } - return $result; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/ChildDef/Required.php b/library/HTMLPurifier/ChildDef/Required.php deleted file mode 100644 index 4889f249b8..0000000000 --- a/library/HTMLPurifier/ChildDef/Required.php +++ /dev/null @@ -1,117 +0,0 @@ - $x) { - $elements[$i] = true; - if (empty($i)) unset($elements[$i]); // remove blank - } - } - $this->elements = $elements; - } - public $allow_empty = false; - public $type = 'required'; - public function validateChildren($tokens_of_children, $config, $context) { - // Flag for subclasses - $this->whitespace = false; - - // if there are no tokens, delete parent node - if (empty($tokens_of_children)) return false; - - // the new set of children - $result = array(); - - // current depth into the nest - $nesting = 0; - - // whether or not we're deleting a node - $is_deleting = false; - - // whether or not parsed character data is allowed - // this controls whether or not we silently drop a tag - // or generate escaped HTML from it - $pcdata_allowed = isset($this->elements['#PCDATA']); - - // a little sanity check to make sure it's not ALL whitespace - $all_whitespace = true; - - // some configuration - $escape_invalid_children = $config->get('Core.EscapeInvalidChildren'); - - // generator - $gen = new HTMLPurifier_Generator($config, $context); - - foreach ($tokens_of_children as $token) { - if (!empty($token->is_whitespace)) { - $result[] = $token; - continue; - } - $all_whitespace = false; // phew, we're not talking about whitespace - - $is_child = ($nesting == 0); - - if ($token instanceof HTMLPurifier_Token_Start) { - $nesting++; - } elseif ($token instanceof HTMLPurifier_Token_End) { - $nesting--; - } - - if ($is_child) { - $is_deleting = false; - if (!isset($this->elements[$token->name])) { - $is_deleting = true; - if ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text) { - $result[] = $token; - } elseif ($pcdata_allowed && $escape_invalid_children) { - $result[] = new HTMLPurifier_Token_Text( - $gen->generateFromToken($token) - ); - } - continue; - } - } - if (!$is_deleting || ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text)) { - $result[] = $token; - } elseif ($pcdata_allowed && $escape_invalid_children) { - $result[] = - new HTMLPurifier_Token_Text( - $gen->generateFromToken($token) - ); - } else { - // drop silently - } - } - if (empty($result)) return false; - if ($all_whitespace) { - $this->whitespace = true; - return false; - } - if ($tokens_of_children == $result) return true; - return $result; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/ChildDef/StrictBlockquote.php b/library/HTMLPurifier/ChildDef/StrictBlockquote.php deleted file mode 100644 index dfae8a6e5e..0000000000 --- a/library/HTMLPurifier/ChildDef/StrictBlockquote.php +++ /dev/null @@ -1,88 +0,0 @@ -init($config); - return $this->fake_elements; - } - - public function validateChildren($tokens_of_children, $config, $context) { - - $this->init($config); - - // trick the parent class into thinking it allows more - $this->elements = $this->fake_elements; - $result = parent::validateChildren($tokens_of_children, $config, $context); - $this->elements = $this->real_elements; - - if ($result === false) return array(); - if ($result === true) $result = $tokens_of_children; - - $def = $config->getHTMLDefinition(); - $block_wrap_start = new HTMLPurifier_Token_Start($def->info_block_wrapper); - $block_wrap_end = new HTMLPurifier_Token_End( $def->info_block_wrapper); - $is_inline = false; - $depth = 0; - $ret = array(); - - // assuming that there are no comment tokens - foreach ($result as $i => $token) { - $token = $result[$i]; - // ifs are nested for readability - if (!$is_inline) { - if (!$depth) { - if ( - ($token instanceof HTMLPurifier_Token_Text && !$token->is_whitespace) || - (!$token instanceof HTMLPurifier_Token_Text && !isset($this->elements[$token->name])) - ) { - $is_inline = true; - $ret[] = $block_wrap_start; - } - } - } else { - if (!$depth) { - // starting tokens have been inline text / empty - if ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) { - if (isset($this->elements[$token->name])) { - // ended - $ret[] = $block_wrap_end; - $is_inline = false; - } - } - } - } - $ret[] = $token; - if ($token instanceof HTMLPurifier_Token_Start) $depth++; - if ($token instanceof HTMLPurifier_Token_End) $depth--; - } - if ($is_inline) $ret[] = $block_wrap_end; - return $ret; - } - - private function init($config) { - if (!$this->init) { - $def = $config->getHTMLDefinition(); - // allow all inline elements - $this->real_elements = $this->elements; - $this->fake_elements = $def->info_content_sets['Flow']; - $this->fake_elements['#PCDATA'] = true; - $this->init = true; - } - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/ChildDef/Table.php b/library/HTMLPurifier/ChildDef/Table.php deleted file mode 100644 index 34f0227dd2..0000000000 --- a/library/HTMLPurifier/ChildDef/Table.php +++ /dev/null @@ -1,142 +0,0 @@ - true, 'tbody' => true, 'thead' => true, - 'tfoot' => true, 'caption' => true, 'colgroup' => true, 'col' => true); - public function __construct() {} - public function validateChildren($tokens_of_children, $config, $context) { - if (empty($tokens_of_children)) return false; - - // this ensures that the loop gets run one last time before closing - // up. It's a little bit of a hack, but it works! Just make sure you - // get rid of the token later. - $tokens_of_children[] = false; - - // only one of these elements is allowed in a table - $caption = false; - $thead = false; - $tfoot = false; - - // as many of these as you want - $cols = array(); - $content = array(); - - $nesting = 0; // current depth so we can determine nodes - $is_collecting = false; // are we globbing together tokens to package - // into one of the collectors? - $collection = array(); // collected nodes - $tag_index = 0; // the first node might be whitespace, - // so this tells us where the start tag is - - foreach ($tokens_of_children as $token) { - $is_child = ($nesting == 0); - - if ($token === false) { - // terminating sequence started - } elseif ($token instanceof HTMLPurifier_Token_Start) { - $nesting++; - } elseif ($token instanceof HTMLPurifier_Token_End) { - $nesting--; - } - - // handle node collection - if ($is_collecting) { - if ($is_child) { - // okay, let's stash the tokens away - // first token tells us the type of the collection - switch ($collection[$tag_index]->name) { - case 'tr': - case 'tbody': - $content[] = $collection; - break; - case 'caption': - if ($caption !== false) break; - $caption = $collection; - break; - case 'thead': - case 'tfoot': - // access the appropriate variable, $thead or $tfoot - $var = $collection[$tag_index]->name; - if ($$var === false) { - $$var = $collection; - } else { - // transmutate the first and less entries into - // tbody tags, and then put into content - $collection[$tag_index]->name = 'tbody'; - $collection[count($collection)-1]->name = 'tbody'; - $content[] = $collection; - } - break; - case 'colgroup': - $cols[] = $collection; - break; - } - $collection = array(); - $is_collecting = false; - $tag_index = 0; - } else { - // add the node to the collection - $collection[] = $token; - } - } - - // terminate - if ($token === false) break; - - if ($is_child) { - // determine what we're dealing with - if ($token->name == 'col') { - // the only empty tag in the possie, we can handle it - // immediately - $cols[] = array_merge($collection, array($token)); - $collection = array(); - $tag_index = 0; - continue; - } - switch($token->name) { - case 'caption': - case 'colgroup': - case 'thead': - case 'tfoot': - case 'tbody': - case 'tr': - $is_collecting = true; - $collection[] = $token; - continue; - default: - if (!empty($token->is_whitespace)) { - $collection[] = $token; - $tag_index++; - } - continue; - } - } - } - - if (empty($content)) return false; - - $ret = array(); - if ($caption !== false) $ret = array_merge($ret, $caption); - if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array); - if ($thead !== false) $ret = array_merge($ret, $thead); - if ($tfoot !== false) $ret = array_merge($ret, $tfoot); - foreach ($content as $token_array) $ret = array_merge($ret, $token_array); - if (!empty($collection) && $is_collecting == false){ - // grab the trailing space - $ret = array_merge($ret, $collection); - } - - array_pop($tokens_of_children); // remove phantom token - - return ($ret === $tokens_of_children) ? true : $ret; - - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Config.php b/library/HTMLPurifier/Config.php deleted file mode 100644 index 2a334b0d83..0000000000 --- a/library/HTMLPurifier/Config.php +++ /dev/null @@ -1,580 +0,0 @@ -defaultPlist; - $this->plist = new HTMLPurifier_PropertyList($parent); - $this->def = $definition; // keep a copy around for checking - $this->parser = new HTMLPurifier_VarParser_Flexible(); - } - - /** - * Convenience constructor that creates a config object based on a mixed var - * @param mixed $config Variable that defines the state of the config - * object. Can be: a HTMLPurifier_Config() object, - * an array of directives based on loadArray(), - * or a string filename of an ini file. - * @param HTMLPurifier_ConfigSchema Schema object - * @return Configured HTMLPurifier_Config object - */ - public static function create($config, $schema = null) { - if ($config instanceof HTMLPurifier_Config) { - // pass-through - return $config; - } - if (!$schema) { - $ret = HTMLPurifier_Config::createDefault(); - } else { - $ret = new HTMLPurifier_Config($schema); - } - if (is_string($config)) $ret->loadIni($config); - elseif (is_array($config)) $ret->loadArray($config); - return $ret; - } - - /** - * Creates a new config object that inherits from a previous one. - * @param HTMLPurifier_Config $config Configuration object to inherit - * from. - * @return HTMLPurifier_Config object with $config as its parent. - */ - public static function inherit(HTMLPurifier_Config $config) { - return new HTMLPurifier_Config($config->def, $config->plist); - } - - /** - * Convenience constructor that creates a default configuration object. - * @return Default HTMLPurifier_Config object. - */ - public static function createDefault() { - $definition = HTMLPurifier_ConfigSchema::instance(); - $config = new HTMLPurifier_Config($definition); - return $config; - } - - /** - * Retreives a value from the configuration. - * @param $key String key - */ - public function get($key, $a = null) { - if ($a !== null) { - $this->triggerError("Using deprecated API: use \$config->get('$key.$a') instead", E_USER_WARNING); - $key = "$key.$a"; - } - if (!$this->finalized) $this->autoFinalize(); - if (!isset($this->def->info[$key])) { - // can't add % due to SimpleTest bug - $this->triggerError('Cannot retrieve value of undefined directive ' . htmlspecialchars($key), - E_USER_WARNING); - return; - } - if (isset($this->def->info[$key]->isAlias)) { - $d = $this->def->info[$key]; - $this->triggerError('Cannot get value from aliased directive, use real name ' . $d->key, - E_USER_ERROR); - return; - } - if ($this->lock) { - list($ns) = explode('.', $key); - if ($ns !== $this->lock) { - $this->triggerError('Cannot get value of namespace ' . $ns . ' when lock for ' . $this->lock . ' is active, this probably indicates a Definition setup method is accessing directives that are not within its namespace', E_USER_ERROR); - return; - } - } - return $this->plist->get($key); - } - - /** - * Retreives an array of directives to values from a given namespace - * @param $namespace String namespace - */ - public function getBatch($namespace) { - if (!$this->finalized) $this->autoFinalize(); - $full = $this->getAll(); - if (!isset($full[$namespace])) { - $this->triggerError('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace), - E_USER_WARNING); - return; - } - return $full[$namespace]; - } - - /** - * Returns a md5 signature of a segment of the configuration object - * that uniquely identifies that particular configuration - * @note Revision is handled specially and is removed from the batch - * before processing! - * @param $namespace Namespace to get serial for - */ - public function getBatchSerial($namespace) { - if (empty($this->serials[$namespace])) { - $batch = $this->getBatch($namespace); - unset($batch['DefinitionRev']); - $this->serials[$namespace] = md5(serialize($batch)); - } - return $this->serials[$namespace]; - } - - /** - * Returns a md5 signature for the entire configuration object - * that uniquely identifies that particular configuration - */ - public function getSerial() { - if (empty($this->serial)) { - $this->serial = md5(serialize($this->getAll())); - } - return $this->serial; - } - - /** - * Retrieves all directives, organized by namespace - * @warning This is a pretty inefficient function, avoid if you can - */ - public function getAll() { - if (!$this->finalized) $this->autoFinalize(); - $ret = array(); - foreach ($this->plist->squash() as $name => $value) { - list($ns, $key) = explode('.', $name, 2); - $ret[$ns][$key] = $value; - } - return $ret; - } - - /** - * Sets a value to configuration. - * @param $key String key - * @param $value Mixed value - */ - public function set($key, $value, $a = null) { - if (strpos($key, '.') === false) { - $namespace = $key; - $directive = $value; - $value = $a; - $key = "$key.$directive"; - $this->triggerError("Using deprecated API: use \$config->set('$key', ...) instead", E_USER_NOTICE); - } else { - list($namespace) = explode('.', $key); - } - if ($this->isFinalized('Cannot set directive after finalization')) return; - if (!isset($this->def->info[$key])) { - $this->triggerError('Cannot set undefined directive ' . htmlspecialchars($key) . ' to value', - E_USER_WARNING); - return; - } - $def = $this->def->info[$key]; - - if (isset($def->isAlias)) { - if ($this->aliasMode) { - $this->triggerError('Double-aliases not allowed, please fix '. - 'ConfigSchema bug with' . $key, E_USER_ERROR); - return; - } - $this->aliasMode = true; - $this->set($def->key, $value); - $this->aliasMode = false; - $this->triggerError("$key is an alias, preferred directive name is {$def->key}", E_USER_NOTICE); - return; - } - - // Raw type might be negative when using the fully optimized form - // of stdclass, which indicates allow_null == true - $rtype = is_int($def) ? $def : $def->type; - if ($rtype < 0) { - $type = -$rtype; - $allow_null = true; - } else { - $type = $rtype; - $allow_null = isset($def->allow_null); - } - - try { - $value = $this->parser->parse($value, $type, $allow_null); - } catch (HTMLPurifier_VarParserException $e) { - $this->triggerError('Value for ' . $key . ' is of invalid type, should be ' . HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING); - return; - } - if (is_string($value) && is_object($def)) { - // resolve value alias if defined - if (isset($def->aliases[$value])) { - $value = $def->aliases[$value]; - } - // check to see if the value is allowed - if (isset($def->allowed) && !isset($def->allowed[$value])) { - $this->triggerError('Value not supported, valid values are: ' . - $this->_listify($def->allowed), E_USER_WARNING); - return; - } - } - $this->plist->set($key, $value); - - // reset definitions if the directives they depend on changed - // this is a very costly process, so it's discouraged - // with finalization - if ($namespace == 'HTML' || $namespace == 'CSS' || $namespace == 'URI') { - $this->definitions[$namespace] = null; - } - - $this->serials[$namespace] = false; - } - - /** - * Convenience function for error reporting - */ - private function _listify($lookup) { - $list = array(); - foreach ($lookup as $name => $b) $list[] = $name; - return implode(', ', $list); - } - - /** - * Retrieves object reference to the HTML definition. - * @param $raw Return a copy that has not been setup yet. Must be - * called before it's been setup, otherwise won't work. - */ - public function getHTMLDefinition($raw = false) { - return $this->getDefinition('HTML', $raw); - } - - /** - * Retrieves object reference to the CSS definition - * @param $raw Return a copy that has not been setup yet. Must be - * called before it's been setup, otherwise won't work. - */ - public function getCSSDefinition($raw = false) { - return $this->getDefinition('CSS', $raw); - } - - /** - * Retrieves a definition - * @param $type Type of definition: HTML, CSS, etc - * @param $raw Whether or not definition should be returned raw - */ - public function getDefinition($type, $raw = false) { - if (!$this->finalized) $this->autoFinalize(); - // temporarily suspend locks, so we can handle recursive definition calls - $lock = $this->lock; - $this->lock = null; - $factory = HTMLPurifier_DefinitionCacheFactory::instance(); - $cache = $factory->create($type, $this); - $this->lock = $lock; - if (!$raw) { - // see if we can quickly supply a definition - if (!empty($this->definitions[$type])) { - if (!$this->definitions[$type]->setup) { - $this->definitions[$type]->setup($this); - $cache->set($this->definitions[$type], $this); - } - return $this->definitions[$type]; - } - // memory check missed, try cache - $this->definitions[$type] = $cache->get($this); - if ($this->definitions[$type]) { - // definition in cache, return it - return $this->definitions[$type]; - } - } elseif ( - !empty($this->definitions[$type]) && - !$this->definitions[$type]->setup - ) { - // raw requested, raw in memory, quick return - return $this->definitions[$type]; - } - // quick checks failed, let's create the object - if ($type == 'HTML') { - $this->definitions[$type] = new HTMLPurifier_HTMLDefinition(); - } elseif ($type == 'CSS') { - $this->definitions[$type] = new HTMLPurifier_CSSDefinition(); - } elseif ($type == 'URI') { - $this->definitions[$type] = new HTMLPurifier_URIDefinition(); - } else { - throw new HTMLPurifier_Exception("Definition of $type type not supported"); - } - // quick abort if raw - if ($raw) { - if (is_null($this->get($type . '.DefinitionID'))) { - // fatally error out if definition ID not set - throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID"); - } - return $this->definitions[$type]; - } - // set it up - $this->lock = $type; - $this->definitions[$type]->setup($this); - $this->lock = null; - // save in cache - $cache->set($this->definitions[$type], $this); - return $this->definitions[$type]; - } - - /** - * Loads configuration values from an array with the following structure: - * Namespace.Directive => Value - * @param $config_array Configuration associative array - */ - public function loadArray($config_array) { - if ($this->isFinalized('Cannot load directives after finalization')) return; - foreach ($config_array as $key => $value) { - $key = str_replace('_', '.', $key); - if (strpos($key, '.') !== false) { - $this->set($key, $value); - } else { - $namespace = $key; - $namespace_values = $value; - foreach ($namespace_values as $directive => $value) { - $this->set($namespace .'.'. $directive, $value); - } - } - } - } - - /** - * Returns a list of array(namespace, directive) for all directives - * that are allowed in a web-form context as per an allowed - * namespaces/directives list. - * @param $allowed List of allowed namespaces/directives - */ - public static function getAllowedDirectivesForForm($allowed, $schema = null) { - if (!$schema) { - $schema = HTMLPurifier_ConfigSchema::instance(); - } - if ($allowed !== true) { - if (is_string($allowed)) $allowed = array($allowed); - $allowed_ns = array(); - $allowed_directives = array(); - $blacklisted_directives = array(); - foreach ($allowed as $ns_or_directive) { - if (strpos($ns_or_directive, '.') !== false) { - // directive - if ($ns_or_directive[0] == '-') { - $blacklisted_directives[substr($ns_or_directive, 1)] = true; - } else { - $allowed_directives[$ns_or_directive] = true; - } - } else { - // namespace - $allowed_ns[$ns_or_directive] = true; - } - } - } - $ret = array(); - foreach ($schema->info as $key => $def) { - list($ns, $directive) = explode('.', $key, 2); - if ($allowed !== true) { - if (isset($blacklisted_directives["$ns.$directive"])) continue; - if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue; - } - if (isset($def->isAlias)) continue; - if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue; - $ret[] = array($ns, $directive); - } - return $ret; - } - - /** - * Loads configuration values from $_GET/$_POST that were posted - * via ConfigForm - * @param $array $_GET or $_POST array to import - * @param $index Index/name that the config variables are in - * @param $allowed List of allowed namespaces/directives - * @param $mq_fix Boolean whether or not to enable magic quotes fix - * @param $schema Instance of HTMLPurifier_ConfigSchema to use, if not global copy - */ - public static function loadArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) { - $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $schema); - $config = HTMLPurifier_Config::create($ret, $schema); - return $config; - } - - /** - * Merges in configuration values from $_GET/$_POST to object. NOT STATIC. - * @note Same parameters as loadArrayFromForm - */ - public function mergeArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true) { - $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $this->def); - $this->loadArray($ret); - } - - /** - * Prepares an array from a form into something usable for the more - * strict parts of HTMLPurifier_Config - */ - public static function prepareArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) { - if ($index !== false) $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array(); - $mq = $mq_fix && function_exists('get_magic_quotes_gpc') && get_magic_quotes_gpc(); - - $allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed, $schema); - $ret = array(); - foreach ($allowed as $key) { - list($ns, $directive) = $key; - $skey = "$ns.$directive"; - if (!empty($array["Null_$skey"])) { - $ret[$ns][$directive] = null; - continue; - } - if (!isset($array[$skey])) continue; - $value = $mq ? stripslashes($array[$skey]) : $array[$skey]; - $ret[$ns][$directive] = $value; - } - return $ret; - } - - /** - * Loads configuration values from an ini file - * @param $filename Name of ini file - */ - public function loadIni($filename) { - if ($this->isFinalized('Cannot load directives after finalization')) return; - $array = parse_ini_file($filename, true); - $this->loadArray($array); - } - - /** - * Checks whether or not the configuration object is finalized. - * @param $error String error message, or false for no error - */ - public function isFinalized($error = false) { - if ($this->finalized && $error) { - $this->triggerError($error, E_USER_ERROR); - } - return $this->finalized; - } - - /** - * Finalizes configuration only if auto finalize is on and not - * already finalized - */ - public function autoFinalize() { - if ($this->autoFinalize) { - $this->finalize(); - } else { - $this->plist->squash(true); - } - } - - /** - * Finalizes a configuration object, prohibiting further change - */ - public function finalize() { - $this->finalized = true; - unset($this->parser); - } - - /** - * Produces a nicely formatted error message by supplying the - * stack frame information from two levels up and OUTSIDE of - * HTMLPurifier_Config. - */ - protected function triggerError($msg, $no) { - // determine previous stack frame - $backtrace = debug_backtrace(); - if ($this->chatty && isset($backtrace[1])) { - $frame = $backtrace[1]; - $extra = " on line {$frame['line']} in file {$frame['file']}"; - } else { - $extra = ''; - } - trigger_error($msg . $extra, $no); - } - - /** - * Returns a serialized form of the configuration object that can - * be reconstituted. - */ - public function serialize() { - $this->getDefinition('HTML'); - $this->getDefinition('CSS'); - $this->getDefinition('URI'); - return serialize($this); - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/ConfigSchema/ValidatorAtom.php b/library/HTMLPurifier/ConfigSchema/ValidatorAtom.php deleted file mode 100644 index b95aea18cc..0000000000 --- a/library/HTMLPurifier/ConfigSchema/ValidatorAtom.php +++ /dev/null @@ -1,66 +0,0 @@ -context = $context; - $this->obj = $obj; - $this->member = $member; - $this->contents =& $obj->$member; - } - - public function assertIsString() { - if (!is_string($this->contents)) $this->error('must be a string'); - return $this; - } - - public function assertIsBool() { - if (!is_bool($this->contents)) $this->error('must be a boolean'); - return $this; - } - - public function assertIsArray() { - if (!is_array($this->contents)) $this->error('must be an array'); - return $this; - } - - public function assertNotNull() { - if ($this->contents === null) $this->error('must not be null'); - return $this; - } - - public function assertAlnum() { - $this->assertIsString(); - if (!ctype_alnum($this->contents)) $this->error('must be alphanumeric'); - return $this; - } - - public function assertNotEmpty() { - if (empty($this->contents)) $this->error('must not be empty'); - return $this; - } - - public function assertIsLookup() { - $this->assertIsArray(); - foreach ($this->contents as $v) { - if ($v !== true) $this->error('must be a lookup array'); - } - return $this; - } - - protected function error($msg) { - throw new HTMLPurifier_ConfigSchema_Exception(ucfirst($this->member) . ' in ' . $this->context . ' ' . $msg); - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/ConfigSchema/schema.ser b/library/HTMLPurifier/ConfigSchema/schema.ser deleted file mode 100644 index 22b8d54a59f17f73071055bc601d5a5f3a3f6b31..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 13244 zcmeHOOK%%T63$OC6zpXI6C`DC9Qq_vB227a5M?BL3Wj8h>eX*Q#3Jo;9S zK8%i*<}RtzsyzCbj0XKazYcyK9i3EF(K|`g{Hs}x)1)j7FfKoqqv5-4;G{^_<~Au- z#?b(U?;wHAV-hze zoqdXqJn%C0u=9M8Kp~AxsMNa3!Q48jdi?P^DUwx@Z0`LD29#TaGp@@jUq}2=;|=)K zmYXzrKDXJ!mz!yzK}H%RLhaqNhaOXS4b&U1V)ah*#h06NmG{qogFQm5l-bTDp76Wv z3WkFRg%GC6bgL==xt#a z4?)hf#Q-wP>muKnqO#t@sc3>>=Qgtx$c=%6S`oqk0w?eyd77?Q=O$ZGCglj92g81U zBW6eMClVKK+>UVw2)&+STGUFVIb$`3E{?E z*6X<`X3yDTnpLL2Jwp3u9A}sn{IhJ4FKxCWBjCQS?%saj&x}Y8pB086Ob}}1Jr{jCXOh`g5s@v~ejXQ|=&#Ed0|fuC|w z*iASxxGk$^niNIA%i5pm9F|+_bq5BWF$|zIp%2-Z!uhIC1mycgVzUbWHG;$Dg26aL zEx356qCnX2!e@#SA((!cWhww{aR#h4&urf5FVYQkFlHIDou zRv32-Z!|6fgE_>|i+ow9KC}-ZO$TIpcZtlc*JN0FZ@|X{MdbJ!G5}l-G;GLkBmcsn z1c90h)Dr~AZ{-M^&#IzcRCS^DB_W5ol2ry*VkI+EgCgDx8(xt`44WL(9$AwJml!de84mr`B zw_1}y1jyC=-w2NZg-fnIX51f{^f--VEKgNvGcfvvS!+8Svjn1nCMc0_Rz zBU&FQ7f@QnqxXg1xzwB35URSWt4&?u;$JfywnK}vGg^Y;O;F?919_7)hXl_Gy)<7_ zTnlOlU{t@)kaJO^K`t;K1@ROvosNOGrmTASyfByfbdhIJj|v;CAW`;_K&mu)Z2}>E z6Cb*-D)^sc931dhxdZ496l8)ZnR^8I`CWsKgJjwEujZ>RhC}GqpOVs_5XT@hTIo1u zs3Q<^Y&e@z9L*Agqcp2K@<&*XEVCsGq;P!rbMzVU+!PsaUA-=qMN}1B<-s?m%xlO} zjv`eS%poH40tgZhbsS2<>T>y$@`(omul=# z72F+tJ$(ps~MiZ%4%G?Aq@FNlQ>xfjvHg^;#W!Z2Sv1#+!_Q^);z!+5>@) zSAjE+5H;j|+ws+=y>=%dyLhkAqrajI%gT*19HHX-CNgvnziuraNfP($La7P{9tfnb z?;hRoTT#iA6dq&v8iNnAS<~ycBc3?C)j9xi=rvtDfW!kIfjX&4zs;5>sZE;4x;=vg zswuoB30qrZ1a|Z*MU4lfHp~wZs5Zi>{H%ai0pG85ts5}VAp$2oQDW1-W-B|IAZX+g z^|*aMM_I$0W&~`{4=4rl(Cng<0pyT!Z?)ui%?~(+kD>2|6nS0uS}Xj(!G6X4;I7@u zq^_FtwD$Ms`t#!p&JtRjMNl8d-(=s2f{K^U)iCSG?HW;N^c5xzkWl*(1%*Q8 z0qRE-9B9vog2ppC85bjHY`-$Ur!zsW1h$(c5^p-E6Led|s_y9oAFGJKmS(f9;J@bG z%^*Xw&=nZnb`kh!&pmiP;K;&9$LK>OdUDzteRv6tM<3uDd`JJ0{<%sFH<4px!ReVv zoulwYUx3FBZ zUNPVr5uL7Py~^Dqwwv`DyFH^#C*DP?t|mN9!QY>lj@Z{8rkVm>gvokXJkf42$YCUJ{h=toPbH9_$I zVFPN}eFmLMP@kt$?!;BpS0wf$yLF2T*0En)e>DSpmEFA8c&9jiRlm)eSA_P1J9o;B z6D~V-=C9ta0TB%sh&=KNqe(qiynV;O)dDav^V5Mqk%S-S60dD$3KW4ALyQn+IXnS4 zse?1QZw-qmf4GDNWqVjeV-|v|02x*y;=X2Zu9aCdqSvM0gWq+k zq^0cFE4_DAD+pyd2Ry=jMQT^~VP**`^@Az_;oeBQL|XH#2K}kXi@QTp<{KUpBUAbVngLjE<%3n1^v&hjeHM)_zuApStN}Zr2cwN&}i{ z58?PQ0Exet!_if+=8icW1!A2Dn40n0xT*BATm Q8uHP_ug&>z-|yf52WIevWdHyG diff --git a/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedElements.txt b/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedElements.txt deleted file mode 100644 index 888d558196..0000000000 --- a/library/HTMLPurifier/ConfigSchema/schema/HTML.AllowedElements.txt +++ /dev/null @@ -1,18 +0,0 @@ -HTML.AllowedElements -TYPE: lookup/null -VERSION: 1.3.0 -DEFAULT: NULL ---DESCRIPTION-- -

- If HTML Purifier's tag set is unsatisfactory for your needs, you - can overload it with your own list of tags to allow. Note that this - method is subtractive: it does its job by taking away from HTML Purifier - usual feature set, so you cannot add a tag that HTML Purifier never - supported in the first place (like embed, form or head). If you - change this, you probably also want to change %HTML.AllowedAttributes. -

-

- Warning: If another directive conflicts with the - elements here, that directive will win and override. -

---# vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Context.php b/library/HTMLPurifier/Context.php deleted file mode 100644 index 9ddf0c5476..0000000000 --- a/library/HTMLPurifier/Context.php +++ /dev/null @@ -1,82 +0,0 @@ -_storage[$name])) { - trigger_error("Name $name produces collision, cannot re-register", - E_USER_ERROR); - return; - } - $this->_storage[$name] =& $ref; - } - - /** - * Retrieves a variable reference from the context. - * @param $name String name - * @param $ignore_error Boolean whether or not to ignore error - */ - public function &get($name, $ignore_error = false) { - if (!isset($this->_storage[$name])) { - if (!$ignore_error) { - trigger_error("Attempted to retrieve non-existent variable $name", - E_USER_ERROR); - } - $var = null; // so we can return by reference - return $var; - } - return $this->_storage[$name]; - } - - /** - * Destorys a variable in the context. - * @param $name String name - */ - public function destroy($name) { - if (!isset($this->_storage[$name])) { - trigger_error("Attempted to destroy non-existent variable $name", - E_USER_ERROR); - return; - } - unset($this->_storage[$name]); - } - - /** - * Checks whether or not the variable exists. - * @param $name String name - */ - public function exists($name) { - return isset($this->_storage[$name]); - } - - /** - * Loads a series of variables from an associative array - * @param $context_array Assoc array of variables to load - */ - public function loadArray($context_array) { - foreach ($context_array as $key => $discard) { - $this->register($key, $context_array[$key]); - } - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Definition.php b/library/HTMLPurifier/Definition.php deleted file mode 100644 index a7408c9749..0000000000 --- a/library/HTMLPurifier/Definition.php +++ /dev/null @@ -1,39 +0,0 @@ -setup) return; - $this->setup = true; - $this->doSetup($config); - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/DefinitionCache/Decorator.php b/library/HTMLPurifier/DefinitionCache/Decorator.php deleted file mode 100644 index b0fb6d0cd6..0000000000 --- a/library/HTMLPurifier/DefinitionCache/Decorator.php +++ /dev/null @@ -1,62 +0,0 @@ -copy(); - // reference is necessary for mocks in PHP 4 - $decorator->cache =& $cache; - $decorator->type = $cache->type; - return $decorator; - } - - /** - * Cross-compatible clone substitute - */ - public function copy() { - return new HTMLPurifier_DefinitionCache_Decorator(); - } - - public function add($def, $config) { - return $this->cache->add($def, $config); - } - - public function set($def, $config) { - return $this->cache->set($def, $config); - } - - public function replace($def, $config) { - return $this->cache->replace($def, $config); - } - - public function get($config) { - return $this->cache->get($config); - } - - public function remove($config) { - return $this->cache->remove($config); - } - - public function flush($config) { - return $this->cache->flush($config); - } - - public function cleanup($config) { - return $this->cache->cleanup($config); - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/DefinitionCache/Decorator/Cleanup.php b/library/HTMLPurifier/DefinitionCache/Decorator/Cleanup.php deleted file mode 100644 index d4cc35c4bc..0000000000 --- a/library/HTMLPurifier/DefinitionCache/Decorator/Cleanup.php +++ /dev/null @@ -1,43 +0,0 @@ -definitions[$this->generateKey($config)] = $def; - return $status; - } - - public function set($def, $config) { - $status = parent::set($def, $config); - if ($status) $this->definitions[$this->generateKey($config)] = $def; - return $status; - } - - public function replace($def, $config) { - $status = parent::replace($def, $config); - if ($status) $this->definitions[$this->generateKey($config)] = $def; - return $status; - } - - public function get($config) { - $key = $this->generateKey($config); - if (isset($this->definitions[$key])) return $this->definitions[$key]; - $this->definitions[$key] = parent::get($config); - return $this->definitions[$key]; - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/DefinitionCache/Decorator/Template.php.in b/library/HTMLPurifier/DefinitionCache/Decorator/Template.php.in deleted file mode 100644 index 21a8fcfda2..0000000000 --- a/library/HTMLPurifier/DefinitionCache/Decorator/Template.php.in +++ /dev/null @@ -1,47 +0,0 @@ -checkDefType($def)) return; - $file = $this->generateFilePath($config); - if (file_exists($file)) return false; - if (!$this->_prepareDir($config)) return false; - return $this->_write($file, serialize($def)); - } - - public function set($def, $config) { - if (!$this->checkDefType($def)) return; - $file = $this->generateFilePath($config); - if (!$this->_prepareDir($config)) return false; - return $this->_write($file, serialize($def)); - } - - public function replace($def, $config) { - if (!$this->checkDefType($def)) return; - $file = $this->generateFilePath($config); - if (!file_exists($file)) return false; - if (!$this->_prepareDir($config)) return false; - return $this->_write($file, serialize($def)); - } - - public function get($config) { - $file = $this->generateFilePath($config); - if (!file_exists($file)) return false; - return unserialize(file_get_contents($file)); - } - - public function remove($config) { - $file = $this->generateFilePath($config); - if (!file_exists($file)) return false; - return unlink($file); - } - - public function flush($config) { - if (!$this->_prepareDir($config)) return false; - $dir = $this->generateDirectoryPath($config); - $dh = opendir($dir); - while (false !== ($filename = readdir($dh))) { - if (empty($filename)) continue; - if ($filename[0] === '.') continue; - unlink($dir . '/' . $filename); - } - } - - public function cleanup($config) { - if (!$this->_prepareDir($config)) return false; - $dir = $this->generateDirectoryPath($config); - $dh = opendir($dir); - while (false !== ($filename = readdir($dh))) { - if (empty($filename)) continue; - if ($filename[0] === '.') continue; - $key = substr($filename, 0, strlen($filename) - 4); - if ($this->isOld($key, $config)) unlink($dir . '/' . $filename); - } - } - - /** - * Generates the file path to the serial file corresponding to - * the configuration and definition name - * @todo Make protected - */ - public function generateFilePath($config) { - $key = $this->generateKey($config); - return $this->generateDirectoryPath($config) . '/' . $key . '.ser'; - } - - /** - * Generates the path to the directory contain this cache's serial files - * @note No trailing slash - * @todo Make protected - */ - public function generateDirectoryPath($config) { - $base = $this->generateBaseDirectoryPath($config); - return $base . '/' . $this->type; - } - - /** - * Generates path to base directory that contains all definition type - * serials - * @todo Make protected - */ - public function generateBaseDirectoryPath($config) { - $base = $config->get('Cache.SerializerPath'); - $base = is_null($base) ? HTMLPURIFIER_PREFIX . '/HTMLPurifier/DefinitionCache/Serializer' : $base; - return $base; - } - - /** - * Convenience wrapper function for file_put_contents - * @param $file File name to write to - * @param $data Data to write into file - * @return Number of bytes written if success, or false if failure. - */ - private function _write($file, $data) { - return file_put_contents($file, $data); - } - - /** - * Prepares the directory that this type stores the serials in - * @return True if successful - */ - private function _prepareDir($config) { - $directory = $this->generateDirectoryPath($config); - if (!is_dir($directory)) { - $base = $this->generateBaseDirectoryPath($config); - if (!is_dir($base)) { - trigger_error('Base directory '.$base.' does not exist, - please create or change using %Cache.SerializerPath', - E_USER_WARNING); - return false; - } elseif (!$this->_testPermissions($base)) { - return false; - } - $old = umask(0022); // disable group and world writes - mkdir($directory); - umask($old); - } elseif (!$this->_testPermissions($directory)) { - return false; - } - return true; - } - - /** - * Tests permissions on a directory and throws out friendly - * error messages and attempts to chmod it itself if possible - */ - private function _testPermissions($dir) { - // early abort, if it is writable, everything is hunky-dory - if (is_writable($dir)) return true; - if (!is_dir($dir)) { - // generally, you'll want to handle this beforehand - // so a more specific error message can be given - trigger_error('Directory '.$dir.' does not exist', - E_USER_WARNING); - return false; - } - if (function_exists('posix_getuid')) { - // POSIX system, we can give more specific advice - if (fileowner($dir) === posix_getuid()) { - // we can chmod it ourselves - chmod($dir, 0755); - return true; - } elseif (filegroup($dir) === posix_getgid()) { - $chmod = '775'; - } else { - // PHP's probably running as nobody, so we'll - // need to give global permissions - $chmod = '777'; - } - trigger_error('Directory '.$dir.' not writable, '. - 'please chmod to ' . $chmod, - E_USER_WARNING); - } else { - // generic error message - trigger_error('Directory '.$dir.' not writable, '. - 'please alter file permissions', - E_USER_WARNING); - } - return false; - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/EntityLookup/entities.ser b/library/HTMLPurifier/EntityLookup/entities.ser deleted file mode 100644 index f2b8b8f2db..0000000000 --- a/library/HTMLPurifier/EntityLookup/entities.ser +++ /dev/null @@ -1 +0,0 @@ -a:246:{s:4:"nbsp";s:2:" ";s:5:"iexcl";s:2:"¡";s:4:"cent";s:2:"¢";s:5:"pound";s:2:"£";s:6:"curren";s:2:"¤";s:3:"yen";s:2:"¥";s:6:"brvbar";s:2:"¦";s:4:"sect";s:2:"§";s:3:"uml";s:2:"¨";s:4:"copy";s:2:"©";s:4:"ordf";s:2:"ª";s:5:"laquo";s:2:"«";s:3:"not";s:2:"¬";s:3:"shy";s:2:"­";s:3:"reg";s:2:"®";s:4:"macr";s:2:"¯";s:3:"deg";s:2:"°";s:6:"plusmn";s:2:"±";s:5:"acute";s:2:"´";s:5:"micro";s:2:"µ";s:4:"para";s:2:"¶";s:6:"middot";s:2:"·";s:5:"cedil";s:2:"¸";s:4:"ordm";s:2:"º";s:5:"raquo";s:2:"»";s:6:"iquest";s:2:"¿";s:6:"Agrave";s:2:"À";s:6:"Aacute";s:2:"Á";s:5:"Acirc";s:2:"Â";s:6:"Atilde";s:2:"Ã";s:4:"Auml";s:2:"Ä";s:5:"Aring";s:2:"Å";s:5:"AElig";s:2:"Æ";s:6:"Ccedil";s:2:"Ç";s:6:"Egrave";s:2:"È";s:6:"Eacute";s:2:"É";s:5:"Ecirc";s:2:"Ê";s:4:"Euml";s:2:"Ë";s:6:"Igrave";s:2:"Ì";s:6:"Iacute";s:2:"Í";s:5:"Icirc";s:2:"Î";s:4:"Iuml";s:2:"Ï";s:3:"ETH";s:2:"Ð";s:6:"Ntilde";s:2:"Ñ";s:6:"Ograve";s:2:"Ò";s:6:"Oacute";s:2:"Ó";s:5:"Ocirc";s:2:"Ô";s:6:"Otilde";s:2:"Õ";s:4:"Ouml";s:2:"Ö";s:5:"times";s:2:"×";s:6:"Oslash";s:2:"Ø";s:6:"Ugrave";s:2:"Ù";s:6:"Uacute";s:2:"Ú";s:5:"Ucirc";s:2:"Û";s:4:"Uuml";s:2:"Ü";s:6:"Yacute";s:2:"Ý";s:5:"THORN";s:2:"Þ";s:5:"szlig";s:2:"ß";s:6:"agrave";s:2:"à";s:6:"aacute";s:2:"á";s:5:"acirc";s:2:"â";s:6:"atilde";s:2:"ã";s:4:"auml";s:2:"ä";s:5:"aring";s:2:"å";s:5:"aelig";s:2:"æ";s:6:"ccedil";s:2:"ç";s:6:"egrave";s:2:"è";s:6:"eacute";s:2:"é";s:5:"ecirc";s:2:"ê";s:4:"euml";s:2:"ë";s:6:"igrave";s:2:"ì";s:6:"iacute";s:2:"í";s:5:"icirc";s:2:"î";s:4:"iuml";s:2:"ï";s:3:"eth";s:2:"ð";s:6:"ntilde";s:2:"ñ";s:6:"ograve";s:2:"ò";s:6:"oacute";s:2:"ó";s:5:"ocirc";s:2:"ô";s:6:"otilde";s:2:"õ";s:4:"ouml";s:2:"ö";s:6:"divide";s:2:"÷";s:6:"oslash";s:2:"ø";s:6:"ugrave";s:2:"ù";s:6:"uacute";s:2:"ú";s:5:"ucirc";s:2:"û";s:4:"uuml";s:2:"ü";s:6:"yacute";s:2:"ý";s:5:"thorn";s:2:"þ";s:4:"yuml";s:2:"ÿ";s:4:"quot";s:1:""";s:3:"amp";s:1:"&";s:2:"lt";s:1:"<";s:2:"gt";s:1:">";s:4:"apos";s:1:"'";s:5:"OElig";s:2:"Œ";s:5:"oelig";s:2:"œ";s:6:"Scaron";s:2:"Š";s:6:"scaron";s:2:"š";s:4:"Yuml";s:2:"Ÿ";s:4:"circ";s:2:"ˆ";s:5:"tilde";s:2:"˜";s:4:"ensp";s:3:" ";s:4:"emsp";s:3:" ";s:6:"thinsp";s:3:" ";s:4:"zwnj";s:3:"‌";s:3:"zwj";s:3:"‍";s:3:"lrm";s:3:"‎";s:3:"rlm";s:3:"‏";s:5:"ndash";s:3:"–";s:5:"mdash";s:3:"—";s:5:"lsquo";s:3:"‘";s:5:"rsquo";s:3:"’";s:5:"sbquo";s:3:"‚";s:5:"ldquo";s:3:"“";s:5:"rdquo";s:3:"”";s:5:"bdquo";s:3:"„";s:6:"dagger";s:3:"†";s:6:"Dagger";s:3:"‡";s:6:"permil";s:3:"‰";s:6:"lsaquo";s:3:"‹";s:6:"rsaquo";s:3:"›";s:4:"euro";s:3:"€";s:4:"fnof";s:2:"ƒ";s:5:"Alpha";s:2:"Α";s:4:"Beta";s:2:"Β";s:5:"Gamma";s:2:"Γ";s:5:"Delta";s:2:"Δ";s:7:"Epsilon";s:2:"Ε";s:4:"Zeta";s:2:"Ζ";s:3:"Eta";s:2:"Η";s:5:"Theta";s:2:"Θ";s:4:"Iota";s:2:"Ι";s:5:"Kappa";s:2:"Κ";s:6:"Lambda";s:2:"Λ";s:2:"Mu";s:2:"Μ";s:2:"Nu";s:2:"Ν";s:2:"Xi";s:2:"Ξ";s:7:"Omicron";s:2:"Ο";s:2:"Pi";s:2:"Π";s:3:"Rho";s:2:"Ρ";s:5:"Sigma";s:2:"Σ";s:3:"Tau";s:2:"Τ";s:7:"Upsilon";s:2:"Υ";s:3:"Phi";s:2:"Φ";s:3:"Chi";s:2:"Χ";s:3:"Psi";s:2:"Ψ";s:5:"Omega";s:2:"Ω";s:5:"alpha";s:2:"α";s:4:"beta";s:2:"β";s:5:"gamma";s:2:"γ";s:5:"delta";s:2:"δ";s:7:"epsilon";s:2:"ε";s:4:"zeta";s:2:"ζ";s:3:"eta";s:2:"η";s:5:"theta";s:2:"θ";s:4:"iota";s:2:"ι";s:5:"kappa";s:2:"κ";s:6:"lambda";s:2:"λ";s:2:"mu";s:2:"μ";s:2:"nu";s:2:"ν";s:2:"xi";s:2:"ξ";s:7:"omicron";s:2:"ο";s:2:"pi";s:2:"π";s:3:"rho";s:2:"ρ";s:6:"sigmaf";s:2:"ς";s:5:"sigma";s:2:"σ";s:3:"tau";s:2:"τ";s:7:"upsilon";s:2:"υ";s:3:"phi";s:2:"φ";s:3:"chi";s:2:"χ";s:3:"psi";s:2:"ψ";s:5:"omega";s:2:"ω";s:8:"thetasym";s:2:"ϑ";s:5:"upsih";s:2:"ϒ";s:3:"piv";s:2:"ϖ";s:4:"bull";s:3:"•";s:6:"hellip";s:3:"…";s:5:"prime";s:3:"′";s:5:"Prime";s:3:"″";s:5:"oline";s:3:"‾";s:5:"frasl";s:3:"⁄";s:6:"weierp";s:3:"℘";s:5:"image";s:3:"ℑ";s:4:"real";s:3:"ℜ";s:5:"trade";s:3:"™";s:7:"alefsym";s:3:"ℵ";s:4:"larr";s:3:"←";s:4:"uarr";s:3:"↑";s:4:"rarr";s:3:"→";s:4:"darr";s:3:"↓";s:4:"harr";s:3:"↔";s:5:"crarr";s:3:"↵";s:4:"lArr";s:3:"⇐";s:4:"uArr";s:3:"⇑";s:4:"rArr";s:3:"⇒";s:4:"dArr";s:3:"⇓";s:4:"hArr";s:3:"⇔";s:6:"forall";s:3:"∀";s:4:"part";s:3:"∂";s:5:"exist";s:3:"∃";s:5:"empty";s:3:"∅";s:5:"nabla";s:3:"∇";s:4:"isin";s:3:"∈";s:5:"notin";s:3:"∉";s:2:"ni";s:3:"∋";s:4:"prod";s:3:"∏";s:3:"sum";s:3:"∑";s:5:"minus";s:3:"−";s:6:"lowast";s:3:"∗";s:5:"radic";s:3:"√";s:4:"prop";s:3:"∝";s:5:"infin";s:3:"∞";s:3:"ang";s:3:"∠";s:3:"and";s:3:"∧";s:2:"or";s:3:"∨";s:3:"cap";s:3:"∩";s:3:"cup";s:3:"∪";s:3:"int";s:3:"∫";s:3:"sim";s:3:"∼";s:4:"cong";s:3:"≅";s:5:"asymp";s:3:"≈";s:2:"ne";s:3:"≠";s:5:"equiv";s:3:"≡";s:2:"le";s:3:"≤";s:2:"ge";s:3:"≥";s:3:"sub";s:3:"⊂";s:3:"sup";s:3:"⊃";s:4:"nsub";s:3:"⊄";s:4:"sube";s:3:"⊆";s:4:"supe";s:3:"⊇";s:5:"oplus";s:3:"⊕";s:6:"otimes";s:3:"⊗";s:4:"perp";s:3:"⊥";s:4:"sdot";s:3:"⋅";s:5:"lceil";s:3:"⌈";s:5:"rceil";s:3:"⌉";s:6:"lfloor";s:3:"⌊";s:6:"rfloor";s:3:"⌋";s:4:"lang";s:3:"〈";s:4:"rang";s:3:"〉";s:3:"loz";s:3:"◊";s:6:"spades";s:3:"♠";s:5:"clubs";s:3:"♣";s:6:"hearts";s:3:"♥";s:5:"diams";s:3:"♦";} \ No newline at end of file diff --git a/library/HTMLPurifier/Filter/ExtractStyleBlocks.php b/library/HTMLPurifier/Filter/ExtractStyleBlocks.php deleted file mode 100644 index bbf78a6630..0000000000 --- a/library/HTMLPurifier/Filter/ExtractStyleBlocks.php +++ /dev/null @@ -1,135 +0,0 @@ - blocks from input HTML, cleans them up - * using CSSTidy, and then places them in $purifier->context->get('StyleBlocks') - * so they can be used elsewhere in the document. - * - * @note - * See tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php for - * sample usage. - * - * @note - * This filter can also be used on stylesheets not included in the - * document--something purists would probably prefer. Just directly - * call HTMLPurifier_Filter_ExtractStyleBlocks->cleanCSS() - */ -class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter -{ - - public $name = 'ExtractStyleBlocks'; - private $_styleMatches = array(); - private $_tidy; - - public function __construct() { - $this->_tidy = new csstidy(); - } - - /** - * Save the contents of CSS blocks to style matches - * @param $matches preg_replace style $matches array - */ - protected function styleCallback($matches) { - $this->_styleMatches[] = $matches[1]; - } - - /** - * Removes inline #isU', array($this, 'styleCallback'), $html); - $style_blocks = $this->_styleMatches; - $this->_styleMatches = array(); // reset - $context->register('StyleBlocks', $style_blocks); // $context must not be reused - if ($this->_tidy) { - foreach ($style_blocks as &$style) { - $style = $this->cleanCSS($style, $config, $context); - } - } - return $html; - } - - /** - * Takes CSS (the stuff found in in a font-family prop). - if ($config->get('Filter.ExtractStyleBlocks.Escaping')) { - $css = str_replace( - array('<', '>', '&'), - array('\3C ', '\3E ', '\26 '), - $css - ); - } - return $css; - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Filter/YouTube.php b/library/HTMLPurifier/Filter/YouTube.php deleted file mode 100644 index 23df221eaa..0000000000 --- a/library/HTMLPurifier/Filter/YouTube.php +++ /dev/null @@ -1,39 +0,0 @@ -]+>.+?'. - 'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?#s'; - $pre_replace = '\1'; - return preg_replace($pre_regex, $pre_replace, $html); - } - - public function postFilter($html, $config, $context) { - $post_regex = '#((?:v|cp)/[A-Za-z0-9\-_=]+)#'; - return preg_replace_callback($post_regex, array($this, 'postFilterCallback'), $html); - } - - protected function armorUrl($url) { - return str_replace('--', '--', $url); - } - - protected function postFilterCallback($matches) { - $url = $this->armorUrl($matches[1]); - return ''. - ''. - ''. - ''; - - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/Forms.php b/library/HTMLPurifier/HTMLModule/Forms.php deleted file mode 100644 index 44c22f6f8b..0000000000 --- a/library/HTMLPurifier/HTMLModule/Forms.php +++ /dev/null @@ -1,118 +0,0 @@ - 'Form', - 'Inline' => 'Formctrl', - ); - - public function setup($config) { - $form = $this->addElement('form', 'Form', - 'Required: Heading | List | Block | fieldset', 'Common', array( - 'accept' => 'ContentTypes', - 'accept-charset' => 'Charsets', - 'action*' => 'URI', - 'method' => 'Enum#get,post', - // really ContentType, but these two are the only ones used today - 'enctype' => 'Enum#application/x-www-form-urlencoded,multipart/form-data', - )); - $form->excludes = array('form' => true); - - $input = $this->addElement('input', 'Formctrl', 'Empty', 'Common', array( - 'accept' => 'ContentTypes', - 'accesskey' => 'Character', - 'alt' => 'Text', - 'checked' => 'Bool#checked', - 'disabled' => 'Bool#disabled', - 'maxlength' => 'Number', - 'name' => 'CDATA', - 'readonly' => 'Bool#readonly', - 'size' => 'Number', - 'src' => 'URI#embeds', - 'tabindex' => 'Number', - 'type' => 'Enum#text,password,checkbox,button,radio,submit,reset,file,hidden,image', - 'value' => 'CDATA', - )); - $input->attr_transform_post[] = new HTMLPurifier_AttrTransform_Input(); - - $this->addElement('select', 'Formctrl', 'Required: optgroup | option', 'Common', array( - 'disabled' => 'Bool#disabled', - 'multiple' => 'Bool#multiple', - 'name' => 'CDATA', - 'size' => 'Number', - 'tabindex' => 'Number', - )); - - $this->addElement('option', false, 'Optional: #PCDATA', 'Common', array( - 'disabled' => 'Bool#disabled', - 'label' => 'Text', - 'selected' => 'Bool#selected', - 'value' => 'CDATA', - )); - // It's illegal for there to be more than one selected, but not - // be multiple. Also, no selected means undefined behavior. This might - // be difficult to implement; perhaps an injector, or a context variable. - - $textarea = $this->addElement('textarea', 'Formctrl', 'Optional: #PCDATA', 'Common', array( - 'accesskey' => 'Character', - 'cols*' => 'Number', - 'disabled' => 'Bool#disabled', - 'name' => 'CDATA', - 'readonly' => 'Bool#readonly', - 'rows*' => 'Number', - 'tabindex' => 'Number', - )); - $textarea->attr_transform_pre[] = new HTMLPurifier_AttrTransform_Textarea(); - - $button = $this->addElement('button', 'Formctrl', 'Optional: #PCDATA | Heading | List | Block | Inline', 'Common', array( - 'accesskey' => 'Character', - 'disabled' => 'Bool#disabled', - 'name' => 'CDATA', - 'tabindex' => 'Number', - 'type' => 'Enum#button,submit,reset', - 'value' => 'CDATA', - )); - - // For exclusions, ideally we'd specify content sets, not literal elements - $button->excludes = $this->makeLookup( - 'form', 'fieldset', // Form - 'input', 'select', 'textarea', 'label', 'button', // Formctrl - 'a' // as per HTML 4.01 spec, this is omitted by modularization - ); - - // Extra exclusion: img usemap="" is not permitted within this element. - // We'll omit this for now, since we don't have any good way of - // indicating it yet. - - // This is HIGHLY user-unfriendly; we need a custom child-def for this - $this->addElement('fieldset', 'Form', 'Custom: (#WS?,legend,(Flow|#PCDATA)*)', 'Common'); - - $label = $this->addElement('label', 'Formctrl', 'Optional: #PCDATA | Inline', 'Common', array( - 'accesskey' => 'Character', - // 'for' => 'IDREF', // IDREF not implemented, cannot allow - )); - $label->excludes = array('label' => true); - - $this->addElement('legend', false, 'Optional: #PCDATA | Inline', 'Common', array( - 'accesskey' => 'Character', - )); - - $this->addElement('optgroup', false, 'Required: option', 'Common', array( - 'disabled' => 'Bool#disabled', - 'label*' => 'Text', - )); - - // Don't forget an injector for . This one's a little complex - // because it maps to multiple elements. - - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/Tidy/Strict.php b/library/HTMLPurifier/HTMLModule/Tidy/Strict.php deleted file mode 100644 index c73dc3c4d1..0000000000 --- a/library/HTMLPurifier/HTMLModule/Tidy/Strict.php +++ /dev/null @@ -1,21 +0,0 @@ -content_model_type != 'strictblockquote') return parent::getChildDef($def); - return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model); - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Injector/RemoveEmpty.php b/library/HTMLPurifier/Injector/RemoveEmpty.php deleted file mode 100644 index 638bfca03b..0000000000 --- a/library/HTMLPurifier/Injector/RemoveEmpty.php +++ /dev/null @@ -1,51 +0,0 @@ -config = $config; - $this->context = $context; - $this->removeNbsp = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp'); - $this->removeNbspExceptions = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions'); - $this->attrValidator = new HTMLPurifier_AttrValidator(); - } - - public function handleElement(&$token) { - if (!$token instanceof HTMLPurifier_Token_Start) return; - $next = false; - for ($i = $this->inputIndex + 1, $c = count($this->inputTokens); $i < $c; $i++) { - $next = $this->inputTokens[$i]; - if ($next instanceof HTMLPurifier_Token_Text) { - if ($next->is_whitespace) continue; - if ($this->removeNbsp && !isset($this->removeNbspExceptions[$token->name])) { - $plain = str_replace("\xC2\xA0", "", $next->data); - $isWsOrNbsp = $plain === '' || ctype_space($plain); - if ($isWsOrNbsp) continue; - } - } - break; - } - if (!$next || ($next instanceof HTMLPurifier_Token_End && $next->name == $token->name)) { - if ($token->name == 'colgroup') return; - $this->attrValidator->validateToken($token, $this->config, $this->context); - $token->armor['ValidateAttributes'] = true; - if (isset($token->attr['id']) || isset($token->attr['name'])) return; - $token = $i - $this->inputIndex + 1; - for ($b = $this->inputIndex - 1; $b > 0; $b--) { - $prev = $this->inputTokens[$b]; - if ($prev instanceof HTMLPurifier_Token_Text && $prev->is_whitespace) continue; - break; - } - // This is safe because we removed the token that triggered this. - $this->rewind($b - 1); - return; - } - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Language/messages/en.php b/library/HTMLPurifier/Language/messages/en.php deleted file mode 100644 index 8d7b5736bb..0000000000 --- a/library/HTMLPurifier/Language/messages/en.php +++ /dev/null @@ -1,63 +0,0 @@ - 'HTML Purifier', - -// for unit testing purposes -'LanguageFactoryTest: Pizza' => 'Pizza', -'LanguageTest: List' => '$1', -'LanguageTest: Hash' => '$1.Keys; $1.Values', - -'Item separator' => ', ', -'Item separator last' => ' and ', // non-Harvard style - -'ErrorCollector: No errors' => 'No errors detected. However, because error reporting is still incomplete, there may have been errors that the error collector was not notified of; please inspect the output HTML carefully.', -'ErrorCollector: At line' => ' at line $line', -'ErrorCollector: Incidental errors' => 'Incidental errors', - -'Lexer: Unclosed comment' => 'Unclosed comment', -'Lexer: Unescaped lt' => 'Unescaped less-than sign (<) should be <', -'Lexer: Missing gt' => 'Missing greater-than sign (>), previous less-than sign (<) should be escaped', -'Lexer: Missing attribute key' => 'Attribute declaration has no key', -'Lexer: Missing end quote' => 'Attribute declaration has no end quote', -'Lexer: Extracted body' => 'Removed document metadata tags', - -'Strategy_RemoveForeignElements: Tag transform' => '<$1> element transformed into $CurrentToken.Serialized', -'Strategy_RemoveForeignElements: Missing required attribute' => '$CurrentToken.Compact element missing required attribute $1', -'Strategy_RemoveForeignElements: Foreign element to text' => 'Unrecognized $CurrentToken.Serialized tag converted to text', -'Strategy_RemoveForeignElements: Foreign element removed' => 'Unrecognized $CurrentToken.Serialized tag removed', -'Strategy_RemoveForeignElements: Comment removed' => 'Comment containing "$CurrentToken.Data" removed', -'Strategy_RemoveForeignElements: Foreign meta element removed' => 'Unrecognized $CurrentToken.Serialized meta tag and all descendants removed', -'Strategy_RemoveForeignElements: Token removed to end' => 'Tags and text starting from $1 element where removed to end', -'Strategy_RemoveForeignElements: Trailing hyphen in comment removed' => 'Trailing hyphen(s) in comment removed', -'Strategy_RemoveForeignElements: Hyphens in comment collapsed' => 'Double hyphens in comments are not allowed, and were collapsed into single hyphens', - -'Strategy_MakeWellFormed: Unnecessary end tag removed' => 'Unnecessary $CurrentToken.Serialized tag removed', -'Strategy_MakeWellFormed: Unnecessary end tag to text' => 'Unnecessary $CurrentToken.Serialized tag converted to text', -'Strategy_MakeWellFormed: Tag auto closed' => '$1.Compact started on line $1.Line auto-closed by $CurrentToken.Compact', -'Strategy_MakeWellFormed: Tag carryover' => '$1.Compact started on line $1.Line auto-continued into $CurrentToken.Compact', -'Strategy_MakeWellFormed: Stray end tag removed' => 'Stray $CurrentToken.Serialized tag removed', -'Strategy_MakeWellFormed: Stray end tag to text' => 'Stray $CurrentToken.Serialized tag converted to text', -'Strategy_MakeWellFormed: Tag closed by element end' => '$1.Compact tag started on line $1.Line closed by end of $CurrentToken.Serialized', -'Strategy_MakeWellFormed: Tag closed by document end' => '$1.Compact tag started on line $1.Line closed by end of document', - -'Strategy_FixNesting: Node removed' => '$CurrentToken.Compact node removed', -'Strategy_FixNesting: Node excluded' => '$CurrentToken.Compact node removed due to descendant exclusion by ancestor element', -'Strategy_FixNesting: Node reorganized' => 'Contents of $CurrentToken.Compact node reorganized to enforce its content model', -'Strategy_FixNesting: Node contents removed' => 'Contents of $CurrentToken.Compact node removed', - -'AttrValidator: Attributes transformed' => 'Attributes on $CurrentToken.Compact transformed from $1.Keys to $2.Keys', -'AttrValidator: Attribute removed' => '$CurrentAttr.Name attribute on $CurrentToken.Compact removed', - -); - -$errorNames = array( - E_ERROR => 'Error', - E_WARNING => 'Warning', - E_NOTICE => 'Notice' -); - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Lexer/PEARSax3.php b/library/HTMLPurifier/Lexer/PEARSax3.php deleted file mode 100644 index 1d358c7b6b..0000000000 --- a/library/HTMLPurifier/Lexer/PEARSax3.php +++ /dev/null @@ -1,139 +0,0 @@ -tokens = array(); - $this->last_token_was_empty = false; - - $string = $this->normalize($string, $config, $context); - - $this->parent_handler = set_error_handler(array($this, 'muteStrictErrorHandler')); - - $parser = new XML_HTMLSax3(); - $parser->set_object($this); - $parser->set_element_handler('openHandler','closeHandler'); - $parser->set_data_handler('dataHandler'); - $parser->set_escape_handler('escapeHandler'); - - // doesn't seem to work correctly for attributes - $parser->set_option('XML_OPTION_ENTITIES_PARSED', 1); - - $parser->parse($string); - - restore_error_handler(); - - return $this->tokens; - - } - - /** - * Open tag event handler, interface is defined by PEAR package. - */ - public function openHandler(&$parser, $name, $attrs, $closed) { - // entities are not resolved in attrs - foreach ($attrs as $key => $attr) { - $attrs[$key] = $this->parseData($attr); - } - if ($closed) { - $this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs); - $this->last_token_was_empty = true; - } else { - $this->tokens[] = new HTMLPurifier_Token_Start($name, $attrs); - } - $this->stack[] = $name; - return true; - } - - /** - * Close tag event handler, interface is defined by PEAR package. - */ - public function closeHandler(&$parser, $name) { - // HTMLSax3 seems to always send empty tags an extra close tag - // check and ignore if you see it: - // [TESTME] to make sure it doesn't overreach - if ($this->last_token_was_empty) { - $this->last_token_was_empty = false; - return true; - } - $this->tokens[] = new HTMLPurifier_Token_End($name); - if (!empty($this->stack)) array_pop($this->stack); - return true; - } - - /** - * Data event handler, interface is defined by PEAR package. - */ - public function dataHandler(&$parser, $data) { - $this->last_token_was_empty = false; - $this->tokens[] = new HTMLPurifier_Token_Text($data); - return true; - } - - /** - * Escaped text handler, interface is defined by PEAR package. - */ - public function escapeHandler(&$parser, $data) { - if (strpos($data, '--') === 0) { - // remove trailing and leading double-dashes - $data = substr($data, 2); - if (strlen($data) >= 2 && substr($data, -2) == "--") { - $data = substr($data, 0, -2); - } - if (isset($this->stack[sizeof($this->stack) - 1]) && - $this->stack[sizeof($this->stack) - 1] == "style") { - $this->tokens[] = new HTMLPurifier_Token_Text($data); - } else { - $this->tokens[] = new HTMLPurifier_Token_Comment($data); - } - $this->last_token_was_empty = false; - } - // CDATA is handled elsewhere, but if it was handled here: - //if (strpos($data, '[CDATA[') === 0) { - // $this->tokens[] = new HTMLPurifier_Token_Text( - // substr($data, 7, strlen($data) - 9) ); - //} - return true; - } - - /** - * An error handler that mutes strict errors - */ - public function muteStrictErrorHandler($errno, $errstr, $errfile=null, $errline=null, $errcontext=null) { - if ($errno == E_STRICT) return; - return call_user_func($this->parent_handler, $errno, $errstr, $errfile, $errline, $errcontext); - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Lexer/PH5P.php b/library/HTMLPurifier/Lexer/PH5P.php deleted file mode 100644 index fa1bf973e0..0000000000 --- a/library/HTMLPurifier/Lexer/PH5P.php +++ /dev/null @@ -1,3906 +0,0 @@ -normalize($html, $config, $context); - $new_html = $this->wrapHTML($new_html, $config, $context); - try { - $parser = new HTML5($new_html); - $doc = $parser->save(); - } catch (DOMException $e) { - // Uh oh, it failed. Punt to DirectLex. - $lexer = new HTMLPurifier_Lexer_DirectLex(); - $context->register('PH5PError', $e); // save the error, so we can detect it - return $lexer->tokenizeHTML($html, $config, $context); // use original HTML - } - $tokens = array(); - $this->tokenizeDOM( - $doc->getElementsByTagName('html')->item(0)-> // - getElementsByTagName('body')->item(0)-> // - getElementsByTagName('div')->item(0) //
- , $tokens); - return $tokens; - } - -} - -/* - -Copyright 2007 Jeroen van der Meer - -Permission is hereby granted, free of charge, to any person obtaining a -copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be included -in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -*/ - -class HTML5 { - private $data; - private $char; - private $EOF; - private $state; - private $tree; - private $token; - private $content_model; - private $escape = false; - private $entities = array('AElig;','AElig','AMP;','AMP','Aacute;','Aacute', - 'Acirc;','Acirc','Agrave;','Agrave','Alpha;','Aring;','Aring','Atilde;', - 'Atilde','Auml;','Auml','Beta;','COPY;','COPY','Ccedil;','Ccedil','Chi;', - 'Dagger;','Delta;','ETH;','ETH','Eacute;','Eacute','Ecirc;','Ecirc','Egrave;', - 'Egrave','Epsilon;','Eta;','Euml;','Euml','GT;','GT','Gamma;','Iacute;', - 'Iacute','Icirc;','Icirc','Igrave;','Igrave','Iota;','Iuml;','Iuml','Kappa;', - 'LT;','LT','Lambda;','Mu;','Ntilde;','Ntilde','Nu;','OElig;','Oacute;', - 'Oacute','Ocirc;','Ocirc','Ograve;','Ograve','Omega;','Omicron;','Oslash;', - 'Oslash','Otilde;','Otilde','Ouml;','Ouml','Phi;','Pi;','Prime;','Psi;', - 'QUOT;','QUOT','REG;','REG','Rho;','Scaron;','Sigma;','THORN;','THORN', - 'TRADE;','Tau;','Theta;','Uacute;','Uacute','Ucirc;','Ucirc','Ugrave;', - 'Ugrave','Upsilon;','Uuml;','Uuml','Xi;','Yacute;','Yacute','Yuml;','Zeta;', - 'aacute;','aacute','acirc;','acirc','acute;','acute','aelig;','aelig', - 'agrave;','agrave','alefsym;','alpha;','amp;','amp','and;','ang;','apos;', - 'aring;','aring','asymp;','atilde;','atilde','auml;','auml','bdquo;','beta;', - 'brvbar;','brvbar','bull;','cap;','ccedil;','ccedil','cedil;','cedil', - 'cent;','cent','chi;','circ;','clubs;','cong;','copy;','copy','crarr;', - 'cup;','curren;','curren','dArr;','dagger;','darr;','deg;','deg','delta;', - 'diams;','divide;','divide','eacute;','eacute','ecirc;','ecirc','egrave;', - 'egrave','empty;','emsp;','ensp;','epsilon;','equiv;','eta;','eth;','eth', - 'euml;','euml','euro;','exist;','fnof;','forall;','frac12;','frac12', - 'frac14;','frac14','frac34;','frac34','frasl;','gamma;','ge;','gt;','gt', - 'hArr;','harr;','hearts;','hellip;','iacute;','iacute','icirc;','icirc', - 'iexcl;','iexcl','igrave;','igrave','image;','infin;','int;','iota;', - 'iquest;','iquest','isin;','iuml;','iuml','kappa;','lArr;','lambda;','lang;', - 'laquo;','laquo','larr;','lceil;','ldquo;','le;','lfloor;','lowast;','loz;', - 'lrm;','lsaquo;','lsquo;','lt;','lt','macr;','macr','mdash;','micro;','micro', - 'middot;','middot','minus;','mu;','nabla;','nbsp;','nbsp','ndash;','ne;', - 'ni;','not;','not','notin;','nsub;','ntilde;','ntilde','nu;','oacute;', - 'oacute','ocirc;','ocirc','oelig;','ograve;','ograve','oline;','omega;', - 'omicron;','oplus;','or;','ordf;','ordf','ordm;','ordm','oslash;','oslash', - 'otilde;','otilde','otimes;','ouml;','ouml','para;','para','part;','permil;', - 'perp;','phi;','pi;','piv;','plusmn;','plusmn','pound;','pound','prime;', - 'prod;','prop;','psi;','quot;','quot','rArr;','radic;','rang;','raquo;', - 'raquo','rarr;','rceil;','rdquo;','real;','reg;','reg','rfloor;','rho;', - 'rlm;','rsaquo;','rsquo;','sbquo;','scaron;','sdot;','sect;','sect','shy;', - 'shy','sigma;','sigmaf;','sim;','spades;','sub;','sube;','sum;','sup1;', - 'sup1','sup2;','sup2','sup3;','sup3','sup;','supe;','szlig;','szlig','tau;', - 'there4;','theta;','thetasym;','thinsp;','thorn;','thorn','tilde;','times;', - 'times','trade;','uArr;','uacute;','uacute','uarr;','ucirc;','ucirc', - 'ugrave;','ugrave','uml;','uml','upsih;','upsilon;','uuml;','uuml','weierp;', - 'xi;','yacute;','yacute','yen;','yen','yuml;','yuml','zeta;','zwj;','zwnj;'); - - const PCDATA = 0; - const RCDATA = 1; - const CDATA = 2; - const PLAINTEXT = 3; - - const DOCTYPE = 0; - const STARTTAG = 1; - const ENDTAG = 2; - const COMMENT = 3; - const CHARACTR = 4; - const EOF = 5; - - public function __construct($data) { - $data = str_replace("\r\n", "\n", $data); - $data = str_replace("\r", null, $data); - - $this->data = $data; - $this->char = -1; - $this->EOF = strlen($data); - $this->tree = new HTML5TreeConstructer; - $this->content_model = self::PCDATA; - - $this->state = 'data'; - - while($this->state !== null) { - $this->{$this->state.'State'}(); - } - } - - public function save() { - return $this->tree->save(); - } - - private function char() { - return ($this->char < $this->EOF) - ? $this->data[$this->char] - : false; - } - - private function character($s, $l = 0) { - if($s + $l < $this->EOF) { - if($l === 0) { - return $this->data[$s]; - } else { - return substr($this->data, $s, $l); - } - } - } - - private function characters($char_class, $start) { - return preg_replace('#^(['.$char_class.']+).*#s', '\\1', substr($this->data, $start)); - } - - private function dataState() { - // Consume the next input character - $this->char++; - $char = $this->char(); - - if($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) { - /* U+0026 AMPERSAND (&) - When the content model flag is set to one of the PCDATA or RCDATA - states: switch to the entity data state. Otherwise: treat it as per - the "anything else" entry below. */ - $this->state = 'entityData'; - - } elseif($char === '-') { - /* If the content model flag is set to either the RCDATA state or - the CDATA state, and the escape flag is false, and there are at - least three characters before this one in the input stream, and the - last four characters in the input stream, including this one, are - U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS, - and U+002D HYPHEN-MINUS (""), - set the escape flag to false. */ - if(($this->content_model === self::RCDATA || - $this->content_model === self::CDATA) && $this->escape === true && - $this->character($this->char, 3) === '-->') { - $this->escape = false; - } - - /* In any case, emit the input character as a character token. - Stay in the data state. */ - $this->emitToken(array( - 'type' => self::CHARACTR, - 'data' => $char - )); - - } elseif($this->char === $this->EOF) { - /* EOF - Emit an end-of-file token. */ - $this->EOF(); - - } elseif($this->content_model === self::PLAINTEXT) { - /* When the content model flag is set to the PLAINTEXT state - THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of - the text and emit it as a character token. */ - $this->emitToken(array( - 'type' => self::CHARACTR, - 'data' => substr($this->data, $this->char) - )); - - $this->EOF(); - - } else { - /* Anything else - THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that - otherwise would also be treated as a character token and emit it - as a single character token. Stay in the data state. */ - $len = strcspn($this->data, '<&', $this->char); - $char = substr($this->data, $this->char, $len); - $this->char += $len - 1; - - $this->emitToken(array( - 'type' => self::CHARACTR, - 'data' => $char - )); - - $this->state = 'data'; - } - } - - private function entityDataState() { - // Attempt to consume an entity. - $entity = $this->entity(); - - // If nothing is returned, emit a U+0026 AMPERSAND character token. - // Otherwise, emit the character token that was returned. - $char = (!$entity) ? '&' : $entity; - $this->emitToken(array( - 'type' => self::CHARACTR, - 'data' => $char - )); - - // Finally, switch to the data state. - $this->state = 'data'; - } - - private function tagOpenState() { - switch($this->content_model) { - case self::RCDATA: - case self::CDATA: - /* If the next input character is a U+002F SOLIDUS (/) character, - consume it and switch to the close tag open state. If the next - input character is not a U+002F SOLIDUS (/) character, emit a - U+003C LESS-THAN SIGN character token and switch to the data - state to process the next input character. */ - if($this->character($this->char + 1) === '/') { - $this->char++; - $this->state = 'closeTagOpen'; - - } else { - $this->emitToken(array( - 'type' => self::CHARACTR, - 'data' => '<' - )); - - $this->state = 'data'; - } - break; - - case self::PCDATA: - // If the content model flag is set to the PCDATA state - // Consume the next input character: - $this->char++; - $char = $this->char(); - - if($char === '!') { - /* U+0021 EXCLAMATION MARK (!) - Switch to the markup declaration open state. */ - $this->state = 'markupDeclarationOpen'; - - } elseif($char === '/') { - /* U+002F SOLIDUS (/) - Switch to the close tag open state. */ - $this->state = 'closeTagOpen'; - - } elseif(preg_match('/^[A-Za-z]$/', $char)) { - /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z - Create a new start tag token, set its tag name to the lowercase - version of the input character (add 0x0020 to the character's code - point), then switch to the tag name state. (Don't emit the token - yet; further details will be filled in before it is emitted.) */ - $this->token = array( - 'name' => strtolower($char), - 'type' => self::STARTTAG, - 'attr' => array() - ); - - $this->state = 'tagName'; - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Parse error. Emit a U+003C LESS-THAN SIGN character token and a - U+003E GREATER-THAN SIGN character token. Switch to the data state. */ - $this->emitToken(array( - 'type' => self::CHARACTR, - 'data' => '<>' - )); - - $this->state = 'data'; - - } elseif($char === '?') { - /* U+003F QUESTION MARK (?) - Parse error. Switch to the bogus comment state. */ - $this->state = 'bogusComment'; - - } else { - /* Anything else - Parse error. Emit a U+003C LESS-THAN SIGN character token and - reconsume the current input character in the data state. */ - $this->emitToken(array( - 'type' => self::CHARACTR, - 'data' => '<' - )); - - $this->char--; - $this->state = 'data'; - } - break; - } - } - - private function closeTagOpenState() { - $next_node = strtolower($this->characters('A-Za-z', $this->char + 1)); - $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName; - - if(($this->content_model === self::RCDATA || $this->content_model === self::CDATA) && - (!$the_same || ($the_same && (!preg_match('/[\t\n\x0b\x0c >\/]/', - $this->character($this->char + 1 + strlen($next_node))) || $this->EOF === $this->char)))) { - /* If the content model flag is set to the RCDATA or CDATA states then - examine the next few characters. If they do not match the tag name of - the last start tag token emitted (case insensitively), or if they do but - they are not immediately followed by one of the following characters: - * U+0009 CHARACTER TABULATION - * U+000A LINE FEED (LF) - * U+000B LINE TABULATION - * U+000C FORM FEED (FF) - * U+0020 SPACE - * U+003E GREATER-THAN SIGN (>) - * U+002F SOLIDUS (/) - * EOF - ...then there is a parse error. Emit a U+003C LESS-THAN SIGN character - token, a U+002F SOLIDUS character token, and switch to the data state - to process the next input character. */ - $this->emitToken(array( - 'type' => self::CHARACTR, - 'data' => 'state = 'data'; - - } else { - /* Otherwise, if the content model flag is set to the PCDATA state, - or if the next few characters do match that tag name, consume the - next input character: */ - $this->char++; - $char = $this->char(); - - if(preg_match('/^[A-Za-z]$/', $char)) { - /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z - Create a new end tag token, set its tag name to the lowercase version - of the input character (add 0x0020 to the character's code point), then - switch to the tag name state. (Don't emit the token yet; further details - will be filled in before it is emitted.) */ - $this->token = array( - 'name' => strtolower($char), - 'type' => self::ENDTAG - ); - - $this->state = 'tagName'; - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Parse error. Switch to the data state. */ - $this->state = 'data'; - - } elseif($this->char === $this->EOF) { - /* EOF - Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F - SOLIDUS character token. Reconsume the EOF character in the data state. */ - $this->emitToken(array( - 'type' => self::CHARACTR, - 'data' => 'char--; - $this->state = 'data'; - - } else { - /* Parse error. Switch to the bogus comment state. */ - $this->state = 'bogusComment'; - } - } - } - - private function tagNameState() { - // Consume the next input character: - $this->char++; - $char = $this->character($this->char); - - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000B LINE TABULATION - U+000C FORM FEED (FF) - U+0020 SPACE - Switch to the before attribute name state. */ - $this->state = 'beforeAttributeName'; - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. */ - $this->emitToken($this->token); - $this->state = 'data'; - - } elseif($this->char === $this->EOF) { - /* EOF - Parse error. Emit the current tag token. Reconsume the EOF - character in the data state. */ - $this->emitToken($this->token); - - $this->char--; - $this->state = 'data'; - - } elseif($char === '/') { - /* U+002F SOLIDUS (/) - Parse error unless this is a permitted slash. Switch to the before - attribute name state. */ - $this->state = 'beforeAttributeName'; - - } else { - /* Anything else - Append the current input character to the current tag token's tag name. - Stay in the tag name state. */ - $this->token['name'] .= strtolower($char); - $this->state = 'tagName'; - } - } - - private function beforeAttributeNameState() { - // Consume the next input character: - $this->char++; - $char = $this->character($this->char); - - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000B LINE TABULATION - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the before attribute name state. */ - $this->state = 'beforeAttributeName'; - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. */ - $this->emitToken($this->token); - $this->state = 'data'; - - } elseif($char === '/') { - /* U+002F SOLIDUS (/) - Parse error unless this is a permitted slash. Stay in the before - attribute name state. */ - $this->state = 'beforeAttributeName'; - - } elseif($this->char === $this->EOF) { - /* EOF - Parse error. Emit the current tag token. Reconsume the EOF - character in the data state. */ - $this->emitToken($this->token); - - $this->char--; - $this->state = 'data'; - - } else { - /* Anything else - Start a new attribute in the current tag token. Set that attribute's - name to the current input character, and its value to the empty string. - Switch to the attribute name state. */ - $this->token['attr'][] = array( - 'name' => strtolower($char), - 'value' => null - ); - - $this->state = 'attributeName'; - } - } - - private function attributeNameState() { - // Consume the next input character: - $this->char++; - $char = $this->character($this->char); - - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000B LINE TABULATION - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the before attribute name state. */ - $this->state = 'afterAttributeName'; - - } elseif($char === '=') { - /* U+003D EQUALS SIGN (=) - Switch to the before attribute value state. */ - $this->state = 'beforeAttributeValue'; - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. */ - $this->emitToken($this->token); - $this->state = 'data'; - - } elseif($char === '/' && $this->character($this->char + 1) !== '>') { - /* U+002F SOLIDUS (/) - Parse error unless this is a permitted slash. Switch to the before - attribute name state. */ - $this->state = 'beforeAttributeName'; - - } elseif($this->char === $this->EOF) { - /* EOF - Parse error. Emit the current tag token. Reconsume the EOF - character in the data state. */ - $this->emitToken($this->token); - - $this->char--; - $this->state = 'data'; - - } else { - /* Anything else - Append the current input character to the current attribute's name. - Stay in the attribute name state. */ - $last = count($this->token['attr']) - 1; - $this->token['attr'][$last]['name'] .= strtolower($char); - - $this->state = 'attributeName'; - } - } - - private function afterAttributeNameState() { - // Consume the next input character: - $this->char++; - $char = $this->character($this->char); - - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000B LINE TABULATION - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the after attribute name state. */ - $this->state = 'afterAttributeName'; - - } elseif($char === '=') { - /* U+003D EQUALS SIGN (=) - Switch to the before attribute value state. */ - $this->state = 'beforeAttributeValue'; - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. */ - $this->emitToken($this->token); - $this->state = 'data'; - - } elseif($char === '/' && $this->character($this->char + 1) !== '>') { - /* U+002F SOLIDUS (/) - Parse error unless this is a permitted slash. Switch to the - before attribute name state. */ - $this->state = 'beforeAttributeName'; - - } elseif($this->char === $this->EOF) { - /* EOF - Parse error. Emit the current tag token. Reconsume the EOF - character in the data state. */ - $this->emitToken($this->token); - - $this->char--; - $this->state = 'data'; - - } else { - /* Anything else - Start a new attribute in the current tag token. Set that attribute's - name to the current input character, and its value to the empty string. - Switch to the attribute name state. */ - $this->token['attr'][] = array( - 'name' => strtolower($char), - 'value' => null - ); - - $this->state = 'attributeName'; - } - } - - private function beforeAttributeValueState() { - // Consume the next input character: - $this->char++; - $char = $this->character($this->char); - - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000B LINE TABULATION - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the before attribute value state. */ - $this->state = 'beforeAttributeValue'; - - } elseif($char === '"') { - /* U+0022 QUOTATION MARK (") - Switch to the attribute value (double-quoted) state. */ - $this->state = 'attributeValueDoubleQuoted'; - - } elseif($char === '&') { - /* U+0026 AMPERSAND (&) - Switch to the attribute value (unquoted) state and reconsume - this input character. */ - $this->char--; - $this->state = 'attributeValueUnquoted'; - - } elseif($char === '\'') { - /* U+0027 APOSTROPHE (') - Switch to the attribute value (single-quoted) state. */ - $this->state = 'attributeValueSingleQuoted'; - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. */ - $this->emitToken($this->token); - $this->state = 'data'; - - } else { - /* Anything else - Append the current input character to the current attribute's value. - Switch to the attribute value (unquoted) state. */ - $last = count($this->token['attr']) - 1; - $this->token['attr'][$last]['value'] .= $char; - - $this->state = 'attributeValueUnquoted'; - } - } - - private function attributeValueDoubleQuotedState() { - // Consume the next input character: - $this->char++; - $char = $this->character($this->char); - - if($char === '"') { - /* U+0022 QUOTATION MARK (") - Switch to the before attribute name state. */ - $this->state = 'beforeAttributeName'; - - } elseif($char === '&') { - /* U+0026 AMPERSAND (&) - Switch to the entity in attribute value state. */ - $this->entityInAttributeValueState('double'); - - } elseif($this->char === $this->EOF) { - /* EOF - Parse error. Emit the current tag token. Reconsume the character - in the data state. */ - $this->emitToken($this->token); - - $this->char--; - $this->state = 'data'; - - } else { - /* Anything else - Append the current input character to the current attribute's value. - Stay in the attribute value (double-quoted) state. */ - $last = count($this->token['attr']) - 1; - $this->token['attr'][$last]['value'] .= $char; - - $this->state = 'attributeValueDoubleQuoted'; - } - } - - private function attributeValueSingleQuotedState() { - // Consume the next input character: - $this->char++; - $char = $this->character($this->char); - - if($char === '\'') { - /* U+0022 QUOTATION MARK (') - Switch to the before attribute name state. */ - $this->state = 'beforeAttributeName'; - - } elseif($char === '&') { - /* U+0026 AMPERSAND (&) - Switch to the entity in attribute value state. */ - $this->entityInAttributeValueState('single'); - - } elseif($this->char === $this->EOF) { - /* EOF - Parse error. Emit the current tag token. Reconsume the character - in the data state. */ - $this->emitToken($this->token); - - $this->char--; - $this->state = 'data'; - - } else { - /* Anything else - Append the current input character to the current attribute's value. - Stay in the attribute value (single-quoted) state. */ - $last = count($this->token['attr']) - 1; - $this->token['attr'][$last]['value'] .= $char; - - $this->state = 'attributeValueSingleQuoted'; - } - } - - private function attributeValueUnquotedState() { - // Consume the next input character: - $this->char++; - $char = $this->character($this->char); - - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000B LINE TABULATION - U+000C FORM FEED (FF) - U+0020 SPACE - Switch to the before attribute name state. */ - $this->state = 'beforeAttributeName'; - - } elseif($char === '&') { - /* U+0026 AMPERSAND (&) - Switch to the entity in attribute value state. */ - $this->entityInAttributeValueState(); - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. */ - $this->emitToken($this->token); - $this->state = 'data'; - - } else { - /* Anything else - Append the current input character to the current attribute's value. - Stay in the attribute value (unquoted) state. */ - $last = count($this->token['attr']) - 1; - $this->token['attr'][$last]['value'] .= $char; - - $this->state = 'attributeValueUnquoted'; - } - } - - private function entityInAttributeValueState() { - // Attempt to consume an entity. - $entity = $this->entity(); - - // If nothing is returned, append a U+0026 AMPERSAND character to the - // current attribute's value. Otherwise, emit the character token that - // was returned. - $char = (!$entity) - ? '&' - : $entity; - - $last = count($this->token['attr']) - 1; - $this->token['attr'][$last]['value'] .= $char; - } - - private function bogusCommentState() { - /* Consume every character up to the first U+003E GREATER-THAN SIGN - character (>) or the end of the file (EOF), whichever comes first. Emit - a comment token whose data is the concatenation of all the characters - starting from and including the character that caused the state machine - to switch into the bogus comment state, up to and including the last - consumed character before the U+003E character, if any, or up to the - end of the file otherwise. (If the comment was started by the end of - the file (EOF), the token is empty.) */ - $data = $this->characters('^>', $this->char); - $this->emitToken(array( - 'data' => $data, - 'type' => self::COMMENT - )); - - $this->char += strlen($data); - - /* Switch to the data state. */ - $this->state = 'data'; - - /* If the end of the file was reached, reconsume the EOF character. */ - if($this->char === $this->EOF) { - $this->char = $this->EOF - 1; - } - } - - private function markupDeclarationOpenState() { - /* If the next two characters are both U+002D HYPHEN-MINUS (-) - characters, consume those two characters, create a comment token whose - data is the empty string, and switch to the comment state. */ - if($this->character($this->char + 1, 2) === '--') { - $this->char += 2; - $this->state = 'comment'; - $this->token = array( - 'data' => null, - 'type' => self::COMMENT - ); - - /* Otherwise if the next seven chacacters are a case-insensitive match - for the word "DOCTYPE", then consume those characters and switch to the - DOCTYPE state. */ - } elseif(strtolower($this->character($this->char + 1, 7)) === 'doctype') { - $this->char += 7; - $this->state = 'doctype'; - - /* Otherwise, is is a parse error. Switch to the bogus comment state. - The next character that is consumed, if any, is the first character - that will be in the comment. */ - } else { - $this->char++; - $this->state = 'bogusComment'; - } - } - - private function commentState() { - /* Consume the next input character: */ - $this->char++; - $char = $this->char(); - - /* U+002D HYPHEN-MINUS (-) */ - if($char === '-') { - /* Switch to the comment dash state */ - $this->state = 'commentDash'; - - /* EOF */ - } elseif($this->char === $this->EOF) { - /* Parse error. Emit the comment token. Reconsume the EOF character - in the data state. */ - $this->emitToken($this->token); - $this->char--; - $this->state = 'data'; - - /* Anything else */ - } else { - /* Append the input character to the comment token's data. Stay in - the comment state. */ - $this->token['data'] .= $char; - } - } - - private function commentDashState() { - /* Consume the next input character: */ - $this->char++; - $char = $this->char(); - - /* U+002D HYPHEN-MINUS (-) */ - if($char === '-') { - /* Switch to the comment end state */ - $this->state = 'commentEnd'; - - /* EOF */ - } elseif($this->char === $this->EOF) { - /* Parse error. Emit the comment token. Reconsume the EOF character - in the data state. */ - $this->emitToken($this->token); - $this->char--; - $this->state = 'data'; - - /* Anything else */ - } else { - /* Append a U+002D HYPHEN-MINUS (-) character and the input - character to the comment token's data. Switch to the comment state. */ - $this->token['data'] .= '-'.$char; - $this->state = 'comment'; - } - } - - private function commentEndState() { - /* Consume the next input character: */ - $this->char++; - $char = $this->char(); - - if($char === '>') { - $this->emitToken($this->token); - $this->state = 'data'; - - } elseif($char === '-') { - $this->token['data'] .= '-'; - - } elseif($this->char === $this->EOF) { - $this->emitToken($this->token); - $this->char--; - $this->state = 'data'; - - } else { - $this->token['data'] .= '--'.$char; - $this->state = 'comment'; - } - } - - private function doctypeState() { - /* Consume the next input character: */ - $this->char++; - $char = $this->char(); - - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { - $this->state = 'beforeDoctypeName'; - - } else { - $this->char--; - $this->state = 'beforeDoctypeName'; - } - } - - private function beforeDoctypeNameState() { - /* Consume the next input character: */ - $this->char++; - $char = $this->char(); - - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { - // Stay in the before DOCTYPE name state. - - } elseif(preg_match('/^[a-z]$/', $char)) { - $this->token = array( - 'name' => strtoupper($char), - 'type' => self::DOCTYPE, - 'error' => true - ); - - $this->state = 'doctypeName'; - - } elseif($char === '>') { - $this->emitToken(array( - 'name' => null, - 'type' => self::DOCTYPE, - 'error' => true - )); - - $this->state = 'data'; - - } elseif($this->char === $this->EOF) { - $this->emitToken(array( - 'name' => null, - 'type' => self::DOCTYPE, - 'error' => true - )); - - $this->char--; - $this->state = 'data'; - - } else { - $this->token = array( - 'name' => $char, - 'type' => self::DOCTYPE, - 'error' => true - ); - - $this->state = 'doctypeName'; - } - } - - private function doctypeNameState() { - /* Consume the next input character: */ - $this->char++; - $char = $this->char(); - - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { - $this->state = 'AfterDoctypeName'; - - } elseif($char === '>') { - $this->emitToken($this->token); - $this->state = 'data'; - - } elseif(preg_match('/^[a-z]$/', $char)) { - $this->token['name'] .= strtoupper($char); - - } elseif($this->char === $this->EOF) { - $this->emitToken($this->token); - $this->char--; - $this->state = 'data'; - - } else { - $this->token['name'] .= $char; - } - - $this->token['error'] = ($this->token['name'] === 'HTML') - ? false - : true; - } - - private function afterDoctypeNameState() { - /* Consume the next input character: */ - $this->char++; - $char = $this->char(); - - if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { - // Stay in the DOCTYPE name state. - - } elseif($char === '>') { - $this->emitToken($this->token); - $this->state = 'data'; - - } elseif($this->char === $this->EOF) { - $this->emitToken($this->token); - $this->char--; - $this->state = 'data'; - - } else { - $this->token['error'] = true; - $this->state = 'bogusDoctype'; - } - } - - private function bogusDoctypeState() { - /* Consume the next input character: */ - $this->char++; - $char = $this->char(); - - if($char === '>') { - $this->emitToken($this->token); - $this->state = 'data'; - - } elseif($this->char === $this->EOF) { - $this->emitToken($this->token); - $this->char--; - $this->state = 'data'; - - } else { - // Stay in the bogus DOCTYPE state. - } - } - - private function entity() { - $start = $this->char; - - // This section defines how to consume an entity. This definition is - // used when parsing entities in text and in attributes. - - // The behaviour depends on the identity of the next character (the - // one immediately after the U+0026 AMPERSAND character): - - switch($this->character($this->char + 1)) { - // U+0023 NUMBER SIGN (#) - case '#': - - // The behaviour further depends on the character after the - // U+0023 NUMBER SIGN: - switch($this->character($this->char + 1)) { - // U+0078 LATIN SMALL LETTER X - // U+0058 LATIN CAPITAL LETTER X - case 'x': - case 'X': - // Follow the steps below, but using the range of - // characters U+0030 DIGIT ZERO through to U+0039 DIGIT - // NINE, U+0061 LATIN SMALL LETTER A through to U+0066 - // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER - // A, through to U+0046 LATIN CAPITAL LETTER F (in other - // words, 0-9, A-F, a-f). - $char = 1; - $char_class = '0-9A-Fa-f'; - break; - - // Anything else - default: - // Follow the steps below, but using the range of - // characters U+0030 DIGIT ZERO through to U+0039 DIGIT - // NINE (i.e. just 0-9). - $char = 0; - $char_class = '0-9'; - break; - } - - // Consume as many characters as match the range of characters - // given above. - $this->char++; - $e_name = $this->characters($char_class, $this->char + $char + 1); - $entity = $this->character($start, $this->char); - $cond = strlen($e_name) > 0; - - // The rest of the parsing happens bellow. - break; - - // Anything else - default: - // Consume the maximum number of characters possible, with the - // consumed characters case-sensitively matching one of the - // identifiers in the first column of the entities table. - $e_name = $this->characters('0-9A-Za-z;', $this->char + 1); - $len = strlen($e_name); - - for($c = 1; $c <= $len; $c++) { - $id = substr($e_name, 0, $c); - $this->char++; - - if(in_array($id, $this->entities)) { - if ($e_name[$c-1] !== ';') { - if ($c < $len && $e_name[$c] == ';') { - $this->char++; // consume extra semicolon - } - } - $entity = $id; - break; - } - } - - $cond = isset($entity); - // The rest of the parsing happens bellow. - break; - } - - if(!$cond) { - // If no match can be made, then this is a parse error. No - // characters are consumed, and nothing is returned. - $this->char = $start; - return false; - } - - // Return a character token for the character corresponding to the - // entity name (as given by the second column of the entities table). - return html_entity_decode('&'.$entity.';', ENT_QUOTES, 'UTF-8'); - } - - private function emitToken($token) { - $emit = $this->tree->emitToken($token); - - if(is_int($emit)) { - $this->content_model = $emit; - - } elseif($token['type'] === self::ENDTAG) { - $this->content_model = self::PCDATA; - } - } - - private function EOF() { - $this->state = null; - $this->tree->emitToken(array( - 'type' => self::EOF - )); - } -} - -class HTML5TreeConstructer { - public $stack = array(); - - private $phase; - private $mode; - private $dom; - private $foster_parent = null; - private $a_formatting = array(); - - private $head_pointer = null; - private $form_pointer = null; - - private $scoping = array('button','caption','html','marquee','object','table','td','th'); - private $formatting = array('a','b','big','em','font','i','nobr','s','small','strike','strong','tt','u'); - private $special = array('address','area','base','basefont','bgsound', - 'blockquote','body','br','center','col','colgroup','dd','dir','div','dl', - 'dt','embed','fieldset','form','frame','frameset','h1','h2','h3','h4','h5', - 'h6','head','hr','iframe','image','img','input','isindex','li','link', - 'listing','menu','meta','noembed','noframes','noscript','ol','optgroup', - 'option','p','param','plaintext','pre','script','select','spacer','style', - 'tbody','textarea','tfoot','thead','title','tr','ul','wbr'); - - // The different phases. - const INIT_PHASE = 0; - const ROOT_PHASE = 1; - const MAIN_PHASE = 2; - const END_PHASE = 3; - - // The different insertion modes for the main phase. - const BEFOR_HEAD = 0; - const IN_HEAD = 1; - const AFTER_HEAD = 2; - const IN_BODY = 3; - const IN_TABLE = 4; - const IN_CAPTION = 5; - const IN_CGROUP = 6; - const IN_TBODY = 7; - const IN_ROW = 8; - const IN_CELL = 9; - const IN_SELECT = 10; - const AFTER_BODY = 11; - const IN_FRAME = 12; - const AFTR_FRAME = 13; - - // The different types of elements. - const SPECIAL = 0; - const SCOPING = 1; - const FORMATTING = 2; - const PHRASING = 3; - - const MARKER = 0; - - public function __construct() { - $this->phase = self::INIT_PHASE; - $this->mode = self::BEFOR_HEAD; - $this->dom = new DOMDocument; - - $this->dom->encoding = 'UTF-8'; - $this->dom->preserveWhiteSpace = true; - $this->dom->substituteEntities = true; - $this->dom->strictErrorChecking = false; - } - - // Process tag tokens - public function emitToken($token) { - switch($this->phase) { - case self::INIT_PHASE: return $this->initPhase($token); break; - case self::ROOT_PHASE: return $this->rootElementPhase($token); break; - case self::MAIN_PHASE: return $this->mainPhase($token); break; - case self::END_PHASE : return $this->trailingEndPhase($token); break; - } - } - - private function initPhase($token) { - /* Initially, the tree construction stage must handle each token - emitted from the tokenisation stage as follows: */ - - /* A DOCTYPE token that is marked as being in error - A comment token - A start tag token - An end tag token - A character token that is not one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE - An end-of-file token */ - if((isset($token['error']) && $token['error']) || - $token['type'] === HTML5::COMMENT || - $token['type'] === HTML5::STARTTAG || - $token['type'] === HTML5::ENDTAG || - $token['type'] === HTML5::EOF || - ($token['type'] === HTML5::CHARACTR && isset($token['data']) && - !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']))) { - /* This specification does not define how to handle this case. In - particular, user agents may ignore the entirety of this specification - altogether for such documents, and instead invoke special parse modes - with a greater emphasis on backwards compatibility. */ - - $this->phase = self::ROOT_PHASE; - return $this->rootElementPhase($token); - - /* A DOCTYPE token marked as being correct */ - } elseif(isset($token['error']) && !$token['error']) { - /* Append a DocumentType node to the Document node, with the name - attribute set to the name given in the DOCTYPE token (which will be - "HTML"), and the other attributes specific to DocumentType objects - set to null, empty lists, or the empty string as appropriate. */ - $doctype = new DOMDocumentType(null, null, 'HTML'); - - /* Then, switch to the root element phase of the tree construction - stage. */ - $this->phase = self::ROOT_PHASE; - - /* A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE */ - } elseif(isset($token['data']) && preg_match('/^[\t\n\x0b\x0c ]+$/', - $token['data'])) { - /* Append that character to the Document node. */ - $text = $this->dom->createTextNode($token['data']); - $this->dom->appendChild($text); - } - } - - private function rootElementPhase($token) { - /* After the initial phase, as each token is emitted from the tokenisation - stage, it must be processed as described in this section. */ - - /* A DOCTYPE token */ - if($token['type'] === HTML5::DOCTYPE) { - // Parse error. Ignore the token. - - /* A comment token */ - } elseif($token['type'] === HTML5::COMMENT) { - /* Append a Comment node to the Document object with the data - attribute set to the data given in the comment token. */ - $comment = $this->dom->createComment($token['data']); - $this->dom->appendChild($comment); - - /* A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE */ - } elseif($token['type'] === HTML5::CHARACTR && - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { - /* Append that character to the Document node. */ - $text = $this->dom->createTextNode($token['data']); - $this->dom->appendChild($text); - - /* A character token that is not one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED - (FF), or U+0020 SPACE - A start tag token - An end tag token - An end-of-file token */ - } elseif(($token['type'] === HTML5::CHARACTR && - !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || - $token['type'] === HTML5::STARTTAG || - $token['type'] === HTML5::ENDTAG || - $token['type'] === HTML5::EOF) { - /* Create an HTMLElement node with the tag name html, in the HTML - namespace. Append it to the Document object. Switch to the main - phase and reprocess the current token. */ - $html = $this->dom->createElement('html'); - $this->dom->appendChild($html); - $this->stack[] = $html; - - $this->phase = self::MAIN_PHASE; - return $this->mainPhase($token); - } - } - - private function mainPhase($token) { - /* Tokens in the main phase must be handled as follows: */ - - /* A DOCTYPE token */ - if($token['type'] === HTML5::DOCTYPE) { - // Parse error. Ignore the token. - - /* A start tag token with the tag name "html" */ - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') { - /* If this start tag token was not the first start tag token, then - it is a parse error. */ - - /* For each attribute on the token, check to see if the attribute - is already present on the top element of the stack of open elements. - If it is not, add the attribute and its corresponding value to that - element. */ - foreach($token['attr'] as $attr) { - if(!$this->stack[0]->hasAttribute($attr['name'])) { - $this->stack[0]->setAttribute($attr['name'], $attr['value']); - } - } - - /* An end-of-file token */ - } elseif($token['type'] === HTML5::EOF) { - /* Generate implied end tags. */ - $this->generateImpliedEndTags(); - - /* Anything else. */ - } else { - /* Depends on the insertion mode: */ - switch($this->mode) { - case self::BEFOR_HEAD: return $this->beforeHead($token); break; - case self::IN_HEAD: return $this->inHead($token); break; - case self::AFTER_HEAD: return $this->afterHead($token); break; - case self::IN_BODY: return $this->inBody($token); break; - case self::IN_TABLE: return $this->inTable($token); break; - case self::IN_CAPTION: return $this->inCaption($token); break; - case self::IN_CGROUP: return $this->inColumnGroup($token); break; - case self::IN_TBODY: return $this->inTableBody($token); break; - case self::IN_ROW: return $this->inRow($token); break; - case self::IN_CELL: return $this->inCell($token); break; - case self::IN_SELECT: return $this->inSelect($token); break; - case self::AFTER_BODY: return $this->afterBody($token); break; - case self::IN_FRAME: return $this->inFrameset($token); break; - case self::AFTR_FRAME: return $this->afterFrameset($token); break; - case self::END_PHASE: return $this->trailingEndPhase($token); break; - } - } - } - - private function beforeHead($token) { - /* Handle the token as follows: */ - - /* A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE */ - if($token['type'] === HTML5::CHARACTR && - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { - /* Append the character to the current node. */ - $this->insertText($token['data']); - - /* A comment token */ - } elseif($token['type'] === HTML5::COMMENT) { - /* Append a Comment node to the current node with the data attribute - set to the data given in the comment token. */ - $this->insertComment($token['data']); - - /* A start tag token with the tag name "head" */ - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') { - /* Create an element for the token, append the new element to the - current node and push it onto the stack of open elements. */ - $element = $this->insertElement($token); - - /* Set the head element pointer to this new element node. */ - $this->head_pointer = $element; - - /* Change the insertion mode to "in head". */ - $this->mode = self::IN_HEAD; - - /* A start tag token whose tag name is one of: "base", "link", "meta", - "script", "style", "title". Or an end tag with the tag name "html". - Or a character token that is not one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE. Or any other start tag token */ - } elseif($token['type'] === HTML5::STARTTAG || - ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') || - ($token['type'] === HTML5::CHARACTR && !preg_match('/^[\t\n\x0b\x0c ]$/', - $token['data']))) { - /* Act as if a start tag token with the tag name "head" and no - attributes had been seen, then reprocess the current token. */ - $this->beforeHead(array( - 'name' => 'head', - 'type' => HTML5::STARTTAG, - 'attr' => array() - )); - - return $this->inHead($token); - - /* Any other end tag */ - } elseif($token['type'] === HTML5::ENDTAG) { - /* Parse error. Ignore the token. */ - } - } - - private function inHead($token) { - /* Handle the token as follows: */ - - /* A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE. - - THIS DIFFERS FROM THE SPEC: If the current node is either a title, style - or script element, append the character to the current node regardless - of its content. */ - if(($token['type'] === HTML5::CHARACTR && - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || ( - $token['type'] === HTML5::CHARACTR && in_array(end($this->stack)->nodeName, - array('title', 'style', 'script')))) { - /* Append the character to the current node. */ - $this->insertText($token['data']); - - /* A comment token */ - } elseif($token['type'] === HTML5::COMMENT) { - /* Append a Comment node to the current node with the data attribute - set to the data given in the comment token. */ - $this->insertComment($token['data']); - - } elseif($token['type'] === HTML5::ENDTAG && - in_array($token['name'], array('title', 'style', 'script'))) { - array_pop($this->stack); - return HTML5::PCDATA; - - /* A start tag with the tag name "title" */ - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') { - /* Create an element for the token and append the new element to the - node pointed to by the head element pointer, or, if that is null - (innerHTML case), to the current node. */ - if($this->head_pointer !== null) { - $element = $this->insertElement($token, false); - $this->head_pointer->appendChild($element); - - } else { - $element = $this->insertElement($token); - } - - /* Switch the tokeniser's content model flag to the RCDATA state. */ - return HTML5::RCDATA; - - /* A start tag with the tag name "style" */ - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') { - /* Create an element for the token and append the new element to the - node pointed to by the head element pointer, or, if that is null - (innerHTML case), to the current node. */ - if($this->head_pointer !== null) { - $element = $this->insertElement($token, false); - $this->head_pointer->appendChild($element); - - } else { - $this->insertElement($token); - } - - /* Switch the tokeniser's content model flag to the CDATA state. */ - return HTML5::CDATA; - - /* A start tag with the tag name "script" */ - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') { - /* Create an element for the token. */ - $element = $this->insertElement($token, false); - $this->head_pointer->appendChild($element); - - /* Switch the tokeniser's content model flag to the CDATA state. */ - return HTML5::CDATA; - - /* A start tag with the tag name "base", "link", or "meta" */ - } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], - array('base', 'link', 'meta'))) { - /* Create an element for the token and append the new element to the - node pointed to by the head element pointer, or, if that is null - (innerHTML case), to the current node. */ - if($this->head_pointer !== null) { - $element = $this->insertElement($token, false); - $this->head_pointer->appendChild($element); - array_pop($this->stack); - - } else { - $this->insertElement($token); - } - - /* An end tag with the tag name "head" */ - } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') { - /* If the current node is a head element, pop the current node off - the stack of open elements. */ - if($this->head_pointer->isSameNode(end($this->stack))) { - array_pop($this->stack); - - /* Otherwise, this is a parse error. */ - } else { - // k - } - - /* Change the insertion mode to "after head". */ - $this->mode = self::AFTER_HEAD; - - /* A start tag with the tag name "head" or an end tag except "html". */ - } elseif(($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') || - ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html')) { - // Parse error. Ignore the token. - - /* Anything else */ - } else { - /* If the current node is a head element, act as if an end tag - token with the tag name "head" had been seen. */ - if($this->head_pointer->isSameNode(end($this->stack))) { - $this->inHead(array( - 'name' => 'head', - 'type' => HTML5::ENDTAG - )); - - /* Otherwise, change the insertion mode to "after head". */ - } else { - $this->mode = self::AFTER_HEAD; - } - - /* Then, reprocess the current token. */ - return $this->afterHead($token); - } - } - - private function afterHead($token) { - /* Handle the token as follows: */ - - /* A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE */ - if($token['type'] === HTML5::CHARACTR && - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { - /* Append the character to the current node. */ - $this->insertText($token['data']); - - /* A comment token */ - } elseif($token['type'] === HTML5::COMMENT) { - /* Append a Comment node to the current node with the data attribute - set to the data given in the comment token. */ - $this->insertComment($token['data']); - - /* A start tag token with the tag name "body" */ - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') { - /* Insert a body element for the token. */ - $this->insertElement($token); - - /* Change the insertion mode to "in body". */ - $this->mode = self::IN_BODY; - - /* A start tag token with the tag name "frameset" */ - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') { - /* Insert a frameset element for the token. */ - $this->insertElement($token); - - /* Change the insertion mode to "in frameset". */ - $this->mode = self::IN_FRAME; - - /* A start tag token whose tag name is one of: "base", "link", "meta", - "script", "style", "title" */ - } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], - array('base', 'link', 'meta', 'script', 'style', 'title'))) { - /* Parse error. Switch the insertion mode back to "in head" and - reprocess the token. */ - $this->mode = self::IN_HEAD; - return $this->inHead($token); - - /* Anything else */ - } else { - /* Act as if a start tag token with the tag name "body" and no - attributes had been seen, and then reprocess the current token. */ - $this->afterHead(array( - 'name' => 'body', - 'type' => HTML5::STARTTAG, - 'attr' => array() - )); - - return $this->inBody($token); - } - } - - private function inBody($token) { - /* Handle the token as follows: */ - - switch($token['type']) { - /* A character token */ - case HTML5::CHARACTR: - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - /* Append the token's character to the current node. */ - $this->insertText($token['data']); - break; - - /* A comment token */ - case HTML5::COMMENT: - /* Append a Comment node to the current node with the data - attribute set to the data given in the comment token. */ - $this->insertComment($token['data']); - break; - - case HTML5::STARTTAG: - switch($token['name']) { - /* A start tag token whose tag name is one of: "script", - "style" */ - case 'script': case 'style': - /* Process the token as if the insertion mode had been "in - head". */ - return $this->inHead($token); - break; - - /* A start tag token whose tag name is one of: "base", "link", - "meta", "title" */ - case 'base': case 'link': case 'meta': case 'title': - /* Parse error. Process the token as if the insertion mode - had been "in head". */ - return $this->inHead($token); - break; - - /* A start tag token with the tag name "body" */ - case 'body': - /* Parse error. If the second element on the stack of open - elements is not a body element, or, if the stack of open - elements has only one node on it, then ignore the token. - (innerHTML case) */ - if(count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') { - // Ignore - - /* Otherwise, for each attribute on the token, check to see - if the attribute is already present on the body element (the - second element) on the stack of open elements. If it is not, - add the attribute and its corresponding value to that - element. */ - } else { - foreach($token['attr'] as $attr) { - if(!$this->stack[1]->hasAttribute($attr['name'])) { - $this->stack[1]->setAttribute($attr['name'], $attr['value']); - } - } - } - break; - - /* A start tag whose tag name is one of: "address", - "blockquote", "center", "dir", "div", "dl", "fieldset", - "listing", "menu", "ol", "p", "ul" */ - case 'address': case 'blockquote': case 'center': case 'dir': - case 'div': case 'dl': case 'fieldset': case 'listing': - case 'menu': case 'ol': case 'p': case 'ul': - /* If the stack of open elements has a p element in scope, - then act as if an end tag with the tag name p had been - seen. */ - if($this->elementInScope('p')) { - $this->emitToken(array( - 'name' => 'p', - 'type' => HTML5::ENDTAG - )); - } - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - break; - - /* A start tag whose tag name is "form" */ - case 'form': - /* If the form element pointer is not null, ignore the - token with a parse error. */ - if($this->form_pointer !== null) { - // Ignore. - - /* Otherwise: */ - } else { - /* If the stack of open elements has a p element in - scope, then act as if an end tag with the tag name p - had been seen. */ - if($this->elementInScope('p')) { - $this->emitToken(array( - 'name' => 'p', - 'type' => HTML5::ENDTAG - )); - } - - /* Insert an HTML element for the token, and set the - form element pointer to point to the element created. */ - $element = $this->insertElement($token); - $this->form_pointer = $element; - } - break; - - /* A start tag whose tag name is "li", "dd" or "dt" */ - case 'li': case 'dd': case 'dt': - /* If the stack of open elements has a p element in scope, - then act as if an end tag with the tag name p had been - seen. */ - if($this->elementInScope('p')) { - $this->emitToken(array( - 'name' => 'p', - 'type' => HTML5::ENDTAG - )); - } - - $stack_length = count($this->stack) - 1; - - for($n = $stack_length; 0 <= $n; $n--) { - /* 1. Initialise node to be the current node (the - bottommost node of the stack). */ - $stop = false; - $node = $this->stack[$n]; - $cat = $this->getElementCategory($node->tagName); - - /* 2. If node is an li, dd or dt element, then pop all - the nodes from the current node up to node, including - node, then stop this algorithm. */ - if($token['name'] === $node->tagName || ($token['name'] !== 'li' - && ($node->tagName === 'dd' || $node->tagName === 'dt'))) { - for($x = $stack_length; $x >= $n ; $x--) { - array_pop($this->stack); - } - - break; - } - - /* 3. If node is not in the formatting category, and is - not in the phrasing category, and is not an address or - div element, then stop this algorithm. */ - if($cat !== self::FORMATTING && $cat !== self::PHRASING && - $node->tagName !== 'address' && $node->tagName !== 'div') { - break; - } - } - - /* Finally, insert an HTML element with the same tag - name as the token's. */ - $this->insertElement($token); - break; - - /* A start tag token whose tag name is "plaintext" */ - case 'plaintext': - /* If the stack of open elements has a p element in scope, - then act as if an end tag with the tag name p had been - seen. */ - if($this->elementInScope('p')) { - $this->emitToken(array( - 'name' => 'p', - 'type' => HTML5::ENDTAG - )); - } - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - return HTML5::PLAINTEXT; - break; - - /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4", - "h5", "h6" */ - case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': - /* If the stack of open elements has a p element in scope, - then act as if an end tag with the tag name p had been seen. */ - if($this->elementInScope('p')) { - $this->emitToken(array( - 'name' => 'p', - 'type' => HTML5::ENDTAG - )); - } - - /* If the stack of open elements has in scope an element whose - tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then - this is a parse error; pop elements from the stack until an - element with one of those tag names has been popped from the - stack. */ - while($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) { - array_pop($this->stack); - } - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - break; - - /* A start tag whose tag name is "a" */ - case 'a': - /* If the list of active formatting elements contains - an element whose tag name is "a" between the end of the - list and the last marker on the list (or the start of - the list if there is no marker on the list), then this - is a parse error; act as if an end tag with the tag name - "a" had been seen, then remove that element from the list - of active formatting elements and the stack of open - elements if the end tag didn't already remove it (it - might not have if the element is not in table scope). */ - $leng = count($this->a_formatting); - - for($n = $leng - 1; $n >= 0; $n--) { - if($this->a_formatting[$n] === self::MARKER) { - break; - - } elseif($this->a_formatting[$n]->nodeName === 'a') { - $this->emitToken(array( - 'name' => 'a', - 'type' => HTML5::ENDTAG - )); - break; - } - } - - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - /* Insert an HTML element for the token. */ - $el = $this->insertElement($token); - - /* Add that element to the list of active formatting - elements. */ - $this->a_formatting[] = $el; - break; - - /* A start tag whose tag name is one of: "b", "big", "em", "font", - "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */ - case 'b': case 'big': case 'em': case 'font': case 'i': - case 'nobr': case 's': case 'small': case 'strike': - case 'strong': case 'tt': case 'u': - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - /* Insert an HTML element for the token. */ - $el = $this->insertElement($token); - - /* Add that element to the list of active formatting - elements. */ - $this->a_formatting[] = $el; - break; - - /* A start tag token whose tag name is "button" */ - case 'button': - /* If the stack of open elements has a button element in scope, - then this is a parse error; act as if an end tag with the tag - name "button" had been seen, then reprocess the token. (We don't - do that. Unnecessary.) */ - if($this->elementInScope('button')) { - $this->inBody(array( - 'name' => 'button', - 'type' => HTML5::ENDTAG - )); - } - - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - /* Insert a marker at the end of the list of active - formatting elements. */ - $this->a_formatting[] = self::MARKER; - break; - - /* A start tag token whose tag name is one of: "marquee", "object" */ - case 'marquee': case 'object': - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - /* Insert a marker at the end of the list of active - formatting elements. */ - $this->a_formatting[] = self::MARKER; - break; - - /* A start tag token whose tag name is "xmp" */ - case 'xmp': - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - /* Switch the content model flag to the CDATA state. */ - return HTML5::CDATA; - break; - - /* A start tag whose tag name is "table" */ - case 'table': - /* If the stack of open elements has a p element in scope, - then act as if an end tag with the tag name p had been seen. */ - if($this->elementInScope('p')) { - $this->emitToken(array( - 'name' => 'p', - 'type' => HTML5::ENDTAG - )); - } - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - /* Change the insertion mode to "in table". */ - $this->mode = self::IN_TABLE; - break; - - /* A start tag whose tag name is one of: "area", "basefont", - "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */ - case 'area': case 'basefont': case 'bgsound': case 'br': - case 'embed': case 'img': case 'param': case 'spacer': - case 'wbr': - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - /* Immediately pop the current node off the stack of open elements. */ - array_pop($this->stack); - break; - - /* A start tag whose tag name is "hr" */ - case 'hr': - /* If the stack of open elements has a p element in scope, - then act as if an end tag with the tag name p had been seen. */ - if($this->elementInScope('p')) { - $this->emitToken(array( - 'name' => 'p', - 'type' => HTML5::ENDTAG - )); - } - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - /* Immediately pop the current node off the stack of open elements. */ - array_pop($this->stack); - break; - - /* A start tag whose tag name is "image" */ - case 'image': - /* Parse error. Change the token's tag name to "img" and - reprocess it. (Don't ask.) */ - $token['name'] = 'img'; - return $this->inBody($token); - break; - - /* A start tag whose tag name is "input" */ - case 'input': - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - /* Insert an input element for the token. */ - $element = $this->insertElement($token, false); - - /* If the form element pointer is not null, then associate the - input element with the form element pointed to by the form - element pointer. */ - $this->form_pointer !== null - ? $this->form_pointer->appendChild($element) - : end($this->stack)->appendChild($element); - - /* Pop that input element off the stack of open elements. */ - array_pop($this->stack); - break; - - /* A start tag whose tag name is "isindex" */ - case 'isindex': - /* Parse error. */ - // w/e - - /* If the form element pointer is not null, - then ignore the token. */ - if($this->form_pointer === null) { - /* Act as if a start tag token with the tag name "form" had - been seen. */ - $this->inBody(array( - 'name' => 'body', - 'type' => HTML5::STARTTAG, - 'attr' => array() - )); - - /* Act as if a start tag token with the tag name "hr" had - been seen. */ - $this->inBody(array( - 'name' => 'hr', - 'type' => HTML5::STARTTAG, - 'attr' => array() - )); - - /* Act as if a start tag token with the tag name "p" had - been seen. */ - $this->inBody(array( - 'name' => 'p', - 'type' => HTML5::STARTTAG, - 'attr' => array() - )); - - /* Act as if a start tag token with the tag name "label" - had been seen. */ - $this->inBody(array( - 'name' => 'label', - 'type' => HTML5::STARTTAG, - 'attr' => array() - )); - - /* Act as if a stream of character tokens had been seen. */ - $this->insertText('This is a searchable index. '. - 'Insert your search keywords here: '); - - /* Act as if a start tag token with the tag name "input" - had been seen, with all the attributes from the "isindex" - token, except with the "name" attribute set to the value - "isindex" (ignoring any explicit "name" attribute). */ - $attr = $token['attr']; - $attr[] = array('name' => 'name', 'value' => 'isindex'); - - $this->inBody(array( - 'name' => 'input', - 'type' => HTML5::STARTTAG, - 'attr' => $attr - )); - - /* Act as if a stream of character tokens had been seen - (see below for what they should say). */ - $this->insertText('This is a searchable index. '. - 'Insert your search keywords here: '); - - /* Act as if an end tag token with the tag name "label" - had been seen. */ - $this->inBody(array( - 'name' => 'label', - 'type' => HTML5::ENDTAG - )); - - /* Act as if an end tag token with the tag name "p" had - been seen. */ - $this->inBody(array( - 'name' => 'p', - 'type' => HTML5::ENDTAG - )); - - /* Act as if a start tag token with the tag name "hr" had - been seen. */ - $this->inBody(array( - 'name' => 'hr', - 'type' => HTML5::ENDTAG - )); - - /* Act as if an end tag token with the tag name "form" had - been seen. */ - $this->inBody(array( - 'name' => 'form', - 'type' => HTML5::ENDTAG - )); - } - break; - - /* A start tag whose tag name is "textarea" */ - case 'textarea': - $this->insertElement($token); - - /* Switch the tokeniser's content model flag to the - RCDATA state. */ - return HTML5::RCDATA; - break; - - /* A start tag whose tag name is one of: "iframe", "noembed", - "noframes" */ - case 'iframe': case 'noembed': case 'noframes': - $this->insertElement($token); - - /* Switch the tokeniser's content model flag to the CDATA state. */ - return HTML5::CDATA; - break; - - /* A start tag whose tag name is "select" */ - case 'select': - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - /* Change the insertion mode to "in select". */ - $this->mode = self::IN_SELECT; - break; - - /* A start or end tag whose tag name is one of: "caption", "col", - "colgroup", "frame", "frameset", "head", "option", "optgroup", - "tbody", "td", "tfoot", "th", "thead", "tr". */ - case 'caption': case 'col': case 'colgroup': case 'frame': - case 'frameset': case 'head': case 'option': case 'optgroup': - case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': - case 'tr': - // Parse error. Ignore the token. - break; - - /* A start or end tag whose tag name is one of: "event-source", - "section", "nav", "article", "aside", "header", "footer", - "datagrid", "command" */ - case 'event-source': case 'section': case 'nav': case 'article': - case 'aside': case 'header': case 'footer': case 'datagrid': - case 'command': - // Work in progress! - break; - - /* A start tag token not covered by the previous entries */ - default: - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - $this->insertElement($token, true, true); - break; - } - break; - - case HTML5::ENDTAG: - switch($token['name']) { - /* An end tag with the tag name "body" */ - case 'body': - /* If the second element in the stack of open elements is - not a body element, this is a parse error. Ignore the token. - (innerHTML case) */ - if(count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') { - // Ignore. - - /* If the current node is not the body element, then this - is a parse error. */ - } elseif(end($this->stack)->nodeName !== 'body') { - // Parse error. - } - - /* Change the insertion mode to "after body". */ - $this->mode = self::AFTER_BODY; - break; - - /* An end tag with the tag name "html" */ - case 'html': - /* Act as if an end tag with tag name "body" had been seen, - then, if that token wasn't ignored, reprocess the current - token. */ - $this->inBody(array( - 'name' => 'body', - 'type' => HTML5::ENDTAG - )); - - return $this->afterBody($token); - break; - - /* An end tag whose tag name is one of: "address", "blockquote", - "center", "dir", "div", "dl", "fieldset", "listing", "menu", - "ol", "pre", "ul" */ - case 'address': case 'blockquote': case 'center': case 'dir': - case 'div': case 'dl': case 'fieldset': case 'listing': - case 'menu': case 'ol': case 'pre': case 'ul': - /* If the stack of open elements has an element in scope - with the same tag name as that of the token, then generate - implied end tags. */ - if($this->elementInScope($token['name'])) { - $this->generateImpliedEndTags(); - - /* Now, if the current node is not an element with - the same tag name as that of the token, then this - is a parse error. */ - // w/e - - /* If the stack of open elements has an element in - scope with the same tag name as that of the token, - then pop elements from this stack until an element - with that tag name has been popped from the stack. */ - for($n = count($this->stack) - 1; $n >= 0; $n--) { - if($this->stack[$n]->nodeName === $token['name']) { - $n = -1; - } - - array_pop($this->stack); - } - } - break; - - /* An end tag whose tag name is "form" */ - case 'form': - /* If the stack of open elements has an element in scope - with the same tag name as that of the token, then generate - implied end tags. */ - if($this->elementInScope($token['name'])) { - $this->generateImpliedEndTags(); - - } - - if(end($this->stack)->nodeName !== $token['name']) { - /* Now, if the current node is not an element with the - same tag name as that of the token, then this is a parse - error. */ - // w/e - - } else { - /* Otherwise, if the current node is an element with - the same tag name as that of the token pop that element - from the stack. */ - array_pop($this->stack); - } - - /* In any case, set the form element pointer to null. */ - $this->form_pointer = null; - break; - - /* An end tag whose tag name is "p" */ - case 'p': - /* If the stack of open elements has a p element in scope, - then generate implied end tags, except for p elements. */ - if($this->elementInScope('p')) { - $this->generateImpliedEndTags(array('p')); - - /* If the current node is not a p element, then this is - a parse error. */ - // k - - /* If the stack of open elements has a p element in - scope, then pop elements from this stack until the stack - no longer has a p element in scope. */ - for($n = count($this->stack) - 1; $n >= 0; $n--) { - if($this->elementInScope('p')) { - array_pop($this->stack); - - } else { - break; - } - } - } - break; - - /* An end tag whose tag name is "dd", "dt", or "li" */ - case 'dd': case 'dt': case 'li': - /* If the stack of open elements has an element in scope - whose tag name matches the tag name of the token, then - generate implied end tags, except for elements with the - same tag name as the token. */ - if($this->elementInScope($token['name'])) { - $this->generateImpliedEndTags(array($token['name'])); - - /* If the current node is not an element with the same - tag name as the token, then this is a parse error. */ - // w/e - - /* If the stack of open elements has an element in scope - whose tag name matches the tag name of the token, then - pop elements from this stack until an element with that - tag name has been popped from the stack. */ - for($n = count($this->stack) - 1; $n >= 0; $n--) { - if($this->stack[$n]->nodeName === $token['name']) { - $n = -1; - } - - array_pop($this->stack); - } - } - break; - - /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4", - "h5", "h6" */ - case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': - $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'); - - /* If the stack of open elements has in scope an element whose - tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then - generate implied end tags. */ - if($this->elementInScope($elements)) { - $this->generateImpliedEndTags(); - - /* Now, if the current node is not an element with the same - tag name as that of the token, then this is a parse error. */ - // w/e - - /* If the stack of open elements has in scope an element - whose tag name is one of "h1", "h2", "h3", "h4", "h5", or - "h6", then pop elements from the stack until an element - with one of those tag names has been popped from the stack. */ - while($this->elementInScope($elements)) { - array_pop($this->stack); - } - } - break; - - /* An end tag whose tag name is one of: "a", "b", "big", "em", - "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */ - case 'a': case 'b': case 'big': case 'em': case 'font': - case 'i': case 'nobr': case 's': case 'small': case 'strike': - case 'strong': case 'tt': case 'u': - /* 1. Let the formatting element be the last element in - the list of active formatting elements that: - * is between the end of the list and the last scope - marker in the list, if any, or the start of the list - otherwise, and - * has the same tag name as the token. - */ - while(true) { - for($a = count($this->a_formatting) - 1; $a >= 0; $a--) { - if($this->a_formatting[$a] === self::MARKER) { - break; - - } elseif($this->a_formatting[$a]->tagName === $token['name']) { - $formatting_element = $this->a_formatting[$a]; - $in_stack = in_array($formatting_element, $this->stack, true); - $fe_af_pos = $a; - break; - } - } - - /* If there is no such node, or, if that node is - also in the stack of open elements but the element - is not in scope, then this is a parse error. Abort - these steps. The token is ignored. */ - if(!isset($formatting_element) || ($in_stack && - !$this->elementInScope($token['name']))) { - break; - - /* Otherwise, if there is such a node, but that node - is not in the stack of open elements, then this is a - parse error; remove the element from the list, and - abort these steps. */ - } elseif(isset($formatting_element) && !$in_stack) { - unset($this->a_formatting[$fe_af_pos]); - $this->a_formatting = array_merge($this->a_formatting); - break; - } - - /* 2. Let the furthest block be the topmost node in the - stack of open elements that is lower in the stack - than the formatting element, and is not an element in - the phrasing or formatting categories. There might - not be one. */ - $fe_s_pos = array_search($formatting_element, $this->stack, true); - $length = count($this->stack); - - for($s = $fe_s_pos + 1; $s < $length; $s++) { - $category = $this->getElementCategory($this->stack[$s]->nodeName); - - if($category !== self::PHRASING && $category !== self::FORMATTING) { - $furthest_block = $this->stack[$s]; - } - } - - /* 3. If there is no furthest block, then the UA must - skip the subsequent steps and instead just pop all - the nodes from the bottom of the stack of open - elements, from the current node up to the formatting - element, and remove the formatting element from the - list of active formatting elements. */ - if(!isset($furthest_block)) { - for($n = $length - 1; $n >= $fe_s_pos; $n--) { - array_pop($this->stack); - } - - unset($this->a_formatting[$fe_af_pos]); - $this->a_formatting = array_merge($this->a_formatting); - break; - } - - /* 4. Let the common ancestor be the element - immediately above the formatting element in the stack - of open elements. */ - $common_ancestor = $this->stack[$fe_s_pos - 1]; - - /* 5. If the furthest block has a parent node, then - remove the furthest block from its parent node. */ - if($furthest_block->parentNode !== null) { - $furthest_block->parentNode->removeChild($furthest_block); - } - - /* 6. Let a bookmark note the position of the - formatting element in the list of active formatting - elements relative to the elements on either side - of it in the list. */ - $bookmark = $fe_af_pos; - - /* 7. Let node and last node be the furthest block. - Follow these steps: */ - $node = $furthest_block; - $last_node = $furthest_block; - - while(true) { - for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) { - /* 7.1 Let node be the element immediately - prior to node in the stack of open elements. */ - $node = $this->stack[$n]; - - /* 7.2 If node is not in the list of active - formatting elements, then remove node from - the stack of open elements and then go back - to step 1. */ - if(!in_array($node, $this->a_formatting, true)) { - unset($this->stack[$n]); - $this->stack = array_merge($this->stack); - - } else { - break; - } - } - - /* 7.3 Otherwise, if node is the formatting - element, then go to the next step in the overall - algorithm. */ - if($node === $formatting_element) { - break; - - /* 7.4 Otherwise, if last node is the furthest - block, then move the aforementioned bookmark to - be immediately after the node in the list of - active formatting elements. */ - } elseif($last_node === $furthest_block) { - $bookmark = array_search($node, $this->a_formatting, true) + 1; - } - - /* 7.5 If node has any children, perform a - shallow clone of node, replace the entry for - node in the list of active formatting elements - with an entry for the clone, replace the entry - for node in the stack of open elements with an - entry for the clone, and let node be the clone. */ - if($node->hasChildNodes()) { - $clone = $node->cloneNode(); - $s_pos = array_search($node, $this->stack, true); - $a_pos = array_search($node, $this->a_formatting, true); - - $this->stack[$s_pos] = $clone; - $this->a_formatting[$a_pos] = $clone; - $node = $clone; - } - - /* 7.6 Insert last node into node, first removing - it from its previous parent node if any. */ - if($last_node->parentNode !== null) { - $last_node->parentNode->removeChild($last_node); - } - - $node->appendChild($last_node); - - /* 7.7 Let last node be node. */ - $last_node = $node; - } - - /* 8. Insert whatever last node ended up being in - the previous step into the common ancestor node, - first removing it from its previous parent node if - any. */ - if($last_node->parentNode !== null) { - $last_node->parentNode->removeChild($last_node); - } - - $common_ancestor->appendChild($last_node); - - /* 9. Perform a shallow clone of the formatting - element. */ - $clone = $formatting_element->cloneNode(); - - /* 10. Take all of the child nodes of the furthest - block and append them to the clone created in the - last step. */ - while($furthest_block->hasChildNodes()) { - $child = $furthest_block->firstChild; - $furthest_block->removeChild($child); - $clone->appendChild($child); - } - - /* 11. Append that clone to the furthest block. */ - $furthest_block->appendChild($clone); - - /* 12. Remove the formatting element from the list - of active formatting elements, and insert the clone - into the list of active formatting elements at the - position of the aforementioned bookmark. */ - $fe_af_pos = array_search($formatting_element, $this->a_formatting, true); - unset($this->a_formatting[$fe_af_pos]); - $this->a_formatting = array_merge($this->a_formatting); - - $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1); - $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting)); - $this->a_formatting = array_merge($af_part1, array($clone), $af_part2); - - /* 13. Remove the formatting element from the stack - of open elements, and insert the clone into the stack - of open elements immediately after (i.e. in a more - deeply nested position than) the position of the - furthest block in that stack. */ - $fe_s_pos = array_search($formatting_element, $this->stack, true); - $fb_s_pos = array_search($furthest_block, $this->stack, true); - unset($this->stack[$fe_s_pos]); - - $s_part1 = array_slice($this->stack, 0, $fb_s_pos); - $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack)); - $this->stack = array_merge($s_part1, array($clone), $s_part2); - - /* 14. Jump back to step 1 in this series of steps. */ - unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block); - } - break; - - /* An end tag token whose tag name is one of: "button", - "marquee", "object" */ - case 'button': case 'marquee': case 'object': - /* If the stack of open elements has an element in scope whose - tag name matches the tag name of the token, then generate implied - tags. */ - if($this->elementInScope($token['name'])) { - $this->generateImpliedEndTags(); - - /* Now, if the current node is not an element with the same - tag name as the token, then this is a parse error. */ - // k - - /* Now, if the stack of open elements has an element in scope - whose tag name matches the tag name of the token, then pop - elements from the stack until that element has been popped from - the stack, and clear the list of active formatting elements up - to the last marker. */ - for($n = count($this->stack) - 1; $n >= 0; $n--) { - if($this->stack[$n]->nodeName === $token['name']) { - $n = -1; - } - - array_pop($this->stack); - } - - $marker = end(array_keys($this->a_formatting, self::MARKER, true)); - - for($n = count($this->a_formatting) - 1; $n > $marker; $n--) { - array_pop($this->a_formatting); - } - } - break; - - /* Or an end tag whose tag name is one of: "area", "basefont", - "bgsound", "br", "embed", "hr", "iframe", "image", "img", - "input", "isindex", "noembed", "noframes", "param", "select", - "spacer", "table", "textarea", "wbr" */ - case 'area': case 'basefont': case 'bgsound': case 'br': - case 'embed': case 'hr': case 'iframe': case 'image': - case 'img': case 'input': case 'isindex': case 'noembed': - case 'noframes': case 'param': case 'select': case 'spacer': - case 'table': case 'textarea': case 'wbr': - // Parse error. Ignore the token. - break; - - /* An end tag token not covered by the previous entries */ - default: - for($n = count($this->stack) - 1; $n >= 0; $n--) { - /* Initialise node to be the current node (the bottommost - node of the stack). */ - $node = end($this->stack); - - /* If node has the same tag name as the end tag token, - then: */ - if($token['name'] === $node->nodeName) { - /* Generate implied end tags. */ - $this->generateImpliedEndTags(); - - /* If the tag name of the end tag token does not - match the tag name of the current node, this is a - parse error. */ - // k - - /* Pop all the nodes from the current node up to - node, including node, then stop this algorithm. */ - for($x = count($this->stack) - $n; $x >= $n; $x--) { - array_pop($this->stack); - } - - } else { - $category = $this->getElementCategory($node); - - if($category !== self::SPECIAL && $category !== self::SCOPING) { - /* Otherwise, if node is in neither the formatting - category nor the phrasing category, then this is a - parse error. Stop this algorithm. The end tag token - is ignored. */ - return false; - } - } - } - break; - } - break; - } - } - - private function inTable($token) { - $clear = array('html', 'table'); - - /* A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE */ - if($token['type'] === HTML5::CHARACTR && - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { - /* Append the character to the current node. */ - $text = $this->dom->createTextNode($token['data']); - end($this->stack)->appendChild($text); - - /* A comment token */ - } elseif($token['type'] === HTML5::COMMENT) { - /* Append a Comment node to the current node with the data - attribute set to the data given in the comment token. */ - $comment = $this->dom->createComment($token['data']); - end($this->stack)->appendChild($comment); - - /* A start tag whose tag name is "caption" */ - } elseif($token['type'] === HTML5::STARTTAG && - $token['name'] === 'caption') { - /* Clear the stack back to a table context. */ - $this->clearStackToTableContext($clear); - - /* Insert a marker at the end of the list of active - formatting elements. */ - $this->a_formatting[] = self::MARKER; - - /* Insert an HTML element for the token, then switch the - insertion mode to "in caption". */ - $this->insertElement($token); - $this->mode = self::IN_CAPTION; - - /* A start tag whose tag name is "colgroup" */ - } elseif($token['type'] === HTML5::STARTTAG && - $token['name'] === 'colgroup') { - /* Clear the stack back to a table context. */ - $this->clearStackToTableContext($clear); - - /* Insert an HTML element for the token, then switch the - insertion mode to "in column group". */ - $this->insertElement($token); - $this->mode = self::IN_CGROUP; - - /* A start tag whose tag name is "col" */ - } elseif($token['type'] === HTML5::STARTTAG && - $token['name'] === 'col') { - $this->inTable(array( - 'name' => 'colgroup', - 'type' => HTML5::STARTTAG, - 'attr' => array() - )); - - $this->inColumnGroup($token); - - /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */ - } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], - array('tbody', 'tfoot', 'thead'))) { - /* Clear the stack back to a table context. */ - $this->clearStackToTableContext($clear); - - /* Insert an HTML element for the token, then switch the insertion - mode to "in table body". */ - $this->insertElement($token); - $this->mode = self::IN_TBODY; - - /* A start tag whose tag name is one of: "td", "th", "tr" */ - } elseif($token['type'] === HTML5::STARTTAG && - in_array($token['name'], array('td', 'th', 'tr'))) { - /* Act as if a start tag token with the tag name "tbody" had been - seen, then reprocess the current token. */ - $this->inTable(array( - 'name' => 'tbody', - 'type' => HTML5::STARTTAG, - 'attr' => array() - )); - - return $this->inTableBody($token); - - /* A start tag whose tag name is "table" */ - } elseif($token['type'] === HTML5::STARTTAG && - $token['name'] === 'table') { - /* Parse error. Act as if an end tag token with the tag name "table" - had been seen, then, if that token wasn't ignored, reprocess the - current token. */ - $this->inTable(array( - 'name' => 'table', - 'type' => HTML5::ENDTAG - )); - - return $this->mainPhase($token); - - /* An end tag whose tag name is "table" */ - } elseif($token['type'] === HTML5::ENDTAG && - $token['name'] === 'table') { - /* If the stack of open elements does not have an element in table - scope with the same tag name as the token, this is a parse error. - Ignore the token. (innerHTML case) */ - if(!$this->elementInScope($token['name'], true)) { - return false; - - /* Otherwise: */ - } else { - /* Generate implied end tags. */ - $this->generateImpliedEndTags(); - - /* Now, if the current node is not a table element, then this - is a parse error. */ - // w/e - - /* Pop elements from this stack until a table element has been - popped from the stack. */ - while(true) { - $current = end($this->stack)->nodeName; - array_pop($this->stack); - - if($current === 'table') { - break; - } - } - - /* Reset the insertion mode appropriately. */ - $this->resetInsertionMode(); - } - - /* An end tag whose tag name is one of: "body", "caption", "col", - "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ - } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], - array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', - 'tfoot', 'th', 'thead', 'tr'))) { - // Parse error. Ignore the token. - - /* Anything else */ - } else { - /* Parse error. Process the token as if the insertion mode was "in - body", with the following exception: */ - - /* If the current node is a table, tbody, tfoot, thead, or tr - element, then, whenever a node would be inserted into the current - node, it must instead be inserted into the foster parent element. */ - if(in_array(end($this->stack)->nodeName, - array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { - /* The foster parent element is the parent element of the last - table element in the stack of open elements, if there is a - table element and it has such a parent element. If there is no - table element in the stack of open elements (innerHTML case), - then the foster parent element is the first element in the - stack of open elements (the html element). Otherwise, if there - is a table element in the stack of open elements, but the last - table element in the stack of open elements has no parent, or - its parent node is not an element, then the foster parent - element is the element before the last table element in the - stack of open elements. */ - for($n = count($this->stack) - 1; $n >= 0; $n--) { - if($this->stack[$n]->nodeName === 'table') { - $table = $this->stack[$n]; - break; - } - } - - if(isset($table) && $table->parentNode !== null) { - $this->foster_parent = $table->parentNode; - - } elseif(!isset($table)) { - $this->foster_parent = $this->stack[0]; - - } elseif(isset($table) && ($table->parentNode === null || - $table->parentNode->nodeType !== XML_ELEMENT_NODE)) { - $this->foster_parent = $this->stack[$n - 1]; - } - } - - $this->inBody($token); - } - } - - private function inCaption($token) { - /* An end tag whose tag name is "caption" */ - if($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') { - /* If the stack of open elements does not have an element in table - scope with the same tag name as the token, this is a parse error. - Ignore the token. (innerHTML case) */ - if(!$this->elementInScope($token['name'], true)) { - // Ignore - - /* Otherwise: */ - } else { - /* Generate implied end tags. */ - $this->generateImpliedEndTags(); - - /* Now, if the current node is not a caption element, then this - is a parse error. */ - // w/e - - /* Pop elements from this stack until a caption element has - been popped from the stack. */ - while(true) { - $node = end($this->stack)->nodeName; - array_pop($this->stack); - - if($node === 'caption') { - break; - } - } - - /* Clear the list of active formatting elements up to the last - marker. */ - $this->clearTheActiveFormattingElementsUpToTheLastMarker(); - - /* Switch the insertion mode to "in table". */ - $this->mode = self::IN_TABLE; - } - - /* A start tag whose tag name is one of: "caption", "col", "colgroup", - "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag - name is "table" */ - } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'], - array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', - 'thead', 'tr'))) || ($token['type'] === HTML5::ENDTAG && - $token['name'] === 'table')) { - /* Parse error. Act as if an end tag with the tag name "caption" - had been seen, then, if that token wasn't ignored, reprocess the - current token. */ - $this->inCaption(array( - 'name' => 'caption', - 'type' => HTML5::ENDTAG - )); - - return $this->inTable($token); - - /* An end tag whose tag name is one of: "body", "col", "colgroup", - "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ - } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], - array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th', - 'thead', 'tr'))) { - // Parse error. Ignore the token. - - /* Anything else */ - } else { - /* Process the token as if the insertion mode was "in body". */ - $this->inBody($token); - } - } - - private function inColumnGroup($token) { - /* A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE */ - if($token['type'] === HTML5::CHARACTR && - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { - /* Append the character to the current node. */ - $text = $this->dom->createTextNode($token['data']); - end($this->stack)->appendChild($text); - - /* A comment token */ - } elseif($token['type'] === HTML5::COMMENT) { - /* Append a Comment node to the current node with the data - attribute set to the data given in the comment token. */ - $comment = $this->dom->createComment($token['data']); - end($this->stack)->appendChild($comment); - - /* A start tag whose tag name is "col" */ - } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') { - /* Insert a col element for the token. Immediately pop the current - node off the stack of open elements. */ - $this->insertElement($token); - array_pop($this->stack); - - /* An end tag whose tag name is "colgroup" */ - } elseif($token['type'] === HTML5::ENDTAG && - $token['name'] === 'colgroup') { - /* If the current node is the root html element, then this is a - parse error, ignore the token. (innerHTML case) */ - if(end($this->stack)->nodeName === 'html') { - // Ignore - - /* Otherwise, pop the current node (which will be a colgroup - element) from the stack of open elements. Switch the insertion - mode to "in table". */ - } else { - array_pop($this->stack); - $this->mode = self::IN_TABLE; - } - - /* An end tag whose tag name is "col" */ - } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') { - /* Parse error. Ignore the token. */ - - /* Anything else */ - } else { - /* Act as if an end tag with the tag name "colgroup" had been seen, - and then, if that token wasn't ignored, reprocess the current token. */ - $this->inColumnGroup(array( - 'name' => 'colgroup', - 'type' => HTML5::ENDTAG - )); - - return $this->inTable($token); - } - } - - private function inTableBody($token) { - $clear = array('tbody', 'tfoot', 'thead', 'html'); - - /* A start tag whose tag name is "tr" */ - if($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') { - /* Clear the stack back to a table body context. */ - $this->clearStackToTableContext($clear); - - /* Insert a tr element for the token, then switch the insertion - mode to "in row". */ - $this->insertElement($token); - $this->mode = self::IN_ROW; - - /* A start tag whose tag name is one of: "th", "td" */ - } elseif($token['type'] === HTML5::STARTTAG && - ($token['name'] === 'th' || $token['name'] === 'td')) { - /* Parse error. Act as if a start tag with the tag name "tr" had - been seen, then reprocess the current token. */ - $this->inTableBody(array( - 'name' => 'tr', - 'type' => HTML5::STARTTAG, - 'attr' => array() - )); - - return $this->inRow($token); - - /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ - } elseif($token['type'] === HTML5::ENDTAG && - in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { - /* If the stack of open elements does not have an element in table - scope with the same tag name as the token, this is a parse error. - Ignore the token. */ - if(!$this->elementInScope($token['name'], true)) { - // Ignore - - /* Otherwise: */ - } else { - /* Clear the stack back to a table body context. */ - $this->clearStackToTableContext($clear); - - /* Pop the current node from the stack of open elements. Switch - the insertion mode to "in table". */ - array_pop($this->stack); - $this->mode = self::IN_TABLE; - } - - /* A start tag whose tag name is one of: "caption", "col", "colgroup", - "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */ - } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'], - array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead'))) || - ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table')) { - /* If the stack of open elements does not have a tbody, thead, or - tfoot element in table scope, this is a parse error. Ignore the - token. (innerHTML case) */ - if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) { - // Ignore. - - /* Otherwise: */ - } else { - /* Clear the stack back to a table body context. */ - $this->clearStackToTableContext($clear); - - /* Act as if an end tag with the same tag name as the current - node ("tbody", "tfoot", or "thead") had been seen, then - reprocess the current token. */ - $this->inTableBody(array( - 'name' => end($this->stack)->nodeName, - 'type' => HTML5::ENDTAG - )); - - return $this->mainPhase($token); - } - - /* An end tag whose tag name is one of: "body", "caption", "col", - "colgroup", "html", "td", "th", "tr" */ - } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], - array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) { - /* Parse error. Ignore the token. */ - - /* Anything else */ - } else { - /* Process the token as if the insertion mode was "in table". */ - $this->inTable($token); - } - } - - private function inRow($token) { - $clear = array('tr', 'html'); - - /* A start tag whose tag name is one of: "th", "td" */ - if($token['type'] === HTML5::STARTTAG && - ($token['name'] === 'th' || $token['name'] === 'td')) { - /* Clear the stack back to a table row context. */ - $this->clearStackToTableContext($clear); - - /* Insert an HTML element for the token, then switch the insertion - mode to "in cell". */ - $this->insertElement($token); - $this->mode = self::IN_CELL; - - /* Insert a marker at the end of the list of active formatting - elements. */ - $this->a_formatting[] = self::MARKER; - - /* An end tag whose tag name is "tr" */ - } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') { - /* If the stack of open elements does not have an element in table - scope with the same tag name as the token, this is a parse error. - Ignore the token. (innerHTML case) */ - if(!$this->elementInScope($token['name'], true)) { - // Ignore. - - /* Otherwise: */ - } else { - /* Clear the stack back to a table row context. */ - $this->clearStackToTableContext($clear); - - /* Pop the current node (which will be a tr element) from the - stack of open elements. Switch the insertion mode to "in table - body". */ - array_pop($this->stack); - $this->mode = self::IN_TBODY; - } - - /* A start tag whose tag name is one of: "caption", "col", "colgroup", - "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */ - } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], - array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) { - /* Act as if an end tag with the tag name "tr" had been seen, then, - if that token wasn't ignored, reprocess the current token. */ - $this->inRow(array( - 'name' => 'tr', - 'type' => HTML5::ENDTAG - )); - - return $this->inCell($token); - - /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ - } elseif($token['type'] === HTML5::ENDTAG && - in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { - /* If the stack of open elements does not have an element in table - scope with the same tag name as the token, this is a parse error. - Ignore the token. */ - if(!$this->elementInScope($token['name'], true)) { - // Ignore. - - /* Otherwise: */ - } else { - /* Otherwise, act as if an end tag with the tag name "tr" had - been seen, then reprocess the current token. */ - $this->inRow(array( - 'name' => 'tr', - 'type' => HTML5::ENDTAG - )); - - return $this->inCell($token); - } - - /* An end tag whose tag name is one of: "body", "caption", "col", - "colgroup", "html", "td", "th" */ - } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], - array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) { - /* Parse error. Ignore the token. */ - - /* Anything else */ - } else { - /* Process the token as if the insertion mode was "in table". */ - $this->inTable($token); - } - } - - private function inCell($token) { - /* An end tag whose tag name is one of: "td", "th" */ - if($token['type'] === HTML5::ENDTAG && - ($token['name'] === 'td' || $token['name'] === 'th')) { - /* If the stack of open elements does not have an element in table - scope with the same tag name as that of the token, then this is a - parse error and the token must be ignored. */ - if(!$this->elementInScope($token['name'], true)) { - // Ignore. - - /* Otherwise: */ - } else { - /* Generate implied end tags, except for elements with the same - tag name as the token. */ - $this->generateImpliedEndTags(array($token['name'])); - - /* Now, if the current node is not an element with the same tag - name as the token, then this is a parse error. */ - // k - - /* Pop elements from this stack until an element with the same - tag name as the token has been popped from the stack. */ - while(true) { - $node = end($this->stack)->nodeName; - array_pop($this->stack); - - if($node === $token['name']) { - break; - } - } - - /* Clear the list of active formatting elements up to the last - marker. */ - $this->clearTheActiveFormattingElementsUpToTheLastMarker(); - - /* Switch the insertion mode to "in row". (The current node - will be a tr element at this point.) */ - $this->mode = self::IN_ROW; - } - - /* A start tag whose tag name is one of: "caption", "col", "colgroup", - "tbody", "td", "tfoot", "th", "thead", "tr" */ - } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], - array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', - 'thead', 'tr'))) { - /* If the stack of open elements does not have a td or th element - in table scope, then this is a parse error; ignore the token. - (innerHTML case) */ - if(!$this->elementInScope(array('td', 'th'), true)) { - // Ignore. - - /* Otherwise, close the cell (see below) and reprocess the current - token. */ - } else { - $this->closeCell(); - return $this->inRow($token); - } - - /* A start tag whose tag name is one of: "caption", "col", "colgroup", - "tbody", "td", "tfoot", "th", "thead", "tr" */ - } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], - array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', - 'thead', 'tr'))) { - /* If the stack of open elements does not have a td or th element - in table scope, then this is a parse error; ignore the token. - (innerHTML case) */ - if(!$this->elementInScope(array('td', 'th'), true)) { - // Ignore. - - /* Otherwise, close the cell (see below) and reprocess the current - token. */ - } else { - $this->closeCell(); - return $this->inRow($token); - } - - /* An end tag whose tag name is one of: "body", "caption", "col", - "colgroup", "html" */ - } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], - array('body', 'caption', 'col', 'colgroup', 'html'))) { - /* Parse error. Ignore the token. */ - - /* An end tag whose tag name is one of: "table", "tbody", "tfoot", - "thead", "tr" */ - } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], - array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { - /* If the stack of open elements does not have an element in table - scope with the same tag name as that of the token (which can only - happen for "tbody", "tfoot" and "thead", or, in the innerHTML case), - then this is a parse error and the token must be ignored. */ - if(!$this->elementInScope($token['name'], true)) { - // Ignore. - - /* Otherwise, close the cell (see below) and reprocess the current - token. */ - } else { - $this->closeCell(); - return $this->inRow($token); - } - - /* Anything else */ - } else { - /* Process the token as if the insertion mode was "in body". */ - $this->inBody($token); - } - } - - private function inSelect($token) { - /* Handle the token as follows: */ - - /* A character token */ - if($token['type'] === HTML5::CHARACTR) { - /* Append the token's character to the current node. */ - $this->insertText($token['data']); - - /* A comment token */ - } elseif($token['type'] === HTML5::COMMENT) { - /* Append a Comment node to the current node with the data - attribute set to the data given in the comment token. */ - $this->insertComment($token['data']); - - /* A start tag token whose tag name is "option" */ - } elseif($token['type'] === HTML5::STARTTAG && - $token['name'] === 'option') { - /* If the current node is an option element, act as if an end tag - with the tag name "option" had been seen. */ - if(end($this->stack)->nodeName === 'option') { - $this->inSelect(array( - 'name' => 'option', - 'type' => HTML5::ENDTAG - )); - } - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - /* A start tag token whose tag name is "optgroup" */ - } elseif($token['type'] === HTML5::STARTTAG && - $token['name'] === 'optgroup') { - /* If the current node is an option element, act as if an end tag - with the tag name "option" had been seen. */ - if(end($this->stack)->nodeName === 'option') { - $this->inSelect(array( - 'name' => 'option', - 'type' => HTML5::ENDTAG - )); - } - - /* If the current node is an optgroup element, act as if an end tag - with the tag name "optgroup" had been seen. */ - if(end($this->stack)->nodeName === 'optgroup') { - $this->inSelect(array( - 'name' => 'optgroup', - 'type' => HTML5::ENDTAG - )); - } - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - /* An end tag token whose tag name is "optgroup" */ - } elseif($token['type'] === HTML5::ENDTAG && - $token['name'] === 'optgroup') { - /* First, if the current node is an option element, and the node - immediately before it in the stack of open elements is an optgroup - element, then act as if an end tag with the tag name "option" had - been seen. */ - $elements_in_stack = count($this->stack); - - if($this->stack[$elements_in_stack - 1]->nodeName === 'option' && - $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup') { - $this->inSelect(array( - 'name' => 'option', - 'type' => HTML5::ENDTAG - )); - } - - /* If the current node is an optgroup element, then pop that node - from the stack of open elements. Otherwise, this is a parse error, - ignore the token. */ - if($this->stack[$elements_in_stack - 1] === 'optgroup') { - array_pop($this->stack); - } - - /* An end tag token whose tag name is "option" */ - } elseif($token['type'] === HTML5::ENDTAG && - $token['name'] === 'option') { - /* If the current node is an option element, then pop that node - from the stack of open elements. Otherwise, this is a parse error, - ignore the token. */ - if(end($this->stack)->nodeName === 'option') { - array_pop($this->stack); - } - - /* An end tag whose tag name is "select" */ - } elseif($token['type'] === HTML5::ENDTAG && - $token['name'] === 'select') { - /* If the stack of open elements does not have an element in table - scope with the same tag name as the token, this is a parse error. - Ignore the token. (innerHTML case) */ - if(!$this->elementInScope($token['name'], true)) { - // w/e - - /* Otherwise: */ - } else { - /* Pop elements from the stack of open elements until a select - element has been popped from the stack. */ - while(true) { - $current = end($this->stack)->nodeName; - array_pop($this->stack); - - if($current === 'select') { - break; - } - } - - /* Reset the insertion mode appropriately. */ - $this->resetInsertionMode(); - } - - /* A start tag whose tag name is "select" */ - } elseif($token['name'] === 'select' && - $token['type'] === HTML5::STARTTAG) { - /* Parse error. Act as if the token had been an end tag with the - tag name "select" instead. */ - $this->inSelect(array( - 'name' => 'select', - 'type' => HTML5::ENDTAG - )); - - /* An end tag whose tag name is one of: "caption", "table", "tbody", - "tfoot", "thead", "tr", "td", "th" */ - } elseif(in_array($token['name'], array('caption', 'table', 'tbody', - 'tfoot', 'thead', 'tr', 'td', 'th')) && $token['type'] === HTML5::ENDTAG) { - /* Parse error. */ - // w/e - - /* If the stack of open elements has an element in table scope with - the same tag name as that of the token, then act as if an end tag - with the tag name "select" had been seen, and reprocess the token. - Otherwise, ignore the token. */ - if($this->elementInScope($token['name'], true)) { - $this->inSelect(array( - 'name' => 'select', - 'type' => HTML5::ENDTAG - )); - - $this->mainPhase($token); - } - - /* Anything else */ - } else { - /* Parse error. Ignore the token. */ - } - } - - private function afterBody($token) { - /* Handle the token as follows: */ - - /* A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE */ - if($token['type'] === HTML5::CHARACTR && - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { - /* Process the token as it would be processed if the insertion mode - was "in body". */ - $this->inBody($token); - - /* A comment token */ - } elseif($token['type'] === HTML5::COMMENT) { - /* Append a Comment node to the first element in the stack of open - elements (the html element), with the data attribute set to the - data given in the comment token. */ - $comment = $this->dom->createComment($token['data']); - $this->stack[0]->appendChild($comment); - - /* An end tag with the tag name "html" */ - } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') { - /* If the parser was originally created in order to handle the - setting of an element's innerHTML attribute, this is a parse error; - ignore the token. (The element will be an html element in this - case.) (innerHTML case) */ - - /* Otherwise, switch to the trailing end phase. */ - $this->phase = self::END_PHASE; - - /* Anything else */ - } else { - /* Parse error. Set the insertion mode to "in body" and reprocess - the token. */ - $this->mode = self::IN_BODY; - return $this->inBody($token); - } - } - - private function inFrameset($token) { - /* Handle the token as follows: */ - - /* A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ - if($token['type'] === HTML5::CHARACTR && - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { - /* Append the character to the current node. */ - $this->insertText($token['data']); - - /* A comment token */ - } elseif($token['type'] === HTML5::COMMENT) { - /* Append a Comment node to the current node with the data - attribute set to the data given in the comment token. */ - $this->insertComment($token['data']); - - /* A start tag with the tag name "frameset" */ - } elseif($token['name'] === 'frameset' && - $token['type'] === HTML5::STARTTAG) { - $this->insertElement($token); - - /* An end tag with the tag name "frameset" */ - } elseif($token['name'] === 'frameset' && - $token['type'] === HTML5::ENDTAG) { - /* If the current node is the root html element, then this is a - parse error; ignore the token. (innerHTML case) */ - if(end($this->stack)->nodeName === 'html') { - // Ignore - - } else { - /* Otherwise, pop the current node from the stack of open - elements. */ - array_pop($this->stack); - - /* If the parser was not originally created in order to handle - the setting of an element's innerHTML attribute (innerHTML case), - and the current node is no longer a frameset element, then change - the insertion mode to "after frameset". */ - $this->mode = self::AFTR_FRAME; - } - - /* A start tag with the tag name "frame" */ - } elseif($token['name'] === 'frame' && - $token['type'] === HTML5::STARTTAG) { - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - /* Immediately pop the current node off the stack of open elements. */ - array_pop($this->stack); - - /* A start tag with the tag name "noframes" */ - } elseif($token['name'] === 'noframes' && - $token['type'] === HTML5::STARTTAG) { - /* Process the token as if the insertion mode had been "in body". */ - $this->inBody($token); - - /* Anything else */ - } else { - /* Parse error. Ignore the token. */ - } - } - - private function afterFrameset($token) { - /* Handle the token as follows: */ - - /* A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ - if($token['type'] === HTML5::CHARACTR && - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { - /* Append the character to the current node. */ - $this->insertText($token['data']); - - /* A comment token */ - } elseif($token['type'] === HTML5::COMMENT) { - /* Append a Comment node to the current node with the data - attribute set to the data given in the comment token. */ - $this->insertComment($token['data']); - - /* An end tag with the tag name "html" */ - } elseif($token['name'] === 'html' && - $token['type'] === HTML5::ENDTAG) { - /* Switch to the trailing end phase. */ - $this->phase = self::END_PHASE; - - /* A start tag with the tag name "noframes" */ - } elseif($token['name'] === 'noframes' && - $token['type'] === HTML5::STARTTAG) { - /* Process the token as if the insertion mode had been "in body". */ - $this->inBody($token); - - /* Anything else */ - } else { - /* Parse error. Ignore the token. */ - } - } - - private function trailingEndPhase($token) { - /* After the main phase, as each token is emitted from the tokenisation - stage, it must be processed as described in this section. */ - - /* A DOCTYPE token */ - if($token['type'] === HTML5::DOCTYPE) { - // Parse error. Ignore the token. - - /* A comment token */ - } elseif($token['type'] === HTML5::COMMENT) { - /* Append a Comment node to the Document object with the data - attribute set to the data given in the comment token. */ - $comment = $this->dom->createComment($token['data']); - $this->dom->appendChild($comment); - - /* A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE */ - } elseif($token['type'] === HTML5::CHARACTR && - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { - /* Process the token as it would be processed in the main phase. */ - $this->mainPhase($token); - - /* A character token that is not one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE. Or a start tag token. Or an end tag token. */ - } elseif(($token['type'] === HTML5::CHARACTR && - preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || - $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG) { - /* Parse error. Switch back to the main phase and reprocess the - token. */ - $this->phase = self::MAIN_PHASE; - return $this->mainPhase($token); - - /* An end-of-file token */ - } elseif($token['type'] === HTML5::EOF) { - /* OMG DONE!! */ - } - } - - private function insertElement($token, $append = true, $check = false) { - // Proprietary workaround for libxml2's limitations with tag names - if ($check) { - // Slightly modified HTML5 tag-name modification, - // removing anything that's not an ASCII letter, digit, or hyphen - $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']); - // Remove leading hyphens and numbers - $token['name'] = ltrim($token['name'], '-0..9'); - // In theory, this should ever be needed, but just in case - if ($token['name'] === '') $token['name'] = 'span'; // arbitrary generic choice - } - - $el = $this->dom->createElement($token['name']); - - foreach($token['attr'] as $attr) { - if(!$el->hasAttribute($attr['name'])) { - $el->setAttribute($attr['name'], $attr['value']); - } - } - - $this->appendToRealParent($el); - $this->stack[] = $el; - - return $el; - } - - private function insertText($data) { - $text = $this->dom->createTextNode($data); - $this->appendToRealParent($text); - } - - private function insertComment($data) { - $comment = $this->dom->createComment($data); - $this->appendToRealParent($comment); - } - - private function appendToRealParent($node) { - if($this->foster_parent === null) { - end($this->stack)->appendChild($node); - - } elseif($this->foster_parent !== null) { - /* If the foster parent element is the parent element of the - last table element in the stack of open elements, then the new - node must be inserted immediately before the last table element - in the stack of open elements in the foster parent element; - otherwise, the new node must be appended to the foster parent - element. */ - for($n = count($this->stack) - 1; $n >= 0; $n--) { - if($this->stack[$n]->nodeName === 'table' && - $this->stack[$n]->parentNode !== null) { - $table = $this->stack[$n]; - break; - } - } - - if(isset($table) && $this->foster_parent->isSameNode($table->parentNode)) - $this->foster_parent->insertBefore($node, $table); - else - $this->foster_parent->appendChild($node); - - $this->foster_parent = null; - } - } - - private function elementInScope($el, $table = false) { - if(is_array($el)) { - foreach($el as $element) { - if($this->elementInScope($element, $table)) { - return true; - } - } - - return false; - } - - $leng = count($this->stack); - - for($n = 0; $n < $leng; $n++) { - /* 1. Initialise node to be the current node (the bottommost node of - the stack). */ - $node = $this->stack[$leng - 1 - $n]; - - if($node->tagName === $el) { - /* 2. If node is the target node, terminate in a match state. */ - return true; - - } elseif($node->tagName === 'table') { - /* 3. Otherwise, if node is a table element, terminate in a failure - state. */ - return false; - - } elseif($table === true && in_array($node->tagName, array('caption', 'td', - 'th', 'button', 'marquee', 'object'))) { - /* 4. Otherwise, if the algorithm is the "has an element in scope" - variant (rather than the "has an element in table scope" variant), - and node is one of the following, terminate in a failure state. */ - return false; - - } elseif($node === $node->ownerDocument->documentElement) { - /* 5. Otherwise, if node is an html element (root element), terminate - in a failure state. (This can only happen if the node is the topmost - node of the stack of open elements, and prevents the next step from - being invoked if there are no more elements in the stack.) */ - return false; - } - - /* Otherwise, set node to the previous entry in the stack of open - elements and return to step 2. (This will never fail, since the loop - will always terminate in the previous step if the top of the stack - is reached.) */ - } - } - - private function reconstructActiveFormattingElements() { - /* 1. If there are no entries in the list of active formatting elements, - then there is nothing to reconstruct; stop this algorithm. */ - $formatting_elements = count($this->a_formatting); - - if($formatting_elements === 0) { - return false; - } - - /* 3. Let entry be the last (most recently added) element in the list - of active formatting elements. */ - $entry = end($this->a_formatting); - - /* 2. If the last (most recently added) entry in the list of active - formatting elements is a marker, or if it is an element that is in the - stack of open elements, then there is nothing to reconstruct; stop this - algorithm. */ - if($entry === self::MARKER || in_array($entry, $this->stack, true)) { - return false; - } - - for($a = $formatting_elements - 1; $a >= 0; true) { - /* 4. If there are no entries before entry in the list of active - formatting elements, then jump to step 8. */ - if($a === 0) { - $step_seven = false; - break; - } - - /* 5. Let entry be the entry one earlier than entry in the list of - active formatting elements. */ - $a--; - $entry = $this->a_formatting[$a]; - - /* 6. If entry is neither a marker nor an element that is also in - thetack of open elements, go to step 4. */ - if($entry === self::MARKER || in_array($entry, $this->stack, true)) { - break; - } - } - - while(true) { - /* 7. Let entry be the element one later than entry in the list of - active formatting elements. */ - if(isset($step_seven) && $step_seven === true) { - $a++; - $entry = $this->a_formatting[$a]; - } - - /* 8. Perform a shallow clone of the element entry to obtain clone. */ - $clone = $entry->cloneNode(); - - /* 9. Append clone to the current node and push it onto the stack - of open elements so that it is the new current node. */ - end($this->stack)->appendChild($clone); - $this->stack[] = $clone; - - /* 10. Replace the entry for entry in the list with an entry for - clone. */ - $this->a_formatting[$a] = $clone; - - /* 11. If the entry for clone in the list of active formatting - elements is not the last entry in the list, return to step 7. */ - if(end($this->a_formatting) !== $clone) { - $step_seven = true; - } else { - break; - } - } - } - - private function clearTheActiveFormattingElementsUpToTheLastMarker() { - /* When the steps below require the UA to clear the list of active - formatting elements up to the last marker, the UA must perform the - following steps: */ - - while(true) { - /* 1. Let entry be the last (most recently added) entry in the list - of active formatting elements. */ - $entry = end($this->a_formatting); - - /* 2. Remove entry from the list of active formatting elements. */ - array_pop($this->a_formatting); - - /* 3. If entry was a marker, then stop the algorithm at this point. - The list has been cleared up to the last marker. */ - if($entry === self::MARKER) { - break; - } - } - } - - private function generateImpliedEndTags($exclude = array()) { - /* When the steps below require the UA to generate implied end tags, - then, if the current node is a dd element, a dt element, an li element, - a p element, a td element, a th element, or a tr element, the UA must - act as if an end tag with the respective tag name had been seen and - then generate implied end tags again. */ - $node = end($this->stack); - $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude); - - while(in_array(end($this->stack)->nodeName, $elements)) { - array_pop($this->stack); - } - } - - private function getElementCategory($node) { - $name = $node->tagName; - if(in_array($name, $this->special)) - return self::SPECIAL; - - elseif(in_array($name, $this->scoping)) - return self::SCOPING; - - elseif(in_array($name, $this->formatting)) - return self::FORMATTING; - - else - return self::PHRASING; - } - - private function clearStackToTableContext($elements) { - /* When the steps above require the UA to clear the stack back to a - table context, it means that the UA must, while the current node is not - a table element or an html element, pop elements from the stack of open - elements. If this causes any elements to be popped from the stack, then - this is a parse error. */ - while(true) { - $node = end($this->stack)->nodeName; - - if(in_array($node, $elements)) { - break; - } else { - array_pop($this->stack); - } - } - } - - private function resetInsertionMode() { - /* 1. Let last be false. */ - $last = false; - $leng = count($this->stack); - - for($n = $leng - 1; $n >= 0; $n--) { - /* 2. Let node be the last node in the stack of open elements. */ - $node = $this->stack[$n]; - - /* 3. If node is the first node in the stack of open elements, then - set last to true. If the element whose innerHTML attribute is being - set is neither a td element nor a th element, then set node to the - element whose innerHTML attribute is being set. (innerHTML case) */ - if($this->stack[0]->isSameNode($node)) { - $last = true; - } - - /* 4. If node is a select element, then switch the insertion mode to - "in select" and abort these steps. (innerHTML case) */ - if($node->nodeName === 'select') { - $this->mode = self::IN_SELECT; - break; - - /* 5. If node is a td or th element, then switch the insertion mode - to "in cell" and abort these steps. */ - } elseif($node->nodeName === 'td' || $node->nodeName === 'th') { - $this->mode = self::IN_CELL; - break; - - /* 6. If node is a tr element, then switch the insertion mode to - "in row" and abort these steps. */ - } elseif($node->nodeName === 'tr') { - $this->mode = self::IN_ROW; - break; - - /* 7. If node is a tbody, thead, or tfoot element, then switch the - insertion mode to "in table body" and abort these steps. */ - } elseif(in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) { - $this->mode = self::IN_TBODY; - break; - - /* 8. If node is a caption element, then switch the insertion mode - to "in caption" and abort these steps. */ - } elseif($node->nodeName === 'caption') { - $this->mode = self::IN_CAPTION; - break; - - /* 9. If node is a colgroup element, then switch the insertion mode - to "in column group" and abort these steps. (innerHTML case) */ - } elseif($node->nodeName === 'colgroup') { - $this->mode = self::IN_CGROUP; - break; - - /* 10. If node is a table element, then switch the insertion mode - to "in table" and abort these steps. */ - } elseif($node->nodeName === 'table') { - $this->mode = self::IN_TABLE; - break; - - /* 11. If node is a head element, then switch the insertion mode - to "in body" ("in body"! not "in head"!) and abort these steps. - (innerHTML case) */ - } elseif($node->nodeName === 'head') { - $this->mode = self::IN_BODY; - break; - - /* 12. If node is a body element, then switch the insertion mode to - "in body" and abort these steps. */ - } elseif($node->nodeName === 'body') { - $this->mode = self::IN_BODY; - break; - - /* 13. If node is a frameset element, then switch the insertion - mode to "in frameset" and abort these steps. (innerHTML case) */ - } elseif($node->nodeName === 'frameset') { - $this->mode = self::IN_FRAME; - break; - - /* 14. If node is an html element, then: if the head element - pointer is null, switch the insertion mode to "before head", - otherwise, switch the insertion mode to "after head". In either - case, abort these steps. (innerHTML case) */ - } elseif($node->nodeName === 'html') { - $this->mode = ($this->head_pointer === null) - ? self::BEFOR_HEAD - : self::AFTER_HEAD; - - break; - - /* 15. If last is true, then set the insertion mode to "in body" - and abort these steps. (innerHTML case) */ - } elseif($last) { - $this->mode = self::IN_BODY; - break; - } - } - } - - private function closeCell() { - /* If the stack of open elements has a td or th element in table scope, - then act as if an end tag token with that tag name had been seen. */ - foreach(array('td', 'th') as $cell) { - if($this->elementInScope($cell, true)) { - $this->inCell(array( - 'name' => $cell, - 'type' => HTML5::ENDTAG - )); - - break; - } - } - } - - public function save() { - return $this->dom; - } -} -?> diff --git a/library/HTMLPurifier/Strategy/FixNesting.php b/library/HTMLPurifier/Strategy/FixNesting.php deleted file mode 100644 index f81802391b..0000000000 --- a/library/HTMLPurifier/Strategy/FixNesting.php +++ /dev/null @@ -1,328 +0,0 @@ -getHTMLDefinition(); - - // insert implicit "parent" node, will be removed at end. - // DEFINITION CALL - $parent_name = $definition->info_parent; - array_unshift($tokens, new HTMLPurifier_Token_Start($parent_name)); - $tokens[] = new HTMLPurifier_Token_End($parent_name); - - // setup the context variable 'IsInline', for chameleon processing - // is 'false' when we are not inline, 'true' when it must always - // be inline, and an integer when it is inline for a certain - // branch of the document tree - $is_inline = $definition->info_parent_def->descendants_are_inline; - $context->register('IsInline', $is_inline); - - // setup error collector - $e =& $context->get('ErrorCollector', true); - - //####################################################################// - // Loop initialization - - // stack that contains the indexes of all parents, - // $stack[count($stack)-1] being the current parent - $stack = array(); - - // stack that contains all elements that are excluded - // it is organized by parent elements, similar to $stack, - // but it is only populated when an element with exclusions is - // processed, i.e. there won't be empty exclusions. - $exclude_stack = array(); - - // variable that contains the start token while we are processing - // nodes. This enables error reporting to do its job - $start_token = false; - $context->register('CurrentToken', $start_token); - - //####################################################################// - // Loop - - // iterate through all start nodes. Determining the start node - // is complicated so it has been omitted from the loop construct - for ($i = 0, $size = count($tokens) ; $i < $size; ) { - - //################################################################// - // Gather information on children - - // child token accumulator - $child_tokens = array(); - - // scroll to the end of this node, report number, and collect - // all children - for ($j = $i, $depth = 0; ; $j++) { - if ($tokens[$j] instanceof HTMLPurifier_Token_Start) { - $depth++; - // skip token assignment on first iteration, this is the - // token we currently are on - if ($depth == 1) continue; - } elseif ($tokens[$j] instanceof HTMLPurifier_Token_End) { - $depth--; - // skip token assignment on last iteration, this is the - // end token of the token we're currently on - if ($depth == 0) break; - } - $child_tokens[] = $tokens[$j]; - } - - // $i is index of start token - // $j is index of end token - - $start_token = $tokens[$i]; // to make token available via CurrentToken - - //################################################################// - // Gather information on parent - - // calculate parent information - if ($count = count($stack)) { - $parent_index = $stack[$count-1]; - $parent_name = $tokens[$parent_index]->name; - if ($parent_index == 0) { - $parent_def = $definition->info_parent_def; - } else { - $parent_def = $definition->info[$parent_name]; - } - } else { - // processing as if the parent were the "root" node - // unknown info, it won't be used anyway, in the future, - // we may want to enforce one element only (this is - // necessary for HTML Purifier to clean entire documents - $parent_index = $parent_name = $parent_def = null; - } - - // calculate context - if ($is_inline === false) { - // check if conditions make it inline - if (!empty($parent_def) && $parent_def->descendants_are_inline) { - $is_inline = $count - 1; - } - } else { - // check if we're out of inline - if ($count === $is_inline) { - $is_inline = false; - } - } - - //################################################################// - // Determine whether element is explicitly excluded SGML-style - - // determine whether or not element is excluded by checking all - // parent exclusions. The array should not be very large, two - // elements at most. - $excluded = false; - if (!empty($exclude_stack)) { - foreach ($exclude_stack as $lookup) { - if (isset($lookup[$tokens[$i]->name])) { - $excluded = true; - // no need to continue processing - break; - } - } - } - - //################################################################// - // Perform child validation - - if ($excluded) { - // there is an exclusion, remove the entire node - $result = false; - $excludes = array(); // not used, but good to initialize anyway - } else { - // DEFINITION CALL - if ($i === 0) { - // special processing for the first node - $def = $definition->info_parent_def; - } else { - $def = $definition->info[$tokens[$i]->name]; - - } - - if (!empty($def->child)) { - // have DTD child def validate children - $result = $def->child->validateChildren( - $child_tokens, $config, $context); - } else { - // weird, no child definition, get rid of everything - $result = false; - } - - // determine whether or not this element has any exclusions - $excludes = $def->excludes; - } - - // $result is now a bool or array - - //################################################################// - // Process result by interpreting $result - - if ($result === true || $child_tokens === $result) { - // leave the node as is - - // register start token as a parental node start - $stack[] = $i; - - // register exclusions if there are any - if (!empty($excludes)) $exclude_stack[] = $excludes; - - // move cursor to next possible start node - $i++; - - } elseif($result === false) { - // remove entire node - - if ($e) { - if ($excluded) { - $e->send(E_ERROR, 'Strategy_FixNesting: Node excluded'); - } else { - $e->send(E_ERROR, 'Strategy_FixNesting: Node removed'); - } - } - - // calculate length of inner tokens and current tokens - $length = $j - $i + 1; - - // perform removal - array_splice($tokens, $i, $length); - - // update size - $size -= $length; - - // there is no start token to register, - // current node is now the next possible start node - // unless it turns out that we need to do a double-check - - // this is a rought heuristic that covers 100% of HTML's - // cases and 99% of all other cases. A child definition - // that would be tricked by this would be something like: - // ( | a b c) where it's all or nothing. Fortunately, - // our current implementation claims that that case would - // not allow empty, even if it did - if (!$parent_def->child->allow_empty) { - // we need to do a double-check - $i = $parent_index; - array_pop($stack); - } - - // PROJECTED OPTIMIZATION: Process all children elements before - // reprocessing parent node. - - } else { - // replace node with $result - - // calculate length of inner tokens - $length = $j - $i - 1; - - if ($e) { - if (empty($result) && $length) { - $e->send(E_ERROR, 'Strategy_FixNesting: Node contents removed'); - } else { - $e->send(E_WARNING, 'Strategy_FixNesting: Node reorganized'); - } - } - - // perform replacement - array_splice($tokens, $i + 1, $length, $result); - - // update size - $size -= $length; - $size += count($result); - - // register start token as a parental node start - $stack[] = $i; - - // register exclusions if there are any - if (!empty($excludes)) $exclude_stack[] = $excludes; - - // move cursor to next possible start node - $i++; - - } - - //################################################################// - // Scroll to next start node - - // We assume, at this point, that $i is the index of the token - // that is the first possible new start point for a node. - - // Test if the token indeed is a start tag, if not, move forward - // and test again. - $size = count($tokens); - while ($i < $size and !$tokens[$i] instanceof HTMLPurifier_Token_Start) { - if ($tokens[$i] instanceof HTMLPurifier_Token_End) { - // pop a token index off the stack if we ended a node - array_pop($stack); - // pop an exclusion lookup off exclusion stack if - // we ended node and that node had exclusions - if ($i == 0 || $i == $size - 1) { - // use specialized var if it's the super-parent - $s_excludes = $definition->info_parent_def->excludes; - } else { - $s_excludes = $definition->info[$tokens[$i]->name]->excludes; - } - if ($s_excludes) { - array_pop($exclude_stack); - } - } - $i++; - } - - } - - //####################################################################// - // Post-processing - - // remove implicit parent tokens at the beginning and end - array_shift($tokens); - array_pop($tokens); - - // remove context variables - $context->destroy('IsInline'); - $context->destroy('CurrentToken'); - - //####################################################################// - // Return - - return $tokens; - - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Token.php b/library/HTMLPurifier/Token.php deleted file mode 100644 index 7900e6cb10..0000000000 --- a/library/HTMLPurifier/Token.php +++ /dev/null @@ -1,57 +0,0 @@ -line = $l; - $this->col = $c; - } - - /** - * Convenience function for DirectLex settings line/col position. - */ - public function rawPosition($l, $c) { - if ($c === -1) $l++; - $this->line = $l; - $this->col = $c; - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Token/Comment.php b/library/HTMLPurifier/Token/Comment.php deleted file mode 100644 index dc6bdcabb8..0000000000 --- a/library/HTMLPurifier/Token/Comment.php +++ /dev/null @@ -1,22 +0,0 @@ -data = $data; - $this->line = $line; - $this->col = $col; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/TokenFactory.php b/library/HTMLPurifier/TokenFactory.php deleted file mode 100644 index 7cf48fb41c..0000000000 --- a/library/HTMLPurifier/TokenFactory.php +++ /dev/null @@ -1,94 +0,0 @@ -p_start = new HTMLPurifier_Token_Start('', array()); - $this->p_end = new HTMLPurifier_Token_End(''); - $this->p_empty = new HTMLPurifier_Token_Empty('', array()); - $this->p_text = new HTMLPurifier_Token_Text(''); - $this->p_comment= new HTMLPurifier_Token_Comment(''); - } - - /** - * Creates a HTMLPurifier_Token_Start. - * @param $name Tag name - * @param $attr Associative array of attributes - * @return Generated HTMLPurifier_Token_Start - */ - public function createStart($name, $attr = array()) { - $p = clone $this->p_start; - $p->__construct($name, $attr); - return $p; - } - - /** - * Creates a HTMLPurifier_Token_End. - * @param $name Tag name - * @return Generated HTMLPurifier_Token_End - */ - public function createEnd($name) { - $p = clone $this->p_end; - $p->__construct($name); - return $p; - } - - /** - * Creates a HTMLPurifier_Token_Empty. - * @param $name Tag name - * @param $attr Associative array of attributes - * @return Generated HTMLPurifier_Token_Empty - */ - public function createEmpty($name, $attr = array()) { - $p = clone $this->p_empty; - $p->__construct($name, $attr); - return $p; - } - - /** - * Creates a HTMLPurifier_Token_Text. - * @param $data Data of text token - * @return Generated HTMLPurifier_Token_Text - */ - public function createText($data) { - $p = clone $this->p_text; - $p->__construct($data); - return $p; - } - - /** - * Creates a HTMLPurifier_Token_Comment. - * @param $data Data of comment token - * @return Generated HTMLPurifier_Token_Comment - */ - public function createComment($data) { - $p = clone $this->p_comment; - $p->__construct($data); - return $p; - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/URI.php b/library/HTMLPurifier/URI.php deleted file mode 100644 index 8b50d0d18d..0000000000 --- a/library/HTMLPurifier/URI.php +++ /dev/null @@ -1,173 +0,0 @@ -scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme); - $this->userinfo = $userinfo; - $this->host = $host; - $this->port = is_null($port) ? $port : (int) $port; - $this->path = $path; - $this->query = $query; - $this->fragment = $fragment; - } - - /** - * Retrieves a scheme object corresponding to the URI's scheme/default - * @param $config Instance of HTMLPurifier_Config - * @param $context Instance of HTMLPurifier_Context - * @return Scheme object appropriate for validating this URI - */ - public function getSchemeObj($config, $context) { - $registry = HTMLPurifier_URISchemeRegistry::instance(); - if ($this->scheme !== null) { - $scheme_obj = $registry->getScheme($this->scheme, $config, $context); - if (!$scheme_obj) return false; // invalid scheme, clean it out - } else { - // no scheme: retrieve the default one - $def = $config->getDefinition('URI'); - $scheme_obj = $registry->getScheme($def->defaultScheme, $config, $context); - if (!$scheme_obj) { - // something funky happened to the default scheme object - trigger_error( - 'Default scheme object "' . $def->defaultScheme . '" was not readable', - E_USER_WARNING - ); - return false; - } - } - return $scheme_obj; - } - - /** - * Generic validation method applicable for all schemes. May modify - * this URI in order to get it into a compliant form. - * @param $config Instance of HTMLPurifier_Config - * @param $context Instance of HTMLPurifier_Context - * @return True if validation/filtering succeeds, false if failure - */ - public function validate($config, $context) { - - // ABNF definitions from RFC 3986 - $chars_sub_delims = '!$&\'()*+,;='; - $chars_gen_delims = ':/?#[]@'; - $chars_pchar = $chars_sub_delims . ':@'; - - // validate scheme (MUST BE FIRST!) - if (!is_null($this->scheme) && is_null($this->host)) { - $def = $config->getDefinition('URI'); - if ($def->defaultScheme === $this->scheme) { - $this->scheme = null; - } - } - - // validate host - if (!is_null($this->host)) { - $host_def = new HTMLPurifier_AttrDef_URI_Host(); - $this->host = $host_def->validate($this->host, $config, $context); - if ($this->host === false) $this->host = null; - } - - // validate username - if (!is_null($this->userinfo)) { - $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':'); - $this->userinfo = $encoder->encode($this->userinfo); - } - - // validate port - if (!is_null($this->port)) { - if ($this->port < 1 || $this->port > 65535) $this->port = null; - } - - // validate path - $path_parts = array(); - $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/'); - if (!is_null($this->host)) { - // path-abempty (hier and relative) - $this->path = $segments_encoder->encode($this->path); - } elseif ($this->path !== '' && $this->path[0] === '/') { - // path-absolute (hier and relative) - if (strlen($this->path) >= 2 && $this->path[1] === '/') { - // This shouldn't ever happen! - $this->path = ''; - } else { - $this->path = $segments_encoder->encode($this->path); - } - } elseif (!is_null($this->scheme) && $this->path !== '') { - // path-rootless (hier) - // Short circuit evaluation means we don't need to check nz - $this->path = $segments_encoder->encode($this->path); - } elseif (is_null($this->scheme) && $this->path !== '') { - // path-noscheme (relative) - // (once again, not checking nz) - $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@'); - $c = strpos($this->path, '/'); - if ($c !== false) { - $this->path = - $segment_nc_encoder->encode(substr($this->path, 0, $c)) . - $segments_encoder->encode(substr($this->path, $c)); - } else { - $this->path = $segment_nc_encoder->encode($this->path); - } - } else { - // path-empty (hier and relative) - $this->path = ''; // just to be safe - } - - // qf = query and fragment - $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?'); - - if (!is_null($this->query)) { - $this->query = $qf_encoder->encode($this->query); - } - - if (!is_null($this->fragment)) { - $this->fragment = $qf_encoder->encode($this->fragment); - } - - return true; - - } - - /** - * Convert URI back to string - * @return String URI appropriate for output - */ - public function toString() { - // reconstruct authority - $authority = null; - if (!is_null($this->host)) { - $authority = ''; - if(!is_null($this->userinfo)) $authority .= $this->userinfo . '@'; - $authority .= $this->host; - if(!is_null($this->port)) $authority .= ':' . $this->port; - } - - // reconstruct the result - $result = ''; - if (!is_null($this->scheme)) $result .= $this->scheme . ':'; - if (!is_null($authority)) $result .= '//' . $authority; - $result .= $this->path; - if (!is_null($this->query)) $result .= '?' . $this->query; - if (!is_null($this->fragment)) $result .= '#' . $this->fragment; - - return $result; - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/URIFilter.php b/library/HTMLPurifier/URIFilter.php deleted file mode 100644 index c116f93dff..0000000000 --- a/library/HTMLPurifier/URIFilter.php +++ /dev/null @@ -1,45 +0,0 @@ -getDefinition('URI')->host; - if ($our_host !== null) $this->ourHostParts = array_reverse(explode('.', $our_host)); - } - public function filter(&$uri, $config, $context) { - if (is_null($uri->host)) return true; - if ($this->ourHostParts === false) return false; - $host_parts = array_reverse(explode('.', $uri->host)); - foreach ($this->ourHostParts as $i => $x) { - if (!isset($host_parts[$i])) return false; - if ($host_parts[$i] != $this->ourHostParts[$i]) return false; - } - return true; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/URIFilter/DisableExternalResources.php b/library/HTMLPurifier/URIFilter/DisableExternalResources.php deleted file mode 100644 index 881abc43cf..0000000000 --- a/library/HTMLPurifier/URIFilter/DisableExternalResources.php +++ /dev/null @@ -1,12 +0,0 @@ -get('EmbeddedURI', true)) return true; - return parent::filter($uri, $config, $context); - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/URIFilter/HostBlacklist.php b/library/HTMLPurifier/URIFilter/HostBlacklist.php deleted file mode 100644 index 045aa0992c..0000000000 --- a/library/HTMLPurifier/URIFilter/HostBlacklist.php +++ /dev/null @@ -1,21 +0,0 @@ -blacklist = $config->get('URI.HostBlacklist'); - return true; - } - public function filter(&$uri, $config, $context) { - foreach($this->blacklist as $blacklisted_host_fragment) { - if (strpos($uri->host, $blacklisted_host_fragment) !== false) { - return false; - } - } - return true; - } -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/URIFilter/Munge.php b/library/HTMLPurifier/URIFilter/Munge.php deleted file mode 100644 index efa10a6458..0000000000 --- a/library/HTMLPurifier/URIFilter/Munge.php +++ /dev/null @@ -1,58 +0,0 @@ -target = $config->get('URI.' . $this->name); - $this->parser = new HTMLPurifier_URIParser(); - $this->doEmbed = $config->get('URI.MungeResources'); - $this->secretKey = $config->get('URI.MungeSecretKey'); - return true; - } - public function filter(&$uri, $config, $context) { - if ($context->get('EmbeddedURI', true) && !$this->doEmbed) return true; - - $scheme_obj = $uri->getSchemeObj($config, $context); - if (!$scheme_obj) return true; // ignore unknown schemes, maybe another postfilter did it - if (is_null($uri->host) || empty($scheme_obj->browsable)) { - return true; - } - // don't redirect if target host is our host - if ($uri->host === $config->getDefinition('URI')->host) { - return true; - } - - $this->makeReplace($uri, $config, $context); - $this->replace = array_map('rawurlencode', $this->replace); - - $new_uri = strtr($this->target, $this->replace); - $new_uri = $this->parser->parse($new_uri); - // don't redirect if the target host is the same as the - // starting host - if ($uri->host === $new_uri->host) return true; - $uri = $new_uri; // overwrite - return true; - } - - protected function makeReplace($uri, $config, $context) { - $string = $uri->toString(); - // always available - $this->replace['%s'] = $string; - $this->replace['%r'] = $context->get('EmbeddedURI', true); - $token = $context->get('CurrentToken', true); - $this->replace['%n'] = $token ? $token->name : null; - $this->replace['%m'] = $context->get('CurrentAttr', true); - $this->replace['%p'] = $context->get('CurrentCSSProperty', true); - // not always available - if ($this->secretKey) $this->replace['%t'] = sha1($this->secretKey . ':' . $string); - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/URIScheme.php b/library/HTMLPurifier/URIScheme.php deleted file mode 100644 index 039710fd15..0000000000 --- a/library/HTMLPurifier/URIScheme.php +++ /dev/null @@ -1,42 +0,0 @@ -, resolves edge cases - * with making relative URIs absolute - */ - public $hierarchical = false; - - /** - * Validates the components of a URI - * @note This implementation should be called by children if they define - * a default port, as it does port processing. - * @param $uri Instance of HTMLPurifier_URI - * @param $config HTMLPurifier_Config object - * @param $context HTMLPurifier_Context object - * @return Bool success or failure - */ - public function validate(&$uri, $config, $context) { - if ($this->default_port == $uri->port) $uri->port = null; - return true; - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/URIScheme/news.php b/library/HTMLPurifier/URIScheme/news.php deleted file mode 100644 index f5f54f4f56..0000000000 --- a/library/HTMLPurifier/URIScheme/news.php +++ /dev/null @@ -1,22 +0,0 @@ -userinfo = null; - $uri->host = null; - $uri->port = null; - $uri->query = null; - // typecode check needed on path - return true; - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/URIScheme/nntp.php b/library/HTMLPurifier/URIScheme/nntp.php deleted file mode 100644 index 5bf93ea784..0000000000 --- a/library/HTMLPurifier/URIScheme/nntp.php +++ /dev/null @@ -1,20 +0,0 @@ -userinfo = null; - $uri->query = null; - return true; - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/VarParser.php b/library/HTMLPurifier/VarParser.php deleted file mode 100644 index 68e72ae869..0000000000 --- a/library/HTMLPurifier/VarParser.php +++ /dev/null @@ -1,154 +0,0 @@ - self::STRING, - 'istring' => self::ISTRING, - 'text' => self::TEXT, - 'itext' => self::ITEXT, - 'int' => self::INT, - 'float' => self::FLOAT, - 'bool' => self::BOOL, - 'lookup' => self::LOOKUP, - 'list' => self::ALIST, - 'hash' => self::HASH, - 'mixed' => self::MIXED - ); - - /** - * Lookup table of types that are string, and can have aliases or - * allowed value lists. - */ - static public $stringTypes = array( - self::STRING => true, - self::ISTRING => true, - self::TEXT => true, - self::ITEXT => true, - ); - - /** - * Validate a variable according to type. Throws - * HTMLPurifier_VarParserException if invalid. - * It may return NULL as a valid type if $allow_null is true. - * - * @param $var Variable to validate - * @param $type Type of variable, see HTMLPurifier_VarParser->types - * @param $allow_null Whether or not to permit null as a value - * @return Validated and type-coerced variable - */ - final public function parse($var, $type, $allow_null = false) { - if (is_string($type)) { - if (!isset(HTMLPurifier_VarParser::$types[$type])) { - throw new HTMLPurifier_VarParserException("Invalid type '$type'"); - } else { - $type = HTMLPurifier_VarParser::$types[$type]; - } - } - $var = $this->parseImplementation($var, $type, $allow_null); - if ($allow_null && $var === null) return null; - // These are basic checks, to make sure nothing horribly wrong - // happened in our implementations. - switch ($type) { - case (self::STRING): - case (self::ISTRING): - case (self::TEXT): - case (self::ITEXT): - if (!is_string($var)) break; - if ($type == self::ISTRING || $type == self::ITEXT) $var = strtolower($var); - return $var; - case (self::INT): - if (!is_int($var)) break; - return $var; - case (self::FLOAT): - if (!is_float($var)) break; - return $var; - case (self::BOOL): - if (!is_bool($var)) break; - return $var; - case (self::LOOKUP): - case (self::ALIST): - case (self::HASH): - if (!is_array($var)) break; - if ($type === self::LOOKUP) { - foreach ($var as $k) if ($k !== true) $this->error('Lookup table contains value other than true'); - } elseif ($type === self::ALIST) { - $keys = array_keys($var); - if (array_keys($keys) !== $keys) $this->error('Indices for list are not uniform'); - } - return $var; - case (self::MIXED): - return $var; - default: - $this->errorInconsistent(get_class($this), $type); - } - $this->errorGeneric($var, $type); - } - - /** - * Actually implements the parsing. Base implementation is to not - * do anything to $var. Subclasses should overload this! - */ - protected function parseImplementation($var, $type, $allow_null) { - return $var; - } - - /** - * Throws an exception. - */ - protected function error($msg) { - throw new HTMLPurifier_VarParserException($msg); - } - - /** - * Throws an inconsistency exception. - * @note This should not ever be called. It would be called if we - * extend the allowed values of HTMLPurifier_VarParser without - * updating subclasses. - */ - protected function errorInconsistent($class, $type) { - throw new HTMLPurifier_Exception("Inconsistency in $class: ".HTMLPurifier_VarParser::getTypeName($type)." not implemented"); - } - - /** - * Generic error for if a type didn't work. - */ - protected function errorGeneric($var, $type) { - $vtype = gettype($var); - $this->error("Expected type ".HTMLPurifier_VarParser::getTypeName($type).", got $vtype"); - } - - static public function getTypeName($type) { - static $lookup; - if (!$lookup) { - // Lazy load the alternative lookup table - $lookup = array_flip(HTMLPurifier_VarParser::$types); - } - if (!isset($lookup[$type])) return 'unknown'; - return $lookup[$type]; - } - -} - -// vim: et sw=4 sts=4 diff --git a/library/ezyang/htmlpurifier/CREDITS b/library/ezyang/htmlpurifier/CREDITS new file mode 100644 index 0000000000..7921b45af7 --- /dev/null +++ b/library/ezyang/htmlpurifier/CREDITS @@ -0,0 +1,9 @@ + +CREDITS + +Almost everything written by Edward Z. Yang (Ambush Commander). Lots of thanks +to the DevNetwork Community for their help (see docs/ref-devnetwork.html for +more details), Feyd especially (namely IPv6 and optimization). Thanks to RSnake +for letting me package his fantastic XSS cheatsheet for a smoketest. + + vim: et sw=4 sts=4 diff --git a/library/ezyang/htmlpurifier/INSTALL b/library/ezyang/htmlpurifier/INSTALL new file mode 100644 index 0000000000..677c04aa04 --- /dev/null +++ b/library/ezyang/htmlpurifier/INSTALL @@ -0,0 +1,374 @@ + +Install + How to install HTML Purifier + +HTML Purifier is designed to run out of the box, so actually using the +library is extremely easy. (Although... if you were looking for a +step-by-step installation GUI, you've downloaded the wrong software!) + +While the impatient can get going immediately with some of the sample +code at the bottom of this library, it's well worth reading this entire +document--most of the other documentation assumes that you are familiar +with these contents. + + +--------------------------------------------------------------------------- +1. Compatibility + +HTML Purifier is PHP 5 only, and is actively tested from PHP 5.0.5 and +up. It has no core dependencies with other libraries. PHP +4 support was deprecated on December 31, 2007 with HTML Purifier 3.0.0. +HTML Purifier is not compatible with zend.ze1_compatibility_mode. + +These optional extensions can enhance the capabilities of HTML Purifier: + + * iconv : Converts text to and from non-UTF-8 encodings + * bcmath : Used for unit conversion and imagecrash protection + * tidy : Used for pretty-printing HTML + +These optional libraries can enhance the capabilities of HTML Purifier: + + * CSSTidy : Clean CSS stylesheets using %Core.ExtractStyleBlocks + * Net_IDNA2 (PEAR) : IRI support using %Core.EnableIDNA + +--------------------------------------------------------------------------- +2. Reconnaissance + +A big plus of HTML Purifier is its inerrant support of standards, so +your web-pages should be standards-compliant. (They should also use +semantic markup, but that's another issue altogether, one HTML Purifier +cannot fix without reading your mind.) + +HTML Purifier can process these doctypes: + +* XHTML 1.0 Transitional (default) +* XHTML 1.0 Strict +* HTML 4.01 Transitional +* HTML 4.01 Strict +* XHTML 1.1 + +...and these character encodings: + +* UTF-8 (default) +* Any encoding iconv supports (with crippled internationalization support) + +These defaults reflect what my choices would be if I were authoring an +HTML document, however, what you choose depends on the nature of your +codebase. If you don't know what doctype you are using, you can determine +the doctype from this identifier at the top of your source code: + + + +...and the character encoding from this code: + + + +If the character encoding declaration is missing, STOP NOW, and +read 'docs/enduser-utf8.html' (web accessible at +http://htmlpurifier.org/docs/enduser-utf8.html). In fact, even if it is +present, read this document anyway, as many websites specify their +document's character encoding incorrectly. + + +--------------------------------------------------------------------------- +3. Including the library + +The procedure is quite simple: + + require_once '/path/to/library/HTMLPurifier.auto.php'; + +This will setup an autoloader, so the library's files are only included +when you use them. + +Only the contents in the library/ folder are necessary, so you can remove +everything else when using HTML Purifier in a production environment. + +If you installed HTML Purifier via PEAR, all you need to do is: + + require_once 'HTMLPurifier.auto.php'; + +Please note that the usual PEAR practice of including just the classes you +want will not work with HTML Purifier's autoloading scheme. + +Advanced users, read on; other users can skip to section 4. + +Autoload compatibility +---------------------- + + HTML Purifier attempts to be as smart as possible when registering an + autoloader, but there are some cases where you will need to change + your own code to accomodate HTML Purifier. These are those cases: + + PHP VERSION IS LESS THAN 5.1.2, AND YOU'VE DEFINED __autoload + Because spl_autoload_register() doesn't exist in early versions + of PHP 5, HTML Purifier has no way of adding itself to the autoload + stack. Modify your __autoload function to test + HTMLPurifier_Bootstrap::autoload($class) + + For example, suppose your autoload function looks like this: + + function __autoload($class) { + require str_replace('_', '/', $class) . '.php'; + return true; + } + + A modified version with HTML Purifier would look like this: + + function __autoload($class) { + if (HTMLPurifier_Bootstrap::autoload($class)) return true; + require str_replace('_', '/', $class) . '.php'; + return true; + } + + Note that there *is* some custom behavior in our autoloader; the + original autoloader in our example would work for 99% of the time, + but would fail when including language files. + + AN __autoload FUNCTION IS DECLARED AFTER OUR AUTOLOADER IS REGISTERED + spl_autoload_register() has the curious behavior of disabling + the existing __autoload() handler. Users need to explicitly + spl_autoload_register('__autoload'). Because we use SPL when it + is available, __autoload() will ALWAYS be disabled. If __autoload() + is declared before HTML Purifier is loaded, this is not a problem: + HTML Purifier will register the function for you. But if it is + declared afterwards, it will mysteriously not work. This + snippet of code (after your autoloader is defined) will fix it: + + spl_autoload_register('__autoload') + + Users should also be on guard if they use a version of PHP previous + to 5.1.2 without an autoloader--HTML Purifier will define __autoload() + for you, which can collide with an autoloader that was added by *you* + later. + + +For better performance +---------------------- + + Opcode caches, which greatly speed up PHP initialization for scripts + with large amounts of code (HTML Purifier included), don't like + autoloaders. We offer an include file that includes all of HTML Purifier's + files in one go in an opcode cache friendly manner: + + // If /path/to/library isn't already in your include path, uncomment + // the below line: + // require '/path/to/library/HTMLPurifier.path.php'; + + require 'HTMLPurifier.includes.php'; + + Optional components still need to be included--you'll know if you try to + use a feature and you get a class doesn't exists error! The autoloader + can be used in conjunction with this approach to catch classes that are + missing. Simply add this afterwards: + + require 'HTMLPurifier.autoload.php'; + +Standalone version +------------------ + + HTML Purifier has a standalone distribution; you can also generate + a standalone file from the full version by running the script + maintenance/generate-standalone.php . The standalone version has the + benefit of having most of its code in one file, so parsing is much + faster and the library is easier to manage. + + If HTMLPurifier.standalone.php exists in the library directory, you + can use it like this: + + require '/path/to/HTMLPurifier.standalone.php'; + + This is equivalent to including HTMLPurifier.includes.php, except that + the contents of standalone/ will be added to your path. To override this + behavior, specify a new HTMLPURIFIER_PREFIX where standalone files can + be found (usually, this will be one directory up, the "true" library + directory in full distributions). Don't forget to set your path too! + + The autoloader can be added to the end to ensure the classes are + loaded when necessary; otherwise you can manually include them. + To use the autoloader, use this: + + require 'HTMLPurifier.autoload.php'; + +For advanced users +------------------ + + HTMLPurifier.auto.php performs a number of operations that can be done + individually. These are: + + HTMLPurifier.path.php + Puts /path/to/library in the include path. For high performance, + this should be done in php.ini. + + HTMLPurifier.autoload.php + Registers our autoload handler HTMLPurifier_Bootstrap::autoload($class). + + You can do these operations by yourself--in fact, you must modify your own + autoload handler if you are using a version of PHP earlier than PHP 5.1.2 + (See "Autoload compatibility" above). + + +--------------------------------------------------------------------------- +4. Configuration + +HTML Purifier is designed to run out-of-the-box, but occasionally HTML +Purifier needs to be told what to do. If you answer no to any of these +questions, read on; otherwise, you can skip to the next section (or, if you're +into configuring things just for the heck of it, skip to 4.3). + +* Am I using UTF-8? +* Am I using XHTML 1.0 Transitional? + +If you answered no to any of these questions, instantiate a configuration +object and read on: + + $config = HTMLPurifier_Config::createDefault(); + + +4.1. Setting a different character encoding + +You really shouldn't use any other encoding except UTF-8, especially if you +plan to support multilingual websites (read section three for more details). +However, switching to UTF-8 is not always immediately feasible, so we can +adapt. + +HTML Purifier uses iconv to support other character encodings, as such, +any encoding that iconv supports +HTML Purifier supports with this code: + + $config->set('Core.Encoding', /* put your encoding here */); + +An example usage for Latin-1 websites (the most common encoding for English +websites): + + $config->set('Core.Encoding', 'ISO-8859-1'); + +Note that HTML Purifier's support for non-Unicode encodings is crippled by the +fact that any character not supported by that encoding will be silently +dropped, EVEN if it is ampersand escaped. If you want to work around +this, you are welcome to read docs/enduser-utf8.html for a fix, +but please be cognizant of the issues the "solution" creates (for this +reason, I do not include the solution in this document). + + +4.2. Setting a different doctype + +For those of you using HTML 4.01 Transitional, you can disable +XHTML output like this: + + $config->set('HTML.Doctype', 'HTML 4.01 Transitional'); + +Other supported doctypes include: + + * HTML 4.01 Strict + * HTML 4.01 Transitional + * XHTML 1.0 Strict + * XHTML 1.0 Transitional + * XHTML 1.1 + + +4.3. Other settings + +There are more configuration directives which can be read about +here: They're a bit boring, +but they can help out for those of you who like to exert maximum control over +your code. Some of the more interesting ones are configurable at the +demo and are well worth looking into +for your own system. + +For example, you can fine tune allowed elements and attributes, convert +relative URLs to absolute ones, and even autoparagraph input text! These +are, respectively, %HTML.Allowed, %URI.MakeAbsolute and %URI.Base, and +%AutoFormat.AutoParagraph. The %Namespace.Directive naming convention +translates to: + + $config->set('Namespace.Directive', $value); + +E.g. + + $config->set('HTML.Allowed', 'p,b,a[href],i'); + $config->set('URI.Base', 'http://www.example.com'); + $config->set('URI.MakeAbsolute', true); + $config->set('AutoFormat.AutoParagraph', true); + + +--------------------------------------------------------------------------- +5. Caching + +HTML Purifier generates some cache files (generally one or two) to speed up +its execution. For maximum performance, make sure that +library/HTMLPurifier/DefinitionCache/Serializer is writeable by the webserver. + +If you are in the library/ folder of HTML Purifier, you can set the +appropriate permissions using: + + chmod -R 0755 HTMLPurifier/DefinitionCache/Serializer + +If the above command doesn't work, you may need to assign write permissions +to all. This may be necessary if your webserver runs as nobody, but is +not recommended since it means any other user can write files in the +directory. Use: + + chmod -R 0777 HTMLPurifier/DefinitionCache/Serializer + +You can also chmod files via your FTP client; this option +is usually accessible by right clicking the corresponding directory and +then selecting "chmod" or "file permissions". + +Starting with 2.0.1, HTML Purifier will generate friendly error messages +that will tell you exactly what you have to chmod the directory to, if in doubt, +follow its advice. + +If you are unable or unwilling to give write permissions to the cache +directory, you can either disable the cache (and suffer a performance +hit): + + $config->set('Core.DefinitionCache', null); + +Or move the cache directory somewhere else (no trailing slash): + + $config->set('Cache.SerializerPath', '/home/user/absolute/path'); + + +--------------------------------------------------------------------------- +6. Using the code + +The interface is mind-numbingly simple: + + $purifier = new HTMLPurifier($config); + $clean_html = $purifier->purify( $dirty_html ); + +That's it! For more examples, check out docs/examples/ (they aren't very +different though). Also, docs/enduser-slow.html gives advice on what to +do if HTML Purifier is slowing down your application. + + +--------------------------------------------------------------------------- +7. Quick install + +First, make sure library/HTMLPurifier/DefinitionCache/Serializer is +writable by the webserver (see Section 5: Caching above for details). +If your website is in UTF-8 and XHTML Transitional, use this code: + +purify($dirty_html); +?> + +If your website is in a different encoding or doctype, use this code: + +set('Core.Encoding', 'ISO-8859-1'); // replace with your encoding + $config->set('HTML.Doctype', 'HTML 4.01 Transitional'); // replace with your doctype + $purifier = new HTMLPurifier($config); + + $clean_html = $purifier->purify($dirty_html); +?> + + vim: et sw=4 sts=4 diff --git a/library/ezyang/htmlpurifier/INSTALL.fr.utf8 b/library/ezyang/htmlpurifier/INSTALL.fr.utf8 new file mode 100644 index 0000000000..06e628cc96 --- /dev/null +++ b/library/ezyang/htmlpurifier/INSTALL.fr.utf8 @@ -0,0 +1,60 @@ + +Installation + Comment installer HTML Purifier + +Attention : Ce document est encodé en UTF-8, si les lettres avec des accents +ne s'affichent pas, prenez un meilleur éditeur de texte. + +L'installation de HTML Purifier est très simple, parce qu'il n'a pas besoin +de configuration. Pour les utilisateurs impatients, le code se trouve dans le +pied de page, mais je recommande de lire le document. + +1. Compatibilité + +HTML Purifier fonctionne avec PHP 5. PHP 5.0.5 est la dernière version testée. +Il ne dépend pas d'autres librairies. + +Les extensions optionnelles sont iconv (généralement déjà installée) et tidy +(répendue aussi). Si vous utilisez UTF-8 et que vous ne voulez pas l'indentation, +vous pouvez utiliser HTML Purifier sans ces extensions. + + +2. Inclure la librairie + +Quand vous devez l'utilisez, incluez le : + + require_once('/path/to/library/HTMLPurifier.auto.php'); + +Ne pas l'inclure si ce n'est pas nécessaire, car HTML Purifier est lourd. + +HTML Purifier utilise "autoload". Si vous avez défini la fonction __autoload, +vous devez ajouter cette fonction : + + spl_autoload_register('__autoload') + +Plus d'informations dans le document "INSTALL". + +3. Installation rapide + +Si votre site Web est en UTF-8 et XHTML Transitional, utilisez : + +purify($html_a_purifier); +?> + +Sinon, utilisez : + +set('Core', 'Encoding', 'ISO-8859-1'); //Remplacez par votre + encodage + $config->set('Core', 'XHTML', true); //Remplacer par false si HTML 4.01 + $purificateur = new HTMLPurifier($config); + $html_propre = $purificateur->purify($html_a_purifier); +?> + + + vim: et sw=4 sts=4 diff --git a/library/ezyang/htmlpurifier/LICENSE b/library/ezyang/htmlpurifier/LICENSE new file mode 100644 index 0000000000..8c88a20d45 --- /dev/null +++ b/library/ezyang/htmlpurifier/LICENSE @@ -0,0 +1,504 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + , 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + vim: et sw=4 sts=4 diff --git a/library/ezyang/htmlpurifier/NEWS b/library/ezyang/htmlpurifier/NEWS new file mode 100644 index 0000000000..a9124af1a1 --- /dev/null +++ b/library/ezyang/htmlpurifier/NEWS @@ -0,0 +1,1094 @@ +NEWS ( CHANGELOG and HISTORY ) HTMLPurifier +||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| + += KEY ==================== + # Breaks back-compat + ! Feature + - Bugfix + + Sub-comment + . Internal change +========================== + +4.7.0, released 2015-08-04 +# opacity is now considered a "tricky" CSS property rather than a + proprietary one. +! %AutoFormat.RemoveEmpty.Predicate for specifying exactly when + an element should be considered "empty" (maybe preserve if it + has attributes), and modify iframe support so that the iframe + is removed if it is missing a src attribute. Thanks meeva for + reporting. +- Don't truncate upon encountering
when using DOMLex. Thanks + Myrto Christina for finally convincing me to fix this. +- Update YouTube filter for new code. +- Fix parsing of rgb() values with spaces in them for 'border' + attribute. +- Don't remove foo="" attributes if foo is a boolean attribute. Thanks + valME for reporting. + +4.6.0, released 2013-11-30 +# Secure URI munge hashing algorithm has changed to hash_hmac("sha256", $url, $secret). + Please update any verification scripts you may have. +# URI parsing algorithm was made more strict, so only prefixes which + looks like schemes will actually be schemes. Thanks + Michael Gusev for fixing. +# %Core.EscapeInvalidChildren is no longer supported, and no longer does + anything. +! New directive %Core.AllowHostnameUnderscore which allows underscores + in hostnames. +- Eliminate quadratic behavior in DOMLex by using a proper queue. + Thanks Ole Laursen for noticing this. +- Rewritten MakeWellFormed/FixNesting implementation eliminates quadratic + behavior in the rest of the purificaiton pipeline. Thanks Chedburn + Networks for sponsoring this work. +- Made Linkify URL parser a bit less permissive, so that non-breaking + spaces and commas are not included as part of URL. Thanks nAS for fixing. +- Fix some bad interactions with %HTML.Allowed and injectors. Thanks + David Hirtz for reporting. +- Fix infinite loop in DirectLex. Thanks Ashar Javed (@soaj1664ashar) + for reporting. + +4.5.0, released 2013-02-17 +# Fix bug where stacked attribute transforms clobber each other; + this also means it's no longer possible to override attribute + transforms in later modules. No internal code was using this + but this may break some clients. +# We now use SHA-1 to identify cached definitions, instead of MD5. +! Support display:inline-block +! Support for more white-space CSS values. +! Permit underscores in font families +! Support for page-break-* CSS3 properties when proprietary properties + are enabled. +! New directive %Core.DisableExcludes; can be set to 'true' to turn off + SGML excludes checking. If HTML Purifier is removing too much text + and you don't care about full standards compliance, try setting this to + 'true'. +- Use prepend for SPL autoloading on PHP 5.3 and later. +- Fix bug with nofollow transform when pre-existing rel exists. +- Fix bug where background:url() always gets lower-cased + (but not background-image:url()) +- Fix bug with non lower-case color names in HTML +- Fix bug where data URI validation doesn't remove temporary files. + Thanks Javier Marín Ros for reporting. +- Don't remove certain empty tags on RemoveEmpty. + +4.4.0, released 2012-01-18 +# Removed PEARSax3 handler. +# URI.Munge now munges URIs inside the same host that go from https + to http. Reported by Neike Taika-Tessaro. +# Core.EscapeNonASCIICharacters now always transforms entities to + entities, even if target encoding is UTF-8. +# Tighten up selector validation in ExtractStyleBlocks. + Non-syntactically valid selectors are now rejected, along with + some of the more obscure ones such as attribute selectors, the + :lang pseudoselector, and anything not in CSS2.1. Furthermore, + ID and class selectors now work properly with the relevant + configuration attributes. Also, mute errors when parsing CSS + with CSS Tidy. Reported by Mario Heiderich and Norman Hippert. +! Added support for 'scope' attribute on tables. +! Added %HTML.TargetBlank, which adds target="blank" to all outgoing links. +! Properly handle sub-lists directly nested inside of lists in + a standards compliant way, by moving them into the preceding
  • +! Added %HTML.AllowedComments and %HTML.AllowedCommentsRegexp for + limited allowed comments in untrusted situations. +! Implement iframes, and allow them to be used in untrusted mode with + %HTML.SafeIframe and %URI.SafeIframeRegexp. Thanks Bradley M. Froehle + for submitting an initial version of the patch. +! The Forms module now works properly for transitional doctypes. +! Added support for internationalized domain names. You need the PEAR + Net_IDNA2 module to be in your path; if it is installed, ensure the + class can be loaded and then set %Core.EnableIDNA to true. +- Color keywords are now case insensitive. Thanks Yzmir Ramirez + for reporting. +- Explicitly initialize anonModule variable to null. +- Do not duplicate nofollow if already present. Thanks 178 + for reporting. +- Do not add nofollow if hostname matches our current host. Thanks 178 + for reporting, and Neike Taika-Tessaro for helping diagnose. +- Do not unset parser variable; this fixes intermittent serialization + problems. Thanks Neike Taika-Tessaro for reporting, bill + <10010tiger@gmail.com> for diagnosing. +- Fix iconv truncation bug, where non-UTF-8 target encodings see + output truncated after around 8000 characters. Thanks Jörg Ludwig + for reporting. +- Fix broken table content model for XHTML1.1 (and also earlier + versions, although the W3C validator doesn't catch those violations). + Thanks GlitchMr for reporting. + +4.3.0, released 2011-03-27 +# Fixed broken caching of customized raw definitions, but requires an + API change. The old API still works but will emit a warning, + see http://htmlpurifier.org/docs/enduser-customize.html#optimized + for how to upgrade your code. +# Protect against Internet Explorer innerHTML behavior by specially + treating attributes with backticks but no angled brackets, quotes or + spaces. This constitutes a slight semantic change, which can be + reverted using %Output.FixInnerHTML. Reported by Neike Taika-Tessaro + and Mario Heiderich. +# Protect against cssText/innerHTML by restricting allowed characters + used in fonts further than mandated by the specification and encoding + some extra special characters in URLs. Reported by Neike + Taika-Tessaro and Mario Heiderich. +! Added %HTML.Nofollow to add rel="nofollow" to external links. +! More types of SPL autoloaders allowed on later versions of PHP. +! Implementations for position, top, left, right, bottom, z-index + when %CSS.Trusted is on. +! Add %Cache.SerializerPermissions option for custom serializer + directory/file permissions +! Fix longstanding bug in Flash support for non-IE browsers, and + allow more wmode attributes. +! Add %CSS.AllowedFonts to restrict permissible font names. +- Switch to an iterative traversal of the DOM, which prevents us + from running out of stack space for deeply nested documents. + Thanks Maxim Krizhanovsky for contributing a patch. +- Make removal of conditional IE comments ungreedy; thanks Bernd + for reporting. +- Escape CDATA before removing Internet Explorer comments. +- Fix removal of id attributes under certain conditions by ensuring + armor attributes are preserved when recreating tags. +- Check if schema.ser was corrupted. +- Check if zend.ze1_compatibility_mode is on, and error out if it is. + This safety check is only done for HTMLPurifier.auto.php; if you + are using standalone or the specialized includes files, you're + expected to know what you're doing. +- Stop repeatedly writing the cache file after I'm done customizing a + raw definition. Reported by ajh. +- Switch to using require_once in the Bootstrap to work around bad + interaction with Zend Debugger and APC. Reported by Antonio Parraga. +- Fix URI handling when hostname is missing but scheme is present. + Reported by Neike Taika-Tessaro. +- Fix missing numeric entities on DirectLex; thanks Neike Taika-Tessaro + for reporting. +- Fix harmless notice from indexing into empty string. Thanks Matthijs + Kooijman for reporting. +- Don't autoclose no parent elements are able to support the element + that triggered the autoclose. In particular fixes strange behavior + of stray
  • tags. Thanks pkuliga@gmail.com for reporting and + Neike Taika-Tessaro for debugging assistance. + +4.2.0, released 2010-09-15 +! Added %Core.RemoveProcessingInstructions, which lets you remove + statements. +! Added %URI.DisableResources functionality; the directive originally + did nothing. Thanks David Rothstein for reporting. +! Add documentation about configuration directive types. +! Add %CSS.ForbiddenProperties configuration directive. +! Add %HTML.FlashAllowFullScreen to permit embedded Flash objects + to utilize full-screen mode. +! Add optional support for the file URI scheme, enable + by explicitly setting %URI.AllowedSchemes. +! Add %Core.NormalizeNewlines options to allow turning off newline + normalization. +- Fix improper handling of Internet Explorer conditional comments + by parser. Thanks zmonteca for reporting. +- Fix missing attributes bug when running on Mac Snow Leopard and APC. + Thanks sidepodcast for the fix. +- Warn if an element is allowed, but an attribute it requires is + not allowed. + +4.1.1, released 2010-05-31 +- Fix undefined index warnings in maintenance scripts. +- Fix bug in DirectLex for parsing elements with a single attribute + with entities. +- Rewrite CSS output logic for font-family and url(). Thanks Mario + Heiderich for reporting and Takeshi + Terada for suggesting the fix. +- Emit an error for CollectErrors if a body is extracted +- Fix bug where in background-position for center keyword handling. +- Fix infinite loop when a wrapper element is inserted in a context + where it's not allowed. Thanks Lars for reporting. +- Remove +x bit and shebang from index.php; only supported mode is to + explicitly call it with php. +- Make test script less chatty when log_errors is on. + +4.1.0, released 2010-04-26 +! Support proprietary height attribute on table element +! Support YouTube slideshows that contain /cp/ in their URL. +! Support for data: URI scheme; not enabled by default, add it using + %URI.AllowedSchemes +! Support flashvars when using %HTML.SafeObject and %HTML.SafeEmbed. +! Support for Internet Explorer compatibility with %HTML.SafeObject + using %Output.FlashCompat. +! Handle
        properly, by inserting the necessary
      1. tag. +- Always quote the insides of url(...) in CSS. + +4.0.0, released 2009-07-07 +# APIs for ConfigSchema subsystem have substantially changed. See + docs/dev-config-bcbreaks.txt for details; in essence, anything that + had both namespace and directive now have a single unified key. +# Some configuration directives were renamed, specifically: + %AutoFormatParam.PurifierLinkifyDocURL -> %AutoFormat.PurifierLinkify.DocURL + %FilterParam.ExtractStyleBlocksEscaping -> %Filter.ExtractStyleBlocks.Escaping + %FilterParam.ExtractStyleBlocksScope -> %Filter.ExtractStyleBlocks.Scope + %FilterParam.ExtractStyleBlocksTidyImpl -> %Filter.ExtractStyleBlocks.TidyImpl + As usual, the old directive names will still work, but will throw E_NOTICE + errors. +# The allowed values for class have been relaxed to allow all of CDATA for + doctypes that are not XHTML 1.1 or XHTML 2.0. For old behavior, set + %Attr.ClassUseCDATA to false. +# Instead of appending the content model to an old content model, a blank + element will replace the old content model. You can use #SUPER to get + the old content model. +! More robust support for name="" and id="" +! HTMLPurifier_Config::inherit($config) allows you to inherit one + configuration, and have changes to that configuration be propagated + to all of its children. +! Implement %HTML.Attr.Name.UseCDATA, which relaxes validation rules on + the name attribute when set. Use with care. Thanks Ian Cook for + sponsoring. +! Implement %AutoFormat.RemoveEmpty.RemoveNbsp, which removes empty + tags that contain non-breaking spaces as well other whitespace. You + can also modify which tags should have   maintained with + %AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions. +! Implement %Attr.AllowedClasses, which allows administrators to restrict + classes users can use to a specified finite set of classes, and + %Attr.ForbiddenClasses, which is the logical inverse. +! You can now maintain your own configuration schema directories by + creating a config-schema.php file or passing an extra argument. Check + docs/dev-config-schema.html for more details. +! Added HTMLPurifier_Config->serialize() method, which lets you save away + your configuration in a compact serial file, which you can unserialize + and use directly without having to go through the overhead of setup. +- Fix bug where URIDefinition would not get cleared if it's directives got + changed. +- Fix fatal error in HTMLPurifier_Encoder on certain platforms (probably NetBSD 5.0) +- Fix bug in Linkify autoformatter involving http://foo +- Make %URI.Munge not apply to links that have the same host as your host. +- Prevent stray tag from truncating output, if a second + is present. +. Created script maintenance/rename-config.php for renaming a configuration + directive while maintaining its alias. This script does not change source code. +. Implement namespace locking for definition construction, to prevent + bugs where a directive is used for definition construction but is not + used to construct the cache hash. + +3.3.0, released 2009-02-16 +! Implement CSS property 'overflow' when %CSS.AllowTricky is true. +! Implement generic property list classess +- Fix bug with testEncodingSupportsASCII() algorithm when iconv() implementation + does not do the "right thing" with characters not supported in the output + set. +- Spellcheck UTF-8: The Secret To Character Encoding +- Fix improper removal of the contents of elements with only whitespace. Thanks + Eric Wald for reporting. +- Fix broken test suite in versions of PHP without spl_autoload_register() +- Fix degenerate case with YouTube filter involving double hyphens. + Thanks Pierre Attar for reporting. +- Fix YouTube rendering problem on certain versions of Firefox. +- Fix CSSDefinition Printer problems with decorators +- Add text parameter to unit tests, forces text output +. Add verbose mode to command line test runner, use (--verbose) +. Turn on unit tests for UnitConverter +. Fix missing version number in configuration %Attr.DefaultImageAlt (added 3.2.0) +. Fix newline errors that caused spurious failures when CRLF HTML Purifier was + tested on Linux. +. Removed trailing whitespace from all text files, see + remote-trailing-whitespace.php maintenance script. +. Convert configuration to use property list backend. + +3.2.0, released 2008-10-31 +# Using %Core.CollectErrors forces line number/column tracking on, whereas + previously you could theoretically turn it off. +# HTMLPurifier_Injector->notifyEnd() is formally deprecated. Please + use handleEnd() instead. +! %Output.AttrSort for when you need your attributes in alphabetical order to + deal with a bug in FCKEditor. Requested by frank farmer. +! Enable HTML comments when %HTML.Trusted is on. Requested by Waldo Jaquith. +! Proper support for name attribute. It is now allowed and equivalent to the id + attribute in a and img tags, and is only converted to id when %HTML.TidyLevel + is heavy (for all doctypes). +! %AutoFormat.RemoveEmpty to remove some empty tags from documents. Please don't + use on hand-written HTML. +! Add error-cases for unsupported elements in MakeWellFormed. This enables + the strategy to be used, standalone, on untrusted input. +! %Core.AggressivelyFixLt is on by default. This causes more sensible + processing of left angled brackets in smileys and other whatnot. +! Test scripts now have a 'type' parameter, which lets you say 'htmlpurifier', + 'phpt', 'vtest', etc. in order to only execute those tests. This supercedes + the --only-phpt parameter, although for backwards-compatibility the flag + will still work. +! AutoParagraph auto-formatter will now preserve double-newlines upon output. + Users who are not performing inbound filtering, this may seem a little + useless, but as a bonus, the test suite and handling of edge cases is also + improved. +! Experimental implementation of forms for %HTML.Trusted +! Track column numbers when maintain line numbers is on +! Proprietary 'background' attribute on table-related elements converted into + corresponding CSS. Thanks Fusemail for sponsoring this feature! +! Add forward(), forwardUntilEndToken(), backward() and current() to Injector + supertype. +! HTMLPurifier_Injector->handleEnd() permits modification to end tokens. The + time of operation varies slightly from notifyEnd() as *all* end tokens are + processed by the injector before they are subject to the well-formedness rules. +! %Attr.DefaultImageAlt allows overriding default behavior of setting alt to + basename of image when not present. +! %AutoFormat.DisplayLinkURI neuters tags into plain text URLs. +- Fix two bugs in %URI.MakeAbsolute; one involving empty paths in base URLs, + the other involving an undefined $is_folder error. +- Throw error when %Core.Encoding is set to a spurious value. Previously, + this errored silently and returned false. +- Redirected stderr to stdout for flush error output. +- %URI.DisableExternal will now use the host in %URI.Base if %URI.Host is not + available. +- Do not re-munge URL if the output URL has the same host as the input URL. + Requested by Chris. +- Fix error in documentation regarding %Filter.ExtractStyleBlocks +- Prevent ]]> from triggering %Core.ConvertDocumentToFragment +- Fix bug with inline elements in blockquotes conflicting with strict doctype +- Detect if HTML support is disabled for DOM by checking for loadHTML() method. +- Fix bug where dots and double-dots in absolute URLs without hostname were + not collapsed by URIFilter_MakeAbsolute. +- Fix bug with anonymous modules operating on SafeEmbed or SafeObject elements + by reordering their addition. +- Will now throw exception on many error conditions during lexer creation; also + throw an exception when MaintainLineNumbers is true, but a non-tracksLineNumbers + is being used. +- Detect if domxml extension is loaded, and use DirectLEx accordingly. +- Improve handling of big numbers with floating point arithmetic in UnitConverter. + Reported by David Morton. +. Strategy_MakeWellFormed now operates in-place, saving memory and allowing + for more interesting filter-backtracking +. New HTMLPurifier_Injector->rewind() functionality, allows injectors to rewind + index to reprocess tokens. +. StringHashParser now allows for multiline sections with "empty" content; + previously the section would remain undefined. +. Added --quick option to multitest.php, which tests only the most recent + release for each series. +. Added --distro option to multitest.php, which accepts either 'normal' or + 'standalone'. This supercedes --exclude-normal and --exclude-standalone + +3.1.1, released 2008-06-19 +# %URI.Munge now, by default, does not munge resources (for example, ) + In order to enable this again, please set %URI.MungeResources to true. +! More robust imagecrash protection with height/width CSS with %CSS.MaxImgLength, + and height/width HTML with %HTML.MaxImgLength. +! %URI.MungeSecretKey for secure URI munging. Thanks Chris + for sponsoring this feature. Check out the corresponding documentation + for details. (Att Nightly testers: The API for this feature changed before + the general release. Namely, rename your directives %URI.SecureMungeSecretKey => + %URI.MungeSecretKey and and %URI.SecureMunge => %URI.Munge) +! Implemented post URI filtering. Set member variable $post to true to set + a URIFilter as such. +! Allow modules to define injectors via $info_injector. Injectors are + automatically disabled if injector's needed elements are not found. +! Support for "safe" objects added, use %HTML.SafeObject and %HTML.SafeEmbed. + Thanks Chris for sponsoring. If you've been using ad hoc code from the + forums, PLEASE use this instead. +! Added substitutions for %e, %n, %a and %p in %URI.Munge (in order, + embedded, tag name, attribute name, CSS property name). See %URI.Munge + for more details. Requested by Jochem Blok. +- Disable percent height/width attributes for img. +- AttrValidator operations are now atomic; updates to attributes are not + manifest in token until end of operations. This prevents naughty internal + code from directly modifying CurrentToken when they're not supposed to. + This semantics change was requested by frank farmer. +- Percent encoding checks enabled for URI query and fragment +- Fix stray backslashes in font-family; CSS Unicode character escapes are + now properly resolved (although *only* in font-family). Thanks Takeshi Terada + for reporting. +- Improve parseCDATA algorithm to take into account newline normalization +- Account for browser confusion between Yen character and backslash in + Shift_JIS encoding. This fix generalizes to any other encoding which is not + a strict superset of printable ASCII. Thanks Takeshi Terada for reporting. +- Fix missing configuration parameter in Generator calls. Thanks vs for the + partial patch. +- Improved adherence to Unicode by checking for non-character codepoints. + Thanks Geoffrey Sneddon for reporting. This may result in degraded + performance for extremely large inputs. +- Allow CSS property-value pair ''text-decoration: none''. Thanks Jochem Blok + for reporting. +. Added HTMLPurifier_UnitConverter and HTMLPurifier_Length for convenient + handling of CSS-style lengths. HTMLPurifier_AttrDef_CSS_Length now uses + this class. +. API of HTMLPurifier_AttrDef_CSS_Length changed from __construct($disable_negative) + to __construct($min, $max). __construct(true) is equivalent to + __construct('0'). +. Added HTMLPurifier_AttrDef_Switch class +. Rename HTMLPurifier_HTMLModule_Tidy->construct() to setup() and bubble method + up inheritance hierarchy to HTMLPurifier_HTMLModule. All HTMLModules + get this called with the configuration object. All modules now + use this rather than __construct(), although legacy code using constructors + will still work--the new format, however, lets modules access the + configuration object for HTML namespace dependant tweaks. +. AttrDef_HTML_Pixels now takes a single construction parameter, pixels. +. ConfigSchema data-structure heavily optimized; on average it uses a third + the memory it did previously. The interface has changed accordingly, + consult changes to HTMLPurifier_Config for details. +. Variable parsing types now are magic integers instead of strings +. Added benchmark for ConfigSchema +. HTMLPurifier_Generator requires $config and $context parameters. If you + don't know what they should be, use HTMLPurifier_Config::createDefault() + and new HTMLPurifier_Context(). +. Printers now properly distinguish between output configuration, and + target configuration. This is not applicable to scripts using + the Printers for HTML Purifier related tasks. +. HTML/CSS Printers must be primed with prepareGenerator($gen_config), otherwise + fatal errors will ensue. +. URIFilter->prepare can return false in order to abort loading of the filter +. Factory for AttrDef_URI implemented, URI#embedded to indicate URI that embeds + an external resource. +. %URI.Munge functionality factored out into a post-filter class. +. Added CurrentCSSProperty context variable during CSS validation + +3.1.0, released 2008-05-18 +# Unnecessary references to objects (vestiges of PHP4) removed from method + signatures. The following methods do not need references when assigning from + them and will result in E_STRICT errors if you try: + + HTMLPurifier_Config->get*Definition() [* = HTML, CSS] + + HTMLPurifier_ConfigSchema::instance() + + HTMLPurifier_DefinitionCacheFactory::instance() + + HTMLPurifier_DefinitionCacheFactory->create() + + HTMLPurifier_DoctypeRegistry->register() + + HTMLPurifier_DoctypeRegistry->get() + + HTMLPurifier_HTMLModule->addElement() + + HTMLPurifier_HTMLModule->addBlankElement() + + HTMLPurifier_LanguageFactory::instance() +# Printer_ConfigForm's get*() functions were static-ified +# %HTML.ForbiddenAttributes requires attribute declarations to be in the + form of tag@attr, NOT tag.attr (which will throw an error and won't do + anything). This is for forwards compatibility with XML; you'd do best + to migrate an %HTML.AllowedAttributes directives to this syntax too. +! Allow index to be false for config from form creation +! Added HTMLPurifier::VERSION constant +! Commas, not dashes, used for serializer IDs. This change is forwards-compatible + and allows for version numbers like "3.1.0-dev". +! %HTML.Allowed deals gracefully with whitespace anywhere, anytime! +! HTML Purifier's URI handling is a lot more robust, with much stricter + validation checks and better percent encoding handling. Thanks Gareth Heyes + for indicating security vulnerabilities from lax percent encoding. +! Bootstrap autoloader deals more robustly with classes that don't exist, + preventing class_exists($class, true) from barfing. +- InterchangeBuilder now alphabetizes its lists +- Validation error in configdoc output fixed +- Iconv and other encoding errors muted even with custom error handlers that + do not honor error_reporting +- Add protection against imagecrash attack with CSS height/width +- HTMLPurifier::instance() created for consistency, is equivalent to getInstance() +- Fixed and revamped broken ConfigForm smoketest +- Bug with bool/null fields in Printer_ConfigForm fixed +- Bug with global forbidden attributes fixed +- Improved error messages for allowed and forbidden HTML elements and attributes +- Missing (or null) in configdoc documentation restored +- If DOM throws and exception during parsing with PH5P (occurs in newer versions + of DOM), HTML Purifier punts to DirectLex +- Fatal error with unserialization of ScriptRequired +- Created directories are now chmod'ed properly +- Fixed bug with fallback languages in LanguageFactory +- Standalone testing setup properly with autoload +. Out-of-date documentation revised +. UTF-8 encoding check optimization as suggested by Diego +. HTMLPurifier_Error removed in favor of exceptions +. More copy() function removed; should use clone instead +. More extensive unit tests for HTMLDefinition +. assertPurification moved to central harness +. HTMLPurifier_Generator accepts $config and $context parameters during + instantiation, not runtime +. Double-quotes outside of attribute values are now unescaped + +3.1.0rc1, released 2008-04-22 +# Autoload support added. Internal require_once's removed in favor of an + explicit require list or autoloading. To use HTML Purifier, + you must now either use HTMLPurifier.auto.php + or HTMLPurifier.includes.php; setting the include path and including + HTMLPurifier.php is insufficient--in such cases include HTMLPurifier.autoload.php + as well to register our autoload handler (or modify your autoload function + to check HTMLPurifier_Bootstrap::getPath($class)). You can also use + HTMLPurifier.safe-includes.php for a less performance friendly but more + user-friendly library load. +# HTMLPurifier_ConfigSchema static functions are officially deprecated. Schema + information is stored in the ConfigSchema directory, and the + maintenance/generate-schema-cache.php generates the schema.ser file, which + is now instantiated. Support for userland schema changes coming soon! +# HTMLPurifier_Config will now throw E_USER_NOTICE when you use a directive + alias; to get rid of these errors just modify your configuration to use + the new directive name. +# HTMLPurifier->addFilter is deprecated; built-in filters can now be + enabled using %Filter.$filter_name or by setting your own filters using + %Filter.Custom +# Directive-level safety properties superceded in favor of module-level + safety. Internal method HTMLModule->addElement() has changed, although + the externally visible HTMLDefinition->addElement has *not* changed. +! Extra utility classes for testing and non-library operations can + be found in extras/. Specifically, these are FSTools and ConfigDoc. + You may find a use for these in your own project, but right now they + are highly experimental and volatile. +! Integration with PHPT allows for automated smoketests +! Limited support for proprietary HTML elements, namely , sponsored + by Chris. You can enable them with %HTML.Proprietary if your client + demands them. +! Support for !important CSS cascade modifier. By default, this will be stripped + from CSS, but you can enable it using %CSS.AllowImportant +! Support for display and visibility CSS properties added, set %CSS.AllowTricky + to true to use them. +! HTML Purifier now has its own Exception hierarchy under HTMLPurifier_Exception. + Developer error (not enduser error) can cause these to be triggered. +! Experimental kses() wrapper introduced with HTMLPurifier.kses.php +! Finally %CSS.AllowedProperties for tweaking allowed CSS properties without + mucking around with HTMLPurifier_CSSDefinition +! ConfigDoc output has been enhanced with version and deprecation info. +! %HTML.ForbiddenAttributes and %HTML.ForbiddenElements implemented. +- Autoclose now operates iteratively, i.e.
        now has + both span tags closed. +- Various HTMLPurifier_Config convenience functions now accept another parameter + $schema which defines what HTMLPurifier_ConfigSchema to use besides the + global default. +- Fix bug with trusted script handling in libxml versions later than 2.6.28. +- Fix bug in ExtractStyleBlocks with comments in style tags +- Fix bug in comment parsing for DirectLex +- Flush output now displayed when in command line mode for unit tester +- Fix bug with rgb(0, 1, 2) color syntax with spaces inside shorthand syntax +- HTMLPurifier_HTMLDefinition->addAttribute can now be called multiple times + on the same element without emitting errors. +- Fixed fatal error in PH5P lexer with invalid tag names +. Plugins now get their own changelogs according to project conventions. +. Convert tokens to use instanceof, reducing memory footprint and + improving comparison speed. +. Dry runs now supported in SimpleTest; testing facilities improved +. Bootstrap class added for handling autoloading functionality +. Implemented recursive glob at FSTools->globr +. ConfigSchema now has instance methods for all corresponding define* + static methods. +. A couple of new historical maintenance scripts were added. +. HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php split into two files +. tests/index.php can now be run from any directory. +. HTMLPurifier_Token subclasses split into seperate files +. HTMLPURIFIER_PREFIX now is defined in Bootstrap.php, NOT HTMLPurifier.php +. HTMLPURIFIER_PREFIX can now be defined outside of HTML Purifier +. New --php=php flag added, allows PHP executable to be specified (command + line only!) +. htmlpurifier_add_test() preferred method to translate test files in to + classes, because it handles PHPT files too. +. Debugger class is deprecated and will be removed soon. +. Command line argument parsing for testing scripts revamped, now --opt value + format is supported. +. Smoketests now cleanup after magic quotes +. Generator now can output comments (however, comments are still stripped + from HTML Purifier output) +. HTMLPurifier_ConfigSchema->validate() deprecated in favor of + HTMLPurifier_VarParser->parse() +. Integers auto-cast into float type by VarParser. +. HTMLPURIFIER_STRICT removed; no validation is performed on runtime, only + during cache generation +. Reordered script calls in maintenance/flush.php +. Command line scripts now honor exit codes +. When --flush fails in unit testers, abort tests and print message +. Improved documentation in docs/dev-flush.html about the maintenance scripts +. copy() methods removed in favor of clone keyword + +3.0.0, released 2008-01-06 +# HTML Purifier is PHP 5 only! The 2.1.x branch will be maintained + until PHP 4 is completely deprecated, but no new features will be added + to it. + + Visibility declarations added + + Constructor methods renamed to __construct() + + PHP4 reference cruft removed (in progress) +! CSS properties are now case-insensitive +! DefinitionCacheFactory now can register new implementations +! New HTMLPurifier_Filter_ExtractStyleBlocks for extracting #isU', array($this, 'styleCallback'), $html); + $style_blocks = $this->_styleMatches; + $this->_styleMatches = array(); // reset + $context->register('StyleBlocks', $style_blocks); // $context must not be reused + if ($this->_tidy) { + foreach ($style_blocks as &$style) { + $style = $this->cleanCSS($style, $config, $context); + } + } + return $html; + } + + /** + * Takes CSS (the stuff found in in a font-family prop). + if ($config->get('Filter.ExtractStyleBlocks.Escaping')) { + $css = str_replace( + array('<', '>', '&'), + array('\3C ', '\3E ', '\26 '), + $css + ); + } + return $css; + } +} + +// vim: et sw=4 sts=4 diff --git a/library/ezyang/htmlpurifier/library/HTMLPurifier/Filter/YouTube.php b/library/ezyang/htmlpurifier/library/HTMLPurifier/Filter/YouTube.php new file mode 100644 index 0000000000..276d8362fa --- /dev/null +++ b/library/ezyang/htmlpurifier/library/HTMLPurifier/Filter/YouTube.php @@ -0,0 +1,65 @@ +]+>.+?' . + '(?:http:)?//www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?#s'; + $pre_replace = '\1'; + return preg_replace($pre_regex, $pre_replace, $html); + } + + /** + * @param string $html + * @param HTMLPurifier_Config $config + * @param HTMLPurifier_Context $context + * @return string + */ + public function postFilter($html, $config, $context) + { + $post_regex = '#((?:v|cp)/[A-Za-z0-9\-_=]+)#'; + return preg_replace_callback($post_regex, array($this, 'postFilterCallback'), $html); + } + + /** + * @param $url + * @return string + */ + protected function armorUrl($url) + { + return str_replace('--', '--', $url); + } + + /** + * @param array $matches + * @return string + */ + protected function postFilterCallback($matches) + { + $url = $this->armorUrl($matches[1]); + return '' . + '' . + '' . + ''; + } +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Generator.php b/library/ezyang/htmlpurifier/library/HTMLPurifier/Generator.php similarity index 56% rename from library/HTMLPurifier/Generator.php rename to library/ezyang/htmlpurifier/library/HTMLPurifier/Generator.php index 4a62417271..6fb5687146 100644 --- a/library/HTMLPurifier/Generator.php +++ b/library/ezyang/htmlpurifier/library/HTMLPurifier/Generator.php @@ -11,49 +11,64 @@ class HTMLPurifier_Generator { /** - * Whether or not generator should produce XML output + * Whether or not generator should produce XML output. + * @type bool */ private $_xhtml = true; /** - * :HACK: Whether or not generator should comment the insides of )#si', - array($this, 'scriptCallback'), $html); + $html = preg_replace_callback( + '#(]*>)(\s*[^<].+?)()#si', + array($this, 'scriptCallback'), + $html + ); } $html = $this->normalize($html, $config, $context); @@ -55,15 +69,15 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer if ($maintain_line_numbers) { $current_line = 1; - $current_col = 0; + $current_col = 0; $length = strlen($html); } else { $current_line = false; - $current_col = false; + $current_col = false; $length = false; } $context->register('CurrentLine', $current_line); - $context->register('CurrentCol', $current_col); + $context->register('CurrentCol', $current_col); $nl = "\n"; // how often to manually recalculate. This will ALWAYS be right, // but it's pretty wasteful. Set to 0 to turn off @@ -77,16 +91,14 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer // for testing synchronization $loops = 0; - while(++$loops) { - + while (++$loops) { // $cursor is either at the start of a token, or inside of // a tag (i.e. there was a < immediately before it), as indicated // by $inside_tag if ($maintain_line_numbers) { - // $rcursor, however, is always at the start of a token. - $rcursor = $cursor - (int) $inside_tag; + $rcursor = $cursor - (int)$inside_tag; // Column number is cheap, so we calculate it every round. // We're interested at the *end* of the newline string, so @@ -96,14 +108,11 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $current_col = $rcursor - (is_bool($nl_pos) ? 0 : $nl_pos + 1); // recalculate lines - if ( - $synchronize_interval && // synchronization is on - $cursor > 0 && // cursor is further than zero - $loops % $synchronize_interval === 0 // time to synchronize! - ) { + if ($synchronize_interval && // synchronization is on + $cursor > 0 && // cursor is further than zero + $loops % $synchronize_interval === 0) { // time to synchronize! $current_line = 1 + $this->substrCount($html, $nl, 0, $cursor); } - } $position_next_lt = strpos($html, '<', $cursor); @@ -119,35 +128,42 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer if (!$inside_tag && $position_next_lt !== false) { // We are not inside tag and there still is another tag to parse $token = new - HTMLPurifier_Token_Text( - $this->parseData( - substr( - $html, $cursor, $position_next_lt - $cursor - ) + HTMLPurifier_Token_Text( + $this->parseData( + substr( + $html, + $cursor, + $position_next_lt - $cursor ) - ); + ) + ); if ($maintain_line_numbers) { $token->rawPosition($current_line, $current_col); $current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor); } $array[] = $token; - $cursor = $position_next_lt + 1; + $cursor = $position_next_lt + 1; $inside_tag = true; continue; } elseif (!$inside_tag) { // We are not inside tag but there are no more tags // If we're already at the end, break - if ($cursor === strlen($html)) break; + if ($cursor === strlen($html)) { + break; + } // Create Text of rest of string $token = new - HTMLPurifier_Token_Text( - $this->parseData( - substr( - $html, $cursor - ) + HTMLPurifier_Token_Text( + $this->parseData( + substr( + $html, + $cursor ) - ); - if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col); + ) + ); + if ($maintain_line_numbers) { + $token->rawPosition($current_line, $current_col); + } $array[] = $token; break; } elseif ($inside_tag && $position_next_gt !== false) { @@ -171,16 +187,16 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer } // Check if it's a comment - if ( - substr($segment, 0, 3) === '!--' - ) { + if (substr($segment, 0, 3) === '!--') { // re-determine segment length, looking for --> $position_comment_end = strpos($html, '-->', $cursor); if ($position_comment_end === false) { // uh oh, we have a comment that extends to // infinity. Can't be helped: set comment // end position to end of string - if ($e) $e->send(E_WARNING, 'Lexer: Unclosed comment'); + if ($e) { + $e->send(E_WARNING, 'Lexer: Unclosed comment'); + } $position_comment_end = strlen($html); $end = true; } else { @@ -189,11 +205,13 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $strlen_segment = $position_comment_end - $cursor; $segment = substr($html, $cursor, $strlen_segment); $token = new - HTMLPurifier_Token_Comment( - substr( - $segment, 3, $strlen_segment - 3 - ) - ); + HTMLPurifier_Token_Comment( + substr( + $segment, + 3, + $strlen_segment - 3 + ) + ); if ($maintain_line_numbers) { $token->rawPosition($current_line, $current_col); $current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment); @@ -205,7 +223,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer } // Check if it's an end tag - $is_end_tag = (strpos($segment,'/') === 0); + $is_end_tag = (strpos($segment, '/') === 0); if ($is_end_tag) { $type = substr($segment, 1); $token = new HTMLPurifier_Token_End($type); @@ -224,7 +242,9 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer // text and go our merry way if (!ctype_alpha($segment[0])) { // XML: $segment[0] !== '_' && $segment[0] !== ':' - if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt'); + if ($e) { + $e->send(E_NOTICE, 'Lexer: Unescaped lt'); + } $token = new HTMLPurifier_Token_Text('<'); if ($maintain_line_numbers) { $token->rawPosition($current_line, $current_col); @@ -239,7 +259,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer // trailing slash. Remember, we could have a tag like
        , so // any later token processing scripts must convert improperly // classified EmptyTags from StartTags. - $is_self_closing = (strrpos($segment,'/') === $strlen_segment-1); + $is_self_closing = (strrpos($segment, '/') === $strlen_segment - 1); if ($is_self_closing) { $strlen_segment--; $segment = substr($segment, 0, $strlen_segment); @@ -269,14 +289,16 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $attribute_string = trim( substr( - $segment, $position_first_space + $segment, + $position_first_space ) ); if ($attribute_string) { $attr = $this->parseAttributeString( - $attribute_string - , $config, $context - ); + $attribute_string, + $config, + $context + ); } else { $attr = array(); } @@ -296,15 +318,19 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer continue; } else { // inside tag, but there's no ending > sign - if ($e) $e->send(E_WARNING, 'Lexer: Missing gt'); + if ($e) { + $e->send(E_WARNING, 'Lexer: Missing gt'); + } $token = new - HTMLPurifier_Token_Text( - '<' . - $this->parseData( - substr($html, $cursor) - ) - ); - if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col); + HTMLPurifier_Token_Text( + '<' . + $this->parseData( + substr($html, $cursor) + ) + ); + if ($maintain_line_numbers) { + $token->rawPosition($current_line, $current_col); + } // no cursor scroll? Hmm... $array[] = $token; break; @@ -319,8 +345,14 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer /** * PHP 5.0.x compatible substr_count that implements offset and length + * @param string $haystack + * @param string $needle + * @param int $offset + * @param int $length + * @return int */ - protected function substrCount($haystack, $needle, $offset, $length) { + protected function substrCount($haystack, $needle, $offset, $length) + { static $oldVersion; if ($oldVersion === null) { $oldVersion = version_compare(PHP_VERSION, '5.1', '<'); @@ -336,13 +368,18 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer /** * Takes the inside of an HTML tag and makes an assoc array of attributes. * - * @param $string Inside of tag excluding name. - * @returns Assoc array of attributes. + * @param string $string Inside of tag excluding name. + * @param HTMLPurifier_Config $config + * @param HTMLPurifier_Context $context + * @return array Assoc array of attributes. */ - public function parseAttributeString($string, $config, $context) { - $string = (string) $string; // quick typecast + public function parseAttributeString($string, $config, $context) + { + $string = (string)$string; // quick typecast - if ($string == '') return array(); // no attributes + if ($string == '') { + return array(); + } // no attributes $e = false; if ($config->get('Core.CollectErrors')) { @@ -361,46 +398,55 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer list($key, $quoted_value) = explode('=', $string); $quoted_value = trim($quoted_value); if (!$key) { - if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key'); + if ($e) { + $e->send(E_ERROR, 'Lexer: Missing attribute key'); + } return array(); } - if (!$quoted_value) return array($key => ''); + if (!$quoted_value) { + return array($key => ''); + } $first_char = @$quoted_value[0]; - $last_char = @$quoted_value[strlen($quoted_value)-1]; + $last_char = @$quoted_value[strlen($quoted_value) - 1]; $same_quote = ($first_char == $last_char); $open_quote = ($first_char == '"' || $first_char == "'"); - if ( $same_quote && $open_quote) { + if ($same_quote && $open_quote) { // well behaved $value = substr($quoted_value, 1, strlen($quoted_value) - 2); } else { // not well behaved if ($open_quote) { - if ($e) $e->send(E_ERROR, 'Lexer: Missing end quote'); + if ($e) { + $e->send(E_ERROR, 'Lexer: Missing end quote'); + } $value = substr($quoted_value, 1); } else { $value = $quoted_value; } } - if ($value === false) $value = ''; + if ($value === false) { + $value = ''; + } return array($key => $this->parseData($value)); } // setup loop environment - $array = array(); // return assoc array of attributes + $array = array(); // return assoc array of attributes $cursor = 0; // current position in string (moves forward) - $size = strlen($string); // size of the string (stays the same) + $size = strlen($string); // size of the string (stays the same) // if we have unquoted attributes, the parser expects a terminating // space, so let's guarantee that there's always a terminating space. $string .= ' '; - while(true) { - - if ($cursor >= $size) { - break; + $old_cursor = -1; + while ($cursor < $size) { + if ($old_cursor >= $cursor) { + throw new Exception("Infinite loop detected"); } + $old_cursor = $cursor; $cursor += ($value = strspn($string, $this->_whitespace, $cursor)); // grab the key @@ -415,8 +461,10 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $key = substr($string, $key_begin, $key_end - $key_begin); if (!$key) { - if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key'); - $cursor += strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop + if ($e) { + $e->send(E_ERROR, 'Lexer: Missing attribute key'); + } + $cursor += 1 + strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop continue; // empty key } @@ -467,24 +515,25 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer } $value = substr($string, $value_begin, $value_end - $value_begin); - if ($value === false) $value = ''; + if ($value === false) { + $value = ''; + } $array[$key] = $this->parseData($value); $cursor++; - } else { // boolattr if ($key !== '') { $array[$key] = $key; } else { // purely theoretical - if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key'); + if ($e) { + $e->send(E_ERROR, 'Lexer: Missing attribute key'); + } } - } } return $array; } - } // vim: et sw=4 sts=4 diff --git a/library/ezyang/htmlpurifier/library/HTMLPurifier/Lexer/PH5P.php b/library/ezyang/htmlpurifier/library/HTMLPurifier/Lexer/PH5P.php new file mode 100644 index 0000000000..ff4fa218fb --- /dev/null +++ b/library/ezyang/htmlpurifier/library/HTMLPurifier/Lexer/PH5P.php @@ -0,0 +1,4787 @@ +normalize($html, $config, $context); + $new_html = $this->wrapHTML($new_html, $config, $context); + try { + $parser = new HTML5($new_html); + $doc = $parser->save(); + } catch (DOMException $e) { + // Uh oh, it failed. Punt to DirectLex. + $lexer = new HTMLPurifier_Lexer_DirectLex(); + $context->register('PH5PError', $e); // save the error, so we can detect it + return $lexer->tokenizeHTML($html, $config, $context); // use original HTML + } + $tokens = array(); + $this->tokenizeDOM( + $doc->getElementsByTagName('html')->item(0)-> // + getElementsByTagName('body')->item(0) // + , + $tokens + ); + return $tokens; + } +} + +/* + +Copyright 2007 Jeroen van der Meer + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +*/ + +class HTML5 +{ + private $data; + private $char; + private $EOF; + private $state; + private $tree; + private $token; + private $content_model; + private $escape = false; + private $entities = array( + 'AElig;', + 'AElig', + 'AMP;', + 'AMP', + 'Aacute;', + 'Aacute', + 'Acirc;', + 'Acirc', + 'Agrave;', + 'Agrave', + 'Alpha;', + 'Aring;', + 'Aring', + 'Atilde;', + 'Atilde', + 'Auml;', + 'Auml', + 'Beta;', + 'COPY;', + 'COPY', + 'Ccedil;', + 'Ccedil', + 'Chi;', + 'Dagger;', + 'Delta;', + 'ETH;', + 'ETH', + 'Eacute;', + 'Eacute', + 'Ecirc;', + 'Ecirc', + 'Egrave;', + 'Egrave', + 'Epsilon;', + 'Eta;', + 'Euml;', + 'Euml', + 'GT;', + 'GT', + 'Gamma;', + 'Iacute;', + 'Iacute', + 'Icirc;', + 'Icirc', + 'Igrave;', + 'Igrave', + 'Iota;', + 'Iuml;', + 'Iuml', + 'Kappa;', + 'LT;', + 'LT', + 'Lambda;', + 'Mu;', + 'Ntilde;', + 'Ntilde', + 'Nu;', + 'OElig;', + 'Oacute;', + 'Oacute', + 'Ocirc;', + 'Ocirc', + 'Ograve;', + 'Ograve', + 'Omega;', + 'Omicron;', + 'Oslash;', + 'Oslash', + 'Otilde;', + 'Otilde', + 'Ouml;', + 'Ouml', + 'Phi;', + 'Pi;', + 'Prime;', + 'Psi;', + 'QUOT;', + 'QUOT', + 'REG;', + 'REG', + 'Rho;', + 'Scaron;', + 'Sigma;', + 'THORN;', + 'THORN', + 'TRADE;', + 'Tau;', + 'Theta;', + 'Uacute;', + 'Uacute', + 'Ucirc;', + 'Ucirc', + 'Ugrave;', + 'Ugrave', + 'Upsilon;', + 'Uuml;', + 'Uuml', + 'Xi;', + 'Yacute;', + 'Yacute', + 'Yuml;', + 'Zeta;', + 'aacute;', + 'aacute', + 'acirc;', + 'acirc', + 'acute;', + 'acute', + 'aelig;', + 'aelig', + 'agrave;', + 'agrave', + 'alefsym;', + 'alpha;', + 'amp;', + 'amp', + 'and;', + 'ang;', + 'apos;', + 'aring;', + 'aring', + 'asymp;', + 'atilde;', + 'atilde', + 'auml;', + 'auml', + 'bdquo;', + 'beta;', + 'brvbar;', + 'brvbar', + 'bull;', + 'cap;', + 'ccedil;', + 'ccedil', + 'cedil;', + 'cedil', + 'cent;', + 'cent', + 'chi;', + 'circ;', + 'clubs;', + 'cong;', + 'copy;', + 'copy', + 'crarr;', + 'cup;', + 'curren;', + 'curren', + 'dArr;', + 'dagger;', + 'darr;', + 'deg;', + 'deg', + 'delta;', + 'diams;', + 'divide;', + 'divide', + 'eacute;', + 'eacute', + 'ecirc;', + 'ecirc', + 'egrave;', + 'egrave', + 'empty;', + 'emsp;', + 'ensp;', + 'epsilon;', + 'equiv;', + 'eta;', + 'eth;', + 'eth', + 'euml;', + 'euml', + 'euro;', + 'exist;', + 'fnof;', + 'forall;', + 'frac12;', + 'frac12', + 'frac14;', + 'frac14', + 'frac34;', + 'frac34', + 'frasl;', + 'gamma;', + 'ge;', + 'gt;', + 'gt', + 'hArr;', + 'harr;', + 'hearts;', + 'hellip;', + 'iacute;', + 'iacute', + 'icirc;', + 'icirc', + 'iexcl;', + 'iexcl', + 'igrave;', + 'igrave', + 'image;', + 'infin;', + 'int;', + 'iota;', + 'iquest;', + 'iquest', + 'isin;', + 'iuml;', + 'iuml', + 'kappa;', + 'lArr;', + 'lambda;', + 'lang;', + 'laquo;', + 'laquo', + 'larr;', + 'lceil;', + 'ldquo;', + 'le;', + 'lfloor;', + 'lowast;', + 'loz;', + 'lrm;', + 'lsaquo;', + 'lsquo;', + 'lt;', + 'lt', + 'macr;', + 'macr', + 'mdash;', + 'micro;', + 'micro', + 'middot;', + 'middot', + 'minus;', + 'mu;', + 'nabla;', + 'nbsp;', + 'nbsp', + 'ndash;', + 'ne;', + 'ni;', + 'not;', + 'not', + 'notin;', + 'nsub;', + 'ntilde;', + 'ntilde', + 'nu;', + 'oacute;', + 'oacute', + 'ocirc;', + 'ocirc', + 'oelig;', + 'ograve;', + 'ograve', + 'oline;', + 'omega;', + 'omicron;', + 'oplus;', + 'or;', + 'ordf;', + 'ordf', + 'ordm;', + 'ordm', + 'oslash;', + 'oslash', + 'otilde;', + 'otilde', + 'otimes;', + 'ouml;', + 'ouml', + 'para;', + 'para', + 'part;', + 'permil;', + 'perp;', + 'phi;', + 'pi;', + 'piv;', + 'plusmn;', + 'plusmn', + 'pound;', + 'pound', + 'prime;', + 'prod;', + 'prop;', + 'psi;', + 'quot;', + 'quot', + 'rArr;', + 'radic;', + 'rang;', + 'raquo;', + 'raquo', + 'rarr;', + 'rceil;', + 'rdquo;', + 'real;', + 'reg;', + 'reg', + 'rfloor;', + 'rho;', + 'rlm;', + 'rsaquo;', + 'rsquo;', + 'sbquo;', + 'scaron;', + 'sdot;', + 'sect;', + 'sect', + 'shy;', + 'shy', + 'sigma;', + 'sigmaf;', + 'sim;', + 'spades;', + 'sub;', + 'sube;', + 'sum;', + 'sup1;', + 'sup1', + 'sup2;', + 'sup2', + 'sup3;', + 'sup3', + 'sup;', + 'supe;', + 'szlig;', + 'szlig', + 'tau;', + 'there4;', + 'theta;', + 'thetasym;', + 'thinsp;', + 'thorn;', + 'thorn', + 'tilde;', + 'times;', + 'times', + 'trade;', + 'uArr;', + 'uacute;', + 'uacute', + 'uarr;', + 'ucirc;', + 'ucirc', + 'ugrave;', + 'ugrave', + 'uml;', + 'uml', + 'upsih;', + 'upsilon;', + 'uuml;', + 'uuml', + 'weierp;', + 'xi;', + 'yacute;', + 'yacute', + 'yen;', + 'yen', + 'yuml;', + 'yuml', + 'zeta;', + 'zwj;', + 'zwnj;' + ); + + const PCDATA = 0; + const RCDATA = 1; + const CDATA = 2; + const PLAINTEXT = 3; + + const DOCTYPE = 0; + const STARTTAG = 1; + const ENDTAG = 2; + const COMMENT = 3; + const CHARACTR = 4; + const EOF = 5; + + public function __construct($data) + { + $this->data = $data; + $this->char = -1; + $this->EOF = strlen($data); + $this->tree = new HTML5TreeConstructer; + $this->content_model = self::PCDATA; + + $this->state = 'data'; + + while ($this->state !== null) { + $this->{$this->state . 'State'}(); + } + } + + public function save() + { + return $this->tree->save(); + } + + private function char() + { + return ($this->char < $this->EOF) + ? $this->data[$this->char] + : false; + } + + private function character($s, $l = 0) + { + if ($s + $l < $this->EOF) { + if ($l === 0) { + return $this->data[$s]; + } else { + return substr($this->data, $s, $l); + } + } + } + + private function characters($char_class, $start) + { + return preg_replace('#^([' . $char_class . ']+).*#s', '\\1', substr($this->data, $start)); + } + + private function dataState() + { + // Consume the next input character + $this->char++; + $char = $this->char(); + + if ($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) { + /* U+0026 AMPERSAND (&) + When the content model flag is set to one of the PCDATA or RCDATA + states: switch to the entity data state. Otherwise: treat it as per + the "anything else" entry below. */ + $this->state = 'entityData'; + + } elseif ($char === '-') { + /* If the content model flag is set to either the RCDATA state or + the CDATA state, and the escape flag is false, and there are at + least three characters before this one in the input stream, and the + last four characters in the input stream, including this one, are + U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS, + and U+002D HYPHEN-MINUS (""), + set the escape flag to false. */ + if (($this->content_model === self::RCDATA || + $this->content_model === self::CDATA) && $this->escape === true && + $this->character($this->char, 3) === '-->' + ) { + $this->escape = false; + } + + /* In any case, emit the input character as a character token. + Stay in the data state. */ + $this->emitToken( + array( + 'type' => self::CHARACTR, + 'data' => $char + ) + ); + + } elseif ($this->char === $this->EOF) { + /* EOF + Emit an end-of-file token. */ + $this->EOF(); + + } elseif ($this->content_model === self::PLAINTEXT) { + /* When the content model flag is set to the PLAINTEXT state + THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of + the text and emit it as a character token. */ + $this->emitToken( + array( + 'type' => self::CHARACTR, + 'data' => substr($this->data, $this->char) + ) + ); + + $this->EOF(); + + } else { + /* Anything else + THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that + otherwise would also be treated as a character token and emit it + as a single character token. Stay in the data state. */ + $len = strcspn($this->data, '<&', $this->char); + $char = substr($this->data, $this->char, $len); + $this->char += $len - 1; + + $this->emitToken( + array( + 'type' => self::CHARACTR, + 'data' => $char + ) + ); + + $this->state = 'data'; + } + } + + private function entityDataState() + { + // Attempt to consume an entity. + $entity = $this->entity(); + + // If nothing is returned, emit a U+0026 AMPERSAND character token. + // Otherwise, emit the character token that was returned. + $char = (!$entity) ? '&' : $entity; + $this->emitToken( + array( + 'type' => self::CHARACTR, + 'data' => $char + ) + ); + + // Finally, switch to the data state. + $this->state = 'data'; + } + + private function tagOpenState() + { + switch ($this->content_model) { + case self::RCDATA: + case self::CDATA: + /* If the next input character is a U+002F SOLIDUS (/) character, + consume it and switch to the close tag open state. If the next + input character is not a U+002F SOLIDUS (/) character, emit a + U+003C LESS-THAN SIGN character token and switch to the data + state to process the next input character. */ + if ($this->character($this->char + 1) === '/') { + $this->char++; + $this->state = 'closeTagOpen'; + + } else { + $this->emitToken( + array( + 'type' => self::CHARACTR, + 'data' => '<' + ) + ); + + $this->state = 'data'; + } + break; + + case self::PCDATA: + // If the content model flag is set to the PCDATA state + // Consume the next input character: + $this->char++; + $char = $this->char(); + + if ($char === '!') { + /* U+0021 EXCLAMATION MARK (!) + Switch to the markup declaration open state. */ + $this->state = 'markupDeclarationOpen'; + + } elseif ($char === '/') { + /* U+002F SOLIDUS (/) + Switch to the close tag open state. */ + $this->state = 'closeTagOpen'; + + } elseif (preg_match('/^[A-Za-z]$/', $char)) { + /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z + Create a new start tag token, set its tag name to the lowercase + version of the input character (add 0x0020 to the character's code + point), then switch to the tag name state. (Don't emit the token + yet; further details will be filled in before it is emitted.) */ + $this->token = array( + 'name' => strtolower($char), + 'type' => self::STARTTAG, + 'attr' => array() + ); + + $this->state = 'tagName'; + + } elseif ($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Parse error. Emit a U+003C LESS-THAN SIGN character token and a + U+003E GREATER-THAN SIGN character token. Switch to the data state. */ + $this->emitToken( + array( + 'type' => self::CHARACTR, + 'data' => '<>' + ) + ); + + $this->state = 'data'; + + } elseif ($char === '?') { + /* U+003F QUESTION MARK (?) + Parse error. Switch to the bogus comment state. */ + $this->state = 'bogusComment'; + + } else { + /* Anything else + Parse error. Emit a U+003C LESS-THAN SIGN character token and + reconsume the current input character in the data state. */ + $this->emitToken( + array( + 'type' => self::CHARACTR, + 'data' => '<' + ) + ); + + $this->char--; + $this->state = 'data'; + } + break; + } + } + + private function closeTagOpenState() + { + $next_node = strtolower($this->characters('A-Za-z', $this->char + 1)); + $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName; + + if (($this->content_model === self::RCDATA || $this->content_model === self::CDATA) && + (!$the_same || ($the_same && (!preg_match( + '/[\t\n\x0b\x0c >\/]/', + $this->character($this->char + 1 + strlen($next_node)) + ) || $this->EOF === $this->char))) + ) { + /* If the content model flag is set to the RCDATA or CDATA states then + examine the next few characters. If they do not match the tag name of + the last start tag token emitted (case insensitively), or if they do but + they are not immediately followed by one of the following characters: + * U+0009 CHARACTER TABULATION + * U+000A LINE FEED (LF) + * U+000B LINE TABULATION + * U+000C FORM FEED (FF) + * U+0020 SPACE + * U+003E GREATER-THAN SIGN (>) + * U+002F SOLIDUS (/) + * EOF + ...then there is a parse error. Emit a U+003C LESS-THAN SIGN character + token, a U+002F SOLIDUS character token, and switch to the data state + to process the next input character. */ + $this->emitToken( + array( + 'type' => self::CHARACTR, + 'data' => 'state = 'data'; + + } else { + /* Otherwise, if the content model flag is set to the PCDATA state, + or if the next few characters do match that tag name, consume the + next input character: */ + $this->char++; + $char = $this->char(); + + if (preg_match('/^[A-Za-z]$/', $char)) { + /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z + Create a new end tag token, set its tag name to the lowercase version + of the input character (add 0x0020 to the character's code point), then + switch to the tag name state. (Don't emit the token yet; further details + will be filled in before it is emitted.) */ + $this->token = array( + 'name' => strtolower($char), + 'type' => self::ENDTAG + ); + + $this->state = 'tagName'; + + } elseif ($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Parse error. Switch to the data state. */ + $this->state = 'data'; + + } elseif ($this->char === $this->EOF) { + /* EOF + Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F + SOLIDUS character token. Reconsume the EOF character in the data state. */ + $this->emitToken( + array( + 'type' => self::CHARACTR, + 'data' => 'char--; + $this->state = 'data'; + + } else { + /* Parse error. Switch to the bogus comment state. */ + $this->state = 'bogusComment'; + } + } + } + + private function tagNameState() + { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Switch to the before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif ($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif ($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the EOF + character in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } elseif ($char === '/') { + /* U+002F SOLIDUS (/) + Parse error unless this is a permitted slash. Switch to the before + attribute name state. */ + $this->state = 'beforeAttributeName'; + + } else { + /* Anything else + Append the current input character to the current tag token's tag name. + Stay in the tag name state. */ + $this->token['name'] .= strtolower($char); + $this->state = 'tagName'; + } + } + + private function beforeAttributeNameState() + { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif ($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif ($char === '/') { + /* U+002F SOLIDUS (/) + Parse error unless this is a permitted slash. Stay in the before + attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif ($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the EOF + character in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } else { + /* Anything else + Start a new attribute in the current tag token. Set that attribute's + name to the current input character, and its value to the empty string. + Switch to the attribute name state. */ + $this->token['attr'][] = array( + 'name' => strtolower($char), + 'value' => null + ); + + $this->state = 'attributeName'; + } + } + + private function attributeNameState() + { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the before attribute name state. */ + $this->state = 'afterAttributeName'; + + } elseif ($char === '=') { + /* U+003D EQUALS SIGN (=) + Switch to the before attribute value state. */ + $this->state = 'beforeAttributeValue'; + + } elseif ($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif ($char === '/' && $this->character($this->char + 1) !== '>') { + /* U+002F SOLIDUS (/) + Parse error unless this is a permitted slash. Switch to the before + attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif ($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the EOF + character in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } else { + /* Anything else + Append the current input character to the current attribute's name. + Stay in the attribute name state. */ + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['name'] .= strtolower($char); + + $this->state = 'attributeName'; + } + } + + private function afterAttributeNameState() + { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the after attribute name state. */ + $this->state = 'afterAttributeName'; + + } elseif ($char === '=') { + /* U+003D EQUALS SIGN (=) + Switch to the before attribute value state. */ + $this->state = 'beforeAttributeValue'; + + } elseif ($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif ($char === '/' && $this->character($this->char + 1) !== '>') { + /* U+002F SOLIDUS (/) + Parse error unless this is a permitted slash. Switch to the + before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif ($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the EOF + character in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } else { + /* Anything else + Start a new attribute in the current tag token. Set that attribute's + name to the current input character, and its value to the empty string. + Switch to the attribute name state. */ + $this->token['attr'][] = array( + 'name' => strtolower($char), + 'value' => null + ); + + $this->state = 'attributeName'; + } + } + + private function beforeAttributeValueState() + { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the before attribute value state. */ + $this->state = 'beforeAttributeValue'; + + } elseif ($char === '"') { + /* U+0022 QUOTATION MARK (") + Switch to the attribute value (double-quoted) state. */ + $this->state = 'attributeValueDoubleQuoted'; + + } elseif ($char === '&') { + /* U+0026 AMPERSAND (&) + Switch to the attribute value (unquoted) state and reconsume + this input character. */ + $this->char--; + $this->state = 'attributeValueUnquoted'; + + } elseif ($char === '\'') { + /* U+0027 APOSTROPHE (') + Switch to the attribute value (single-quoted) state. */ + $this->state = 'attributeValueSingleQuoted'; + + } elseif ($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } else { + /* Anything else + Append the current input character to the current attribute's value. + Switch to the attribute value (unquoted) state. */ + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['value'] .= $char; + + $this->state = 'attributeValueUnquoted'; + } + } + + private function attributeValueDoubleQuotedState() + { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if ($char === '"') { + /* U+0022 QUOTATION MARK (") + Switch to the before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif ($char === '&') { + /* U+0026 AMPERSAND (&) + Switch to the entity in attribute value state. */ + $this->entityInAttributeValueState('double'); + + } elseif ($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the character + in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } else { + /* Anything else + Append the current input character to the current attribute's value. + Stay in the attribute value (double-quoted) state. */ + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['value'] .= $char; + + $this->state = 'attributeValueDoubleQuoted'; + } + } + + private function attributeValueSingleQuotedState() + { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if ($char === '\'') { + /* U+0022 QUOTATION MARK (') + Switch to the before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif ($char === '&') { + /* U+0026 AMPERSAND (&) + Switch to the entity in attribute value state. */ + $this->entityInAttributeValueState('single'); + + } elseif ($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the character + in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } else { + /* Anything else + Append the current input character to the current attribute's value. + Stay in the attribute value (single-quoted) state. */ + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['value'] .= $char; + + $this->state = 'attributeValueSingleQuoted'; + } + } + + private function attributeValueUnquotedState() + { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Switch to the before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif ($char === '&') { + /* U+0026 AMPERSAND (&) + Switch to the entity in attribute value state. */ + $this->entityInAttributeValueState(); + + } elseif ($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } else { + /* Anything else + Append the current input character to the current attribute's value. + Stay in the attribute value (unquoted) state. */ + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['value'] .= $char; + + $this->state = 'attributeValueUnquoted'; + } + } + + private function entityInAttributeValueState() + { + // Attempt to consume an entity. + $entity = $this->entity(); + + // If nothing is returned, append a U+0026 AMPERSAND character to the + // current attribute's value. Otherwise, emit the character token that + // was returned. + $char = (!$entity) + ? '&' + : $entity; + + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['value'] .= $char; + } + + private function bogusCommentState() + { + /* Consume every character up to the first U+003E GREATER-THAN SIGN + character (>) or the end of the file (EOF), whichever comes first. Emit + a comment token whose data is the concatenation of all the characters + starting from and including the character that caused the state machine + to switch into the bogus comment state, up to and including the last + consumed character before the U+003E character, if any, or up to the + end of the file otherwise. (If the comment was started by the end of + the file (EOF), the token is empty.) */ + $data = $this->characters('^>', $this->char); + $this->emitToken( + array( + 'data' => $data, + 'type' => self::COMMENT + ) + ); + + $this->char += strlen($data); + + /* Switch to the data state. */ + $this->state = 'data'; + + /* If the end of the file was reached, reconsume the EOF character. */ + if ($this->char === $this->EOF) { + $this->char = $this->EOF - 1; + } + } + + private function markupDeclarationOpenState() + { + /* If the next two characters are both U+002D HYPHEN-MINUS (-) + characters, consume those two characters, create a comment token whose + data is the empty string, and switch to the comment state. */ + if ($this->character($this->char + 1, 2) === '--') { + $this->char += 2; + $this->state = 'comment'; + $this->token = array( + 'data' => null, + 'type' => self::COMMENT + ); + + /* Otherwise if the next seven chacacters are a case-insensitive match + for the word "DOCTYPE", then consume those characters and switch to the + DOCTYPE state. */ + } elseif (strtolower($this->character($this->char + 1, 7)) === 'doctype') { + $this->char += 7; + $this->state = 'doctype'; + + /* Otherwise, is is a parse error. Switch to the bogus comment state. + The next character that is consumed, if any, is the first character + that will be in the comment. */ + } else { + $this->char++; + $this->state = 'bogusComment'; + } + } + + private function commentState() + { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + /* U+002D HYPHEN-MINUS (-) */ + if ($char === '-') { + /* Switch to the comment dash state */ + $this->state = 'commentDash'; + + /* EOF */ + } elseif ($this->char === $this->EOF) { + /* Parse error. Emit the comment token. Reconsume the EOF character + in the data state. */ + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + /* Anything else */ + } else { + /* Append the input character to the comment token's data. Stay in + the comment state. */ + $this->token['data'] .= $char; + } + } + + private function commentDashState() + { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + /* U+002D HYPHEN-MINUS (-) */ + if ($char === '-') { + /* Switch to the comment end state */ + $this->state = 'commentEnd'; + + /* EOF */ + } elseif ($this->char === $this->EOF) { + /* Parse error. Emit the comment token. Reconsume the EOF character + in the data state. */ + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + /* Anything else */ + } else { + /* Append a U+002D HYPHEN-MINUS (-) character and the input + character to the comment token's data. Switch to the comment state. */ + $this->token['data'] .= '-' . $char; + $this->state = 'comment'; + } + } + + private function commentEndState() + { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if ($char === '>') { + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif ($char === '-') { + $this->token['data'] .= '-'; + + } elseif ($this->char === $this->EOF) { + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + } else { + $this->token['data'] .= '--' . $char; + $this->state = 'comment'; + } + } + + private function doctypeState() + { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + $this->state = 'beforeDoctypeName'; + + } else { + $this->char--; + $this->state = 'beforeDoctypeName'; + } + } + + private function beforeDoctypeNameState() + { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + // Stay in the before DOCTYPE name state. + + } elseif (preg_match('/^[a-z]$/', $char)) { + $this->token = array( + 'name' => strtoupper($char), + 'type' => self::DOCTYPE, + 'error' => true + ); + + $this->state = 'doctypeName'; + + } elseif ($char === '>') { + $this->emitToken( + array( + 'name' => null, + 'type' => self::DOCTYPE, + 'error' => true + ) + ); + + $this->state = 'data'; + + } elseif ($this->char === $this->EOF) { + $this->emitToken( + array( + 'name' => null, + 'type' => self::DOCTYPE, + 'error' => true + ) + ); + + $this->char--; + $this->state = 'data'; + + } else { + $this->token = array( + 'name' => $char, + 'type' => self::DOCTYPE, + 'error' => true + ); + + $this->state = 'doctypeName'; + } + } + + private function doctypeNameState() + { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + $this->state = 'AfterDoctypeName'; + + } elseif ($char === '>') { + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif (preg_match('/^[a-z]$/', $char)) { + $this->token['name'] .= strtoupper($char); + + } elseif ($this->char === $this->EOF) { + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + } else { + $this->token['name'] .= $char; + } + + $this->token['error'] = ($this->token['name'] === 'HTML') + ? false + : true; + } + + private function afterDoctypeNameState() + { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + // Stay in the DOCTYPE name state. + + } elseif ($char === '>') { + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif ($this->char === $this->EOF) { + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + } else { + $this->token['error'] = true; + $this->state = 'bogusDoctype'; + } + } + + private function bogusDoctypeState() + { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if ($char === '>') { + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif ($this->char === $this->EOF) { + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + } else { + // Stay in the bogus DOCTYPE state. + } + } + + private function entity() + { + $start = $this->char; + + // This section defines how to consume an entity. This definition is + // used when parsing entities in text and in attributes. + + // The behaviour depends on the identity of the next character (the + // one immediately after the U+0026 AMPERSAND character): + + switch ($this->character($this->char + 1)) { + // U+0023 NUMBER SIGN (#) + case '#': + + // The behaviour further depends on the character after the + // U+0023 NUMBER SIGN: + switch ($this->character($this->char + 1)) { + // U+0078 LATIN SMALL LETTER X + // U+0058 LATIN CAPITAL LETTER X + case 'x': + case 'X': + // Follow the steps below, but using the range of + // characters U+0030 DIGIT ZERO through to U+0039 DIGIT + // NINE, U+0061 LATIN SMALL LETTER A through to U+0066 + // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER + // A, through to U+0046 LATIN CAPITAL LETTER F (in other + // words, 0-9, A-F, a-f). + $char = 1; + $char_class = '0-9A-Fa-f'; + break; + + // Anything else + default: + // Follow the steps below, but using the range of + // characters U+0030 DIGIT ZERO through to U+0039 DIGIT + // NINE (i.e. just 0-9). + $char = 0; + $char_class = '0-9'; + break; + } + + // Consume as many characters as match the range of characters + // given above. + $this->char++; + $e_name = $this->characters($char_class, $this->char + $char + 1); + $entity = $this->character($start, $this->char); + $cond = strlen($e_name) > 0; + + // The rest of the parsing happens bellow. + break; + + // Anything else + default: + // Consume the maximum number of characters possible, with the + // consumed characters case-sensitively matching one of the + // identifiers in the first column of the entities table. + $e_name = $this->characters('0-9A-Za-z;', $this->char + 1); + $len = strlen($e_name); + + for ($c = 1; $c <= $len; $c++) { + $id = substr($e_name, 0, $c); + $this->char++; + + if (in_array($id, $this->entities)) { + if ($e_name[$c - 1] !== ';') { + if ($c < $len && $e_name[$c] == ';') { + $this->char++; // consume extra semicolon + } + } + $entity = $id; + break; + } + } + + $cond = isset($entity); + // The rest of the parsing happens bellow. + break; + } + + if (!$cond) { + // If no match can be made, then this is a parse error. No + // characters are consumed, and nothing is returned. + $this->char = $start; + return false; + } + + // Return a character token for the character corresponding to the + // entity name (as given by the second column of the entities table). + return html_entity_decode('&' . $entity . ';', ENT_QUOTES, 'UTF-8'); + } + + private function emitToken($token) + { + $emit = $this->tree->emitToken($token); + + if (is_int($emit)) { + $this->content_model = $emit; + + } elseif ($token['type'] === self::ENDTAG) { + $this->content_model = self::PCDATA; + } + } + + private function EOF() + { + $this->state = null; + $this->tree->emitToken( + array( + 'type' => self::EOF + ) + ); + } +} + +class HTML5TreeConstructer +{ + public $stack = array(); + + private $phase; + private $mode; + private $dom; + private $foster_parent = null; + private $a_formatting = array(); + + private $head_pointer = null; + private $form_pointer = null; + + private $scoping = array('button', 'caption', 'html', 'marquee', 'object', 'table', 'td', 'th'); + private $formatting = array( + 'a', + 'b', + 'big', + 'em', + 'font', + 'i', + 'nobr', + 's', + 'small', + 'strike', + 'strong', + 'tt', + 'u' + ); + private $special = array( + 'address', + 'area', + 'base', + 'basefont', + 'bgsound', + 'blockquote', + 'body', + 'br', + 'center', + 'col', + 'colgroup', + 'dd', + 'dir', + 'div', + 'dl', + 'dt', + 'embed', + 'fieldset', + 'form', + 'frame', + 'frameset', + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', + 'head', + 'hr', + 'iframe', + 'image', + 'img', + 'input', + 'isindex', + 'li', + 'link', + 'listing', + 'menu', + 'meta', + 'noembed', + 'noframes', + 'noscript', + 'ol', + 'optgroup', + 'option', + 'p', + 'param', + 'plaintext', + 'pre', + 'script', + 'select', + 'spacer', + 'style', + 'tbody', + 'textarea', + 'tfoot', + 'thead', + 'title', + 'tr', + 'ul', + 'wbr' + ); + + // The different phases. + const INIT_PHASE = 0; + const ROOT_PHASE = 1; + const MAIN_PHASE = 2; + const END_PHASE = 3; + + // The different insertion modes for the main phase. + const BEFOR_HEAD = 0; + const IN_HEAD = 1; + const AFTER_HEAD = 2; + const IN_BODY = 3; + const IN_TABLE = 4; + const IN_CAPTION = 5; + const IN_CGROUP = 6; + const IN_TBODY = 7; + const IN_ROW = 8; + const IN_CELL = 9; + const IN_SELECT = 10; + const AFTER_BODY = 11; + const IN_FRAME = 12; + const AFTR_FRAME = 13; + + // The different types of elements. + const SPECIAL = 0; + const SCOPING = 1; + const FORMATTING = 2; + const PHRASING = 3; + + const MARKER = 0; + + public function __construct() + { + $this->phase = self::INIT_PHASE; + $this->mode = self::BEFOR_HEAD; + $this->dom = new DOMDocument; + + $this->dom->encoding = 'UTF-8'; + $this->dom->preserveWhiteSpace = true; + $this->dom->substituteEntities = true; + $this->dom->strictErrorChecking = false; + } + + // Process tag tokens + public function emitToken($token) + { + switch ($this->phase) { + case self::INIT_PHASE: + return $this->initPhase($token); + break; + case self::ROOT_PHASE: + return $this->rootElementPhase($token); + break; + case self::MAIN_PHASE: + return $this->mainPhase($token); + break; + case self::END_PHASE : + return $this->trailingEndPhase($token); + break; + } + } + + private function initPhase($token) + { + /* Initially, the tree construction stage must handle each token + emitted from the tokenisation stage as follows: */ + + /* A DOCTYPE token that is marked as being in error + A comment token + A start tag token + An end tag token + A character token that is not one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE + An end-of-file token */ + if ((isset($token['error']) && $token['error']) || + $token['type'] === HTML5::COMMENT || + $token['type'] === HTML5::STARTTAG || + $token['type'] === HTML5::ENDTAG || + $token['type'] === HTML5::EOF || + ($token['type'] === HTML5::CHARACTR && isset($token['data']) && + !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) + ) { + /* This specification does not define how to handle this case. In + particular, user agents may ignore the entirety of this specification + altogether for such documents, and instead invoke special parse modes + with a greater emphasis on backwards compatibility. */ + + $this->phase = self::ROOT_PHASE; + return $this->rootElementPhase($token); + + /* A DOCTYPE token marked as being correct */ + } elseif (isset($token['error']) && !$token['error']) { + /* Append a DocumentType node to the Document node, with the name + attribute set to the name given in the DOCTYPE token (which will be + "HTML"), and the other attributes specific to DocumentType objects + set to null, empty lists, or the empty string as appropriate. */ + $doctype = new DOMDocumentType(null, null, 'HTML'); + + /* Then, switch to the root element phase of the tree construction + stage. */ + $this->phase = self::ROOT_PHASE; + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + } elseif (isset($token['data']) && preg_match( + '/^[\t\n\x0b\x0c ]+$/', + $token['data'] + ) + ) { + /* Append that character to the Document node. */ + $text = $this->dom->createTextNode($token['data']); + $this->dom->appendChild($text); + } + } + + private function rootElementPhase($token) + { + /* After the initial phase, as each token is emitted from the tokenisation + stage, it must be processed as described in this section. */ + + /* A DOCTYPE token */ + if ($token['type'] === HTML5::DOCTYPE) { + // Parse error. Ignore the token. + + /* A comment token */ + } elseif ($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the Document object with the data + attribute set to the data given in the comment token. */ + $comment = $this->dom->createComment($token['data']); + $this->dom->appendChild($comment); + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + } elseif ($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']) + ) { + /* Append that character to the Document node. */ + $text = $this->dom->createTextNode($token['data']); + $this->dom->appendChild($text); + + /* A character token that is not one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED + (FF), or U+0020 SPACE + A start tag token + An end tag token + An end-of-file token */ + } elseif (($token['type'] === HTML5::CHARACTR && + !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || + $token['type'] === HTML5::STARTTAG || + $token['type'] === HTML5::ENDTAG || + $token['type'] === HTML5::EOF + ) { + /* Create an HTMLElement node with the tag name html, in the HTML + namespace. Append it to the Document object. Switch to the main + phase and reprocess the current token. */ + $html = $this->dom->createElement('html'); + $this->dom->appendChild($html); + $this->stack[] = $html; + + $this->phase = self::MAIN_PHASE; + return $this->mainPhase($token); + } + } + + private function mainPhase($token) + { + /* Tokens in the main phase must be handled as follows: */ + + /* A DOCTYPE token */ + if ($token['type'] === HTML5::DOCTYPE) { + // Parse error. Ignore the token. + + /* A start tag token with the tag name "html" */ + } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') { + /* If this start tag token was not the first start tag token, then + it is a parse error. */ + + /* For each attribute on the token, check to see if the attribute + is already present on the top element of the stack of open elements. + If it is not, add the attribute and its corresponding value to that + element. */ + foreach ($token['attr'] as $attr) { + if (!$this->stack[0]->hasAttribute($attr['name'])) { + $this->stack[0]->setAttribute($attr['name'], $attr['value']); + } + } + + /* An end-of-file token */ + } elseif ($token['type'] === HTML5::EOF) { + /* Generate implied end tags. */ + $this->generateImpliedEndTags(); + + /* Anything else. */ + } else { + /* Depends on the insertion mode: */ + switch ($this->mode) { + case self::BEFOR_HEAD: + return $this->beforeHead($token); + break; + case self::IN_HEAD: + return $this->inHead($token); + break; + case self::AFTER_HEAD: + return $this->afterHead($token); + break; + case self::IN_BODY: + return $this->inBody($token); + break; + case self::IN_TABLE: + return $this->inTable($token); + break; + case self::IN_CAPTION: + return $this->inCaption($token); + break; + case self::IN_CGROUP: + return $this->inColumnGroup($token); + break; + case self::IN_TBODY: + return $this->inTableBody($token); + break; + case self::IN_ROW: + return $this->inRow($token); + break; + case self::IN_CELL: + return $this->inCell($token); + break; + case self::IN_SELECT: + return $this->inSelect($token); + break; + case self::AFTER_BODY: + return $this->afterBody($token); + break; + case self::IN_FRAME: + return $this->inFrameset($token); + break; + case self::AFTR_FRAME: + return $this->afterFrameset($token); + break; + case self::END_PHASE: + return $this->trailingEndPhase($token); + break; + } + } + } + + private function beforeHead($token) + { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + if ($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']) + ) { + /* Append the character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif ($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data attribute + set to the data given in the comment token. */ + $this->insertComment($token['data']); + + /* A start tag token with the tag name "head" */ + } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') { + /* Create an element for the token, append the new element to the + current node and push it onto the stack of open elements. */ + $element = $this->insertElement($token); + + /* Set the head element pointer to this new element node. */ + $this->head_pointer = $element; + + /* Change the insertion mode to "in head". */ + $this->mode = self::IN_HEAD; + + /* A start tag token whose tag name is one of: "base", "link", "meta", + "script", "style", "title". Or an end tag with the tag name "html". + Or a character token that is not one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE. Or any other start tag token */ + } elseif ($token['type'] === HTML5::STARTTAG || + ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') || + ($token['type'] === HTML5::CHARACTR && !preg_match( + '/^[\t\n\x0b\x0c ]$/', + $token['data'] + )) + ) { + /* Act as if a start tag token with the tag name "head" and no + attributes had been seen, then reprocess the current token. */ + $this->beforeHead( + array( + 'name' => 'head', + 'type' => HTML5::STARTTAG, + 'attr' => array() + ) + ); + + return $this->inHead($token); + + /* Any other end tag */ + } elseif ($token['type'] === HTML5::ENDTAG) { + /* Parse error. Ignore the token. */ + } + } + + private function inHead($token) + { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE. + + THIS DIFFERS FROM THE SPEC: If the current node is either a title, style + or script element, append the character to the current node regardless + of its content. */ + if (($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || ( + $token['type'] === HTML5::CHARACTR && in_array( + end($this->stack)->nodeName, + array('title', 'style', 'script') + )) + ) { + /* Append the character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif ($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data attribute + set to the data given in the comment token. */ + $this->insertComment($token['data']); + + } elseif ($token['type'] === HTML5::ENDTAG && + in_array($token['name'], array('title', 'style', 'script')) + ) { + array_pop($this->stack); + return HTML5::PCDATA; + + /* A start tag with the tag name "title" */ + } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') { + /* Create an element for the token and append the new element to the + node pointed to by the head element pointer, or, if that is null + (innerHTML case), to the current node. */ + if ($this->head_pointer !== null) { + $element = $this->insertElement($token, false); + $this->head_pointer->appendChild($element); + + } else { + $element = $this->insertElement($token); + } + + /* Switch the tokeniser's content model flag to the RCDATA state. */ + return HTML5::RCDATA; + + /* A start tag with the tag name "style" */ + } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') { + /* Create an element for the token and append the new element to the + node pointed to by the head element pointer, or, if that is null + (innerHTML case), to the current node. */ + if ($this->head_pointer !== null) { + $element = $this->insertElement($token, false); + $this->head_pointer->appendChild($element); + + } else { + $this->insertElement($token); + } + + /* Switch the tokeniser's content model flag to the CDATA state. */ + return HTML5::CDATA; + + /* A start tag with the tag name "script" */ + } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') { + /* Create an element for the token. */ + $element = $this->insertElement($token, false); + $this->head_pointer->appendChild($element); + + /* Switch the tokeniser's content model flag to the CDATA state. */ + return HTML5::CDATA; + + /* A start tag with the tag name "base", "link", or "meta" */ + } elseif ($token['type'] === HTML5::STARTTAG && in_array( + $token['name'], + array('base', 'link', 'meta') + ) + ) { + /* Create an element for the token and append the new element to the + node pointed to by the head element pointer, or, if that is null + (innerHTML case), to the current node. */ + if ($this->head_pointer !== null) { + $element = $this->insertElement($token, false); + $this->head_pointer->appendChild($element); + array_pop($this->stack); + + } else { + $this->insertElement($token); + } + + /* An end tag with the tag name "head" */ + } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') { + /* If the current node is a head element, pop the current node off + the stack of open elements. */ + if ($this->head_pointer->isSameNode(end($this->stack))) { + array_pop($this->stack); + + /* Otherwise, this is a parse error. */ + } else { + // k + } + + /* Change the insertion mode to "after head". */ + $this->mode = self::AFTER_HEAD; + + /* A start tag with the tag name "head" or an end tag except "html". */ + } elseif (($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') || + ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html') + ) { + // Parse error. Ignore the token. + + /* Anything else */ + } else { + /* If the current node is a head element, act as if an end tag + token with the tag name "head" had been seen. */ + if ($this->head_pointer->isSameNode(end($this->stack))) { + $this->inHead( + array( + 'name' => 'head', + 'type' => HTML5::ENDTAG + ) + ); + + /* Otherwise, change the insertion mode to "after head". */ + } else { + $this->mode = self::AFTER_HEAD; + } + + /* Then, reprocess the current token. */ + return $this->afterHead($token); + } + } + + private function afterHead($token) + { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + if ($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']) + ) { + /* Append the character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif ($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data attribute + set to the data given in the comment token. */ + $this->insertComment($token['data']); + + /* A start tag token with the tag name "body" */ + } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') { + /* Insert a body element for the token. */ + $this->insertElement($token); + + /* Change the insertion mode to "in body". */ + $this->mode = self::IN_BODY; + + /* A start tag token with the tag name "frameset" */ + } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') { + /* Insert a frameset element for the token. */ + $this->insertElement($token); + + /* Change the insertion mode to "in frameset". */ + $this->mode = self::IN_FRAME; + + /* A start tag token whose tag name is one of: "base", "link", "meta", + "script", "style", "title" */ + } elseif ($token['type'] === HTML5::STARTTAG && in_array( + $token['name'], + array('base', 'link', 'meta', 'script', 'style', 'title') + ) + ) { + /* Parse error. Switch the insertion mode back to "in head" and + reprocess the token. */ + $this->mode = self::IN_HEAD; + return $this->inHead($token); + + /* Anything else */ + } else { + /* Act as if a start tag token with the tag name "body" and no + attributes had been seen, and then reprocess the current token. */ + $this->afterHead( + array( + 'name' => 'body', + 'type' => HTML5::STARTTAG, + 'attr' => array() + ) + ); + + return $this->inBody($token); + } + } + + private function inBody($token) + { + /* Handle the token as follows: */ + + switch ($token['type']) { + /* A character token */ + case HTML5::CHARACTR: + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Append the token's character to the current node. */ + $this->insertText($token['data']); + break; + + /* A comment token */ + case HTML5::COMMENT: + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $this->insertComment($token['data']); + break; + + case HTML5::STARTTAG: + switch ($token['name']) { + /* A start tag token whose tag name is one of: "script", + "style" */ + case 'script': + case 'style': + /* Process the token as if the insertion mode had been "in + head". */ + return $this->inHead($token); + break; + + /* A start tag token whose tag name is one of: "base", "link", + "meta", "title" */ + case 'base': + case 'link': + case 'meta': + case 'title': + /* Parse error. Process the token as if the insertion mode + had been "in head". */ + return $this->inHead($token); + break; + + /* A start tag token with the tag name "body" */ + case 'body': + /* Parse error. If the second element on the stack of open + elements is not a body element, or, if the stack of open + elements has only one node on it, then ignore the token. + (innerHTML case) */ + if (count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') { + // Ignore + + /* Otherwise, for each attribute on the token, check to see + if the attribute is already present on the body element (the + second element) on the stack of open elements. If it is not, + add the attribute and its corresponding value to that + element. */ + } else { + foreach ($token['attr'] as $attr) { + if (!$this->stack[1]->hasAttribute($attr['name'])) { + $this->stack[1]->setAttribute($attr['name'], $attr['value']); + } + } + } + break; + + /* A start tag whose tag name is one of: "address", + "blockquote", "center", "dir", "div", "dl", "fieldset", + "listing", "menu", "ol", "p", "ul" */ + case 'address': + case 'blockquote': + case 'center': + case 'dir': + case 'div': + case 'dl': + case 'fieldset': + case 'listing': + case 'menu': + case 'ol': + case 'p': + case 'ul': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been + seen. */ + if ($this->elementInScope('p')) { + $this->emitToken( + array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + ) + ); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + break; + + /* A start tag whose tag name is "form" */ + case 'form': + /* If the form element pointer is not null, ignore the + token with a parse error. */ + if ($this->form_pointer !== null) { + // Ignore. + + /* Otherwise: */ + } else { + /* If the stack of open elements has a p element in + scope, then act as if an end tag with the tag name p + had been seen. */ + if ($this->elementInScope('p')) { + $this->emitToken( + array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + ) + ); + } + + /* Insert an HTML element for the token, and set the + form element pointer to point to the element created. */ + $element = $this->insertElement($token); + $this->form_pointer = $element; + } + break; + + /* A start tag whose tag name is "li", "dd" or "dt" */ + case 'li': + case 'dd': + case 'dt': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been + seen. */ + if ($this->elementInScope('p')) { + $this->emitToken( + array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + ) + ); + } + + $stack_length = count($this->stack) - 1; + + for ($n = $stack_length; 0 <= $n; $n--) { + /* 1. Initialise node to be the current node (the + bottommost node of the stack). */ + $stop = false; + $node = $this->stack[$n]; + $cat = $this->getElementCategory($node->tagName); + + /* 2. If node is an li, dd or dt element, then pop all + the nodes from the current node up to node, including + node, then stop this algorithm. */ + if ($token['name'] === $node->tagName || ($token['name'] !== 'li' + && ($node->tagName === 'dd' || $node->tagName === 'dt')) + ) { + for ($x = $stack_length; $x >= $n; $x--) { + array_pop($this->stack); + } + + break; + } + + /* 3. If node is not in the formatting category, and is + not in the phrasing category, and is not an address or + div element, then stop this algorithm. */ + if ($cat !== self::FORMATTING && $cat !== self::PHRASING && + $node->tagName !== 'address' && $node->tagName !== 'div' + ) { + break; + } + } + + /* Finally, insert an HTML element with the same tag + name as the token's. */ + $this->insertElement($token); + break; + + /* A start tag token whose tag name is "plaintext" */ + case 'plaintext': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been + seen. */ + if ($this->elementInScope('p')) { + $this->emitToken( + array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + ) + ); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + return HTML5::PLAINTEXT; + break; + + /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4", + "h5", "h6" */ + case 'h1': + case 'h2': + case 'h3': + case 'h4': + case 'h5': + case 'h6': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been seen. */ + if ($this->elementInScope('p')) { + $this->emitToken( + array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + ) + ); + } + + /* If the stack of open elements has in scope an element whose + tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then + this is a parse error; pop elements from the stack until an + element with one of those tag names has been popped from the + stack. */ + while ($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) { + array_pop($this->stack); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + break; + + /* A start tag whose tag name is "a" */ + case 'a': + /* If the list of active formatting elements contains + an element whose tag name is "a" between the end of the + list and the last marker on the list (or the start of + the list if there is no marker on the list), then this + is a parse error; act as if an end tag with the tag name + "a" had been seen, then remove that element from the list + of active formatting elements and the stack of open + elements if the end tag didn't already remove it (it + might not have if the element is not in table scope). */ + $leng = count($this->a_formatting); + + for ($n = $leng - 1; $n >= 0; $n--) { + if ($this->a_formatting[$n] === self::MARKER) { + break; + + } elseif ($this->a_formatting[$n]->nodeName === 'a') { + $this->emitToken( + array( + 'name' => 'a', + 'type' => HTML5::ENDTAG + ) + ); + break; + } + } + + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $el = $this->insertElement($token); + + /* Add that element to the list of active formatting + elements. */ + $this->a_formatting[] = $el; + break; + + /* A start tag whose tag name is one of: "b", "big", "em", "font", + "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */ + case 'b': + case 'big': + case 'em': + case 'font': + case 'i': + case 'nobr': + case 's': + case 'small': + case 'strike': + case 'strong': + case 'tt': + case 'u': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $el = $this->insertElement($token); + + /* Add that element to the list of active formatting + elements. */ + $this->a_formatting[] = $el; + break; + + /* A start tag token whose tag name is "button" */ + case 'button': + /* If the stack of open elements has a button element in scope, + then this is a parse error; act as if an end tag with the tag + name "button" had been seen, then reprocess the token. (We don't + do that. Unnecessary.) */ + if ($this->elementInScope('button')) { + $this->inBody( + array( + 'name' => 'button', + 'type' => HTML5::ENDTAG + ) + ); + } + + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Insert a marker at the end of the list of active + formatting elements. */ + $this->a_formatting[] = self::MARKER; + break; + + /* A start tag token whose tag name is one of: "marquee", "object" */ + case 'marquee': + case 'object': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Insert a marker at the end of the list of active + formatting elements. */ + $this->a_formatting[] = self::MARKER; + break; + + /* A start tag token whose tag name is "xmp" */ + case 'xmp': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Switch the content model flag to the CDATA state. */ + return HTML5::CDATA; + break; + + /* A start tag whose tag name is "table" */ + case 'table': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been seen. */ + if ($this->elementInScope('p')) { + $this->emitToken( + array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + ) + ); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Change the insertion mode to "in table". */ + $this->mode = self::IN_TABLE; + break; + + /* A start tag whose tag name is one of: "area", "basefont", + "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */ + case 'area': + case 'basefont': + case 'bgsound': + case 'br': + case 'embed': + case 'img': + case 'param': + case 'spacer': + case 'wbr': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Immediately pop the current node off the stack of open elements. */ + array_pop($this->stack); + break; + + /* A start tag whose tag name is "hr" */ + case 'hr': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been seen. */ + if ($this->elementInScope('p')) { + $this->emitToken( + array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + ) + ); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Immediately pop the current node off the stack of open elements. */ + array_pop($this->stack); + break; + + /* A start tag whose tag name is "image" */ + case 'image': + /* Parse error. Change the token's tag name to "img" and + reprocess it. (Don't ask.) */ + $token['name'] = 'img'; + return $this->inBody($token); + break; + + /* A start tag whose tag name is "input" */ + case 'input': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an input element for the token. */ + $element = $this->insertElement($token, false); + + /* If the form element pointer is not null, then associate the + input element with the form element pointed to by the form + element pointer. */ + $this->form_pointer !== null + ? $this->form_pointer->appendChild($element) + : end($this->stack)->appendChild($element); + + /* Pop that input element off the stack of open elements. */ + array_pop($this->stack); + break; + + /* A start tag whose tag name is "isindex" */ + case 'isindex': + /* Parse error. */ + // w/e + + /* If the form element pointer is not null, + then ignore the token. */ + if ($this->form_pointer === null) { + /* Act as if a start tag token with the tag name "form" had + been seen. */ + $this->inBody( + array( + 'name' => 'body', + 'type' => HTML5::STARTTAG, + 'attr' => array() + ) + ); + + /* Act as if a start tag token with the tag name "hr" had + been seen. */ + $this->inBody( + array( + 'name' => 'hr', + 'type' => HTML5::STARTTAG, + 'attr' => array() + ) + ); + + /* Act as if a start tag token with the tag name "p" had + been seen. */ + $this->inBody( + array( + 'name' => 'p', + 'type' => HTML5::STARTTAG, + 'attr' => array() + ) + ); + + /* Act as if a start tag token with the tag name "label" + had been seen. */ + $this->inBody( + array( + 'name' => 'label', + 'type' => HTML5::STARTTAG, + 'attr' => array() + ) + ); + + /* Act as if a stream of character tokens had been seen. */ + $this->insertText( + 'This is a searchable index. ' . + 'Insert your search keywords here: ' + ); + + /* Act as if a start tag token with the tag name "input" + had been seen, with all the attributes from the "isindex" + token, except with the "name" attribute set to the value + "isindex" (ignoring any explicit "name" attribute). */ + $attr = $token['attr']; + $attr[] = array('name' => 'name', 'value' => 'isindex'); + + $this->inBody( + array( + 'name' => 'input', + 'type' => HTML5::STARTTAG, + 'attr' => $attr + ) + ); + + /* Act as if a stream of character tokens had been seen + (see below for what they should say). */ + $this->insertText( + 'This is a searchable index. ' . + 'Insert your search keywords here: ' + ); + + /* Act as if an end tag token with the tag name "label" + had been seen. */ + $this->inBody( + array( + 'name' => 'label', + 'type' => HTML5::ENDTAG + ) + ); + + /* Act as if an end tag token with the tag name "p" had + been seen. */ + $this->inBody( + array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + ) + ); + + /* Act as if a start tag token with the tag name "hr" had + been seen. */ + $this->inBody( + array( + 'name' => 'hr', + 'type' => HTML5::ENDTAG + ) + ); + + /* Act as if an end tag token with the tag name "form" had + been seen. */ + $this->inBody( + array( + 'name' => 'form', + 'type' => HTML5::ENDTAG + ) + ); + } + break; + + /* A start tag whose tag name is "textarea" */ + case 'textarea': + $this->insertElement($token); + + /* Switch the tokeniser's content model flag to the + RCDATA state. */ + return HTML5::RCDATA; + break; + + /* A start tag whose tag name is one of: "iframe", "noembed", + "noframes" */ + case 'iframe': + case 'noembed': + case 'noframes': + $this->insertElement($token); + + /* Switch the tokeniser's content model flag to the CDATA state. */ + return HTML5::CDATA; + break; + + /* A start tag whose tag name is "select" */ + case 'select': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Change the insertion mode to "in select". */ + $this->mode = self::IN_SELECT; + break; + + /* A start or end tag whose tag name is one of: "caption", "col", + "colgroup", "frame", "frameset", "head", "option", "optgroup", + "tbody", "td", "tfoot", "th", "thead", "tr". */ + case 'caption': + case 'col': + case 'colgroup': + case 'frame': + case 'frameset': + case 'head': + case 'option': + case 'optgroup': + case 'tbody': + case 'td': + case 'tfoot': + case 'th': + case 'thead': + case 'tr': + // Parse error. Ignore the token. + break; + + /* A start or end tag whose tag name is one of: "event-source", + "section", "nav", "article", "aside", "header", "footer", + "datagrid", "command" */ + case 'event-source': + case 'section': + case 'nav': + case 'article': + case 'aside': + case 'header': + case 'footer': + case 'datagrid': + case 'command': + // Work in progress! + break; + + /* A start tag token not covered by the previous entries */ + default: + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + $this->insertElement($token, true, true); + break; + } + break; + + case HTML5::ENDTAG: + switch ($token['name']) { + /* An end tag with the tag name "body" */ + case 'body': + /* If the second element in the stack of open elements is + not a body element, this is a parse error. Ignore the token. + (innerHTML case) */ + if (count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') { + // Ignore. + + /* If the current node is not the body element, then this + is a parse error. */ + } elseif (end($this->stack)->nodeName !== 'body') { + // Parse error. + } + + /* Change the insertion mode to "after body". */ + $this->mode = self::AFTER_BODY; + break; + + /* An end tag with the tag name "html" */ + case 'html': + /* Act as if an end tag with tag name "body" had been seen, + then, if that token wasn't ignored, reprocess the current + token. */ + $this->inBody( + array( + 'name' => 'body', + 'type' => HTML5::ENDTAG + ) + ); + + return $this->afterBody($token); + break; + + /* An end tag whose tag name is one of: "address", "blockquote", + "center", "dir", "div", "dl", "fieldset", "listing", "menu", + "ol", "pre", "ul" */ + case 'address': + case 'blockquote': + case 'center': + case 'dir': + case 'div': + case 'dl': + case 'fieldset': + case 'listing': + case 'menu': + case 'ol': + case 'pre': + case 'ul': + /* If the stack of open elements has an element in scope + with the same tag name as that of the token, then generate + implied end tags. */ + if ($this->elementInScope($token['name'])) { + $this->generateImpliedEndTags(); + + /* Now, if the current node is not an element with + the same tag name as that of the token, then this + is a parse error. */ + // w/e + + /* If the stack of open elements has an element in + scope with the same tag name as that of the token, + then pop elements from this stack until an element + with that tag name has been popped from the stack. */ + for ($n = count($this->stack) - 1; $n >= 0; $n--) { + if ($this->stack[$n]->nodeName === $token['name']) { + $n = -1; + } + + array_pop($this->stack); + } + } + break; + + /* An end tag whose tag name is "form" */ + case 'form': + /* If the stack of open elements has an element in scope + with the same tag name as that of the token, then generate + implied end tags. */ + if ($this->elementInScope($token['name'])) { + $this->generateImpliedEndTags(); + + } + + if (end($this->stack)->nodeName !== $token['name']) { + /* Now, if the current node is not an element with the + same tag name as that of the token, then this is a parse + error. */ + // w/e + + } else { + /* Otherwise, if the current node is an element with + the same tag name as that of the token pop that element + from the stack. */ + array_pop($this->stack); + } + + /* In any case, set the form element pointer to null. */ + $this->form_pointer = null; + break; + + /* An end tag whose tag name is "p" */ + case 'p': + /* If the stack of open elements has a p element in scope, + then generate implied end tags, except for p elements. */ + if ($this->elementInScope('p')) { + $this->generateImpliedEndTags(array('p')); + + /* If the current node is not a p element, then this is + a parse error. */ + // k + + /* If the stack of open elements has a p element in + scope, then pop elements from this stack until the stack + no longer has a p element in scope. */ + for ($n = count($this->stack) - 1; $n >= 0; $n--) { + if ($this->elementInScope('p')) { + array_pop($this->stack); + + } else { + break; + } + } + } + break; + + /* An end tag whose tag name is "dd", "dt", or "li" */ + case 'dd': + case 'dt': + case 'li': + /* If the stack of open elements has an element in scope + whose tag name matches the tag name of the token, then + generate implied end tags, except for elements with the + same tag name as the token. */ + if ($this->elementInScope($token['name'])) { + $this->generateImpliedEndTags(array($token['name'])); + + /* If the current node is not an element with the same + tag name as the token, then this is a parse error. */ + // w/e + + /* If the stack of open elements has an element in scope + whose tag name matches the tag name of the token, then + pop elements from this stack until an element with that + tag name has been popped from the stack. */ + for ($n = count($this->stack) - 1; $n >= 0; $n--) { + if ($this->stack[$n]->nodeName === $token['name']) { + $n = -1; + } + + array_pop($this->stack); + } + } + break; + + /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4", + "h5", "h6" */ + case 'h1': + case 'h2': + case 'h3': + case 'h4': + case 'h5': + case 'h6': + $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'); + + /* If the stack of open elements has in scope an element whose + tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then + generate implied end tags. */ + if ($this->elementInScope($elements)) { + $this->generateImpliedEndTags(); + + /* Now, if the current node is not an element with the same + tag name as that of the token, then this is a parse error. */ + // w/e + + /* If the stack of open elements has in scope an element + whose tag name is one of "h1", "h2", "h3", "h4", "h5", or + "h6", then pop elements from the stack until an element + with one of those tag names has been popped from the stack. */ + while ($this->elementInScope($elements)) { + array_pop($this->stack); + } + } + break; + + /* An end tag whose tag name is one of: "a", "b", "big", "em", + "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */ + case 'a': + case 'b': + case 'big': + case 'em': + case 'font': + case 'i': + case 'nobr': + case 's': + case 'small': + case 'strike': + case 'strong': + case 'tt': + case 'u': + /* 1. Let the formatting element be the last element in + the list of active formatting elements that: + * is between the end of the list and the last scope + marker in the list, if any, or the start of the list + otherwise, and + * has the same tag name as the token. + */ + while (true) { + for ($a = count($this->a_formatting) - 1; $a >= 0; $a--) { + if ($this->a_formatting[$a] === self::MARKER) { + break; + + } elseif ($this->a_formatting[$a]->tagName === $token['name']) { + $formatting_element = $this->a_formatting[$a]; + $in_stack = in_array($formatting_element, $this->stack, true); + $fe_af_pos = $a; + break; + } + } + + /* If there is no such node, or, if that node is + also in the stack of open elements but the element + is not in scope, then this is a parse error. Abort + these steps. The token is ignored. */ + if (!isset($formatting_element) || ($in_stack && + !$this->elementInScope($token['name'])) + ) { + break; + + /* Otherwise, if there is such a node, but that node + is not in the stack of open elements, then this is a + parse error; remove the element from the list, and + abort these steps. */ + } elseif (isset($formatting_element) && !$in_stack) { + unset($this->a_formatting[$fe_af_pos]); + $this->a_formatting = array_merge($this->a_formatting); + break; + } + + /* 2. Let the furthest block be the topmost node in the + stack of open elements that is lower in the stack + than the formatting element, and is not an element in + the phrasing or formatting categories. There might + not be one. */ + $fe_s_pos = array_search($formatting_element, $this->stack, true); + $length = count($this->stack); + + for ($s = $fe_s_pos + 1; $s < $length; $s++) { + $category = $this->getElementCategory($this->stack[$s]->nodeName); + + if ($category !== self::PHRASING && $category !== self::FORMATTING) { + $furthest_block = $this->stack[$s]; + } + } + + /* 3. If there is no furthest block, then the UA must + skip the subsequent steps and instead just pop all + the nodes from the bottom of the stack of open + elements, from the current node up to the formatting + element, and remove the formatting element from the + list of active formatting elements. */ + if (!isset($furthest_block)) { + for ($n = $length - 1; $n >= $fe_s_pos; $n--) { + array_pop($this->stack); + } + + unset($this->a_formatting[$fe_af_pos]); + $this->a_formatting = array_merge($this->a_formatting); + break; + } + + /* 4. Let the common ancestor be the element + immediately above the formatting element in the stack + of open elements. */ + $common_ancestor = $this->stack[$fe_s_pos - 1]; + + /* 5. If the furthest block has a parent node, then + remove the furthest block from its parent node. */ + if ($furthest_block->parentNode !== null) { + $furthest_block->parentNode->removeChild($furthest_block); + } + + /* 6. Let a bookmark note the position of the + formatting element in the list of active formatting + elements relative to the elements on either side + of it in the list. */ + $bookmark = $fe_af_pos; + + /* 7. Let node and last node be the furthest block. + Follow these steps: */ + $node = $furthest_block; + $last_node = $furthest_block; + + while (true) { + for ($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) { + /* 7.1 Let node be the element immediately + prior to node in the stack of open elements. */ + $node = $this->stack[$n]; + + /* 7.2 If node is not in the list of active + formatting elements, then remove node from + the stack of open elements and then go back + to step 1. */ + if (!in_array($node, $this->a_formatting, true)) { + unset($this->stack[$n]); + $this->stack = array_merge($this->stack); + + } else { + break; + } + } + + /* 7.3 Otherwise, if node is the formatting + element, then go to the next step in the overall + algorithm. */ + if ($node === $formatting_element) { + break; + + /* 7.4 Otherwise, if last node is the furthest + block, then move the aforementioned bookmark to + be immediately after the node in the list of + active formatting elements. */ + } elseif ($last_node === $furthest_block) { + $bookmark = array_search($node, $this->a_formatting, true) + 1; + } + + /* 7.5 If node has any children, perform a + shallow clone of node, replace the entry for + node in the list of active formatting elements + with an entry for the clone, replace the entry + for node in the stack of open elements with an + entry for the clone, and let node be the clone. */ + if ($node->hasChildNodes()) { + $clone = $node->cloneNode(); + $s_pos = array_search($node, $this->stack, true); + $a_pos = array_search($node, $this->a_formatting, true); + + $this->stack[$s_pos] = $clone; + $this->a_formatting[$a_pos] = $clone; + $node = $clone; + } + + /* 7.6 Insert last node into node, first removing + it from its previous parent node if any. */ + if ($last_node->parentNode !== null) { + $last_node->parentNode->removeChild($last_node); + } + + $node->appendChild($last_node); + + /* 7.7 Let last node be node. */ + $last_node = $node; + } + + /* 8. Insert whatever last node ended up being in + the previous step into the common ancestor node, + first removing it from its previous parent node if + any. */ + if ($last_node->parentNode !== null) { + $last_node->parentNode->removeChild($last_node); + } + + $common_ancestor->appendChild($last_node); + + /* 9. Perform a shallow clone of the formatting + element. */ + $clone = $formatting_element->cloneNode(); + + /* 10. Take all of the child nodes of the furthest + block and append them to the clone created in the + last step. */ + while ($furthest_block->hasChildNodes()) { + $child = $furthest_block->firstChild; + $furthest_block->removeChild($child); + $clone->appendChild($child); + } + + /* 11. Append that clone to the furthest block. */ + $furthest_block->appendChild($clone); + + /* 12. Remove the formatting element from the list + of active formatting elements, and insert the clone + into the list of active formatting elements at the + position of the aforementioned bookmark. */ + $fe_af_pos = array_search($formatting_element, $this->a_formatting, true); + unset($this->a_formatting[$fe_af_pos]); + $this->a_formatting = array_merge($this->a_formatting); + + $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1); + $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting)); + $this->a_formatting = array_merge($af_part1, array($clone), $af_part2); + + /* 13. Remove the formatting element from the stack + of open elements, and insert the clone into the stack + of open elements immediately after (i.e. in a more + deeply nested position than) the position of the + furthest block in that stack. */ + $fe_s_pos = array_search($formatting_element, $this->stack, true); + $fb_s_pos = array_search($furthest_block, $this->stack, true); + unset($this->stack[$fe_s_pos]); + + $s_part1 = array_slice($this->stack, 0, $fb_s_pos); + $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack)); + $this->stack = array_merge($s_part1, array($clone), $s_part2); + + /* 14. Jump back to step 1 in this series of steps. */ + unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block); + } + break; + + /* An end tag token whose tag name is one of: "button", + "marquee", "object" */ + case 'button': + case 'marquee': + case 'object': + /* If the stack of open elements has an element in scope whose + tag name matches the tag name of the token, then generate implied + tags. */ + if ($this->elementInScope($token['name'])) { + $this->generateImpliedEndTags(); + + /* Now, if the current node is not an element with the same + tag name as the token, then this is a parse error. */ + // k + + /* Now, if the stack of open elements has an element in scope + whose tag name matches the tag name of the token, then pop + elements from the stack until that element has been popped from + the stack, and clear the list of active formatting elements up + to the last marker. */ + for ($n = count($this->stack) - 1; $n >= 0; $n--) { + if ($this->stack[$n]->nodeName === $token['name']) { + $n = -1; + } + + array_pop($this->stack); + } + + $marker = end(array_keys($this->a_formatting, self::MARKER, true)); + + for ($n = count($this->a_formatting) - 1; $n > $marker; $n--) { + array_pop($this->a_formatting); + } + } + break; + + /* Or an end tag whose tag name is one of: "area", "basefont", + "bgsound", "br", "embed", "hr", "iframe", "image", "img", + "input", "isindex", "noembed", "noframes", "param", "select", + "spacer", "table", "textarea", "wbr" */ + case 'area': + case 'basefont': + case 'bgsound': + case 'br': + case 'embed': + case 'hr': + case 'iframe': + case 'image': + case 'img': + case 'input': + case 'isindex': + case 'noembed': + case 'noframes': + case 'param': + case 'select': + case 'spacer': + case 'table': + case 'textarea': + case 'wbr': + // Parse error. Ignore the token. + break; + + /* An end tag token not covered by the previous entries */ + default: + for ($n = count($this->stack) - 1; $n >= 0; $n--) { + /* Initialise node to be the current node (the bottommost + node of the stack). */ + $node = end($this->stack); + + /* If node has the same tag name as the end tag token, + then: */ + if ($token['name'] === $node->nodeName) { + /* Generate implied end tags. */ + $this->generateImpliedEndTags(); + + /* If the tag name of the end tag token does not + match the tag name of the current node, this is a + parse error. */ + // k + + /* Pop all the nodes from the current node up to + node, including node, then stop this algorithm. */ + for ($x = count($this->stack) - $n; $x >= $n; $x--) { + array_pop($this->stack); + } + + } else { + $category = $this->getElementCategory($node); + + if ($category !== self::SPECIAL && $category !== self::SCOPING) { + /* Otherwise, if node is in neither the formatting + category nor the phrasing category, then this is a + parse error. Stop this algorithm. The end tag token + is ignored. */ + return false; + } + } + } + break; + } + break; + } + } + + private function inTable($token) + { + $clear = array('html', 'table'); + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + if ($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']) + ) { + /* Append the character to the current node. */ + $text = $this->dom->createTextNode($token['data']); + end($this->stack)->appendChild($text); + + /* A comment token */ + } elseif ($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $comment = $this->dom->createComment($token['data']); + end($this->stack)->appendChild($comment); + + /* A start tag whose tag name is "caption" */ + } elseif ($token['type'] === HTML5::STARTTAG && + $token['name'] === 'caption' + ) { + /* Clear the stack back to a table context. */ + $this->clearStackToTableContext($clear); + + /* Insert a marker at the end of the list of active + formatting elements. */ + $this->a_formatting[] = self::MARKER; + + /* Insert an HTML element for the token, then switch the + insertion mode to "in caption". */ + $this->insertElement($token); + $this->mode = self::IN_CAPTION; + + /* A start tag whose tag name is "colgroup" */ + } elseif ($token['type'] === HTML5::STARTTAG && + $token['name'] === 'colgroup' + ) { + /* Clear the stack back to a table context. */ + $this->clearStackToTableContext($clear); + + /* Insert an HTML element for the token, then switch the + insertion mode to "in column group". */ + $this->insertElement($token); + $this->mode = self::IN_CGROUP; + + /* A start tag whose tag name is "col" */ + } elseif ($token['type'] === HTML5::STARTTAG && + $token['name'] === 'col' + ) { + $this->inTable( + array( + 'name' => 'colgroup', + 'type' => HTML5::STARTTAG, + 'attr' => array() + ) + ); + + $this->inColumnGroup($token); + + /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */ + } elseif ($token['type'] === HTML5::STARTTAG && in_array( + $token['name'], + array('tbody', 'tfoot', 'thead') + ) + ) { + /* Clear the stack back to a table context. */ + $this->clearStackToTableContext($clear); + + /* Insert an HTML element for the token, then switch the insertion + mode to "in table body". */ + $this->insertElement($token); + $this->mode = self::IN_TBODY; + + /* A start tag whose tag name is one of: "td", "th", "tr" */ + } elseif ($token['type'] === HTML5::STARTTAG && + in_array($token['name'], array('td', 'th', 'tr')) + ) { + /* Act as if a start tag token with the tag name "tbody" had been + seen, then reprocess the current token. */ + $this->inTable( + array( + 'name' => 'tbody', + 'type' => HTML5::STARTTAG, + 'attr' => array() + ) + ); + + return $this->inTableBody($token); + + /* A start tag whose tag name is "table" */ + } elseif ($token['type'] === HTML5::STARTTAG && + $token['name'] === 'table' + ) { + /* Parse error. Act as if an end tag token with the tag name "table" + had been seen, then, if that token wasn't ignored, reprocess the + current token. */ + $this->inTable( + array( + 'name' => 'table', + 'type' => HTML5::ENDTAG + ) + ); + + return $this->mainPhase($token); + + /* An end tag whose tag name is "table" */ + } elseif ($token['type'] === HTML5::ENDTAG && + $token['name'] === 'table' + ) { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. (innerHTML case) */ + if (!$this->elementInScope($token['name'], true)) { + return false; + + /* Otherwise: */ + } else { + /* Generate implied end tags. */ + $this->generateImpliedEndTags(); + + /* Now, if the current node is not a table element, then this + is a parse error. */ + // w/e + + /* Pop elements from this stack until a table element has been + popped from the stack. */ + while (true) { + $current = end($this->stack)->nodeName; + array_pop($this->stack); + + if ($current === 'table') { + break; + } + } + + /* Reset the insertion mode appropriately. */ + $this->resetInsertionMode(); + } + + /* An end tag whose tag name is one of: "body", "caption", "col", + "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ + } elseif ($token['type'] === HTML5::ENDTAG && in_array( + $token['name'], + array( + 'body', + 'caption', + 'col', + 'colgroup', + 'html', + 'tbody', + 'td', + 'tfoot', + 'th', + 'thead', + 'tr' + ) + ) + ) { + // Parse error. Ignore the token. + + /* Anything else */ + } else { + /* Parse error. Process the token as if the insertion mode was "in + body", with the following exception: */ + + /* If the current node is a table, tbody, tfoot, thead, or tr + element, then, whenever a node would be inserted into the current + node, it must instead be inserted into the foster parent element. */ + if (in_array( + end($this->stack)->nodeName, + array('table', 'tbody', 'tfoot', 'thead', 'tr') + ) + ) { + /* The foster parent element is the parent element of the last + table element in the stack of open elements, if there is a + table element and it has such a parent element. If there is no + table element in the stack of open elements (innerHTML case), + then the foster parent element is the first element in the + stack of open elements (the html element). Otherwise, if there + is a table element in the stack of open elements, but the last + table element in the stack of open elements has no parent, or + its parent node is not an element, then the foster parent + element is the element before the last table element in the + stack of open elements. */ + for ($n = count($this->stack) - 1; $n >= 0; $n--) { + if ($this->stack[$n]->nodeName === 'table') { + $table = $this->stack[$n]; + break; + } + } + + if (isset($table) && $table->parentNode !== null) { + $this->foster_parent = $table->parentNode; + + } elseif (!isset($table)) { + $this->foster_parent = $this->stack[0]; + + } elseif (isset($table) && ($table->parentNode === null || + $table->parentNode->nodeType !== XML_ELEMENT_NODE) + ) { + $this->foster_parent = $this->stack[$n - 1]; + } + } + + $this->inBody($token); + } + } + + private function inCaption($token) + { + /* An end tag whose tag name is "caption" */ + if ($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. (innerHTML case) */ + if (!$this->elementInScope($token['name'], true)) { + // Ignore + + /* Otherwise: */ + } else { + /* Generate implied end tags. */ + $this->generateImpliedEndTags(); + + /* Now, if the current node is not a caption element, then this + is a parse error. */ + // w/e + + /* Pop elements from this stack until a caption element has + been popped from the stack. */ + while (true) { + $node = end($this->stack)->nodeName; + array_pop($this->stack); + + if ($node === 'caption') { + break; + } + } + + /* Clear the list of active formatting elements up to the last + marker. */ + $this->clearTheActiveFormattingElementsUpToTheLastMarker(); + + /* Switch the insertion mode to "in table". */ + $this->mode = self::IN_TABLE; + } + + /* A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag + name is "table" */ + } elseif (($token['type'] === HTML5::STARTTAG && in_array( + $token['name'], + array( + 'caption', + 'col', + 'colgroup', + 'tbody', + 'td', + 'tfoot', + 'th', + 'thead', + 'tr' + ) + )) || ($token['type'] === HTML5::ENDTAG && + $token['name'] === 'table') + ) { + /* Parse error. Act as if an end tag with the tag name "caption" + had been seen, then, if that token wasn't ignored, reprocess the + current token. */ + $this->inCaption( + array( + 'name' => 'caption', + 'type' => HTML5::ENDTAG + ) + ); + + return $this->inTable($token); + + /* An end tag whose tag name is one of: "body", "col", "colgroup", + "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ + } elseif ($token['type'] === HTML5::ENDTAG && in_array( + $token['name'], + array( + 'body', + 'col', + 'colgroup', + 'html', + 'tbody', + 'tfoot', + 'th', + 'thead', + 'tr' + ) + ) + ) { + // Parse error. Ignore the token. + + /* Anything else */ + } else { + /* Process the token as if the insertion mode was "in body". */ + $this->inBody($token); + } + } + + private function inColumnGroup($token) + { + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + if ($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']) + ) { + /* Append the character to the current node. */ + $text = $this->dom->createTextNode($token['data']); + end($this->stack)->appendChild($text); + + /* A comment token */ + } elseif ($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $comment = $this->dom->createComment($token['data']); + end($this->stack)->appendChild($comment); + + /* A start tag whose tag name is "col" */ + } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') { + /* Insert a col element for the token. Immediately pop the current + node off the stack of open elements. */ + $this->insertElement($token); + array_pop($this->stack); + + /* An end tag whose tag name is "colgroup" */ + } elseif ($token['type'] === HTML5::ENDTAG && + $token['name'] === 'colgroup' + ) { + /* If the current node is the root html element, then this is a + parse error, ignore the token. (innerHTML case) */ + if (end($this->stack)->nodeName === 'html') { + // Ignore + + /* Otherwise, pop the current node (which will be a colgroup + element) from the stack of open elements. Switch the insertion + mode to "in table". */ + } else { + array_pop($this->stack); + $this->mode = self::IN_TABLE; + } + + /* An end tag whose tag name is "col" */ + } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') { + /* Parse error. Ignore the token. */ + + /* Anything else */ + } else { + /* Act as if an end tag with the tag name "colgroup" had been seen, + and then, if that token wasn't ignored, reprocess the current token. */ + $this->inColumnGroup( + array( + 'name' => 'colgroup', + 'type' => HTML5::ENDTAG + ) + ); + + return $this->inTable($token); + } + } + + private function inTableBody($token) + { + $clear = array('tbody', 'tfoot', 'thead', 'html'); + + /* A start tag whose tag name is "tr" */ + if ($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') { + /* Clear the stack back to a table body context. */ + $this->clearStackToTableContext($clear); + + /* Insert a tr element for the token, then switch the insertion + mode to "in row". */ + $this->insertElement($token); + $this->mode = self::IN_ROW; + + /* A start tag whose tag name is one of: "th", "td" */ + } elseif ($token['type'] === HTML5::STARTTAG && + ($token['name'] === 'th' || $token['name'] === 'td') + ) { + /* Parse error. Act as if a start tag with the tag name "tr" had + been seen, then reprocess the current token. */ + $this->inTableBody( + array( + 'name' => 'tr', + 'type' => HTML5::STARTTAG, + 'attr' => array() + ) + ); + + return $this->inRow($token); + + /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ + } elseif ($token['type'] === HTML5::ENDTAG && + in_array($token['name'], array('tbody', 'tfoot', 'thead')) + ) { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. */ + if (!$this->elementInScope($token['name'], true)) { + // Ignore + + /* Otherwise: */ + } else { + /* Clear the stack back to a table body context. */ + $this->clearStackToTableContext($clear); + + /* Pop the current node from the stack of open elements. Switch + the insertion mode to "in table". */ + array_pop($this->stack); + $this->mode = self::IN_TABLE; + } + + /* A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */ + } elseif (($token['type'] === HTML5::STARTTAG && in_array( + $token['name'], + array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead') + )) || + ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table') + ) { + /* If the stack of open elements does not have a tbody, thead, or + tfoot element in table scope, this is a parse error. Ignore the + token. (innerHTML case) */ + if (!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) { + // Ignore. + + /* Otherwise: */ + } else { + /* Clear the stack back to a table body context. */ + $this->clearStackToTableContext($clear); + + /* Act as if an end tag with the same tag name as the current + node ("tbody", "tfoot", or "thead") had been seen, then + reprocess the current token. */ + $this->inTableBody( + array( + 'name' => end($this->stack)->nodeName, + 'type' => HTML5::ENDTAG + ) + ); + + return $this->mainPhase($token); + } + + /* An end tag whose tag name is one of: "body", "caption", "col", + "colgroup", "html", "td", "th", "tr" */ + } elseif ($token['type'] === HTML5::ENDTAG && in_array( + $token['name'], + array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr') + ) + ) { + /* Parse error. Ignore the token. */ + + /* Anything else */ + } else { + /* Process the token as if the insertion mode was "in table". */ + $this->inTable($token); + } + } + + private function inRow($token) + { + $clear = array('tr', 'html'); + + /* A start tag whose tag name is one of: "th", "td" */ + if ($token['type'] === HTML5::STARTTAG && + ($token['name'] === 'th' || $token['name'] === 'td') + ) { + /* Clear the stack back to a table row context. */ + $this->clearStackToTableContext($clear); + + /* Insert an HTML element for the token, then switch the insertion + mode to "in cell". */ + $this->insertElement($token); + $this->mode = self::IN_CELL; + + /* Insert a marker at the end of the list of active formatting + elements. */ + $this->a_formatting[] = self::MARKER; + + /* An end tag whose tag name is "tr" */ + } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. (innerHTML case) */ + if (!$this->elementInScope($token['name'], true)) { + // Ignore. + + /* Otherwise: */ + } else { + /* Clear the stack back to a table row context. */ + $this->clearStackToTableContext($clear); + + /* Pop the current node (which will be a tr element) from the + stack of open elements. Switch the insertion mode to "in table + body". */ + array_pop($this->stack); + $this->mode = self::IN_TBODY; + } + + /* A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */ + } elseif ($token['type'] === HTML5::STARTTAG && in_array( + $token['name'], + array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr') + ) + ) { + /* Act as if an end tag with the tag name "tr" had been seen, then, + if that token wasn't ignored, reprocess the current token. */ + $this->inRow( + array( + 'name' => 'tr', + 'type' => HTML5::ENDTAG + ) + ); + + return $this->inCell($token); + + /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ + } elseif ($token['type'] === HTML5::ENDTAG && + in_array($token['name'], array('tbody', 'tfoot', 'thead')) + ) { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. */ + if (!$this->elementInScope($token['name'], true)) { + // Ignore. + + /* Otherwise: */ + } else { + /* Otherwise, act as if an end tag with the tag name "tr" had + been seen, then reprocess the current token. */ + $this->inRow( + array( + 'name' => 'tr', + 'type' => HTML5::ENDTAG + ) + ); + + return $this->inCell($token); + } + + /* An end tag whose tag name is one of: "body", "caption", "col", + "colgroup", "html", "td", "th" */ + } elseif ($token['type'] === HTML5::ENDTAG && in_array( + $token['name'], + array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr') + ) + ) { + /* Parse error. Ignore the token. */ + + /* Anything else */ + } else { + /* Process the token as if the insertion mode was "in table". */ + $this->inTable($token); + } + } + + private function inCell($token) + { + /* An end tag whose tag name is one of: "td", "th" */ + if ($token['type'] === HTML5::ENDTAG && + ($token['name'] === 'td' || $token['name'] === 'th') + ) { + /* If the stack of open elements does not have an element in table + scope with the same tag name as that of the token, then this is a + parse error and the token must be ignored. */ + if (!$this->elementInScope($token['name'], true)) { + // Ignore. + + /* Otherwise: */ + } else { + /* Generate implied end tags, except for elements with the same + tag name as the token. */ + $this->generateImpliedEndTags(array($token['name'])); + + /* Now, if the current node is not an element with the same tag + name as the token, then this is a parse error. */ + // k + + /* Pop elements from this stack until an element with the same + tag name as the token has been popped from the stack. */ + while (true) { + $node = end($this->stack)->nodeName; + array_pop($this->stack); + + if ($node === $token['name']) { + break; + } + } + + /* Clear the list of active formatting elements up to the last + marker. */ + $this->clearTheActiveFormattingElementsUpToTheLastMarker(); + + /* Switch the insertion mode to "in row". (The current node + will be a tr element at this point.) */ + $this->mode = self::IN_ROW; + } + + /* A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "td", "tfoot", "th", "thead", "tr" */ + } elseif ($token['type'] === HTML5::STARTTAG && in_array( + $token['name'], + array( + 'caption', + 'col', + 'colgroup', + 'tbody', + 'td', + 'tfoot', + 'th', + 'thead', + 'tr' + ) + ) + ) { + /* If the stack of open elements does not have a td or th element + in table scope, then this is a parse error; ignore the token. + (innerHTML case) */ + if (!$this->elementInScope(array('td', 'th'), true)) { + // Ignore. + + /* Otherwise, close the cell (see below) and reprocess the current + token. */ + } else { + $this->closeCell(); + return $this->inRow($token); + } + + /* A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "td", "tfoot", "th", "thead", "tr" */ + } elseif ($token['type'] === HTML5::STARTTAG && in_array( + $token['name'], + array( + 'caption', + 'col', + 'colgroup', + 'tbody', + 'td', + 'tfoot', + 'th', + 'thead', + 'tr' + ) + ) + ) { + /* If the stack of open elements does not have a td or th element + in table scope, then this is a parse error; ignore the token. + (innerHTML case) */ + if (!$this->elementInScope(array('td', 'th'), true)) { + // Ignore. + + /* Otherwise, close the cell (see below) and reprocess the current + token. */ + } else { + $this->closeCell(); + return $this->inRow($token); + } + + /* An end tag whose tag name is one of: "body", "caption", "col", + "colgroup", "html" */ + } elseif ($token['type'] === HTML5::ENDTAG && in_array( + $token['name'], + array('body', 'caption', 'col', 'colgroup', 'html') + ) + ) { + /* Parse error. Ignore the token. */ + + /* An end tag whose tag name is one of: "table", "tbody", "tfoot", + "thead", "tr" */ + } elseif ($token['type'] === HTML5::ENDTAG && in_array( + $token['name'], + array('table', 'tbody', 'tfoot', 'thead', 'tr') + ) + ) { + /* If the stack of open elements does not have an element in table + scope with the same tag name as that of the token (which can only + happen for "tbody", "tfoot" and "thead", or, in the innerHTML case), + then this is a parse error and the token must be ignored. */ + if (!$this->elementInScope($token['name'], true)) { + // Ignore. + + /* Otherwise, close the cell (see below) and reprocess the current + token. */ + } else { + $this->closeCell(); + return $this->inRow($token); + } + + /* Anything else */ + } else { + /* Process the token as if the insertion mode was "in body". */ + $this->inBody($token); + } + } + + private function inSelect($token) + { + /* Handle the token as follows: */ + + /* A character token */ + if ($token['type'] === HTML5::CHARACTR) { + /* Append the token's character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif ($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $this->insertComment($token['data']); + + /* A start tag token whose tag name is "option" */ + } elseif ($token['type'] === HTML5::STARTTAG && + $token['name'] === 'option' + ) { + /* If the current node is an option element, act as if an end tag + with the tag name "option" had been seen. */ + if (end($this->stack)->nodeName === 'option') { + $this->inSelect( + array( + 'name' => 'option', + 'type' => HTML5::ENDTAG + ) + ); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* A start tag token whose tag name is "optgroup" */ + } elseif ($token['type'] === HTML5::STARTTAG && + $token['name'] === 'optgroup' + ) { + /* If the current node is an option element, act as if an end tag + with the tag name "option" had been seen. */ + if (end($this->stack)->nodeName === 'option') { + $this->inSelect( + array( + 'name' => 'option', + 'type' => HTML5::ENDTAG + ) + ); + } + + /* If the current node is an optgroup element, act as if an end tag + with the tag name "optgroup" had been seen. */ + if (end($this->stack)->nodeName === 'optgroup') { + $this->inSelect( + array( + 'name' => 'optgroup', + 'type' => HTML5::ENDTAG + ) + ); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* An end tag token whose tag name is "optgroup" */ + } elseif ($token['type'] === HTML5::ENDTAG && + $token['name'] === 'optgroup' + ) { + /* First, if the current node is an option element, and the node + immediately before it in the stack of open elements is an optgroup + element, then act as if an end tag with the tag name "option" had + been seen. */ + $elements_in_stack = count($this->stack); + + if ($this->stack[$elements_in_stack - 1]->nodeName === 'option' && + $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup' + ) { + $this->inSelect( + array( + 'name' => 'option', + 'type' => HTML5::ENDTAG + ) + ); + } + + /* If the current node is an optgroup element, then pop that node + from the stack of open elements. Otherwise, this is a parse error, + ignore the token. */ + if ($this->stack[$elements_in_stack - 1] === 'optgroup') { + array_pop($this->stack); + } + + /* An end tag token whose tag name is "option" */ + } elseif ($token['type'] === HTML5::ENDTAG && + $token['name'] === 'option' + ) { + /* If the current node is an option element, then pop that node + from the stack of open elements. Otherwise, this is a parse error, + ignore the token. */ + if (end($this->stack)->nodeName === 'option') { + array_pop($this->stack); + } + + /* An end tag whose tag name is "select" */ + } elseif ($token['type'] === HTML5::ENDTAG && + $token['name'] === 'select' + ) { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. (innerHTML case) */ + if (!$this->elementInScope($token['name'], true)) { + // w/e + + /* Otherwise: */ + } else { + /* Pop elements from the stack of open elements until a select + element has been popped from the stack. */ + while (true) { + $current = end($this->stack)->nodeName; + array_pop($this->stack); + + if ($current === 'select') { + break; + } + } + + /* Reset the insertion mode appropriately. */ + $this->resetInsertionMode(); + } + + /* A start tag whose tag name is "select" */ + } elseif ($token['name'] === 'select' && + $token['type'] === HTML5::STARTTAG + ) { + /* Parse error. Act as if the token had been an end tag with the + tag name "select" instead. */ + $this->inSelect( + array( + 'name' => 'select', + 'type' => HTML5::ENDTAG + ) + ); + + /* An end tag whose tag name is one of: "caption", "table", "tbody", + "tfoot", "thead", "tr", "td", "th" */ + } elseif (in_array( + $token['name'], + array( + 'caption', + 'table', + 'tbody', + 'tfoot', + 'thead', + 'tr', + 'td', + 'th' + ) + ) && $token['type'] === HTML5::ENDTAG + ) { + /* Parse error. */ + // w/e + + /* If the stack of open elements has an element in table scope with + the same tag name as that of the token, then act as if an end tag + with the tag name "select" had been seen, and reprocess the token. + Otherwise, ignore the token. */ + if ($this->elementInScope($token['name'], true)) { + $this->inSelect( + array( + 'name' => 'select', + 'type' => HTML5::ENDTAG + ) + ); + + $this->mainPhase($token); + } + + /* Anything else */ + } else { + /* Parse error. Ignore the token. */ + } + } + + private function afterBody($token) + { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + if ($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']) + ) { + /* Process the token as it would be processed if the insertion mode + was "in body". */ + $this->inBody($token); + + /* A comment token */ + } elseif ($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the first element in the stack of open + elements (the html element), with the data attribute set to the + data given in the comment token. */ + $comment = $this->dom->createComment($token['data']); + $this->stack[0]->appendChild($comment); + + /* An end tag with the tag name "html" */ + } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') { + /* If the parser was originally created in order to handle the + setting of an element's innerHTML attribute, this is a parse error; + ignore the token. (The element will be an html element in this + case.) (innerHTML case) */ + + /* Otherwise, switch to the trailing end phase. */ + $this->phase = self::END_PHASE; + + /* Anything else */ + } else { + /* Parse error. Set the insertion mode to "in body" and reprocess + the token. */ + $this->mode = self::IN_BODY; + return $this->inBody($token); + } + } + + private function inFrameset($token) + { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ + if ($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']) + ) { + /* Append the character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif ($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $this->insertComment($token['data']); + + /* A start tag with the tag name "frameset" */ + } elseif ($token['name'] === 'frameset' && + $token['type'] === HTML5::STARTTAG + ) { + $this->insertElement($token); + + /* An end tag with the tag name "frameset" */ + } elseif ($token['name'] === 'frameset' && + $token['type'] === HTML5::ENDTAG + ) { + /* If the current node is the root html element, then this is a + parse error; ignore the token. (innerHTML case) */ + if (end($this->stack)->nodeName === 'html') { + // Ignore + + } else { + /* Otherwise, pop the current node from the stack of open + elements. */ + array_pop($this->stack); + + /* If the parser was not originally created in order to handle + the setting of an element's innerHTML attribute (innerHTML case), + and the current node is no longer a frameset element, then change + the insertion mode to "after frameset". */ + $this->mode = self::AFTR_FRAME; + } + + /* A start tag with the tag name "frame" */ + } elseif ($token['name'] === 'frame' && + $token['type'] === HTML5::STARTTAG + ) { + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Immediately pop the current node off the stack of open elements. */ + array_pop($this->stack); + + /* A start tag with the tag name "noframes" */ + } elseif ($token['name'] === 'noframes' && + $token['type'] === HTML5::STARTTAG + ) { + /* Process the token as if the insertion mode had been "in body". */ + $this->inBody($token); + + /* Anything else */ + } else { + /* Parse error. Ignore the token. */ + } + } + + private function afterFrameset($token) + { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ + if ($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']) + ) { + /* Append the character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif ($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $this->insertComment($token['data']); + + /* An end tag with the tag name "html" */ + } elseif ($token['name'] === 'html' && + $token['type'] === HTML5::ENDTAG + ) { + /* Switch to the trailing end phase. */ + $this->phase = self::END_PHASE; + + /* A start tag with the tag name "noframes" */ + } elseif ($token['name'] === 'noframes' && + $token['type'] === HTML5::STARTTAG + ) { + /* Process the token as if the insertion mode had been "in body". */ + $this->inBody($token); + + /* Anything else */ + } else { + /* Parse error. Ignore the token. */ + } + } + + private function trailingEndPhase($token) + { + /* After the main phase, as each token is emitted from the tokenisation + stage, it must be processed as described in this section. */ + + /* A DOCTYPE token */ + if ($token['type'] === HTML5::DOCTYPE) { + // Parse error. Ignore the token. + + /* A comment token */ + } elseif ($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the Document object with the data + attribute set to the data given in the comment token. */ + $comment = $this->dom->createComment($token['data']); + $this->dom->appendChild($comment); + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + } elseif ($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']) + ) { + /* Process the token as it would be processed in the main phase. */ + $this->mainPhase($token); + + /* A character token that is not one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE. Or a start tag token. Or an end tag token. */ + } elseif (($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || + $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG + ) { + /* Parse error. Switch back to the main phase and reprocess the + token. */ + $this->phase = self::MAIN_PHASE; + return $this->mainPhase($token); + + /* An end-of-file token */ + } elseif ($token['type'] === HTML5::EOF) { + /* OMG DONE!! */ + } + } + + private function insertElement($token, $append = true, $check = false) + { + // Proprietary workaround for libxml2's limitations with tag names + if ($check) { + // Slightly modified HTML5 tag-name modification, + // removing anything that's not an ASCII letter, digit, or hyphen + $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']); + // Remove leading hyphens and numbers + $token['name'] = ltrim($token['name'], '-0..9'); + // In theory, this should ever be needed, but just in case + if ($token['name'] === '') { + $token['name'] = 'span'; + } // arbitrary generic choice + } + + $el = $this->dom->createElement($token['name']); + + foreach ($token['attr'] as $attr) { + if (!$el->hasAttribute($attr['name'])) { + $el->setAttribute($attr['name'], $attr['value']); + } + } + + $this->appendToRealParent($el); + $this->stack[] = $el; + + return $el; + } + + private function insertText($data) + { + $text = $this->dom->createTextNode($data); + $this->appendToRealParent($text); + } + + private function insertComment($data) + { + $comment = $this->dom->createComment($data); + $this->appendToRealParent($comment); + } + + private function appendToRealParent($node) + { + if ($this->foster_parent === null) { + end($this->stack)->appendChild($node); + + } elseif ($this->foster_parent !== null) { + /* If the foster parent element is the parent element of the + last table element in the stack of open elements, then the new + node must be inserted immediately before the last table element + in the stack of open elements in the foster parent element; + otherwise, the new node must be appended to the foster parent + element. */ + for ($n = count($this->stack) - 1; $n >= 0; $n--) { + if ($this->stack[$n]->nodeName === 'table' && + $this->stack[$n]->parentNode !== null + ) { + $table = $this->stack[$n]; + break; + } + } + + if (isset($table) && $this->foster_parent->isSameNode($table->parentNode)) { + $this->foster_parent->insertBefore($node, $table); + } else { + $this->foster_parent->appendChild($node); + } + + $this->foster_parent = null; + } + } + + private function elementInScope($el, $table = false) + { + if (is_array($el)) { + foreach ($el as $element) { + if ($this->elementInScope($element, $table)) { + return true; + } + } + + return false; + } + + $leng = count($this->stack); + + for ($n = 0; $n < $leng; $n++) { + /* 1. Initialise node to be the current node (the bottommost node of + the stack). */ + $node = $this->stack[$leng - 1 - $n]; + + if ($node->tagName === $el) { + /* 2. If node is the target node, terminate in a match state. */ + return true; + + } elseif ($node->tagName === 'table') { + /* 3. Otherwise, if node is a table element, terminate in a failure + state. */ + return false; + + } elseif ($table === true && in_array( + $node->tagName, + array( + 'caption', + 'td', + 'th', + 'button', + 'marquee', + 'object' + ) + ) + ) { + /* 4. Otherwise, if the algorithm is the "has an element in scope" + variant (rather than the "has an element in table scope" variant), + and node is one of the following, terminate in a failure state. */ + return false; + + } elseif ($node === $node->ownerDocument->documentElement) { + /* 5. Otherwise, if node is an html element (root element), terminate + in a failure state. (This can only happen if the node is the topmost + node of the stack of open elements, and prevents the next step from + being invoked if there are no more elements in the stack.) */ + return false; + } + + /* Otherwise, set node to the previous entry in the stack of open + elements and return to step 2. (This will never fail, since the loop + will always terminate in the previous step if the top of the stack + is reached.) */ + } + } + + private function reconstructActiveFormattingElements() + { + /* 1. If there are no entries in the list of active formatting elements, + then there is nothing to reconstruct; stop this algorithm. */ + $formatting_elements = count($this->a_formatting); + + if ($formatting_elements === 0) { + return false; + } + + /* 3. Let entry be the last (most recently added) element in the list + of active formatting elements. */ + $entry = end($this->a_formatting); + + /* 2. If the last (most recently added) entry in the list of active + formatting elements is a marker, or if it is an element that is in the + stack of open elements, then there is nothing to reconstruct; stop this + algorithm. */ + if ($entry === self::MARKER || in_array($entry, $this->stack, true)) { + return false; + } + + for ($a = $formatting_elements - 1; $a >= 0; true) { + /* 4. If there are no entries before entry in the list of active + formatting elements, then jump to step 8. */ + if ($a === 0) { + $step_seven = false; + break; + } + + /* 5. Let entry be the entry one earlier than entry in the list of + active formatting elements. */ + $a--; + $entry = $this->a_formatting[$a]; + + /* 6. If entry is neither a marker nor an element that is also in + thetack of open elements, go to step 4. */ + if ($entry === self::MARKER || in_array($entry, $this->stack, true)) { + break; + } + } + + while (true) { + /* 7. Let entry be the element one later than entry in the list of + active formatting elements. */ + if (isset($step_seven) && $step_seven === true) { + $a++; + $entry = $this->a_formatting[$a]; + } + + /* 8. Perform a shallow clone of the element entry to obtain clone. */ + $clone = $entry->cloneNode(); + + /* 9. Append clone to the current node and push it onto the stack + of open elements so that it is the new current node. */ + end($this->stack)->appendChild($clone); + $this->stack[] = $clone; + + /* 10. Replace the entry for entry in the list with an entry for + clone. */ + $this->a_formatting[$a] = $clone; + + /* 11. If the entry for clone in the list of active formatting + elements is not the last entry in the list, return to step 7. */ + if (end($this->a_formatting) !== $clone) { + $step_seven = true; + } else { + break; + } + } + } + + private function clearTheActiveFormattingElementsUpToTheLastMarker() + { + /* When the steps below require the UA to clear the list of active + formatting elements up to the last marker, the UA must perform the + following steps: */ + + while (true) { + /* 1. Let entry be the last (most recently added) entry in the list + of active formatting elements. */ + $entry = end($this->a_formatting); + + /* 2. Remove entry from the list of active formatting elements. */ + array_pop($this->a_formatting); + + /* 3. If entry was a marker, then stop the algorithm at this point. + The list has been cleared up to the last marker. */ + if ($entry === self::MARKER) { + break; + } + } + } + + private function generateImpliedEndTags($exclude = array()) + { + /* When the steps below require the UA to generate implied end tags, + then, if the current node is a dd element, a dt element, an li element, + a p element, a td element, a th element, or a tr element, the UA must + act as if an end tag with the respective tag name had been seen and + then generate implied end tags again. */ + $node = end($this->stack); + $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude); + + while (in_array(end($this->stack)->nodeName, $elements)) { + array_pop($this->stack); + } + } + + private function getElementCategory($node) + { + $name = $node->tagName; + if (in_array($name, $this->special)) { + return self::SPECIAL; + } elseif (in_array($name, $this->scoping)) { + return self::SCOPING; + } elseif (in_array($name, $this->formatting)) { + return self::FORMATTING; + } else { + return self::PHRASING; + } + } + + private function clearStackToTableContext($elements) + { + /* When the steps above require the UA to clear the stack back to a + table context, it means that the UA must, while the current node is not + a table element or an html element, pop elements from the stack of open + elements. If this causes any elements to be popped from the stack, then + this is a parse error. */ + while (true) { + $node = end($this->stack)->nodeName; + + if (in_array($node, $elements)) { + break; + } else { + array_pop($this->stack); + } + } + } + + private function resetInsertionMode() + { + /* 1. Let last be false. */ + $last = false; + $leng = count($this->stack); + + for ($n = $leng - 1; $n >= 0; $n--) { + /* 2. Let node be the last node in the stack of open elements. */ + $node = $this->stack[$n]; + + /* 3. If node is the first node in the stack of open elements, then + set last to true. If the element whose innerHTML attribute is being + set is neither a td element nor a th element, then set node to the + element whose innerHTML attribute is being set. (innerHTML case) */ + if ($this->stack[0]->isSameNode($node)) { + $last = true; + } + + /* 4. If node is a select element, then switch the insertion mode to + "in select" and abort these steps. (innerHTML case) */ + if ($node->nodeName === 'select') { + $this->mode = self::IN_SELECT; + break; + + /* 5. If node is a td or th element, then switch the insertion mode + to "in cell" and abort these steps. */ + } elseif ($node->nodeName === 'td' || $node->nodeName === 'th') { + $this->mode = self::IN_CELL; + break; + + /* 6. If node is a tr element, then switch the insertion mode to + "in row" and abort these steps. */ + } elseif ($node->nodeName === 'tr') { + $this->mode = self::IN_ROW; + break; + + /* 7. If node is a tbody, thead, or tfoot element, then switch the + insertion mode to "in table body" and abort these steps. */ + } elseif (in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) { + $this->mode = self::IN_TBODY; + break; + + /* 8. If node is a caption element, then switch the insertion mode + to "in caption" and abort these steps. */ + } elseif ($node->nodeName === 'caption') { + $this->mode = self::IN_CAPTION; + break; + + /* 9. If node is a colgroup element, then switch the insertion mode + to "in column group" and abort these steps. (innerHTML case) */ + } elseif ($node->nodeName === 'colgroup') { + $this->mode = self::IN_CGROUP; + break; + + /* 10. If node is a table element, then switch the insertion mode + to "in table" and abort these steps. */ + } elseif ($node->nodeName === 'table') { + $this->mode = self::IN_TABLE; + break; + + /* 11. If node is a head element, then switch the insertion mode + to "in body" ("in body"! not "in head"!) and abort these steps. + (innerHTML case) */ + } elseif ($node->nodeName === 'head') { + $this->mode = self::IN_BODY; + break; + + /* 12. If node is a body element, then switch the insertion mode to + "in body" and abort these steps. */ + } elseif ($node->nodeName === 'body') { + $this->mode = self::IN_BODY; + break; + + /* 13. If node is a frameset element, then switch the insertion + mode to "in frameset" and abort these steps. (innerHTML case) */ + } elseif ($node->nodeName === 'frameset') { + $this->mode = self::IN_FRAME; + break; + + /* 14. If node is an html element, then: if the head element + pointer is null, switch the insertion mode to "before head", + otherwise, switch the insertion mode to "after head". In either + case, abort these steps. (innerHTML case) */ + } elseif ($node->nodeName === 'html') { + $this->mode = ($this->head_pointer === null) + ? self::BEFOR_HEAD + : self::AFTER_HEAD; + + break; + + /* 15. If last is true, then set the insertion mode to "in body" + and abort these steps. (innerHTML case) */ + } elseif ($last) { + $this->mode = self::IN_BODY; + break; + } + } + } + + private function closeCell() + { + /* If the stack of open elements has a td or th element in table scope, + then act as if an end tag token with that tag name had been seen. */ + foreach (array('td', 'th') as $cell) { + if ($this->elementInScope($cell, true)) { + $this->inCell( + array( + 'name' => $cell, + 'type' => HTML5::ENDTAG + ) + ); + + break; + } + } + } + + public function save() + { + return $this->dom; + } +} diff --git a/library/ezyang/htmlpurifier/library/HTMLPurifier/Node.php b/library/ezyang/htmlpurifier/library/HTMLPurifier/Node.php new file mode 100644 index 0000000000..3995fec9fe --- /dev/null +++ b/library/ezyang/htmlpurifier/library/HTMLPurifier/Node.php @@ -0,0 +1,49 @@ +data = $data; + $this->line = $line; + $this->col = $col; + } + + public function toTokenPair() { + return array(new HTMLPurifier_Token_Comment($this->data, $this->line, $this->col), null); + } +} diff --git a/library/ezyang/htmlpurifier/library/HTMLPurifier/Node/Element.php b/library/ezyang/htmlpurifier/library/HTMLPurifier/Node/Element.php new file mode 100644 index 0000000000..6cbf56dada --- /dev/null +++ b/library/ezyang/htmlpurifier/library/HTMLPurifier/Node/Element.php @@ -0,0 +1,59 @@ + form or the form, i.e. + * is it a pair of start/end tokens or an empty token. + * @bool + */ + public $empty = false; + + public $endCol = null, $endLine = null, $endArmor = array(); + + public function __construct($name, $attr = array(), $line = null, $col = null, $armor = array()) { + $this->name = $name; + $this->attr = $attr; + $this->line = $line; + $this->col = $col; + $this->armor = $armor; + } + + public function toTokenPair() { + // XXX inefficiency here, normalization is not necessary + if ($this->empty) { + return array(new HTMLPurifier_Token_Empty($this->name, $this->attr, $this->line, $this->col, $this->armor), null); + } else { + $start = new HTMLPurifier_Token_Start($this->name, $this->attr, $this->line, $this->col, $this->armor); + $end = new HTMLPurifier_Token_End($this->name, array(), $this->endLine, $this->endCol, $this->endArmor); + //$end->start = $start; + return array($start, $end); + } + } +} + diff --git a/library/ezyang/htmlpurifier/library/HTMLPurifier/Node/Text.php b/library/ezyang/htmlpurifier/library/HTMLPurifier/Node/Text.php new file mode 100644 index 0000000000..aec9166473 --- /dev/null +++ b/library/ezyang/htmlpurifier/library/HTMLPurifier/Node/Text.php @@ -0,0 +1,54 @@ +data = $data; + $this->is_whitespace = $is_whitespace; + $this->line = $line; + $this->col = $col; + } + + public function toTokenPair() { + return array(new HTMLPurifier_Token_Text($this->data, $this->line, $this->col), null); + } +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/PercentEncoder.php b/library/ezyang/htmlpurifier/library/HTMLPurifier/PercentEncoder.php similarity index 78% rename from library/HTMLPurifier/PercentEncoder.php rename to library/ezyang/htmlpurifier/library/HTMLPurifier/PercentEncoder.php index a43c44f4c5..18c8bbb00a 100644 --- a/library/HTMLPurifier/PercentEncoder.php +++ b/library/ezyang/htmlpurifier/library/HTMLPurifier/PercentEncoder.php @@ -13,17 +13,26 @@ class HTMLPurifier_PercentEncoder /** * Reserved characters to preserve when using encode(). + * @type array */ protected $preserve = array(); /** * String of characters that should be preserved while using encode(). + * @param bool $preserve */ - public function __construct($preserve = false) { + public function __construct($preserve = false) + { // unreserved letters, ought to const-ify - for ($i = 48; $i <= 57; $i++) $this->preserve[$i] = true; // digits - for ($i = 65; $i <= 90; $i++) $this->preserve[$i] = true; // upper-case - for ($i = 97; $i <= 122; $i++) $this->preserve[$i] = true; // lower-case + for ($i = 48; $i <= 57; $i++) { // digits + $this->preserve[$i] = true; + } + for ($i = 65; $i <= 90; $i++) { // upper-case + $this->preserve[$i] = true; + } + for ($i = 97; $i <= 122; $i++) { // lower-case + $this->preserve[$i] = true; + } $this->preserve[45] = true; // Dash - $this->preserve[46] = true; // Period . $this->preserve[95] = true; // Underscore _ @@ -44,13 +53,14 @@ class HTMLPurifier_PercentEncoder * Assumes that the string has already been normalized, making any * and all percent escape sequences valid. Percents will not be * re-escaped, regardless of their status in $preserve - * @param $string String to be encoded - * @return Encoded string. + * @param string $string String to be encoded + * @return string Encoded string. */ - public function encode($string) { + public function encode($string) + { $ret = ''; for ($i = 0, $c = strlen($string); $i < $c; $i++) { - if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])]) ) { + if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])])) { $ret .= '%' . sprintf('%02X', $int); } else { $ret .= $string[$i]; @@ -64,10 +74,14 @@ class HTMLPurifier_PercentEncoder * @warning This function is affected by $preserve, even though the * usual desired behavior is for this not to preserve those * characters. Be careful when reusing instances of PercentEncoder! - * @param $string String to normalize + * @param string $string String to normalize + * @return string */ - public function normalize($string) { - if ($string == '') return ''; + public function normalize($string) + { + if ($string == '') { + return ''; + } $parts = explode('%', $string); $ret = array_shift($parts); foreach ($parts as $part) { @@ -92,7 +106,6 @@ class HTMLPurifier_PercentEncoder } return $ret; } - } // vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Printer.php b/library/ezyang/htmlpurifier/library/HTMLPurifier/Printer.php similarity index 54% rename from library/HTMLPurifier/Printer.php rename to library/ezyang/htmlpurifier/library/HTMLPurifier/Printer.php index e7eb82e83e..549e4cea1e 100644 --- a/library/HTMLPurifier/Printer.php +++ b/library/ezyang/htmlpurifier/library/HTMLPurifier/Printer.php @@ -7,25 +7,30 @@ class HTMLPurifier_Printer { /** - * Instance of HTMLPurifier_Generator for HTML generation convenience funcs + * For HTML generation convenience funcs. + * @type HTMLPurifier_Generator */ protected $generator; /** - * Instance of HTMLPurifier_Config, for easy access + * For easy access. + * @type HTMLPurifier_Config */ protected $config; /** * Initialize $generator. */ - public function __construct() { + public function __construct() + { } /** * Give generator necessary configuration if possible + * @param HTMLPurifier_Config $config */ - public function prepareGenerator($config) { + public function prepareGenerator($config) + { $all = $config->getAll(); $context = new HTMLPurifier_Context(); $this->generator = new HTMLPurifier_Generator($config, $context); @@ -39,45 +44,62 @@ class HTMLPurifier_Printer /** * Returns a start tag - * @param $tag Tag name - * @param $attr Attribute array + * @param string $tag Tag name + * @param array $attr Attribute array + * @return string */ - protected function start($tag, $attr = array()) { + protected function start($tag, $attr = array()) + { return $this->generator->generateFromToken( - new HTMLPurifier_Token_Start($tag, $attr ? $attr : array()) - ); + new HTMLPurifier_Token_Start($tag, $attr ? $attr : array()) + ); } /** - * Returns an end teg - * @param $tag Tag name + * Returns an end tag + * @param string $tag Tag name + * @return string */ - protected function end($tag) { + protected function end($tag) + { return $this->generator->generateFromToken( - new HTMLPurifier_Token_End($tag) - ); + new HTMLPurifier_Token_End($tag) + ); } /** * Prints a complete element with content inside - * @param $tag Tag name - * @param $contents Element contents - * @param $attr Tag attributes - * @param $escape Bool whether or not to escape contents + * @param string $tag Tag name + * @param string $contents Element contents + * @param array $attr Tag attributes + * @param bool $escape whether or not to escape contents + * @return string */ - protected function element($tag, $contents, $attr = array(), $escape = true) { + protected function element($tag, $contents, $attr = array(), $escape = true) + { return $this->start($tag, $attr) . - ($escape ? $this->escape($contents) : $contents) . - $this->end($tag); + ($escape ? $this->escape($contents) : $contents) . + $this->end($tag); } - protected function elementEmpty($tag, $attr = array()) { + /** + * @param string $tag + * @param array $attr + * @return string + */ + protected function elementEmpty($tag, $attr = array()) + { return $this->generator->generateFromToken( new HTMLPurifier_Token_Empty($tag, $attr) ); } - protected function text($text) { + /** + * @param string $text + * @return string + */ + protected function text($text) + { return $this->generator->generateFromToken( new HTMLPurifier_Token_Text($text) ); @@ -85,24 +107,29 @@ class HTMLPurifier_Printer /** * Prints a simple key/value row in a table. - * @param $name Key - * @param $value Value + * @param string $name Key + * @param mixed $value Value + * @return string */ - protected function row($name, $value) { - if (is_bool($value)) $value = $value ? 'On' : 'Off'; + protected function row($name, $value) + { + if (is_bool($value)) { + $value = $value ? 'On' : 'Off'; + } return $this->start('tr') . "\n" . - $this->element('th', $name) . "\n" . - $this->element('td', $value) . "\n" . - $this->end('tr') - ; + $this->element('th', $name) . "\n" . + $this->element('td', $value) . "\n" . + $this->end('tr'); } /** * Escapes a string for HTML output. - * @param $string String to escape + * @param string $string String to escape + * @return string */ - protected function escape($string) { + protected function escape($string) + { $string = HTMLPurifier_Encoder::cleanUTF8($string); $string = htmlspecialchars($string, ENT_COMPAT, 'UTF-8'); return $string; @@ -110,32 +137,46 @@ class HTMLPurifier_Printer /** * Takes a list of strings and turns them into a single list - * @param $array List of strings - * @param $polite Bool whether or not to add an end before the last + * @param string[] $array List of strings + * @param bool $polite Bool whether or not to add an end before the last + * @return string */ - protected function listify($array, $polite = false) { - if (empty($array)) return 'None'; + protected function listify($array, $polite = false) + { + if (empty($array)) { + return 'None'; + } $ret = ''; $i = count($array); foreach ($array as $value) { $i--; $ret .= $value; - if ($i > 0 && !($polite && $i == 1)) $ret .= ', '; - if ($polite && $i == 1) $ret .= 'and '; + if ($i > 0 && !($polite && $i == 1)) { + $ret .= ', '; + } + if ($polite && $i == 1) { + $ret .= 'and '; + } } return $ret; } /** * Retrieves the class of an object without prefixes, as well as metadata - * @param $obj Object to determine class of - * @param $prefix Further prefix to remove + * @param object $obj Object to determine class of + * @param string $sec_prefix Further prefix to remove + * @return string */ - protected function getClass($obj, $sec_prefix = '') { + protected function getClass($obj, $sec_prefix = '') + { static $five = null; - if ($five === null) $five = version_compare(PHP_VERSION, '5', '>='); + if ($five === null) { + $five = version_compare(PHP_VERSION, '5', '>='); + } $prefix = 'HTMLPurifier_' . $sec_prefix; - if (!$five) $prefix = strtolower($prefix); + if (!$five) { + $prefix = strtolower($prefix); + } $class = str_replace($prefix, '', get_class($obj)); $lclass = strtolower($class); $class .= '('; @@ -164,13 +205,14 @@ class HTMLPurifier_Printer break; case 'css_importantdecorator': $class .= $this->getClass($obj->def, $sec_prefix); - if ($obj->allow) $class .= ', !important'; + if ($obj->allow) { + $class .= ', !important'; + } break; } $class .= ')'; return $class; } - } // vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Printer/CSSDefinition.php b/library/ezyang/htmlpurifier/library/HTMLPurifier/Printer/CSSDefinition.php similarity index 85% rename from library/HTMLPurifier/Printer/CSSDefinition.php rename to library/ezyang/htmlpurifier/library/HTMLPurifier/Printer/CSSDefinition.php index 81f9865901..29505fe12d 100644 --- a/library/HTMLPurifier/Printer/CSSDefinition.php +++ b/library/ezyang/htmlpurifier/library/HTMLPurifier/Printer/CSSDefinition.php @@ -2,10 +2,17 @@ class HTMLPurifier_Printer_CSSDefinition extends HTMLPurifier_Printer { - + /** + * @type HTMLPurifier_CSSDefinition + */ protected $def; - public function render($config) { + /** + * @param HTMLPurifier_Config $config + * @return string + */ + public function render($config) + { $this->def = $config->getCSSDefinition(); $ret = ''; @@ -32,7 +39,6 @@ class HTMLPurifier_Printer_CSSDefinition extends HTMLPurifier_Printer return $ret; } - } // vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Printer/ConfigForm.css b/library/ezyang/htmlpurifier/library/HTMLPurifier/Printer/ConfigForm.css similarity index 100% rename from library/HTMLPurifier/Printer/ConfigForm.css rename to library/ezyang/htmlpurifier/library/HTMLPurifier/Printer/ConfigForm.css diff --git a/library/HTMLPurifier/Printer/ConfigForm.js b/library/ezyang/htmlpurifier/library/HTMLPurifier/Printer/ConfigForm.js similarity index 100% rename from library/HTMLPurifier/Printer/ConfigForm.js rename to library/ezyang/htmlpurifier/library/HTMLPurifier/Printer/ConfigForm.js diff --git a/library/HTMLPurifier/Printer/ConfigForm.php b/library/ezyang/htmlpurifier/library/HTMLPurifier/Printer/ConfigForm.php similarity index 60% rename from library/HTMLPurifier/Printer/ConfigForm.php rename to library/ezyang/htmlpurifier/library/HTMLPurifier/Printer/ConfigForm.php index 02aa656894..36100ce738 100644 --- a/library/HTMLPurifier/Printer/ConfigForm.php +++ b/library/ezyang/htmlpurifier/library/HTMLPurifier/Printer/ConfigForm.php @@ -7,17 +7,20 @@ class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer { /** - * Printers for specific fields + * Printers for specific fields. + * @type HTMLPurifier_Printer[] */ protected $fields = array(); /** - * Documentation URL, can have fragment tagged on end + * Documentation URL, can have fragment tagged on end. + * @type string */ protected $docURL; /** - * Name of form element to stuff config in + * Name of form element to stuff config in. + * @type string */ protected $name; @@ -25,24 +28,27 @@ class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer * Whether or not to compress directive names, clipping them off * after a certain amount of letters. False to disable or integer letters * before clipping. + * @type bool */ protected $compress = false; /** - * @param $name Form element name for directives to be stuffed into - * @param $doc_url String documentation URL, will have fragment tagged on - * @param $compress Integer max length before compressing a directive name, set to false to turn off + * @param string $name Form element name for directives to be stuffed into + * @param string $doc_url String documentation URL, will have fragment tagged on + * @param bool $compress Integer max length before compressing a directive name, set to false to turn off */ public function __construct( - $name, $doc_url = null, $compress = false + $name, + $doc_url = null, + $compress = false ) { parent::__construct(); $this->docURL = $doc_url; - $this->name = $name; + $this->name = $name; $this->compress = $compress; // initialize sub-printers - $this->fields[0] = new HTMLPurifier_Printer_ConfigForm_default(); - $this->fields[HTMLPurifier_VarParser::BOOL] = new HTMLPurifier_Printer_ConfigForm_bool(); + $this->fields[0] = new HTMLPurifier_Printer_ConfigForm_default(); + $this->fields[HTMLPurifier_VarParser::BOOL] = new HTMLPurifier_Printer_ConfigForm_bool(); } /** @@ -50,32 +56,42 @@ class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer * @param $cols Integer columns of textarea, null to use default * @param $rows Integer rows of textarea, null to use default */ - public function setTextareaDimensions($cols = null, $rows = null) { - if ($cols) $this->fields['default']->cols = $cols; - if ($rows) $this->fields['default']->rows = $rows; + public function setTextareaDimensions($cols = null, $rows = null) + { + if ($cols) { + $this->fields['default']->cols = $cols; + } + if ($rows) { + $this->fields['default']->rows = $rows; + } } /** * Retrieves styling, in case it is not accessible by webserver */ - public static function getCSS() { + public static function getCSS() + { return file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/Printer/ConfigForm.css'); } /** * Retrieves JavaScript, in case it is not accessible by webserver */ - public static function getJavaScript() { + public static function getJavaScript() + { return file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/Printer/ConfigForm.js'); } /** * Returns HTML output for a configuration form - * @param $config Configuration object of current form state, or an array + * @param HTMLPurifier_Config|array $config Configuration object of current form state, or an array * where [0] has an HTML namespace and [1] is being rendered. - * @param $allowed Optional namespace(s) and directives to restrict form to. + * @param array|bool $allowed Optional namespace(s) and directives to restrict form to. + * @param bool $render_controls + * @return string */ - public function render($config, $allowed = true, $render_controls = true) { + public function render($config, $allowed = true, $render_controls = true) + { if (is_array($config) && isset($config[0])) { $gen_config = $config[0]; $config = $config[1]; @@ -91,29 +107,29 @@ class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer $all = array(); foreach ($allowed as $key) { list($ns, $directive) = $key; - $all[$ns][$directive] = $config->get($ns .'.'. $directive); + $all[$ns][$directive] = $config->get($ns . '.' . $directive); } $ret = ''; $ret .= $this->start('table', array('class' => 'hp-config')); $ret .= $this->start('thead'); $ret .= $this->start('tr'); - $ret .= $this->element('th', 'Directive', array('class' => 'hp-directive')); - $ret .= $this->element('th', 'Value', array('class' => 'hp-value')); + $ret .= $this->element('th', 'Directive', array('class' => 'hp-directive')); + $ret .= $this->element('th', 'Value', array('class' => 'hp-value')); $ret .= $this->end('tr'); $ret .= $this->end('thead'); foreach ($all as $ns => $directives) { $ret .= $this->renderNamespace($ns, $directives); } if ($render_controls) { - $ret .= $this->start('tbody'); - $ret .= $this->start('tr'); - $ret .= $this->start('td', array('colspan' => 2, 'class' => 'controls')); - $ret .= $this->elementEmpty('input', array('type' => 'submit', 'value' => 'Submit')); - $ret .= '[Reset]'; - $ret .= $this->end('td'); - $ret .= $this->end('tr'); - $ret .= $this->end('tbody'); + $ret .= $this->start('tbody'); + $ret .= $this->start('tr'); + $ret .= $this->start('td', array('colspan' => 2, 'class' => 'controls')); + $ret .= $this->elementEmpty('input', array('type' => 'submit', 'value' => 'Submit')); + $ret .= '[Reset]'; + $ret .= $this->end('td'); + $ret .= $this->end('tr'); + $ret .= $this->end('tbody'); } $ret .= $this->end('table'); return $ret; @@ -122,13 +138,15 @@ class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer /** * Renders a single namespace * @param $ns String namespace name - * @param $directive Associative array of directives to values + * @param array $directives array of directives to values + * @return string */ - protected function renderNamespace($ns, $directives) { + protected function renderNamespace($ns, $directives) + { $ret = ''; $ret .= $this->start('tbody', array('class' => 'namespace')); $ret .= $this->start('tr'); - $ret .= $this->element('th', $ns, array('colspan' => 2)); + $ret .= $this->element('th', $ns, array('colspan' => 2)); $ret .= $this->end('tr'); $ret .= $this->end('tbody'); $ret .= $this->start('tbody'); @@ -139,40 +157,44 @@ class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer $url = str_replace('%s', urlencode("$ns.$directive"), $this->docURL); $ret .= $this->start('a', array('href' => $url)); } - $attr = array('for' => "{$this->name}:$ns.$directive"); + $attr = array('for' => "{$this->name}:$ns.$directive"); - // crop directive name if it's too long - if (!$this->compress || (strlen($directive) < $this->compress)) { - $directive_disp = $directive; - } else { - $directive_disp = substr($directive, 0, $this->compress - 2) . '...'; - $attr['title'] = $directive; - } + // crop directive name if it's too long + if (!$this->compress || (strlen($directive) < $this->compress)) { + $directive_disp = $directive; + } else { + $directive_disp = substr($directive, 0, $this->compress - 2) . '...'; + $attr['title'] = $directive; + } - $ret .= $this->element( - 'label', - $directive_disp, - // component printers must create an element with this id - $attr - ); - if ($this->docURL) $ret .= $this->end('a'); + $ret .= $this->element( + 'label', + $directive_disp, + // component printers must create an element with this id + $attr + ); + if ($this->docURL) { + $ret .= $this->end('a'); + } $ret .= $this->end('th'); $ret .= $this->start('td'); - $def = $this->config->def->info["$ns.$directive"]; - if (is_int($def)) { - $allow_null = $def < 0; - $type = abs($def); - } else { - $type = $def->type; - $allow_null = isset($def->allow_null); - } - if (!isset($this->fields[$type])) $type = 0; // default - $type_obj = $this->fields[$type]; - if ($allow_null) { - $type_obj = new HTMLPurifier_Printer_ConfigForm_NullDecorator($type_obj); - } - $ret .= $type_obj->render($ns, $directive, $value, $this->name, array($this->genConfig, $this->config)); + $def = $this->config->def->info["$ns.$directive"]; + if (is_int($def)) { + $allow_null = $def < 0; + $type = abs($def); + } else { + $type = $def->type; + $allow_null = isset($def->allow_null); + } + if (!isset($this->fields[$type])) { + $type = 0; + } // default + $type_obj = $this->fields[$type]; + if ($allow_null) { + $type_obj = new HTMLPurifier_Printer_ConfigForm_NullDecorator($type_obj); + } + $ret .= $type_obj->render($ns, $directive, $value, $this->name, array($this->genConfig, $this->config)); $ret .= $this->end('td'); $ret .= $this->end('tr'); } @@ -185,19 +207,33 @@ class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer /** * Printer decorator for directives that accept null */ -class HTMLPurifier_Printer_ConfigForm_NullDecorator extends HTMLPurifier_Printer { +class HTMLPurifier_Printer_ConfigForm_NullDecorator extends HTMLPurifier_Printer +{ /** * Printer being decorated + * @type HTMLPurifier_Printer */ protected $obj; + /** - * @param $obj Printer to decorate + * @param HTMLPurifier_Printer $obj Printer to decorate */ - public function __construct($obj) { + public function __construct($obj) + { parent::__construct(); $this->obj = $obj; } - public function render($ns, $directive, $value, $name, $config) { + + /** + * @param string $ns + * @param string $directive + * @param string $value + * @param string $name + * @param HTMLPurifier_Config|array $config + * @return string + */ + public function render($ns, $directive, $value, $name, $config) + { if (is_array($config) && isset($config[0])) { $gen_config = $config[0]; $config = $config[1]; @@ -215,15 +251,19 @@ class HTMLPurifier_Printer_ConfigForm_NullDecorator extends HTMLPurifier_Printer 'type' => 'checkbox', 'value' => '1', 'class' => 'null-toggle', - 'name' => "$name"."[Null_$ns.$directive]", + 'name' => "$name" . "[Null_$ns.$directive]", 'id' => "$name:Null_$ns.$directive", 'onclick' => "toggleWriteability('$name:$ns.$directive',checked)" // INLINE JAVASCRIPT!!!! ); if ($this->obj instanceof HTMLPurifier_Printer_ConfigForm_bool) { // modify inline javascript slightly - $attr['onclick'] = "toggleWriteability('$name:Yes_$ns.$directive',checked);toggleWriteability('$name:No_$ns.$directive',checked)"; + $attr['onclick'] = + "toggleWriteability('$name:Yes_$ns.$directive',checked);" . + "toggleWriteability('$name:No_$ns.$directive',checked)"; + } + if ($value === null) { + $attr['checked'] = 'checked'; } - if ($value === null) $attr['checked'] = 'checked'; $ret .= $this->elementEmpty('input', $attr); $ret .= $this->text(' or '); $ret .= $this->elementEmpty('br'); @@ -235,10 +275,28 @@ class HTMLPurifier_Printer_ConfigForm_NullDecorator extends HTMLPurifier_Printer /** * Swiss-army knife configuration form field printer */ -class HTMLPurifier_Printer_ConfigForm_default extends HTMLPurifier_Printer { +class HTMLPurifier_Printer_ConfigForm_default extends HTMLPurifier_Printer +{ + /** + * @type int + */ public $cols = 18; + + /** + * @type int + */ public $rows = 5; - public function render($ns, $directive, $value, $name, $config) { + + /** + * @param string $ns + * @param string $directive + * @param string $value + * @param string $name + * @param HTMLPurifier_Config|array $config + * @return string + */ + public function render($ns, $directive, $value, $name, $config) + { if (is_array($config) && isset($config[0])) { $gen_config = $config[0]; $config = $config[1]; @@ -262,6 +320,7 @@ class HTMLPurifier_Printer_ConfigForm_default extends HTMLPurifier_Printer { foreach ($array as $val => $b) { $value[] = $val; } + //TODO does this need a break? case HTMLPurifier_VarParser::ALIST: $value = implode(PHP_EOL, $value); break; @@ -281,25 +340,27 @@ class HTMLPurifier_Printer_ConfigForm_default extends HTMLPurifier_Printer { $value = serialize($value); } $attr = array( - 'name' => "$name"."[$ns.$directive]", + 'name' => "$name" . "[$ns.$directive]", 'id' => "$name:$ns.$directive" ); - if ($value === null) $attr['disabled'] = 'disabled'; + if ($value === null) { + $attr['disabled'] = 'disabled'; + } if (isset($def->allowed)) { $ret .= $this->start('select', $attr); foreach ($def->allowed as $val => $b) { $attr = array(); - if ($value == $val) $attr['selected'] = 'selected'; + if ($value == $val) { + $attr['selected'] = 'selected'; + } $ret .= $this->element('option', $val, $attr); } $ret .= $this->end('select'); - } elseif ( - $type === HTMLPurifier_VarParser::TEXT || - $type === HTMLPurifier_VarParser::ITEXT || - $type === HTMLPurifier_VarParser::ALIST || - $type === HTMLPurifier_VarParser::HASH || - $type === HTMLPurifier_VarParser::LOOKUP - ) { + } elseif ($type === HTMLPurifier_VarParser::TEXT || + $type === HTMLPurifier_VarParser::ITEXT || + $type === HTMLPurifier_VarParser::ALIST || + $type === HTMLPurifier_VarParser::HASH || + $type === HTMLPurifier_VarParser::LOOKUP) { $attr['cols'] = $this->cols; $attr['rows'] = $this->rows; $ret .= $this->start('textarea', $attr); @@ -317,8 +378,18 @@ class HTMLPurifier_Printer_ConfigForm_default extends HTMLPurifier_Printer { /** * Bool form field printer */ -class HTMLPurifier_Printer_ConfigForm_bool extends HTMLPurifier_Printer { - public function render($ns, $directive, $value, $name, $config) { +class HTMLPurifier_Printer_ConfigForm_bool extends HTMLPurifier_Printer +{ + /** + * @param string $ns + * @param string $directive + * @param string $value + * @param string $name + * @param HTMLPurifier_Config|array $config + * @return string + */ + public function render($ns, $directive, $value, $name, $config) + { if (is_array($config) && isset($config[0])) { $gen_config = $config[0]; $config = $config[1]; @@ -336,12 +407,16 @@ class HTMLPurifier_Printer_ConfigForm_bool extends HTMLPurifier_Printer { $attr = array( 'type' => 'radio', - 'name' => "$name"."[$ns.$directive]", + 'name' => "$name" . "[$ns.$directive]", 'id' => "$name:Yes_$ns.$directive", 'value' => '1' ); - if ($value === true) $attr['checked'] = 'checked'; - if ($value === null) $attr['disabled'] = 'disabled'; + if ($value === true) { + $attr['checked'] = 'checked'; + } + if ($value === null) { + $attr['disabled'] = 'disabled'; + } $ret .= $this->elementEmpty('input', $attr); $ret .= $this->start('label', array('for' => "$name:No_$ns.$directive")); @@ -351,12 +426,16 @@ class HTMLPurifier_Printer_ConfigForm_bool extends HTMLPurifier_Printer { $attr = array( 'type' => 'radio', - 'name' => "$name"."[$ns.$directive]", + 'name' => "$name" . "[$ns.$directive]", 'id' => "$name:No_$ns.$directive", 'value' => '0' ); - if ($value === false) $attr['checked'] = 'checked'; - if ($value === null) $attr['disabled'] = 'disabled'; + if ($value === false) { + $attr['checked'] = 'checked'; + } + if ($value === null) { + $attr['disabled'] = 'disabled'; + } $ret .= $this->elementEmpty('input', $attr); $ret .= $this->end('div'); diff --git a/library/HTMLPurifier/Printer/HTMLDefinition.php b/library/ezyang/htmlpurifier/library/HTMLPurifier/Printer/HTMLDefinition.php similarity index 53% rename from library/HTMLPurifier/Printer/HTMLDefinition.php rename to library/ezyang/htmlpurifier/library/HTMLPurifier/Printer/HTMLDefinition.php index 8a8f126b81..5f2f2f8a7c 100644 --- a/library/HTMLPurifier/Printer/HTMLDefinition.php +++ b/library/ezyang/htmlpurifier/library/HTMLPurifier/Printer/HTMLDefinition.php @@ -4,11 +4,16 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer { /** - * Instance of HTMLPurifier_HTMLDefinition, for easy access + * @type HTMLPurifier_HTMLDefinition, for easy access */ protected $def; - public function render($config) { + /** + * @param HTMLPurifier_Config $config + * @return string + */ + public function render($config) + { $ret = ''; $this->config =& $config; @@ -28,8 +33,10 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer /** * Renders the Doctype table + * @return string */ - protected function renderDoctype() { + protected function renderDoctype() + { $doctype = $this->def->doctype; $ret = ''; $ret .= $this->start('table'); @@ -45,8 +52,10 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer /** * Renders environment table, which is miscellaneous info + * @return string */ - protected function renderEnvironment() { + protected function renderEnvironment() + { $def = $this->def; $ret = ''; @@ -59,28 +68,28 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer $ret .= $this->row('Block wrap name', $def->info_block_wrapper); $ret .= $this->start('tr'); - $ret .= $this->element('th', 'Global attributes'); - $ret .= $this->element('td', $this->listifyAttr($def->info_global_attr),0,0); + $ret .= $this->element('th', 'Global attributes'); + $ret .= $this->element('td', $this->listifyAttr($def->info_global_attr), null, 0); $ret .= $this->end('tr'); $ret .= $this->start('tr'); - $ret .= $this->element('th', 'Tag transforms'); - $list = array(); - foreach ($def->info_tag_transform as $old => $new) { - $new = $this->getClass($new, 'TagTransform_'); - $list[] = "<$old> with $new"; - } - $ret .= $this->element('td', $this->listify($list)); + $ret .= $this->element('th', 'Tag transforms'); + $list = array(); + foreach ($def->info_tag_transform as $old => $new) { + $new = $this->getClass($new, 'TagTransform_'); + $list[] = "<$old> with $new"; + } + $ret .= $this->element('td', $this->listify($list)); $ret .= $this->end('tr'); $ret .= $this->start('tr'); - $ret .= $this->element('th', 'Pre-AttrTransform'); - $ret .= $this->element('td', $this->listifyObjectList($def->info_attr_transform_pre)); + $ret .= $this->element('th', 'Pre-AttrTransform'); + $ret .= $this->element('td', $this->listifyObjectList($def->info_attr_transform_pre)); $ret .= $this->end('tr'); $ret .= $this->start('tr'); - $ret .= $this->element('th', 'Post-AttrTransform'); - $ret .= $this->element('td', $this->listifyObjectList($def->info_attr_transform_post)); + $ret .= $this->element('th', 'Post-AttrTransform'); + $ret .= $this->element('td', $this->listifyObjectList($def->info_attr_transform_post)); $ret .= $this->end('tr'); $ret .= $this->end('table'); @@ -89,8 +98,10 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer /** * Renders the Content Sets table + * @return string */ - protected function renderContentSets() { + protected function renderContentSets() + { $ret = ''; $ret .= $this->start('table'); $ret .= $this->element('caption', 'Content Sets'); @@ -106,8 +117,10 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer /** * Renders the Elements ($info) table + * @return string */ - protected function renderInfo() { + protected function renderInfo() + { $ret = ''; $ret .= $this->start('table'); $ret .= $this->element('caption', 'Elements ($info)'); @@ -118,39 +131,39 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer $ret .= $this->end('tr'); foreach ($this->def->info as $name => $def) { $ret .= $this->start('tr'); - $ret .= $this->element('th', "<$name>", array('class'=>'heavy', 'colspan' => 2)); + $ret .= $this->element('th', "<$name>", array('class' => 'heavy', 'colspan' => 2)); $ret .= $this->end('tr'); $ret .= $this->start('tr'); - $ret .= $this->element('th', 'Inline content'); - $ret .= $this->element('td', $def->descendants_are_inline ? 'Yes' : 'No'); + $ret .= $this->element('th', 'Inline content'); + $ret .= $this->element('td', $def->descendants_are_inline ? 'Yes' : 'No'); $ret .= $this->end('tr'); if (!empty($def->excludes)) { $ret .= $this->start('tr'); - $ret .= $this->element('th', 'Excludes'); - $ret .= $this->element('td', $this->listifyTagLookup($def->excludes)); + $ret .= $this->element('th', 'Excludes'); + $ret .= $this->element('td', $this->listifyTagLookup($def->excludes)); $ret .= $this->end('tr'); } if (!empty($def->attr_transform_pre)) { $ret .= $this->start('tr'); - $ret .= $this->element('th', 'Pre-AttrTransform'); - $ret .= $this->element('td', $this->listifyObjectList($def->attr_transform_pre)); + $ret .= $this->element('th', 'Pre-AttrTransform'); + $ret .= $this->element('td', $this->listifyObjectList($def->attr_transform_pre)); $ret .= $this->end('tr'); } if (!empty($def->attr_transform_post)) { $ret .= $this->start('tr'); - $ret .= $this->element('th', 'Post-AttrTransform'); - $ret .= $this->element('td', $this->listifyObjectList($def->attr_transform_post)); + $ret .= $this->element('th', 'Post-AttrTransform'); + $ret .= $this->element('td', $this->listifyObjectList($def->attr_transform_post)); $ret .= $this->end('tr'); } if (!empty($def->auto_close)) { $ret .= $this->start('tr'); - $ret .= $this->element('th', 'Auto closed by'); - $ret .= $this->element('td', $this->listifyTagLookup($def->auto_close)); + $ret .= $this->element('th', 'Auto closed by'); + $ret .= $this->element('td', $this->listifyTagLookup($def->auto_close)); $ret .= $this->end('tr'); } $ret .= $this->start('tr'); - $ret .= $this->element('th', 'Allowed attributes'); - $ret .= $this->element('td',$this->listifyAttr($def->attr), array(), 0); + $ret .= $this->element('th', 'Allowed attributes'); + $ret .= $this->element('td', $this->listifyAttr($def->attr), array(), 0); $ret .= $this->end('tr'); if (!empty($def->required_attr)) { @@ -165,64 +178,94 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer /** * Renders a row describing the allowed children of an element - * @param $def HTMLPurifier_ChildDef of pertinent element + * @param HTMLPurifier_ChildDef $def HTMLPurifier_ChildDef of pertinent element + * @return string */ - protected function renderChildren($def) { + protected function renderChildren($def) + { $context = new HTMLPurifier_Context(); $ret = ''; $ret .= $this->start('tr'); + $elements = array(); + $attr = array(); + if (isset($def->elements)) { + if ($def->type == 'strictblockquote') { + $def->validateChildren(array(), $this->config, $context); + } + $elements = $def->elements; + } + if ($def->type == 'chameleon') { + $attr['rowspan'] = 2; + } elseif ($def->type == 'empty') { $elements = array(); - $attr = array(); - if (isset($def->elements)) { - if ($def->type == 'strictblockquote') { - $def->validateChildren(array(), $this->config, $context); - } - $elements = $def->elements; - } - if ($def->type == 'chameleon') { - $attr['rowspan'] = 2; - } elseif ($def->type == 'empty') { - $elements = array(); - } elseif ($def->type == 'table') { - $elements = array_flip(array('col', 'caption', 'colgroup', 'thead', - 'tfoot', 'tbody', 'tr')); - } - $ret .= $this->element('th', 'Allowed children', $attr); + } elseif ($def->type == 'table') { + $elements = array_flip( + array( + 'col', + 'caption', + 'colgroup', + 'thead', + 'tfoot', + 'tbody', + 'tr' + ) + ); + } + $ret .= $this->element('th', 'Allowed children', $attr); - if ($def->type == 'chameleon') { + if ($def->type == 'chameleon') { - $ret .= $this->element('td', - 'Block: ' . - $this->escape($this->listifyTagLookup($def->block->elements)),0,0); - $ret .= $this->end('tr'); - $ret .= $this->start('tr'); - $ret .= $this->element('td', - 'Inline: ' . - $this->escape($this->listifyTagLookup($def->inline->elements)),0,0); + $ret .= $this->element( + 'td', + 'Block: ' . + $this->escape($this->listifyTagLookup($def->block->elements)), + null, + 0 + ); + $ret .= $this->end('tr'); + $ret .= $this->start('tr'); + $ret .= $this->element( + 'td', + 'Inline: ' . + $this->escape($this->listifyTagLookup($def->inline->elements)), + null, + 0 + ); - } elseif ($def->type == 'custom') { + } elseif ($def->type == 'custom') { - $ret .= $this->element('td', ''.ucfirst($def->type).': ' . - $def->dtd_regex); + $ret .= $this->element( + 'td', + '' . ucfirst($def->type) . ': ' . + $def->dtd_regex + ); - } else { - $ret .= $this->element('td', - ''.ucfirst($def->type).': ' . - $this->escape($this->listifyTagLookup($elements)),0,0); - } + } else { + $ret .= $this->element( + 'td', + '' . ucfirst($def->type) . ': ' . + $this->escape($this->listifyTagLookup($elements)), + null, + 0 + ); + } $ret .= $this->end('tr'); return $ret; } /** * Listifies a tag lookup table. - * @param $array Tag lookup array in form of array('tagname' => true) + * @param array $array Tag lookup array in form of array('tagname' => true) + * @return string */ - protected function listifyTagLookup($array) { + protected function listifyTagLookup($array) + { ksort($array); $list = array(); foreach ($array as $name => $discard) { - if ($name !== '#PCDATA' && !isset($this->def->info[$name])) continue; + if ($name !== '#PCDATA' && !isset($this->def->info[$name])) { + continue; + } $list[] = $name; } return $this->listify($list); @@ -230,13 +273,15 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer /** * Listifies a list of objects by retrieving class names and internal state - * @param $array List of objects + * @param array $array List of objects + * @return string * @todo Also add information about internal state */ - protected function listifyObjectList($array) { + protected function listifyObjectList($array) + { ksort($array); $list = array(); - foreach ($array as $discard => $obj) { + foreach ($array as $obj) { $list[] = $this->getClass($obj, 'AttrTransform_'); } return $this->listify($list); @@ -244,13 +289,17 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer /** * Listifies a hash of attributes to AttrDef classes - * @param $array Array hash in form of array('attrname' => HTMLPurifier_AttrDef) + * @param array $array Array hash in form of array('attrname' => HTMLPurifier_AttrDef) + * @return string */ - protected function listifyAttr($array) { + protected function listifyAttr($array) + { ksort($array); $list = array(); foreach ($array as $name => $obj) { - if ($obj === false) continue; + if ($obj === false) { + continue; + } $list[] = "$name = " . $this->getClass($obj, 'AttrDef_') . ''; } return $this->listify($list); @@ -258,15 +307,18 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer /** * Creates a heavy header row + * @param string $text + * @param int $num + * @return string */ - protected function heavyHeader($text, $num = 1) { + protected function heavyHeader($text, $num = 1) + { $ret = ''; $ret .= $this->start('tr'); $ret .= $this->element('th', $text, array('colspan' => $num, 'class' => 'heavy')); $ret .= $this->end('tr'); return $ret; } - } // vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/PropertyList.php b/library/ezyang/htmlpurifier/library/HTMLPurifier/PropertyList.php similarity index 50% rename from library/HTMLPurifier/PropertyList.php rename to library/ezyang/htmlpurifier/library/HTMLPurifier/PropertyList.php index 2b99fb7bc3..189348fd9e 100644 --- a/library/HTMLPurifier/PropertyList.php +++ b/library/ezyang/htmlpurifier/library/HTMLPurifier/PropertyList.php @@ -6,61 +6,93 @@ class HTMLPurifier_PropertyList { /** - * Internal data-structure for properties + * Internal data-structure for properties. + * @type array */ protected $data = array(); /** - * Parent plist + * Parent plist. + * @type HTMLPurifier_PropertyList */ protected $parent; + /** + * Cache. + * @type array + */ protected $cache; - public function __construct($parent = null) { + /** + * @param HTMLPurifier_PropertyList $parent Parent plist + */ + public function __construct($parent = null) + { $this->parent = $parent; } /** * Recursively retrieves the value for a key + * @param string $name + * @throws HTMLPurifier_Exception */ - public function get($name) { - if ($this->has($name)) return $this->data[$name]; + public function get($name) + { + if ($this->has($name)) { + return $this->data[$name]; + } // possible performance bottleneck, convert to iterative if necessary - if ($this->parent) return $this->parent->get($name); + if ($this->parent) { + return $this->parent->get($name); + } throw new HTMLPurifier_Exception("Key '$name' not found"); } /** * Sets the value of a key, for this plist + * @param string $name + * @param mixed $value */ - public function set($name, $value) { + public function set($name, $value) + { $this->data[$name] = $value; } /** * Returns true if a given key exists + * @param string $name + * @return bool */ - public function has($name) { + public function has($name) + { return array_key_exists($name, $this->data); } /** * Resets a value to the value of it's parent, usually the default. If * no value is specified, the entire plist is reset. + * @param string $name */ - public function reset($name = null) { - if ($name == null) $this->data = array(); - else unset($this->data[$name]); + public function reset($name = null) + { + if ($name == null) { + $this->data = array(); + } else { + unset($this->data[$name]); + } } /** * Squashes this property list and all of its property lists into a single * array, and returns the array. This value is cached by default. - * @param $force If true, ignores the cache and regenerates the array. + * @param bool $force If true, ignores the cache and regenerates the array. + * @return array */ - public function squash($force = false) { - if ($this->cache !== null && !$force) return $this->cache; + public function squash($force = false) + { + if ($this->cache !== null && !$force) { + return $this->cache; + } if ($this->parent) { return $this->cache = array_merge($this->parent->squash($force), $this->data); } else { @@ -70,15 +102,19 @@ class HTMLPurifier_PropertyList /** * Returns the parent plist. + * @return HTMLPurifier_PropertyList */ - public function getParent() { + public function getParent() + { return $this->parent; } /** * Sets the parent plist. + * @param HTMLPurifier_PropertyList $plist Parent plist */ - public function setParent($plist) { + public function setParent($plist) + { $this->parent = $plist; } } diff --git a/library/HTMLPurifier/PropertyListIterator.php b/library/ezyang/htmlpurifier/library/HTMLPurifier/PropertyListIterator.php similarity index 60% rename from library/HTMLPurifier/PropertyListIterator.php rename to library/ezyang/htmlpurifier/library/HTMLPurifier/PropertyListIterator.php index 8f250443e4..15b330ea30 100644 --- a/library/HTMLPurifier/PropertyListIterator.php +++ b/library/ezyang/htmlpurifier/library/HTMLPurifier/PropertyListIterator.php @@ -6,27 +6,37 @@ class HTMLPurifier_PropertyListIterator extends FilterIterator { + /** + * @type int + */ protected $l; + /** + * @type string + */ protected $filter; /** - * @param $data Array of data to iterate over - * @param $filter Optional prefix to only allow values of + * @param Iterator $iterator Array of data to iterate over + * @param string $filter Optional prefix to only allow values of */ - public function __construct(Iterator $iterator, $filter = null) { + public function __construct(Iterator $iterator, $filter = null) + { parent::__construct($iterator); $this->l = strlen($filter); $this->filter = $filter; } - public function accept() { + /** + * @return bool + */ + public function accept() + { $key = $this->getInnerIterator()->key(); - if( strncmp($key, $this->filter, $this->l) !== 0 ) { + if (strncmp($key, $this->filter, $this->l) !== 0) { return false; } return true; } - } // vim: et sw=4 sts=4 diff --git a/library/ezyang/htmlpurifier/library/HTMLPurifier/Queue.php b/library/ezyang/htmlpurifier/library/HTMLPurifier/Queue.php new file mode 100644 index 0000000000..f58db9042a --- /dev/null +++ b/library/ezyang/htmlpurifier/library/HTMLPurifier/Queue.php @@ -0,0 +1,56 @@ +input = $input; + $this->output = array(); + } + + /** + * Shifts an element off the front of the queue. + */ + public function shift() { + if (empty($this->output)) { + $this->output = array_reverse($this->input); + $this->input = array(); + } + if (empty($this->output)) { + return NULL; + } + return array_pop($this->output); + } + + /** + * Pushes an element onto the front of the queue. + */ + public function push($x) { + array_push($this->input, $x); + } + + /** + * Checks if it's empty. + */ + public function isEmpty() { + return empty($this->input) && empty($this->output); + } +} diff --git a/library/HTMLPurifier/Strategy.php b/library/ezyang/htmlpurifier/library/HTMLPurifier/Strategy.php similarity index 66% rename from library/HTMLPurifier/Strategy.php rename to library/ezyang/htmlpurifier/library/HTMLPurifier/Strategy.php index 2462865210..e1ff3b72df 100644 --- a/library/HTMLPurifier/Strategy.php +++ b/library/ezyang/htmlpurifier/library/HTMLPurifier/Strategy.php @@ -15,12 +15,12 @@ abstract class HTMLPurifier_Strategy /** * Executes the strategy on the tokens. * - * @param $tokens Array of HTMLPurifier_Token objects to be operated on. - * @param $config Configuration options - * @returns Processed array of token objects. + * @param HTMLPurifier_Token[] $tokens Array of HTMLPurifier_Token objects to be operated on. + * @param HTMLPurifier_Config $config + * @param HTMLPurifier_Context $context + * @return HTMLPurifier_Token[] Processed array of token objects. */ abstract public function execute($tokens, $config, $context); - } // vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Strategy/Composite.php b/library/ezyang/htmlpurifier/library/HTMLPurifier/Strategy/Composite.php similarity index 61% rename from library/HTMLPurifier/Strategy/Composite.php rename to library/ezyang/htmlpurifier/library/HTMLPurifier/Strategy/Composite.php index 816490b799..d7d35ce7d1 100644 --- a/library/HTMLPurifier/Strategy/Composite.php +++ b/library/ezyang/htmlpurifier/library/HTMLPurifier/Strategy/Composite.php @@ -8,18 +8,23 @@ abstract class HTMLPurifier_Strategy_Composite extends HTMLPurifier_Strategy /** * List of strategies to run tokens through. + * @type HTMLPurifier_Strategy[] */ protected $strategies = array(); - abstract public function __construct(); - - public function execute($tokens, $config, $context) { + /** + * @param HTMLPurifier_Token[] $tokens + * @param HTMLPurifier_Config $config + * @param HTMLPurifier_Context $context + * @return HTMLPurifier_Token[] + */ + public function execute($tokens, $config, $context) + { foreach ($this->strategies as $strategy) { $tokens = $strategy->execute($tokens, $config, $context); } return $tokens; } - } // vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Strategy/Core.php b/library/ezyang/htmlpurifier/library/HTMLPurifier/Strategy/Core.php similarity index 92% rename from library/HTMLPurifier/Strategy/Core.php rename to library/ezyang/htmlpurifier/library/HTMLPurifier/Strategy/Core.php index d90e158606..4414c17d6e 100644 --- a/library/HTMLPurifier/Strategy/Core.php +++ b/library/ezyang/htmlpurifier/library/HTMLPurifier/Strategy/Core.php @@ -5,14 +5,13 @@ */ class HTMLPurifier_Strategy_Core extends HTMLPurifier_Strategy_Composite { - - public function __construct() { + public function __construct() + { $this->strategies[] = new HTMLPurifier_Strategy_RemoveForeignElements(); $this->strategies[] = new HTMLPurifier_Strategy_MakeWellFormed(); $this->strategies[] = new HTMLPurifier_Strategy_FixNesting(); $this->strategies[] = new HTMLPurifier_Strategy_ValidateAttributes(); } - } // vim: et sw=4 sts=4 diff --git a/library/ezyang/htmlpurifier/library/HTMLPurifier/Strategy/FixNesting.php b/library/ezyang/htmlpurifier/library/HTMLPurifier/Strategy/FixNesting.php new file mode 100644 index 0000000000..6fa673db9a --- /dev/null +++ b/library/ezyang/htmlpurifier/library/HTMLPurifier/Strategy/FixNesting.php @@ -0,0 +1,181 @@ +getHTMLDefinition(); + + $excludes_enabled = !$config->get('Core.DisableExcludes'); + + // setup the context variable 'IsInline', for chameleon processing + // is 'false' when we are not inline, 'true' when it must always + // be inline, and an integer when it is inline for a certain + // branch of the document tree + $is_inline = $definition->info_parent_def->descendants_are_inline; + $context->register('IsInline', $is_inline); + + // setup error collector + $e =& $context->get('ErrorCollector', true); + + //####################################################################// + // Loop initialization + + // stack that contains all elements that are excluded + // it is organized by parent elements, similar to $stack, + // but it is only populated when an element with exclusions is + // processed, i.e. there won't be empty exclusions. + $exclude_stack = array($definition->info_parent_def->excludes); + + // variable that contains the start token while we are processing + // nodes. This enables error reporting to do its job + $node = $top_node; + // dummy token + list($token, $d) = $node->toTokenPair(); + $context->register('CurrentNode', $node); + $context->register('CurrentToken', $token); + + //####################################################################// + // Loop + + // We need to implement a post-order traversal iteratively, to + // avoid running into stack space limits. This is pretty tricky + // to reason about, so we just manually stack-ify the recursive + // variant: + // + // function f($node) { + // foreach ($node->children as $child) { + // f($child); + // } + // validate($node); + // } + // + // Thus, we will represent a stack frame as array($node, + // $is_inline, stack of children) + // e.g. array_reverse($node->children) - already processed + // children. + + $parent_def = $definition->info_parent_def; + $stack = array( + array($top_node, + $parent_def->descendants_are_inline, + $parent_def->excludes, // exclusions + 0) + ); + + while (!empty($stack)) { + list($node, $is_inline, $excludes, $ix) = array_pop($stack); + // recursive call + $go = false; + $def = empty($stack) ? $definition->info_parent_def : $definition->info[$node->name]; + while (isset($node->children[$ix])) { + $child = $node->children[$ix++]; + if ($child instanceof HTMLPurifier_Node_Element) { + $go = true; + $stack[] = array($node, $is_inline, $excludes, $ix); + $stack[] = array($child, + // ToDo: I don't think it matters if it's def or + // child_def, but double check this... + $is_inline || $def->descendants_are_inline, + empty($def->excludes) ? $excludes + : array_merge($excludes, $def->excludes), + 0); + break; + } + }; + if ($go) continue; + list($token, $d) = $node->toTokenPair(); + // base case + if ($excludes_enabled && isset($excludes[$node->name])) { + $node->dead = true; + if ($e) $e->send(E_ERROR, 'Strategy_FixNesting: Node excluded'); + } else { + // XXX I suppose it would be slightly more efficient to + // avoid the allocation here and have children + // strategies handle it + $children = array(); + foreach ($node->children as $child) { + if (!$child->dead) $children[] = $child; + } + $result = $def->child->validateChildren($children, $config, $context); + if ($result === true) { + // nop + $node->children = $children; + } elseif ($result === false) { + $node->dead = true; + if ($e) $e->send(E_ERROR, 'Strategy_FixNesting: Node removed'); + } else { + $node->children = $result; + if ($e) { + // XXX This will miss mutations of internal nodes. Perhaps defer to the child validators + if (empty($result) && !empty($children)) { + $e->send(E_ERROR, 'Strategy_FixNesting: Node contents removed'); + } else if ($result != $children) { + $e->send(E_WARNING, 'Strategy_FixNesting: Node reorganized'); + } + } + } + } + } + + //####################################################################// + // Post-processing + + // remove context variables + $context->destroy('IsInline'); + $context->destroy('CurrentNode'); + $context->destroy('CurrentToken'); + + //####################################################################// + // Return + + return HTMLPurifier_Arborize::flatten($node, $config, $context); + } +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Strategy/MakeWellFormed.php b/library/ezyang/htmlpurifier/library/HTMLPurifier/Strategy/MakeWellFormed.php similarity index 52% rename from library/HTMLPurifier/Strategy/MakeWellFormed.php rename to library/ezyang/htmlpurifier/library/HTMLPurifier/Strategy/MakeWellFormed.php index c736584007..e389e00116 100644 --- a/library/HTMLPurifier/Strategy/MakeWellFormed.php +++ b/library/ezyang/htmlpurifier/library/HTMLPurifier/Strategy/MakeWellFormed.php @@ -2,66 +2,97 @@ /** * Takes tokens makes them well-formed (balance end tags, etc.) + * + * Specification of the armor attributes this strategy uses: + * + * - MakeWellFormed_TagClosedError: This armor field is used to + * suppress tag closed errors for certain tokens [TagClosedSuppress], + * in particular, if a tag was generated automatically by HTML + * Purifier, we may rely on our infrastructure to close it for us + * and shouldn't report an error to the user [TagClosedAuto]. */ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy { /** * Array stream of tokens being processed. + * @type HTMLPurifier_Token[] */ protected $tokens; /** - * Current index in $tokens. + * Current token. + * @type HTMLPurifier_Token */ - protected $t; + protected $token; + + /** + * Zipper managing the true state. + * @type HTMLPurifier_Zipper + */ + protected $zipper; /** * Current nesting of elements. + * @type array */ protected $stack; /** * Injectors active in this stream processing. + * @type HTMLPurifier_Injector[] */ protected $injectors; /** * Current instance of HTMLPurifier_Config. + * @type HTMLPurifier_Config */ protected $config; /** * Current instance of HTMLPurifier_Context. + * @type HTMLPurifier_Context */ protected $context; - public function execute($tokens, $config, $context) { - + /** + * @param HTMLPurifier_Token[] $tokens + * @param HTMLPurifier_Config $config + * @param HTMLPurifier_Context $context + * @return HTMLPurifier_Token[] + * @throws HTMLPurifier_Exception + */ + public function execute($tokens, $config, $context) + { $definition = $config->getHTMLDefinition(); // local variables $generator = new HTMLPurifier_Generator($config, $context); $escape_invalid_tags = $config->get('Core.EscapeInvalidTags'); + // used for autoclose early abortion + $global_parent_allowed_elements = $definition->info_parent_def->child->getAllowedElements($config); $e = $context->get('ErrorCollector', true); - $t = false; // token index $i = false; // injector index - $token = false; // the current token - $reprocess = false; // whether or not to reprocess the same token + list($zipper, $token) = HTMLPurifier_Zipper::fromArray($tokens); + if ($token === NULL) { + return array(); + } + $reprocess = false; // whether or not to reprocess the same token $stack = array(); // member variables - $this->stack =& $stack; - $this->t =& $t; - $this->tokens =& $tokens; - $this->config = $config; + $this->stack =& $stack; + $this->tokens =& $tokens; + $this->token =& $token; + $this->zipper =& $zipper; + $this->config = $config; $this->context = $context; // context variables $context->register('CurrentNesting', $stack); - $context->register('InputIndex', $t); - $context->register('InputTokens', $tokens); - $context->register('CurrentToken', $token); + $context->register('InputZipper', $zipper); + $context->register('CurrentToken', $token); // -- begin INJECTOR -- @@ -73,9 +104,13 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy unset($injectors['Custom']); // special case foreach ($injectors as $injector => $b) { // XXX: Fix with a legitimate lookup table of enabled filters - if (strpos($injector, '.') !== false) continue; + if (strpos($injector, '.') !== false) { + continue; + } $injector = "HTMLPurifier_Injector_$injector"; - if (!$b) continue; + if (!$b) { + continue; + } $this->injectors[] = new $injector; } foreach ($def_injectors as $injector) { @@ -83,7 +118,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $this->injectors[] = $injector; } foreach ($custom_injectors as $injector) { - if (!$injector) continue; + if (!$injector) { + continue; + } if (is_string($injector)) { $injector = "HTMLPurifier_Injector_$injector"; $injector = new $injector; @@ -95,14 +132,16 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // variables for performance reasons foreach ($this->injectors as $ix => $injector) { $error = $injector->prepare($config, $context); - if (!$error) continue; + if (!$error) { + continue; + } array_splice($this->injectors, $ix, 1); // rm the injector trigger_error("Cannot enable {$injector->name} injector because $error is not allowed", E_USER_WARNING); } // -- end INJECTOR -- - // a note on punting: + // a note on reprocessing: // In order to reduce code duplication, whenever some code needs // to make HTML changes in order to make things "correct", the // new HTML gets sent through the purifier, regardless of its @@ -111,70 +150,75 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // punt ($reprocess = true; continue;) and it does that for us. // isset is in loop because $tokens size changes during loop exec - for ( - $t = 0; - $t == 0 || isset($tokens[$t - 1]); - // only increment if we don't need to reprocess - $reprocess ? $reprocess = false : $t++ - ) { + for (;; + // only increment if we don't need to reprocess + $reprocess ? $reprocess = false : $token = $zipper->next($token)) { // check for a rewind - if (is_int($i) && $i >= 0) { + if (is_int($i)) { // possibility: disable rewinding if the current token has a // rewind set on it already. This would offer protection from // infinite loop, but might hinder some advanced rewinding. - $rewind_to = $this->injectors[$i]->getRewind(); - if (is_int($rewind_to) && $rewind_to < $t) { - if ($rewind_to < 0) $rewind_to = 0; - while ($t > $rewind_to) { - $t--; - $prev = $tokens[$t]; + $rewind_offset = $this->injectors[$i]->getRewindOffset(); + if (is_int($rewind_offset)) { + for ($j = 0; $j < $rewind_offset; $j++) { + if (empty($zipper->front)) break; + $token = $zipper->prev($token); // indicate that other injectors should not process this token, // but we need to reprocess it - unset($prev->skip[$i]); - $prev->rewind = $i; - if ($prev instanceof HTMLPurifier_Token_Start) array_pop($this->stack); - elseif ($prev instanceof HTMLPurifier_Token_End) $this->stack[] = $prev->start; + unset($token->skip[$i]); + $token->rewind = $i; + if ($token instanceof HTMLPurifier_Token_Start) { + array_pop($this->stack); + } elseif ($token instanceof HTMLPurifier_Token_End) { + $this->stack[] = $token->start; + } } } $i = false; } // handle case of document end - if (!isset($tokens[$t])) { + if ($token === NULL) { // kill processing if stack is empty - if (empty($this->stack)) break; + if (empty($this->stack)) { + break; + } // peek $top_nesting = array_pop($this->stack); $this->stack[] = $top_nesting; - // send error + // send error [TagClosedSuppress] if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) { $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting); } // append, don't splice, since this is the end - $tokens[] = new HTMLPurifier_Token_End($top_nesting->name); + $token = new HTMLPurifier_Token_End($top_nesting->name); // punt! $reprocess = true; continue; } - $token = $tokens[$t]; - - //echo '
        '; printTokens($tokens, $t); printTokens($this->stack); + //echo '
        '; printZipper($zipper, $token);//printTokens($this->stack); //flush(); // quick-check: if it's not a tag, no need to process if (empty($token->is_tag)) { if ($token instanceof HTMLPurifier_Token_Text) { foreach ($this->injectors as $i => $injector) { - if (isset($token->skip[$i])) continue; - if ($token->rewind !== null && $token->rewind !== $i) continue; - $injector->handleText($token); - $this->processToken($token, $i); + if (isset($token->skip[$i])) { + continue; + } + if ($token->rewind !== null && $token->rewind !== $i) { + continue; + } + // XXX fuckup + $r = $token; + $injector->handleText($r); + $token = $this->processToken($r, $i); $reprocess = true; break; } @@ -193,12 +237,22 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $ok = false; if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) { // claims to be a start tag but is empty - $token = new HTMLPurifier_Token_Empty($token->name, $token->attr); + $token = new HTMLPurifier_Token_Empty( + $token->name, + $token->attr, + $token->line, + $token->col, + $token->armor + ); $ok = true; } elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) { // claims to be empty but really is a start tag - $this->swap(new HTMLPurifier_Token_End($token->name)); - $this->insertBefore(new HTMLPurifier_Token_Start($token->name, $token->attr)); + // NB: this assignment is required + $old_token = $token; + $token = new HTMLPurifier_Token_End($token->name); + $token = $this->insertBefore( + new HTMLPurifier_Token_Start($old_token->name, $old_token->attr, $old_token->line, $old_token->col, $old_token->armor) + ); // punt (since we had to modify the input stream in a non-trivial way) $reprocess = true; continue; @@ -211,56 +265,97 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // ...unless they also have to close their parent if (!empty($this->stack)) { + // Performance note: you might think that it's rather + // inefficient, recalculating the autoclose information + // for every tag that a token closes (since when we + // do an autoclose, we push a new token into the + // stream and then /process/ that, before + // re-processing this token.) But this is + // necessary, because an injector can make an + // arbitrary transformations to the autoclosing + // tokens we introduce, so things may have changed + // in the meantime. Also, doing the inefficient thing is + // "easy" to reason about (for certain perverse definitions + // of "easy") + $parent = array_pop($this->stack); $this->stack[] = $parent; + $parent_def = null; + $parent_elements = null; + $autoclose = false; if (isset($definition->info[$parent->name])) { - $elements = $definition->info[$parent->name]->child->getAllowedElements($config); - $autoclose = !isset($elements[$token->name]); - } else { - $autoclose = false; + $parent_def = $definition->info[$parent->name]; + $parent_elements = $parent_def->child->getAllowedElements($config); + $autoclose = !isset($parent_elements[$token->name]); } if ($autoclose && $definition->info[$token->name]->wrap) { - // Check if an element can be wrapped by another - // element to make it valid in a context (for + // Check if an element can be wrapped by another + // element to make it valid in a context (for // example,