mistpark 2.0 infrasturcture lands

This commit is contained in:
Mike Macgirvin 2010-09-08 20:14:17 -07:00
commit ffb1997902
360 changed files with 25001 additions and 457 deletions

View file

@ -21,8 +21,25 @@ define ( 'NOTIFY_WALL', 0x0004 );
define ( 'NOTIFY_COMMENT', 0x0008 ); define ( 'NOTIFY_COMMENT', 0x0008 );
define ( 'NOTIFY_MAIL', 0x0010 ); define ( 'NOTIFY_MAIL', 0x0010 );
define ( 'NAMESPACE_DFRN' , 'http://purl.org/macgirvin/dfrn/1.0' ); define ( 'NAMESPACE_DFRN' , 'http://purl.org/macgirvin/dfrn/1.0' );
define ( 'NAMESPACE_ACTIVITY', 'http://activitystrea.ms/schema/1.0/' ); define ( 'NAMESPACE_ACTIVITY', 'http://activitystrea.ms/spec/1.0/' );
define ( 'NAMESPACE_ACTIVITY_SCHEMA', 'http://activitystrea.ms/schema/1.0/');
define ( 'ACTIVITY_LIKE', NAMESPACE_ACTIVITY_SCHEMA . 'like' );
define ( 'ACTIVITY_FRIEND', NAMESPACE_ACTIVITY_SCHEMA . 'make-friend' );
define ( 'ACTIVITY_POST', NAMESPACE_ACTIVITY_SCHEMA . 'post' );
define ( 'ACTIVITY_UPDATE', NAMESPACE_ACTIVITY_SCHEMA . 'update' );
define ( 'ACTIVITY_OBJ_COMMENT', NAMESPACE_ACTIVITY_SCHEMA . 'comment' );
define ( 'ACTIVITY_OBJ_NOTE', NAMESPACE_ACTIVITY_SCHEMA . 'note' );
define ( 'ACTIVITY_OBJ_PERSON', NAMESPACE_ACTIVITY_SCHEMA . 'person' );
define ( 'ACTIVITY_OBJ_PHOTO', NAMESPACE_ACTIVITY_SCHEMA . 'photo' );
define ( 'ACTIVITY_OBJ_P_PHOTO', NAMESPACE_ACTIVITY_SCHEMA . 'profile-photo' );
define ( 'ACTIVITY_OBJ_ALBUM', NAMESPACE_ACTIVITY_SCHEMA . 'photo-album' );
define ( 'ACTIVITY_OBJ_HEART', NAMESPACE_DFRN . '/heart' );
if(! class_exists('App')) { if(! class_exists('App')) {
class App { class App {
@ -582,8 +599,11 @@ function get_config($family, $key, $instore = false) {
global $a; global $a;
if(! $instore) { if(! $instore) {
if(isset($a->config[$family][$key])) if(isset($a->config[$family][$key])) {
if($a->config[$family][$key] == '!<unset>!')
return false;
return $a->config[$family][$key]; return $a->config[$family][$key];
}
} }
$ret = q("SELECT `v` FROM `config` WHERE `cat` = '%s' AND `k` = '%s' LIMIT 1", $ret = q("SELECT `v` FROM `config` WHERE `cat` = '%s' AND `k` = '%s' LIMIT 1",
dbesc($family), dbesc($family),
@ -593,6 +613,9 @@ function get_config($family, $key, $instore = false) {
$a->config[$family][$key] = $ret[0]['v']; $a->config[$family][$key] = $ret[0]['v'];
return $ret[0]['v']; return $ret[0]['v'];
} }
else {
$a->config[$family][$key] = '!<unset>!';
}
return false; return false;
}} }}
@ -667,8 +690,9 @@ function convert_xml_element_to_array($xml_element, &$recursion_depth=0) {
if(! function_exists('webfinger')) { if(! function_exists('webfinger')) {
function webfinger($s) { function webfinger($s) {
if(! strstr($s,'@')) if(! strstr($s,'@')) {
return $s; return $s;
}
$host = substr($s,strpos($s,'@') + 1); $host = substr($s,strpos($s,'@') + 1);
$url = 'http://' . $host . '/.well-known/host-meta' ; $url = 'http://' . $host . '/.well-known/host-meta' ;
$xml = fetch_url($url); $xml = fetch_url($url);
@ -714,3 +738,51 @@ function webfinger($s) {
return $link['@attributes']['href']; return $link['@attributes']['href'];
return ''; return '';
}} }}
if(! function_exists('perms2str')) {
function perms2str($p) {
$ret = '';
$tmp = $p;
if(is_array($tmp)) {
array_walk($tmp,'sanitise_acl');
$ret = implode('',$tmp);
}
return $ret;
}}
if(! function_exists('item_new_uri')) {
function item_new_uri($hostname,$uid) {
do {
$dups = false;
$hash = random_string();
$uri = "urn:X-dfrn:" . $hostname . ':' . $uid . ':' . $hash;
$r = q("SELECT `id` FROM `item` WHERE `uri` = '%s' LIMIT 1",
dbesc($uri));
if(count($r))
$dups = true;
} while($dups == true);
return $uri;
}}
if(! function_exists('get_uid')) {
function get_uid() {
return ((x($_SESSION,'uid')) ? intval($_SESSION['uid']) : 0) ;
}}
if(! function_exists('validate_url')) {
function validate_url($url) {
if(substr($url,0,4) != 'http')
$url = 'http://' . $url;
$h = parse_url($url);
if(! $h)
return false;
if(! checkdnsrr($h['host'], 'ANY'))
return false;
return true;
}}

54
include/Contact.php Normal file
View file

@ -0,0 +1,54 @@
<?php
function contact_remove($id) {
q("DELETE FROM `contact` WHERE `id` = %d LIMIT 1",
intval($id)
);
q("DELETE FROM `item` WHERE `contact-id` = %d ",
intval($id)
);
q("DELETE FROM `photo` WHERE `contact-id` = %d ",
intval($id)
);
}
// Contact has refused to recognise us as a friend. We will start a countdown.
// If they still don't recognise us in 32 days, the relationship is over,
// and we won't waste any more time trying to communicate with them.
// This provides for the possibility that their database is temporarily messed
// up or some other transient event and that there's a possibility we could recover from it.
if(! function_exists('mark_for_death')) {
function mark_for_death($contact) {
if($contact['term-date'] == '0000-00-00 00:00:00') {
q("UPDATE `contact` SET `term-date` = '%s' WHERE `id` = %d LIMIT 1",
dbesc(datetime_convert()),
intval($contact['id'])
);
}
else {
$expiry = $contact['term-date'] . ' + 32 days ';
if(datetime_convert() > datetime_convert('UTC','UTC',$expiry)) {
// relationship is really truly dead.
contact_remove($contact['id']);
}
}
}}
if(! function_exists('unmark_for_death')) {
function unmark_for_death($contact) {
// It's a miracle. Our dead contact has inexplicably come back to life.
q("UPDATE `contact` SET `term-date = '%s' WHERE `id` = %d LIMIT 1",
dbesc('0000-00-00 00:00:00'),
intval($contact['id'])
);
}}

View file

@ -7,7 +7,6 @@ if((x($_SESSION,'authenticated')) && (! ($_POST['auth-params'] == 'login'))) {
unset($_SESSION['authenticated']); unset($_SESSION['authenticated']);
unset($_SESSION['uid']); unset($_SESSION['uid']);
unset($_SESSION['visitor_id']); unset($_SESSION['visitor_id']);
unset($_SESSION['is_visitor']);
unset($_SESSION['administrator']); unset($_SESSION['administrator']);
unset($_SESSION['cid']); unset($_SESSION['cid']);
unset($_SESSION['theme']); unset($_SESSION['theme']);
@ -41,7 +40,6 @@ else {
unset($_SESSION['authenticated']); unset($_SESSION['authenticated']);
unset($_SESSION['uid']); unset($_SESSION['uid']);
unset($_SESSION['visitor_id']); unset($_SESSION['visitor_id']);
unset($_SESSION['is_visitor']);
unset($_SESSION['administrator']); unset($_SESSION['administrator']);
unset($_SESSION['cid']); unset($_SESSION['cid']);
$encrypted = hash('whirlpool',trim($_POST['password'])); $encrypted = hash('whirlpool',trim($_POST['password']));

View file

@ -76,7 +76,7 @@ function bbcode($Text) {
// Youtube extensions // Youtube extensions
$Text = preg_replace("/\[youtube\]http:\/\/www.youtube.com\/watch\?v\=(.+?)\[\/youtube\]/",'[youtube]$1[/youtube]',$Text); $Text = preg_replace("/\[youtube\]http:\/\/www.youtube.com\/watch\?v\=(.+?)\[\/youtube\]/",'[youtube]$1[/youtube]',$Text);
$Text = preg_replace("/\[youtube\](.+?)\[\/youtube\]/", '<object width="425" height="350"><param name="movie" value="http://www.youtube.com/v/$1"></param><embed src="http://www.youtube.com/v/$1" type="application/x-shockwave-flash" width="425" height="350"></embed></object>', $Text); $Text = preg_replace("/\[youtube\](.+?)\[\/youtube\]/", '<object width="425" height="350"><param name="movie" value="http://www.youtube.com/v/$1"></param><!--[if IE]><embed src="http://www.youtube.com/v/$1" type="application/x-shockwave-flash" width="425" height="350" /><![endif]--></object>', $Text);
return $Text; return $Text;
} }

View file

@ -55,9 +55,20 @@ function select_timezone($current = 'America/Los_Angeles') {
if(! function_exists('datetime_convert')) { if(! function_exists('datetime_convert')) {
function datetime_convert($from = 'UTC', $to = 'UTC', $s = 'now', $fmt = "Y-m-d H:i:s") { function datetime_convert($from = 'UTC', $to = 'UTC', $s = 'now', $fmt = "Y-m-d H:i:s") {
$d = new DateTime($s, new DateTimeZone($from));
$d->setTimeZone(new DateTimeZone($to)); // Slight hackish adjustment so that 'zero' datetime actually returns what is intended
return($d->format($fmt)); // otherwise we end up with -0001-11-30 ...
// add 32 days so that we at least get year 00, and then hack around the fact that
// months and days always start with 1.
if(substr($s,0,10) == '0000-00-00') {
$d = new DateTime($s . ' + 32 days', new DateTimeZone('UTC'));
return str_replace('1','0',$d->format($fmt));
}
$d = new DateTime($s, new DateTimeZone($from));
$d->setTimeZone(new DateTimeZone($to));
return($d->format($fmt));
}} }}
function dob($dob) { function dob($dob) {

50
include/html2bbcode.php Normal file
View file

@ -0,0 +1,50 @@
<?php
function html2bbcode($s) {
// Tags to Find
$htmltags = array(
'/\<b\>(.*?)\<\/b\>/is',
'/\<i\>(.*?)\<\/i\>/is',
'/\<u\>(.*?)\<\/u\>/is',
'/\<ul\>(.*?)\<\/ul\>/is',
'/\<li\>(.*?)\<\/li\>/is',
'/\<img(.*?) src=\"(.*?)\" (.*?)\>/is',
'/\<div(.*?)\>(.*?)\<\/div\>/is',
'/\<br(.*?)\>/is',
'/\<strong\>(.*?)\<\/strong\>/is',
'/\<a href=\"(.*?)\"(.*?)\>(.*?)\<\/a\>/is',
'/\<code\>(.*?)\<\/code\>/is',
'/\<font color=(.*?)\>(.*?)\<\/font\>',
'/\<font color=\"(.*?)\"\>(.*?)\<\/font\>',
'/\<blockquote\>(.*?)\<\/blockquote\>/is',
);
// Replace with
$bbtags = array(
'[b]$1[/b]',
'[i]$1[/i]',
'[u]$1[/u]',
'[list]$1[/list]',
'[*]$1',
'[img]$2[/img]',
'$2',
'\n',
'[b]$1[/b]',
'[url=$1]$3[/url]',
'[code]$1[/code],
'[color="$1"]$2[/color]',
'[color="$1"]$2[/color]',
'[quote]$1[/quote]',
);
// Replace $htmltags in $text with $bbtags
$text = preg_replace ($htmltags, $bbtags, $s);
// Strip all other HTML tags
$text = strip_tags($text);
return $text;
}

View file

@ -1,18 +1,27 @@
<?php <?php
function get_feed_for(&$a,$dfrn_id,$owner_id,$last_update) { function get_feed_for(&$a, $dfrn_id, $owner_id, $last_update) {
require_once('bbcode.php');
// default permissions - anonymous user // default permissions - anonymous user
$sql_extra = " AND `allow_cid` = '' AND `allow_gid` = '' AND `deny_cid` = '' AND `deny_gid` = '' "; $sql_extra = "
AND `allow_cid` = ''
AND `allow_gid` = ''
AND `deny_cid` = ''
AND `deny_gid` = ''
";
if(strlen($owner_id) && ! intval($owner_id)) { if(strlen($owner_id) && ! intval($owner_id)) {
$r = q("SELECT `uid` FROM `user` WHERE `nickname` = '%s' LIMIT 1", $r = q("SELECT `uid`, `nickname` FROM `user` WHERE `nickname` = '%s' LIMIT 1",
dbesc($owner_id) dbesc($owner_id)
); );
if(count($r)) if(count($r)) {
$owner_id = $r[0]['uid']; $owner_id = $r[0]['uid'];
$owner_nick = $r[0]['nickname'];
}
} }
$r = q("SELECT * FROM `contact` WHERE `self` = 1 AND `uid` = %d LIMIT 1", $r = q("SELECT * FROM `contact` WHERE `self` = 1 AND `uid` = %d LIMIT 1",
@ -42,12 +51,12 @@ function get_feed_for(&$a,$dfrn_id,$owner_id,$last_update) {
else else
$gs = '<<>>' ; // Impossible to match $gs = '<<>>' ; // Impossible to match
$sql_extra = sprintf( $sql_extra = sprintf("
" AND ( `allow_cid` = '' OR `allow_cid` REGEXP '<%d>' ) AND ( `allow_cid` = '' OR `allow_cid` REGEXP '<%d>' )
AND ( `deny_cid` = '' OR NOT `deny_cid` REGEXP '<%d>' ) AND ( `deny_cid` = '' OR NOT `deny_cid` REGEXP '<%d>' )
AND ( `allow_gid` = '' OR `allow_gid` REGEXP '%s' ) AND ( `allow_gid` = '' OR `allow_gid` REGEXP '%s' )
AND ( `deny_gid` = '' OR NOT `deny_gid` REGEXP '%s') ", AND ( `deny_gid` = '' OR NOT `deny_gid` REGEXP '%s')
",
intval($contact['id']), intval($contact['id']),
intval($contact['id']), intval($contact['id']),
dbesc($gs), dbesc($gs),
@ -88,7 +97,7 @@ function get_feed_for(&$a,$dfrn_id,$owner_id,$last_update) {
$atom .= replace_macros($feed_template, array( $atom .= replace_macros($feed_template, array(
'$feed_id' => xmlify($a->get_baseurl()), '$feed_id' => xmlify($a->get_baseurl() . '/profile/' . $owner_nick),
'$feed_title' => xmlify($owner['name']), '$feed_title' => xmlify($owner['name']),
'$feed_updated' => xmlify(datetime_convert('UTC', 'UTC', $updated . '+00:00' , 'Y-m-d\TH:i:s\Z')) , '$feed_updated' => xmlify(datetime_convert('UTC', 'UTC', $updated . '+00:00' , 'Y-m-d\TH:i:s\Z')) ,
'$name' => xmlify($owner['name']), '$name' => xmlify($owner['name']),
@ -96,12 +105,24 @@ function get_feed_for(&$a,$dfrn_id,$owner_id,$last_update) {
'$photo' => xmlify($owner['photo']), '$photo' => xmlify($owner['photo']),
'$thumb' => xmlify($owner['thumb']), '$thumb' => xmlify($owner['thumb']),
'$picdate' => xmlify(datetime_convert('UTC','UTC',$owner['avatar-date'] . '+00:00' , 'Y-m-d\TH:i:s\Z')) , '$picdate' => xmlify(datetime_convert('UTC','UTC',$owner['avatar-date'] . '+00:00' , 'Y-m-d\TH:i:s\Z')) ,
'$uridate' => xmlify(datetime_convert('UTC','UTC',$owner['uri-date'] . '+00:00' , 'Y-m-d\TH:i:s\Z')) , '$uridate' => xmlify(datetime_convert('UTC','UTC',$owner['uri-date'] . '+00:00' , 'Y-m-d\TH:i:s\Z')) ,
'$namdate' => xmlify(datetime_convert('UTC','UTC',$owner['name-date'] . '+00:00' , 'Y-m-d\TH:i:s\Z')) '$namdate' => xmlify(datetime_convert('UTC','UTC',$owner['name-date'] . '+00:00' , 'Y-m-d\TH:i:s\Z'))
)); ));
foreach($items as $item) { foreach($items as $item) {
// public feeds get html, our own nodes use bbcode
if($dfrn_id == '*') {
$item['body'] = bbcode($item['body']);
$type = 'html';
}
else {
$type = 'text';
}
if($item['deleted']) { if($item['deleted']) {
$atom .= replace_macros($tomb_template, array( $atom .= replace_macros($tomb_template, array(
'$id' => xmlify($item['uri']), '$id' => xmlify($item['uri']),
@ -109,6 +130,9 @@ function get_feed_for(&$a,$dfrn_id,$owner_id,$last_update) {
)); ));
} }
else { else {
$verb = construct_verb($item);
$actobj = construct_activity($item);
if($item['parent'] == $item['id']) { if($item['parent'] == $item['id']) {
$atom .= replace_macros($item_template, array( $atom .= replace_macros($item_template, array(
'$name' => xmlify($item['name']), '$name' => xmlify($item['name']),
@ -122,7 +146,10 @@ function get_feed_for(&$a,$dfrn_id,$owner_id,$last_update) {
'$published' => xmlify(datetime_convert('UTC', 'UTC', $item['created'] . '+00:00' , 'Y-m-d\TH:i:s\Z')), '$published' => xmlify(datetime_convert('UTC', 'UTC', $item['created'] . '+00:00' , 'Y-m-d\TH:i:s\Z')),
'$updated' => xmlify(datetime_convert('UTC', 'UTC', $item['edited'] . '+00:00' , 'Y-m-d\TH:i:s\Z')), '$updated' => xmlify(datetime_convert('UTC', 'UTC', $item['edited'] . '+00:00' , 'Y-m-d\TH:i:s\Z')),
'$location' => xmlify($item['location']), '$location' => xmlify($item['location']),
'$type' => $type,
'$content' => xmlify($item['body']), '$content' => xmlify($item['body']),
'$verb' => xmlify($verb),
'$actobj' => $actobj, // do not xmlify
'$comment_allow' => (($item['last-child'] && strlen($contact['dfrn-id'])) ? 1 : 0) '$comment_allow' => (($item['last-child'] && strlen($contact['dfrn-id'])) ? 1 : 0)
)); ));
} }
@ -135,7 +162,10 @@ function get_feed_for(&$a,$dfrn_id,$owner_id,$last_update) {
'$title' => xmlify($item['title']), '$title' => xmlify($item['title']),
'$published' => xmlify(datetime_convert('UTC', 'UTC', $item['created'] . '+00:00' , 'Y-m-d\TH:i:s\Z')), '$published' => xmlify(datetime_convert('UTC', 'UTC', $item['created'] . '+00:00' , 'Y-m-d\TH:i:s\Z')),
'$updated' => xmlify(datetime_convert('UTC', 'UTC', $item['edited'] . '+00:00' , 'Y-m-d\TH:i:s\Z')), '$updated' => xmlify(datetime_convert('UTC', 'UTC', $item['edited'] . '+00:00' , 'Y-m-d\TH:i:s\Z')),
'$type' => $type,
'$content' =>xmlify($item['body']), '$content' =>xmlify($item['body']),
'$verb' => xmlify($verb),
'$actobj' => $actobj, // do not xmlify
'$parent_id' => xmlify($item['parent-uri']), '$parent_id' => xmlify($item['parent-uri']),
'$comment_allow' => (($item['last-child']) ? 1 : 0) '$comment_allow' => (($item['last-child']) ? 1 : 0)
)); ));
@ -148,16 +178,42 @@ function get_feed_for(&$a,$dfrn_id,$owner_id,$last_update) {
} }
function construct_verb($item) {
if($item['verb'])
return $item['verb'];
return ACTIVITY_POST;
}
function construct_activity($item) {
if($item['type'] == 'activity') {
}
return '';
}
function get_atom_elements($item) { function get_atom_elements($item) {
require_once('library/HTMLPurifier.auto.php');
require_once('include/html2bbcode.php');
$res = array(); $res = array();
$raw_author = $item->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10,'author');
if($raw_author) {
if($raw_author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['link'][0]['attribs']['']['rel'] == 'photo')
$res['author-avatar'] = unxmlify($raw_author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['link'][0]['attribs']['']['href']);
}
$author = $item->get_author(); $author = $item->get_author();
$res['author-name'] = unxmlify($author->get_name()); $res['author-name'] = unxmlify($author->get_name());
$res['author-link'] = unxmlify($author->get_link()); $res['author-link'] = unxmlify($author->get_link());
$res['author-avatar'] = unxmlify($author->get_avatar()); if(! $res['author-avatar'])
$res['author-avatar'] = unxmlify($author->get_avatar());
$res['uri'] = unxmlify($item->get_id()); $res['uri'] = unxmlify($item->get_id());
$res['title'] = unxmlify($item->get_title()); $res['title'] = unxmlify($item->get_title());
$res['body'] = unxmlify($item->get_content()); $res['body'] = unxmlify($item->get_content());
@ -166,6 +222,36 @@ function get_atom_elements($item) {
if($maxlen && (strlen($res['body']) > $maxlen)) if($maxlen && (strlen($res['body']) > $maxlen))
$res['body'] = substr($res['body'],0, $maxlen); $res['body'] = substr($res['body'],0, $maxlen);
// It isn't certain at this point whether our content is plaintext or html and we'd be foolish to trust
// the content type. Our own network only emits text normally, though it might have been converted to
// html if we used a pubsubhubbub transport. But if we see even one html open tag in our text, we will
// have to assume it is all html and needs to be purified.
// It doesn't matter all that much security wise - because before this content is used anywhere, we are
// going to escape any tags we find regardless, but this lets us import a limited subset of html from
// the wild, by sanitising it and converting supported tags to bbcode before we rip out any remaining
// html.
if(strpos($res['body'],'<')) {
$res['body'] = preg_replace('#<object[^>]+>.+?' . 'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?</object>#s',
'[youtube]$1[/youtube]', $res['body']);
$config = HTMLPurifier_Config::createDefault();
$config->set('Core.DefinitionCache', null);
// we shouldn't need a whitelist, because the bbcode converter
// will strip out any unsupported tags.
// $config->set('HTML.Allowed', 'p,b,a[href],i');
$purifier = new HTMLPurifier($config);
$res['body'] = $purifier->purify($res['body']);
}
$res['body'] = html2bbcode($res['body']);
$allow = $item->get_item_tags(NAMESPACE_DFRN,'comment-allow'); $allow = $item->get_item_tags(NAMESPACE_DFRN,'comment-allow');
if($allow && $allow[0]['data'] == 1) if($allow && $allow[0]['data'] == 1)
$res['last-child'] = 1; $res['last-child'] = 1;
@ -186,18 +272,37 @@ function get_atom_elements($item) {
$res['edited'] = unxmlify($rawcreated[0]['data']); $res['edited'] = unxmlify($rawcreated[0]['data']);
$rawowner = $item->get_item_tags(NAMESPACE_DFRN, 'owner'); $rawowner = $item->get_item_tags(NAMESPACE_DFRN, 'owner');
if($rawowner[0]['child'][NAMESPACE_DFRN]['name'][0]['data']) if($rawowner[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['name'][0]['data'])
$res['owner-name'] = unxmlify($rawowner[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['name'][0]['data']);
elseif($rawowner[0]['child'][NAMESPACE_DFRN]['name'][0]['data'])
$res['owner-name'] = unxmlify($rawowner[0]['child'][NAMESPACE_DFRN]['name'][0]['data']); $res['owner-name'] = unxmlify($rawowner[0]['child'][NAMESPACE_DFRN]['name'][0]['data']);
if($rawowner[0]['child'][NAMESPACE_DFRN]['uri'][0]['data']) if($rawowner[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]['data'])
$res['owner-link'] = unxmlify($rawowner[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]['data']);
elseif($rawowner[0]['child'][NAMESPACE_DFRN]['uri'][0]['data'])
$res['owner-link'] = unxmlify($rawowner[0]['child'][NAMESPACE_DFRN]['uri'][0]['data']); $res['owner-link'] = unxmlify($rawowner[0]['child'][NAMESPACE_DFRN]['uri'][0]['data']);
if($rawowner[0]['child'][NAMESPACE_DFRN]['avatar'][0]['data'])
if($rawowner[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['link'][0]['attribs']['']['rel'] == 'photo')
$res['owner-avatar'] = unxmlify($rawowner[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['link'][0]['attribs']['']['href']);
elseif($rawowner[0]['child'][NAMESPACE_DFRN]['avatar'][0]['data'])
$res['owner-avatar'] = unxmlify($rawowner[0]['child'][NAMESPACE_DFRN]['avatar'][0]['data']); $res['owner-avatar'] = unxmlify($rawowner[0]['child'][NAMESPACE_DFRN]['avatar'][0]['data']);
$rawverb = $item->get_item_tags(NAMESPACE_ACTIVITY, 'verb');
// select between supported verbs
if($rawverb)
$res['verb'] = unxmlify($rawverb[0]['data']);
$rawobj = $item->get_item_tags(NAMESPACE_ACTIVITY, 'object');
if($rawobj) {
$res['object-type'] = $rawobj[0]['object-type'][0]['data'];
$res['object'] = $rawobj[0];
}
return $res; return $res;
} }
function post_remote($a,$arr) { function post_remote($a,$arr) {
//print_r($arr);
if(! x($arr,'type')) if(! x($arr,'type'))
$arr['type'] = 'remote'; $arr['type'] = 'remote';
@ -218,8 +323,12 @@ function post_remote($a,$arr) {
$arr['visible'] = 1; $arr['visible'] = 1;
$arr['deleted'] = 0; $arr['deleted'] = 0;
$arr['parent-uri'] = notags(trim($arr['parent-uri'])); $arr['parent-uri'] = notags(trim($arr['parent-uri']));
$arr['verb'] = notags(trim($arr['verb']));
$arr['object-type'] = notags(trim($arr['object-type']));
$arr['object'] = trim($arr['object']);
$parent_id = 0; $parent_id = 0;
$parent_missing = false;
dbesc_array($arr); dbesc_array($arr);
@ -237,15 +346,28 @@ function post_remote($a,$arr) {
if(count($r)) if(count($r))
$parent_id = $r[0]['id']; $parent_id = $r[0]['id'];
else { else {
// if parent is missing, what do we do? $parent_missing = true;
} }
$r = q("SELECT `id` FROM `item` WHERE `uri` = '%s' AND `uid` = %d LIMIT 1", $r = q("SELECT `id` FROM `item` WHERE `uri` = '%s' AND `uid` = %d LIMIT 1",
$arr['uri'], $arr['uri'], // already dbesc'd
intval($arr['uid']) intval($arr['uid'])
); );
if(count($r)) if(count($r))
$current_post = $r[0]['id']; $current_post = $r[0]['id'];
else
return 0;
if($parent_missing) {
// perhaps the parent was deleted, but in any case, this thread is dead
// and unfortunately our brand new item now has to be destroyed
q("DELETE FROM `item` WHERE `id` = %d LIMIT 1",
intval($current_post)
);
return 0;
}
$r = q("UPDATE `item` SET `parent` = %d WHERE `id` = %d LIMIT 1", $r = q("UPDATE `item` SET `parent` = %d WHERE `id` = %d LIMIT 1",
intval($parent_id), intval($parent_id),

View file

@ -1,6 +1,6 @@
<?php <?php
$debugging = false; $debugging = true;
require_once("boot.php"); require_once("boot.php");
@ -19,9 +19,6 @@
$a->set_baseurl(get_config('system','url')); $a->set_baseurl(get_config('system','url'));
$baseurl = $argv[1];
$a->set_baseurl($argv[1]);
$cmd = $argv[1]; $cmd = $argv[1];
switch($cmd) { switch($cmd) {
@ -266,10 +263,10 @@
if($rr['self']) if($rr['self'])
continue; continue;
if((! strlen($rr['dfrn-id'])) || ($rr['duplex'] && ! strlen($rr['issued-id']))) if((! strlen($rr['dfrn-id'])) && (! $rr['duplex']))
continue; continue;
$idtosend = (($rr['duplex']) ? $rr['issued-id'] : $rr['dfrn-id']); $idtosend = (($rr['dfrn-id']) ? $rr['dfrn-id'] : $rr['issued-id']);
$url = $rr['notify'] . '?dfrn_id=' . $idtosend; $url = $rr['notify'] . '?dfrn_id=' . $idtosend;
@ -291,7 +288,7 @@
$challenge = hex2bin($res->challenge); $challenge = hex2bin($res->challenge);
$final_dfrn_id = ''; $final_dfrn_id = '';
if($rr['duplex']) { if($rr['duplex'] && strlen($rr['prvkey'])) {
openssl_private_decrypt($sent_dfrn_id,$final_dfrn_id,$rr['prvkey']); openssl_private_decrypt($sent_dfrn_id,$final_dfrn_id,$rr['prvkey']);
openssl_private_decrypt($challenge,$postvars['challenge'],$rr['prvkey']); openssl_private_decrypt($challenge,$postvars['challenge'],$rr['prvkey']);
} }
@ -301,18 +298,14 @@
} }
$final_dfrn_id = substr($final_dfrn_id, 0, strpos($final_dfrn_id, '.')); $final_dfrn_id = substr($final_dfrn_id, 0, strpos($final_dfrn_id, '.'));
if(($final_dfrn_id != $rr['dfrn-id']) || (($rr['duplex']) && ($final_dfrn_id != $rr['issued-id']))) { if($final_dfrn_id != $idtosend) {
// did not decode properly - cannot trust this site // did not decode properly - cannot trust this site
continue; continue;
} }
$postvars['dfrn_id'] = (($duplex) ? $rr['issued-id'] : $rr['dfrn-id']); $postvars['dfrn_id'] = $idtosend;
if($cmd == 'mail') { if((($rr['rel'] == DIRECTION_OUT) || ($rr['rel'] == DIRECTION_BOTH)) && (! $rr['blocked']) && (! $rr['readonly'])) {
$postvars['data'] = $atom;
}
elseif(((strlen($rr['dfrn-id'])) || (($rr['duplex']) && (strlen($rr['issued-id']))))
&& (! ($rr['blocked']) || ($rr['readonly']))) {
$postvars['data'] = $atom; $postvars['data'] = $atom;
} }
else { else {

View file

@ -15,6 +15,8 @@
require_once('simplepie/simplepie.inc'); require_once('simplepie/simplepie.inc');
require_once('include/items.php'); require_once('include/items.php');
require_once('include/Contact.php');
$a->set_baseurl(get_config('system','url')); $a->set_baseurl(get_config('system','url'));
$contacts = q("SELECT * FROM `contact` $contacts = q("SELECT * FROM `contact`
@ -73,7 +75,7 @@
? datetime_convert('UTC','UTC','now - 30 days','Y-m-d\TH:i:s\Z') ? datetime_convert('UTC','UTC','now - 30 days','Y-m-d\TH:i:s\Z')
: datetime_convert('UTC','UTC',$contact['last-update'],'Y-m-d\TH:i:s\Z')); : datetime_convert('UTC','UTC',$contact['last-update'],'Y-m-d\TH:i:s\Z'));
$idtosend = (($contact['duplex']) ? $contact['issued-id'] : $contact['dfrn-id']); $idtosend = (($contact['dfrn-id']) ? $contact['dfrn-id'] : $contact['issued-id']);
$url = $contact['poll'] . '?dfrn_id=' . $idtosend . '&type=data&last_update=' . $last_update ; $url = $contact['poll'] . '?dfrn_id=' . $idtosend . '&type=data&last_update=' . $last_update ;
@ -87,9 +89,15 @@ echo "XML: " . $xml;
$res = simplexml_load_string($xml); $res = simplexml_load_string($xml);
if(intval($res->status) == 1)
mark_for_death($contact);
if((intval($res->status) != 0) || (! strlen($res->challenge)) || (! strlen($res->dfrn_id))) if((intval($res->status) != 0) || (! strlen($res->challenge)) || (! strlen($res->dfrn_id)))
continue; continue;
if($contact['term-date'] != '0000-00-00 00:00:00')
unmark_for_death($contact);
$postvars = array(); $postvars = array();
$sent_dfrn_id = hex2bin($res->dfrn_id); $sent_dfrn_id = hex2bin($res->dfrn_id);
@ -97,7 +105,7 @@ echo "XML: " . $xml;
$final_dfrn_id = ''; $final_dfrn_id = '';
if($contact['duplex']) { if(($contact['duplex']) && strlen($contact['prvkey'])) {
openssl_private_decrypt($sent_dfrn_id,$final_dfrn_id,$contact['prvkey']); openssl_private_decrypt($sent_dfrn_id,$final_dfrn_id,$contact['prvkey']);
openssl_private_decrypt($challenge,$postvars['challenge'],$contact['prvkey']); openssl_private_decrypt($challenge,$postvars['challenge'],$contact['prvkey']);
@ -108,13 +116,12 @@ echo "XML: " . $xml;
} }
$final_dfrn_id = substr($final_dfrn_id, 0, strpos($final_dfrn_id, '.')); $final_dfrn_id = substr($final_dfrn_id, 0, strpos($final_dfrn_id, '.'));
if(($final_dfrn_id != $contact['dfrn-id']) if($final_dfrn_id != $idtosend)
|| (($contact['duplex']) && ($final_dfrn_id != $contact['issued-id']))) {
// did not decode properly - cannot trust this site // did not decode properly - cannot trust this site
continue; continue;
} }
$postvars['dfrn_id'] = (($contact['duplex']) ? $contact['issued-id'] : $contact['dfrn-id']); $postvars['dfrn_id'] = $idtosend;
$xml = post_url($contact['poll'],$postvars); $xml = post_url($contact['poll'],$postvars);

View file

@ -42,23 +42,10 @@ if(strlen($a->module)) {
} }
else { else {
header($_SERVER["SERVER_PROTOCOL"] . ' 404 ' . t('Not Found')); header($_SERVER["SERVER_PROTOCOL"] . ' 404 ' . t('Not Found'));
notice( t('Page not found' ) . EOL); notice( t('Page not found.' ) . EOL);
} }
} }
// invoke module functions
// Important: Modules normally do not emit content, unless you need it for debugging.
// The module_init, module_post, and module_afterpost functions process URL parameters and POST processing.
// The module_content function returns content text to this file where it is included on the page.
// Modules emitting XML/Atom, etc. should do so idirectly and promptly exit before the HTML page can be rendered.
// "Most" HTML resides in the view directory as text templates with macro substitution.
// They look like HTML with PHP variables but only a couple pass through the PHP processor - those with .php extensions.
// The macro substitution is defined per page for the .tpl files.
// Information transfer between functions can be accomplished via the App session '$a' and its related variables.
// x() queries both a variable's existence and that it is "non-zero" or "non-empty" depending on how it is called.
// q() is the SQL query form. All string (%s) variables MUST be passed through dbesc().
// All int values MUST be cast to integer using intval();
if($a->module_loaded) { if($a->module_loaded) {
$a->page['page_title'] = $a->module; $a->page['page_title'] = $a->module;
if(function_exists($a->module . '_init')) { if(function_exists($a->module . '_init')) {
@ -85,6 +72,10 @@ if($a->module_loaded) {
} }
if(stristr($_SESSION['sysmsg'], t('Permission denied'))) {
header($_SERVER["SERVER_PROTOCOL"] . ' 403 ' . t('Permission denied.'));
}
// report anything important happening // report anything important happening
if(x($_SESSION,'sysmsg')) { if(x($_SESSION,'sysmsg')) {
@ -93,11 +84,6 @@ if(x($_SESSION,'sysmsg')) {
unset($_SESSION['sysmsg']); unset($_SESSION['sysmsg']);
} }
if(stristr($_SESSION['sysmsg'], t('Permission denied'))) {
header($_SERVER["SERVER_PROTOCOL"] . ' 403 ' . t('Permission denied.'));
}
// Feel free to comment out this line on production sites. // Feel free to comment out this line on production sites.
$a->page['content'] .= $debug_text; $a->page['content'] .= $debug_text;

View file

@ -0,0 +1,11 @@
<?php
/**
* This is a stub include that automatically configures the include path.
*/
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
require_once 'HTMLPurifier/Bootstrap.php';
require_once 'HTMLPurifier.autoload.php';
// vim: et sw=4 sts=4

View file

@ -0,0 +1,21 @@
<?php
/**
* @file
* Convenience file that registers autoload handler for HTML Purifier.
*/
if (function_exists('spl_autoload_register') && function_exists('spl_autoload_unregister')) {
// We need unregister for our pre-registering functionality
HTMLPurifier_Bootstrap::registerAutoload();
if (function_exists('__autoload')) {
// Be polite and ensure that userland autoload gets retained
spl_autoload_register('__autoload');
}
} elseif (!function_exists('__autoload')) {
function __autoload($class) {
return HTMLPurifier_Bootstrap::autoload($class);
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,23 @@
<?php
/**
* @file
* Defines a function wrapper for HTML Purifier for quick use.
* @note ''HTMLPurifier()'' is NOT the same as ''new HTMLPurifier()''
*/
/**
* Purify HTML.
* @param $html String HTML to purify
* @param $config Configuration to use, can be any value accepted by
* HTMLPurifier_Config::create()
*/
function HTMLPurifier($html, $config = null) {
static $purifier = false;
if (!$purifier) {
$purifier = new HTMLPurifier();
}
return $purifier->purify($html, $config);
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,210 @@
<?php
/**
* @file
* This file was auto-generated by generate-includes.php and includes all of
* the core files required by HTML Purifier. Use this if performance is a
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
* FILE, changes will be overwritten the next time the script is run.
*
* @version 4.1.1
*
* @warning
* You must *not* include any other HTML Purifier files before this file,
* because 'require' not 'require_once' is used.
*
* @warning
* This file requires that the include path contains the HTML Purifier
* library directory; this is not auto-set.
*/
require 'HTMLPurifier.php';
require 'HTMLPurifier/AttrCollections.php';
require 'HTMLPurifier/AttrDef.php';
require 'HTMLPurifier/AttrTransform.php';
require 'HTMLPurifier/AttrTypes.php';
require 'HTMLPurifier/AttrValidator.php';
require 'HTMLPurifier/Bootstrap.php';
require 'HTMLPurifier/Definition.php';
require 'HTMLPurifier/CSSDefinition.php';
require 'HTMLPurifier/ChildDef.php';
require 'HTMLPurifier/Config.php';
require 'HTMLPurifier/ConfigSchema.php';
require 'HTMLPurifier/ContentSets.php';
require 'HTMLPurifier/Context.php';
require 'HTMLPurifier/DefinitionCache.php';
require 'HTMLPurifier/DefinitionCacheFactory.php';
require 'HTMLPurifier/Doctype.php';
require 'HTMLPurifier/DoctypeRegistry.php';
require 'HTMLPurifier/ElementDef.php';
require 'HTMLPurifier/Encoder.php';
require 'HTMLPurifier/EntityLookup.php';
require 'HTMLPurifier/EntityParser.php';
require 'HTMLPurifier/ErrorCollector.php';
require 'HTMLPurifier/ErrorStruct.php';
require 'HTMLPurifier/Exception.php';
require 'HTMLPurifier/Filter.php';
require 'HTMLPurifier/Generator.php';
require 'HTMLPurifier/HTMLDefinition.php';
require 'HTMLPurifier/HTMLModule.php';
require 'HTMLPurifier/HTMLModuleManager.php';
require 'HTMLPurifier/IDAccumulator.php';
require 'HTMLPurifier/Injector.php';
require 'HTMLPurifier/Language.php';
require 'HTMLPurifier/LanguageFactory.php';
require 'HTMLPurifier/Length.php';
require 'HTMLPurifier/Lexer.php';
require 'HTMLPurifier/PercentEncoder.php';
require 'HTMLPurifier/PropertyList.php';
require 'HTMLPurifier/PropertyListIterator.php';
require 'HTMLPurifier/Strategy.php';
require 'HTMLPurifier/StringHash.php';
require 'HTMLPurifier/StringHashParser.php';
require 'HTMLPurifier/TagTransform.php';
require 'HTMLPurifier/Token.php';
require 'HTMLPurifier/TokenFactory.php';
require 'HTMLPurifier/URI.php';
require 'HTMLPurifier/URIDefinition.php';
require 'HTMLPurifier/URIFilter.php';
require 'HTMLPurifier/URIParser.php';
require 'HTMLPurifier/URIScheme.php';
require 'HTMLPurifier/URISchemeRegistry.php';
require 'HTMLPurifier/UnitConverter.php';
require 'HTMLPurifier/VarParser.php';
require 'HTMLPurifier/VarParserException.php';
require 'HTMLPurifier/AttrDef/CSS.php';
require 'HTMLPurifier/AttrDef/Enum.php';
require 'HTMLPurifier/AttrDef/Integer.php';
require 'HTMLPurifier/AttrDef/Lang.php';
require 'HTMLPurifier/AttrDef/Switch.php';
require 'HTMLPurifier/AttrDef/Text.php';
require 'HTMLPurifier/AttrDef/URI.php';
require 'HTMLPurifier/AttrDef/CSS/Number.php';
require 'HTMLPurifier/AttrDef/CSS/AlphaValue.php';
require 'HTMLPurifier/AttrDef/CSS/Background.php';
require 'HTMLPurifier/AttrDef/CSS/BackgroundPosition.php';
require 'HTMLPurifier/AttrDef/CSS/Border.php';
require 'HTMLPurifier/AttrDef/CSS/Color.php';
require 'HTMLPurifier/AttrDef/CSS/Composite.php';
require 'HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
require 'HTMLPurifier/AttrDef/CSS/Filter.php';
require 'HTMLPurifier/AttrDef/CSS/Font.php';
require 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
require 'HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
require 'HTMLPurifier/AttrDef/CSS/Length.php';
require 'HTMLPurifier/AttrDef/CSS/ListStyle.php';
require 'HTMLPurifier/AttrDef/CSS/Multiple.php';
require 'HTMLPurifier/AttrDef/CSS/Percentage.php';
require 'HTMLPurifier/AttrDef/CSS/TextDecoration.php';
require 'HTMLPurifier/AttrDef/CSS/URI.php';
require 'HTMLPurifier/AttrDef/HTML/Bool.php';
require 'HTMLPurifier/AttrDef/HTML/Nmtokens.php';
require 'HTMLPurifier/AttrDef/HTML/Class.php';
require 'HTMLPurifier/AttrDef/HTML/Color.php';
require 'HTMLPurifier/AttrDef/HTML/FrameTarget.php';
require 'HTMLPurifier/AttrDef/HTML/ID.php';
require 'HTMLPurifier/AttrDef/HTML/Pixels.php';
require 'HTMLPurifier/AttrDef/HTML/Length.php';
require 'HTMLPurifier/AttrDef/HTML/LinkTypes.php';
require 'HTMLPurifier/AttrDef/HTML/MultiLength.php';
require 'HTMLPurifier/AttrDef/URI/Email.php';
require 'HTMLPurifier/AttrDef/URI/Host.php';
require 'HTMLPurifier/AttrDef/URI/IPv4.php';
require 'HTMLPurifier/AttrDef/URI/IPv6.php';
require 'HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';
require 'HTMLPurifier/AttrTransform/Background.php';
require 'HTMLPurifier/AttrTransform/BdoDir.php';
require 'HTMLPurifier/AttrTransform/BgColor.php';
require 'HTMLPurifier/AttrTransform/BoolToCSS.php';
require 'HTMLPurifier/AttrTransform/Border.php';
require 'HTMLPurifier/AttrTransform/EnumToCSS.php';
require 'HTMLPurifier/AttrTransform/ImgRequired.php';
require 'HTMLPurifier/AttrTransform/ImgSpace.php';
require 'HTMLPurifier/AttrTransform/Input.php';
require 'HTMLPurifier/AttrTransform/Lang.php';
require 'HTMLPurifier/AttrTransform/Length.php';
require 'HTMLPurifier/AttrTransform/Name.php';
require 'HTMLPurifier/AttrTransform/NameSync.php';
require 'HTMLPurifier/AttrTransform/SafeEmbed.php';
require 'HTMLPurifier/AttrTransform/SafeObject.php';
require 'HTMLPurifier/AttrTransform/SafeParam.php';
require 'HTMLPurifier/AttrTransform/ScriptRequired.php';
require 'HTMLPurifier/AttrTransform/Textarea.php';
require 'HTMLPurifier/ChildDef/Chameleon.php';
require 'HTMLPurifier/ChildDef/Custom.php';
require 'HTMLPurifier/ChildDef/Empty.php';
require 'HTMLPurifier/ChildDef/Required.php';
require 'HTMLPurifier/ChildDef/Optional.php';
require 'HTMLPurifier/ChildDef/StrictBlockquote.php';
require 'HTMLPurifier/ChildDef/Table.php';
require 'HTMLPurifier/DefinitionCache/Decorator.php';
require 'HTMLPurifier/DefinitionCache/Null.php';
require 'HTMLPurifier/DefinitionCache/Serializer.php';
require 'HTMLPurifier/DefinitionCache/Decorator/Cleanup.php';
require 'HTMLPurifier/DefinitionCache/Decorator/Memory.php';
require 'HTMLPurifier/HTMLModule/Bdo.php';
require 'HTMLPurifier/HTMLModule/CommonAttributes.php';
require 'HTMLPurifier/HTMLModule/Edit.php';
require 'HTMLPurifier/HTMLModule/Forms.php';
require 'HTMLPurifier/HTMLModule/Hypertext.php';
require 'HTMLPurifier/HTMLModule/Image.php';
require 'HTMLPurifier/HTMLModule/Legacy.php';
require 'HTMLPurifier/HTMLModule/List.php';
require 'HTMLPurifier/HTMLModule/Name.php';
require 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
require 'HTMLPurifier/HTMLModule/Object.php';
require 'HTMLPurifier/HTMLModule/Presentation.php';
require 'HTMLPurifier/HTMLModule/Proprietary.php';
require 'HTMLPurifier/HTMLModule/Ruby.php';
require 'HTMLPurifier/HTMLModule/SafeEmbed.php';
require 'HTMLPurifier/HTMLModule/SafeObject.php';
require 'HTMLPurifier/HTMLModule/Scripting.php';
require 'HTMLPurifier/HTMLModule/StyleAttribute.php';
require 'HTMLPurifier/HTMLModule/Tables.php';
require 'HTMLPurifier/HTMLModule/Target.php';
require 'HTMLPurifier/HTMLModule/Text.php';
require 'HTMLPurifier/HTMLModule/Tidy.php';
require 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
require 'HTMLPurifier/HTMLModule/Tidy/Name.php';
require 'HTMLPurifier/HTMLModule/Tidy/Proprietary.php';
require 'HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';
require 'HTMLPurifier/HTMLModule/Tidy/Strict.php';
require 'HTMLPurifier/HTMLModule/Tidy/Transitional.php';
require 'HTMLPurifier/HTMLModule/Tidy/XHTML.php';
require 'HTMLPurifier/Injector/AutoParagraph.php';
require 'HTMLPurifier/Injector/DisplayLinkURI.php';
require 'HTMLPurifier/Injector/Linkify.php';
require 'HTMLPurifier/Injector/PurifierLinkify.php';
require 'HTMLPurifier/Injector/RemoveEmpty.php';
require 'HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
require 'HTMLPurifier/Injector/SafeObject.php';
require 'HTMLPurifier/Lexer/DOMLex.php';
require 'HTMLPurifier/Lexer/DirectLex.php';
require 'HTMLPurifier/Strategy/Composite.php';
require 'HTMLPurifier/Strategy/Core.php';
require 'HTMLPurifier/Strategy/FixNesting.php';
require 'HTMLPurifier/Strategy/MakeWellFormed.php';
require 'HTMLPurifier/Strategy/RemoveForeignElements.php';
require 'HTMLPurifier/Strategy/ValidateAttributes.php';
require 'HTMLPurifier/TagTransform/Font.php';
require 'HTMLPurifier/TagTransform/Simple.php';
require 'HTMLPurifier/Token/Comment.php';
require 'HTMLPurifier/Token/Tag.php';
require 'HTMLPurifier/Token/Empty.php';
require 'HTMLPurifier/Token/End.php';
require 'HTMLPurifier/Token/Start.php';
require 'HTMLPurifier/Token/Text.php';
require 'HTMLPurifier/URIFilter/DisableExternal.php';
require 'HTMLPurifier/URIFilter/DisableExternalResources.php';
require 'HTMLPurifier/URIFilter/HostBlacklist.php';
require 'HTMLPurifier/URIFilter/MakeAbsolute.php';
require 'HTMLPurifier/URIFilter/Munge.php';
require 'HTMLPurifier/URIScheme/data.php';
require 'HTMLPurifier/URIScheme/ftp.php';
require 'HTMLPurifier/URIScheme/http.php';
require 'HTMLPurifier/URIScheme/https.php';
require 'HTMLPurifier/URIScheme/mailto.php';
require 'HTMLPurifier/URIScheme/news.php';
require 'HTMLPurifier/URIScheme/nntp.php';
require 'HTMLPurifier/VarParser/Flexible.php';
require 'HTMLPurifier/VarParser/Native.php';

View file

@ -0,0 +1,30 @@
<?php
/**
* @file
* Emulation layer for code that used kses(), substituting in HTML Purifier.
*/
require_once dirname(__FILE__) . '/HTMLPurifier.auto.php';
function kses($string, $allowed_html, $allowed_protocols = null) {
$config = HTMLPurifier_Config::createDefault();
$allowed_elements = array();
$allowed_attributes = array();
foreach ($allowed_html as $element => $attributes) {
$allowed_elements[$element] = true;
foreach ($attributes as $attribute => $x) {
$allowed_attributes["$element.$attribute"] = true;
}
}
$config->set('HTML.AllowedElements', $allowed_elements);
$config->set('HTML.AllowedAttributes', $allowed_attributes);
$allowed_schemes = array();
if ($allowed_protocols !== null) {
$config->set('URI.AllowedSchemes', $allowed_protocols);
}
$purifier = new HTMLPurifier($config);
return $purifier->purify($string);
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,11 @@
<?php
/**
* @file
* Convenience stub file that adds HTML Purifier's library file to the path
* without any other side-effects.
*/
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
// vim: et sw=4 sts=4

237
library/HTMLPurifier.php Normal file
View file

@ -0,0 +1,237 @@
<?php
/*! @mainpage
*
* HTML Purifier is an HTML filter that will take an arbitrary snippet of
* HTML and rigorously test, validate and filter it into a version that
* is safe for output onto webpages. It achieves this by:
*
* -# Lexing (parsing into tokens) the document,
* -# Executing various strategies on the tokens:
* -# Removing all elements not in the whitelist,
* -# Making the tokens well-formed,
* -# Fixing the nesting of the nodes, and
* -# Validating attributes of the nodes; and
* -# Generating HTML from the purified tokens.
*
* However, most users will only need to interface with the HTMLPurifier
* and HTMLPurifier_Config.
*/
/*
HTML Purifier 4.1.1 - Standards Compliant HTML Filtering
Copyright (C) 2006-2008 Edward Z. Yang
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
*
* @note There are several points in which configuration can be specified
* for HTML Purifier. The precedence of these (from lowest to
* highest) is as follows:
* -# Instance: new HTMLPurifier($config)
* -# Invocation: purify($html, $config)
* These configurations are entirely independent of each other and
* are *not* merged (this behavior may change in the future).
*
* @todo We need an easier way to inject strategies using the configuration
* object.
*/
class HTMLPurifier
{
/** Version of HTML Purifier */
public $version = '4.1.1';
/** Constant with version of HTML Purifier */
const VERSION = '4.1.1';
/** Global configuration object */
public $config;
/** Array of extra HTMLPurifier_Filter objects to run on HTML, for backwards compatibility */
private $filters = array();
/** Single instance of HTML Purifier */
private static $instance;
protected $strategy, $generator;
/**
* Resultant HTMLPurifier_Context of last run purification. Is an array
* of contexts if the last called method was purifyArray().
*/
public $context;
/**
* Initializes the purifier.
* @param $config Optional HTMLPurifier_Config object for all instances of
* the purifier, if omitted, a default configuration is
* supplied (which can be overridden on a per-use basis).
* The parameter can also be any type that
* HTMLPurifier_Config::create() supports.
*/
public function __construct($config = null) {
$this->config = HTMLPurifier_Config::create($config);
$this->strategy = new HTMLPurifier_Strategy_Core();
}
/**
* Adds a filter to process the output. First come first serve
* @param $filter HTMLPurifier_Filter object
*/
public function addFilter($filter) {
trigger_error('HTMLPurifier->addFilter() is deprecated, use configuration directives in the Filter namespace or Filter.Custom', E_USER_WARNING);
$this->filters[] = $filter;
}
/**
* Filters an HTML snippet/document to be XSS-free and standards-compliant.
*
* @param $html String of HTML to purify
* @param $config HTMLPurifier_Config object for this operation, if omitted,
* defaults to the config object specified during this
* object's construction. The parameter can also be any type
* that HTMLPurifier_Config::create() supports.
* @return Purified HTML
*/
public function purify($html, $config = null) {
// :TODO: make the config merge in, instead of replace
$config = $config ? HTMLPurifier_Config::create($config) : $this->config;
// implementation is partially environment dependant, partially
// configuration dependant
$lexer = HTMLPurifier_Lexer::create($config);
$context = new HTMLPurifier_Context();
// setup HTML generator
$this->generator = new HTMLPurifier_Generator($config, $context);
$context->register('Generator', $this->generator);
// set up global context variables
if ($config->get('Core.CollectErrors')) {
// may get moved out if other facilities use it
$language_factory = HTMLPurifier_LanguageFactory::instance();
$language = $language_factory->create($config, $context);
$context->register('Locale', $language);
$error_collector = new HTMLPurifier_ErrorCollector($context);
$context->register('ErrorCollector', $error_collector);
}
// setup id_accumulator context, necessary due to the fact that
// AttrValidator can be called from many places
$id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
$context->register('IDAccumulator', $id_accumulator);
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
// setup filters
$filter_flags = $config->getBatch('Filter');
$custom_filters = $filter_flags['Custom'];
unset($filter_flags['Custom']);
$filters = array();
foreach ($filter_flags as $filter => $flag) {
if (!$flag) continue;
if (strpos($filter, '.') !== false) continue;
$class = "HTMLPurifier_Filter_$filter";
$filters[] = new $class;
}
foreach ($custom_filters as $filter) {
// maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat
$filters[] = $filter;
}
$filters = array_merge($filters, $this->filters);
// maybe prepare(), but later
for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) {
$html = $filters[$i]->preFilter($html, $config, $context);
}
// purified HTML
$html =
$this->generator->generateFromTokens(
// list of tokens
$this->strategy->execute(
// list of un-purified tokens
$lexer->tokenizeHTML(
// un-purified HTML
$html, $config, $context
),
$config, $context
)
);
for ($i = $filter_size - 1; $i >= 0; $i--) {
$html = $filters[$i]->postFilter($html, $config, $context);
}
$html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
$this->context =& $context;
return $html;
}
/**
* Filters an array of HTML snippets
* @param $config Optional HTMLPurifier_Config object for this operation.
* See HTMLPurifier::purify() for more details.
* @return Array of purified HTML
*/
public function purifyArray($array_of_html, $config = null) {
$context_array = array();
foreach ($array_of_html as $key => $html) {
$array_of_html[$key] = $this->purify($html, $config);
$context_array[$key] = $this->context;
}
$this->context = $context_array;
return $array_of_html;
}
/**
* Singleton for enforcing just one HTML Purifier in your system
* @param $prototype Optional prototype HTMLPurifier instance to
* overload singleton with, or HTMLPurifier_Config
* instance to configure the generated version with.
*/
public static function instance($prototype = null) {
if (!self::$instance || $prototype) {
if ($prototype instanceof HTMLPurifier) {
self::$instance = $prototype;
} elseif ($prototype) {
self::$instance = new HTMLPurifier($prototype);
} else {
self::$instance = new HTMLPurifier();
}
}
return self::$instance;
}
/**
* @note Backwards compatibility, see instance()
*/
public static function getInstance($prototype = null) {
return HTMLPurifier::instance($prototype);
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,204 @@
<?php
/**
* @file
* This file was auto-generated by generate-includes.php and includes all of
* the core files required by HTML Purifier. This is a convenience stub that
* includes all files using dirname(__FILE__) and require_once. PLEASE DO NOT
* EDIT THIS FILE, changes will be overwritten the next time the script is run.
*
* Changes to include_path are not necessary.
*/
$__dir = dirname(__FILE__);
require_once $__dir . '/HTMLPurifier.php';
require_once $__dir . '/HTMLPurifier/AttrCollections.php';
require_once $__dir . '/HTMLPurifier/AttrDef.php';
require_once $__dir . '/HTMLPurifier/AttrTransform.php';
require_once $__dir . '/HTMLPurifier/AttrTypes.php';
require_once $__dir . '/HTMLPurifier/AttrValidator.php';
require_once $__dir . '/HTMLPurifier/Bootstrap.php';
require_once $__dir . '/HTMLPurifier/Definition.php';
require_once $__dir . '/HTMLPurifier/CSSDefinition.php';
require_once $__dir . '/HTMLPurifier/ChildDef.php';
require_once $__dir . '/HTMLPurifier/Config.php';
require_once $__dir . '/HTMLPurifier/ConfigSchema.php';
require_once $__dir . '/HTMLPurifier/ContentSets.php';
require_once $__dir . '/HTMLPurifier/Context.php';
require_once $__dir . '/HTMLPurifier/DefinitionCache.php';
require_once $__dir . '/HTMLPurifier/DefinitionCacheFactory.php';
require_once $__dir . '/HTMLPurifier/Doctype.php';
require_once $__dir . '/HTMLPurifier/DoctypeRegistry.php';
require_once $__dir . '/HTMLPurifier/ElementDef.php';
require_once $__dir . '/HTMLPurifier/Encoder.php';
require_once $__dir . '/HTMLPurifier/EntityLookup.php';
require_once $__dir . '/HTMLPurifier/EntityParser.php';
require_once $__dir . '/HTMLPurifier/ErrorCollector.php';
require_once $__dir . '/HTMLPurifier/ErrorStruct.php';
require_once $__dir . '/HTMLPurifier/Exception.php';
require_once $__dir . '/HTMLPurifier/Filter.php';
require_once $__dir . '/HTMLPurifier/Generator.php';
require_once $__dir . '/HTMLPurifier/HTMLDefinition.php';
require_once $__dir . '/HTMLPurifier/HTMLModule.php';
require_once $__dir . '/HTMLPurifier/HTMLModuleManager.php';
require_once $__dir . '/HTMLPurifier/IDAccumulator.php';
require_once $__dir . '/HTMLPurifier/Injector.php';
require_once $__dir . '/HTMLPurifier/Language.php';
require_once $__dir . '/HTMLPurifier/LanguageFactory.php';
require_once $__dir . '/HTMLPurifier/Length.php';
require_once $__dir . '/HTMLPurifier/Lexer.php';
require_once $__dir . '/HTMLPurifier/PercentEncoder.php';
require_once $__dir . '/HTMLPurifier/PropertyList.php';
require_once $__dir . '/HTMLPurifier/PropertyListIterator.php';
require_once $__dir . '/HTMLPurifier/Strategy.php';
require_once $__dir . '/HTMLPurifier/StringHash.php';
require_once $__dir . '/HTMLPurifier/StringHashParser.php';
require_once $__dir . '/HTMLPurifier/TagTransform.php';
require_once $__dir . '/HTMLPurifier/Token.php';
require_once $__dir . '/HTMLPurifier/TokenFactory.php';
require_once $__dir . '/HTMLPurifier/URI.php';
require_once $__dir . '/HTMLPurifier/URIDefinition.php';
require_once $__dir . '/HTMLPurifier/URIFilter.php';
require_once $__dir . '/HTMLPurifier/URIParser.php';
require_once $__dir . '/HTMLPurifier/URIScheme.php';
require_once $__dir . '/HTMLPurifier/URISchemeRegistry.php';
require_once $__dir . '/HTMLPurifier/UnitConverter.php';
require_once $__dir . '/HTMLPurifier/VarParser.php';
require_once $__dir . '/HTMLPurifier/VarParserException.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Enum.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Integer.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Lang.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Switch.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Text.php';
require_once $__dir . '/HTMLPurifier/AttrDef/URI.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Number.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/AlphaValue.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Background.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Border.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Color.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Composite.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Filter.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Font.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/FontFamily.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Length.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ListStyle.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Multiple.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Percentage.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/TextDecoration.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/URI.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Bool.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Nmtokens.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Class.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Color.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/FrameTarget.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/ID.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Pixels.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Length.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/LinkTypes.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/MultiLength.php';
require_once $__dir . '/HTMLPurifier/AttrDef/URI/Email.php';
require_once $__dir . '/HTMLPurifier/AttrDef/URI/Host.php';
require_once $__dir . '/HTMLPurifier/AttrDef/URI/IPv4.php';
require_once $__dir . '/HTMLPurifier/AttrDef/URI/IPv6.php';
require_once $__dir . '/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Background.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/BdoDir.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/BgColor.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/BoolToCSS.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Border.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/EnumToCSS.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/ImgRequired.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/ImgSpace.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Input.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Lang.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Length.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Name.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/NameSync.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeEmbed.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/ScriptRequired.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Required.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Optional.php';
require_once $__dir . '/HTMLPurifier/ChildDef/StrictBlockquote.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Table.php';
require_once $__dir . '/HTMLPurifier/DefinitionCache/Decorator.php';
require_once $__dir . '/HTMLPurifier/DefinitionCache/Null.php';
require_once $__dir . '/HTMLPurifier/DefinitionCache/Serializer.php';
require_once $__dir . '/HTMLPurifier/DefinitionCache/Decorator/Cleanup.php';
require_once $__dir . '/HTMLPurifier/DefinitionCache/Decorator/Memory.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Bdo.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/CommonAttributes.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Edit.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Forms.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Hypertext.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Image.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Legacy.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/List.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Name.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Object.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Presentation.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Proprietary.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Ruby.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/SafeEmbed.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/SafeObject.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Scripting.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/StyleAttribute.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tables.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Target.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Text.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Name.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Proprietary.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Strict.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Transitional.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/XHTML.php';
require_once $__dir . '/HTMLPurifier/Injector/AutoParagraph.php';
require_once $__dir . '/HTMLPurifier/Injector/DisplayLinkURI.php';
require_once $__dir . '/HTMLPurifier/Injector/Linkify.php';
require_once $__dir . '/HTMLPurifier/Injector/PurifierLinkify.php';
require_once $__dir . '/HTMLPurifier/Injector/RemoveEmpty.php';
require_once $__dir . '/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
require_once $__dir . '/HTMLPurifier/Injector/SafeObject.php';
require_once $__dir . '/HTMLPurifier/Lexer/DOMLex.php';
require_once $__dir . '/HTMLPurifier/Lexer/DirectLex.php';
require_once $__dir . '/HTMLPurifier/Strategy/Composite.php';
require_once $__dir . '/HTMLPurifier/Strategy/Core.php';
require_once $__dir . '/HTMLPurifier/Strategy/FixNesting.php';
require_once $__dir . '/HTMLPurifier/Strategy/MakeWellFormed.php';
require_once $__dir . '/HTMLPurifier/Strategy/RemoveForeignElements.php';
require_once $__dir . '/HTMLPurifier/Strategy/ValidateAttributes.php';
require_once $__dir . '/HTMLPurifier/TagTransform/Font.php';
require_once $__dir . '/HTMLPurifier/TagTransform/Simple.php';
require_once $__dir . '/HTMLPurifier/Token/Comment.php';
require_once $__dir . '/HTMLPurifier/Token/Tag.php';
require_once $__dir . '/HTMLPurifier/Token/Empty.php';
require_once $__dir . '/HTMLPurifier/Token/End.php';
require_once $__dir . '/HTMLPurifier/Token/Start.php';
require_once $__dir . '/HTMLPurifier/Token/Text.php';
require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternal.php';
require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternalResources.php';
require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php';
require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php';
require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php';
require_once $__dir . '/HTMLPurifier/URIScheme/data.php';
require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php';
require_once $__dir . '/HTMLPurifier/URIScheme/http.php';
require_once $__dir . '/HTMLPurifier/URIScheme/https.php';
require_once $__dir . '/HTMLPurifier/URIScheme/mailto.php';
require_once $__dir . '/HTMLPurifier/URIScheme/news.php';
require_once $__dir . '/HTMLPurifier/URIScheme/nntp.php';
require_once $__dir . '/HTMLPurifier/VarParser/Flexible.php';
require_once $__dir . '/HTMLPurifier/VarParser/Native.php';

View file

@ -0,0 +1,128 @@
<?php
/**
* Defines common attribute collections that modules reference
*/
class HTMLPurifier_AttrCollections
{
/**
* Associative array of attribute collections, indexed by name
*/
public $info = array();
/**
* Performs all expansions on internal data for use by other inclusions
* It also collects all attribute collection extensions from
* modules
* @param $attr_types HTMLPurifier_AttrTypes instance
* @param $modules Hash array of HTMLPurifier_HTMLModule members
*/
public function __construct($attr_types, $modules) {
// load extensions from the modules
foreach ($modules as $module) {
foreach ($module->attr_collections as $coll_i => $coll) {
if (!isset($this->info[$coll_i])) {
$this->info[$coll_i] = array();
}
foreach ($coll as $attr_i => $attr) {
if ($attr_i === 0 && isset($this->info[$coll_i][$attr_i])) {
// merge in includes
$this->info[$coll_i][$attr_i] = array_merge(
$this->info[$coll_i][$attr_i], $attr);
continue;
}
$this->info[$coll_i][$attr_i] = $attr;
}
}
}
// perform internal expansions and inclusions
foreach ($this->info as $name => $attr) {
// merge attribute collections that include others
$this->performInclusions($this->info[$name]);
// replace string identifiers with actual attribute objects
$this->expandIdentifiers($this->info[$name], $attr_types);
}
}
/**
* Takes a reference to an attribute associative array and performs
* all inclusions specified by the zero index.
* @param &$attr Reference to attribute array
*/
public function performInclusions(&$attr) {
if (!isset($attr[0])) return;
$merge = $attr[0];
$seen = array(); // recursion guard
// loop through all the inclusions
for ($i = 0; isset($merge[$i]); $i++) {
if (isset($seen[$merge[$i]])) continue;
$seen[$merge[$i]] = true;
// foreach attribute of the inclusion, copy it over
if (!isset($this->info[$merge[$i]])) continue;
foreach ($this->info[$merge[$i]] as $key => $value) {
if (isset($attr[$key])) continue; // also catches more inclusions
$attr[$key] = $value;
}
if (isset($this->info[$merge[$i]][0])) {
// recursion
$merge = array_merge($merge, $this->info[$merge[$i]][0]);
}
}
unset($attr[0]);
}
/**
* Expands all string identifiers in an attribute array by replacing
* them with the appropriate values inside HTMLPurifier_AttrTypes
* @param &$attr Reference to attribute array
* @param $attr_types HTMLPurifier_AttrTypes instance
*/
public function expandIdentifiers(&$attr, $attr_types) {
// because foreach will process new elements we add, make sure we
// skip duplicates
$processed = array();
foreach ($attr as $def_i => $def) {
// skip inclusions
if ($def_i === 0) continue;
if (isset($processed[$def_i])) continue;
// determine whether or not attribute is required
if ($required = (strpos($def_i, '*') !== false)) {
// rename the definition
unset($attr[$def_i]);
$def_i = trim($def_i, '*');
$attr[$def_i] = $def;
}
$processed[$def_i] = true;
// if we've already got a literal object, move on
if (is_object($def)) {
// preserve previous required
$attr[$def_i]->required = ($required || $attr[$def_i]->required);
continue;
}
if ($def === false) {
unset($attr[$def_i]);
continue;
}
if ($t = $attr_types->get($def)) {
$attr[$def_i] = $t;
$attr[$def_i]->required = $required;
} else {
unset($attr[$def_i]);
}
}
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,123 @@
<?php
/**
* Base class for all validating attribute definitions.
*
* This family of classes forms the core for not only HTML attribute validation,
* but also any sort of string that needs to be validated or cleaned (which
* means CSS properties and composite definitions are defined here too).
* Besides defining (through code) what precisely makes the string valid,
* subclasses are also responsible for cleaning the code if possible.
*/
abstract class HTMLPurifier_AttrDef
{
/**
* Tells us whether or not an HTML attribute is minimized. Has no
* meaning in other contexts.
*/
public $minimized = false;
/**
* Tells us whether or not an HTML attribute is required. Has no
* meaning in other contexts
*/
public $required = false;
/**
* Validates and cleans passed string according to a definition.
*
* @param $string String to be validated and cleaned.
* @param $config Mandatory HTMLPurifier_Config object.
* @param $context Mandatory HTMLPurifier_AttrContext object.
*/
abstract public function validate($string, $config, $context);
/**
* Convenience method that parses a string as if it were CDATA.
*
* This method process a string in the manner specified at
* <http://www.w3.org/TR/html4/types.html#h-6.2> by removing
* leading and trailing whitespace, ignoring line feeds, and replacing
* carriage returns and tabs with spaces. While most useful for HTML
* attributes specified as CDATA, it can also be applied to most CSS
* values.
*
* @note This method is not entirely standards compliant, as trim() removes
* more types of whitespace than specified in the spec. In practice,
* this is rarely a problem, as those extra characters usually have
* already been removed by HTMLPurifier_Encoder.
*
* @warning This processing is inconsistent with XML's whitespace handling
* as specified by section 3.3.3 and referenced XHTML 1.0 section
* 4.7. However, note that we are NOT necessarily
* parsing XML, thus, this behavior may still be correct. We
* assume that newlines have been normalized.
*/
public function parseCDATA($string) {
$string = trim($string);
$string = str_replace(array("\n", "\t", "\r"), ' ', $string);
return $string;
}
/**
* Factory method for creating this class from a string.
* @param $string String construction info
* @return Created AttrDef object corresponding to $string
*/
public function make($string) {
// default implementation, return a flyweight of this object.
// If $string has an effect on the returned object (i.e. you
// need to overload this method), it is best
// to clone or instantiate new copies. (Instantiation is safer.)
return $this;
}
/**
* Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work
* properly. THIS IS A HACK!
*/
protected function mungeRgb($string) {
return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
}
/**
* Parses a possibly escaped CSS string and returns the "pure"
* version of it.
*/
protected function expandCSSEscape($string) {
// flexibly parse it
$ret = '';
for ($i = 0, $c = strlen($string); $i < $c; $i++) {
if ($string[$i] === '\\') {
$i++;
if ($i >= $c) {
$ret .= '\\';
break;
}
if (ctype_xdigit($string[$i])) {
$code = $string[$i];
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
if (!ctype_xdigit($string[$i])) break;
$code .= $string[$i];
}
// We have to be extremely careful when adding
// new characters, to make sure we're not breaking
// the encoding.
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
$ret .= $char;
if ($i < $c && trim($string[$i]) !== '') $i--;
continue;
}
if ($string[$i] === "\n") continue;
}
$ret .= $string[$i];
}
return $ret;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,87 @@
<?php
/**
* Validates the HTML attribute style, otherwise known as CSS.
* @note We don't implement the whole CSS specification, so it might be
* difficult to reuse this component in the context of validating
* actual stylesheet declarations.
* @note If we were really serious about validating the CSS, we would
* tokenize the styles and then parse the tokens. Obviously, we
* are not doing that. Doing that could seriously harm performance,
* but would make these components a lot more viable for a CSS
* filtering solution.
*/
class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
{
public function validate($css, $config, $context) {
$css = $this->parseCDATA($css);
$definition = $config->getCSSDefinition();
// we're going to break the spec and explode by semicolons.
// This is because semicolon rarely appears in escaped form
// Doing this is generally flaky but fast
// IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI
// for details
$declarations = explode(';', $css);
$propvalues = array();
/**
* Name of the current CSS property being validated.
*/
$property = false;
$context->register('CurrentCSSProperty', $property);
foreach ($declarations as $declaration) {
if (!$declaration) continue;
if (!strpos($declaration, ':')) continue;
list($property, $value) = explode(':', $declaration, 2);
$property = trim($property);
$value = trim($value);
$ok = false;
do {
if (isset($definition->info[$property])) {
$ok = true;
break;
}
if (ctype_lower($property)) break;
$property = strtolower($property);
if (isset($definition->info[$property])) {
$ok = true;
break;
}
} while(0);
if (!$ok) continue;
// inefficient call, since the validator will do this again
if (strtolower(trim($value)) !== 'inherit') {
// inherit works for everything (but only on the base property)
$result = $definition->info[$property]->validate(
$value, $config, $context );
} else {
$result = 'inherit';
}
if ($result === false) continue;
$propvalues[$property] = $result;
}
$context->destroy('CurrentCSSProperty');
// procedure does not write the new CSS simultaneously, so it's
// slightly inefficient, but it's the only way of getting rid of
// duplicates. Perhaps config to optimize it, but not now.
$new_declarations = '';
foreach ($propvalues as $prop => $value) {
$new_declarations .= "$prop:$value;";
}
return $new_declarations ? $new_declarations : false;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,21 @@
<?php
class HTMLPurifier_AttrDef_CSS_AlphaValue extends HTMLPurifier_AttrDef_CSS_Number
{
public function __construct() {
parent::__construct(false); // opacity is non-negative, but we will clamp it
}
public function validate($number, $config, $context) {
$result = parent::validate($number, $config, $context);
if ($result === false) return $result;
$float = (float) $result;
if ($float < 0.0) $result = '0';
if ($float > 1.0) $result = '1';
return $result;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,87 @@
<?php
/**
* Validates shorthand CSS property background.
* @warning Does not support url tokens that have internal spaces.
*/
class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
{
/**
* Local copy of component validators.
* @note See HTMLPurifier_AttrDef_Font::$info for a similar impl.
*/
protected $info;
public function __construct($config) {
$def = $config->getCSSDefinition();
$this->info['background-color'] = $def->info['background-color'];
$this->info['background-image'] = $def->info['background-image'];
$this->info['background-repeat'] = $def->info['background-repeat'];
$this->info['background-attachment'] = $def->info['background-attachment'];
$this->info['background-position'] = $def->info['background-position'];
}
public function validate($string, $config, $context) {
// regular pre-processing
$string = $this->parseCDATA($string);
if ($string === '') return false;
// munge rgb() decl if necessary
$string = $this->mungeRgb($string);
// assumes URI doesn't have spaces in it
$bits = explode(' ', strtolower($string)); // bits to process
$caught = array();
$caught['color'] = false;
$caught['image'] = false;
$caught['repeat'] = false;
$caught['attachment'] = false;
$caught['position'] = false;
$i = 0; // number of catches
$none = false;
foreach ($bits as $bit) {
if ($bit === '') continue;
foreach ($caught as $key => $status) {
if ($key != 'position') {
if ($status !== false) continue;
$r = $this->info['background-' . $key]->validate($bit, $config, $context);
} else {
$r = $bit;
}
if ($r === false) continue;
if ($key == 'position') {
if ($caught[$key] === false) $caught[$key] = '';
$caught[$key] .= $r . ' ';
} else {
$caught[$key] = $r;
}
$i++;
break;
}
}
if (!$i) return false;
if ($caught['position'] !== false) {
$caught['position'] = $this->info['background-position']->
validate($caught['position'], $config, $context);
}
$ret = array();
foreach ($caught as $value) {
if ($value === false) continue;
$ret[] = $value;
}
if (empty($ret)) return false;
return implode(' ', $ret);
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,133 @@
<?php
/* W3C says:
[ // adjective and number must be in correct order, even if
// you could switch them without introducing ambiguity.
// some browsers support that syntax
[
<percentage> | <length> | left | center | right
]
[
<percentage> | <length> | top | center | bottom
]?
] |
[ // this signifies that the vertical and horizontal adjectives
// can be arbitrarily ordered, however, there can only be two,
// one of each, or none at all
[
left | center | right
] ||
[
top | center | bottom
]
]
top, left = 0%
center, (none) = 50%
bottom, right = 100%
*/
/* QuirksMode says:
keyword + length/percentage must be ordered correctly, as per W3C
Internet Explorer and Opera, however, support arbitrary ordering. We
should fix it up.
Minor issue though, not strictly necessary.
*/
// control freaks may appreciate the ability to convert these to
// percentages or something, but it's not necessary
/**
* Validates the value of background-position.
*/
class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
{
protected $length;
protected $percentage;
public function __construct() {
$this->length = new HTMLPurifier_AttrDef_CSS_Length();
$this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage();
}
public function validate($string, $config, $context) {
$string = $this->parseCDATA($string);
$bits = explode(' ', $string);
$keywords = array();
$keywords['h'] = false; // left, right
$keywords['v'] = false; // top, bottom
$keywords['ch'] = false; // center (first word)
$keywords['cv'] = false; // center (second word)
$measures = array();
$i = 0;
$lookup = array(
'top' => 'v',
'bottom' => 'v',
'left' => 'h',
'right' => 'h',
'center' => 'c'
);
foreach ($bits as $bit) {
if ($bit === '') continue;
// test for keyword
$lbit = ctype_lower($bit) ? $bit : strtolower($bit);
if (isset($lookup[$lbit])) {
$status = $lookup[$lbit];
if ($status == 'c') {
if ($i == 0) {
$status = 'ch';
} else {
$status = 'cv';
}
}
$keywords[$status] = $lbit;
$i++;
}
// test for length
$r = $this->length->validate($bit, $config, $context);
if ($r !== false) {
$measures[] = $r;
$i++;
}
// test for percentage
$r = $this->percentage->validate($bit, $config, $context);
if ($r !== false) {
$measures[] = $r;
$i++;
}
}
if (!$i) return false; // no valid values were caught
$ret = array();
// first keyword
if ($keywords['h']) $ret[] = $keywords['h'];
elseif ($keywords['ch']) {
$ret[] = $keywords['ch'];
$keywords['cv'] = false; // prevent re-use: center = center center
}
elseif (count($measures)) $ret[] = array_shift($measures);
if ($keywords['v']) $ret[] = $keywords['v'];
elseif ($keywords['cv']) $ret[] = $keywords['cv'];
elseif (count($measures)) $ret[] = array_shift($measures);
if (empty($ret)) return false;
return implode(' ', $ret);
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,43 @@
<?php
/**
* Validates the border property as defined by CSS.
*/
class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
{
/**
* Local copy of properties this property is shorthand for.
*/
protected $info = array();
public function __construct($config) {
$def = $config->getCSSDefinition();
$this->info['border-width'] = $def->info['border-width'];
$this->info['border-style'] = $def->info['border-style'];
$this->info['border-top-color'] = $def->info['border-top-color'];
}
public function validate($string, $config, $context) {
$string = $this->parseCDATA($string);
$string = $this->mungeRgb($string);
$bits = explode(' ', $string);
$done = array(); // segments we've finished
$ret = ''; // return value
foreach ($bits as $bit) {
foreach ($this->info as $propname => $validator) {
if (isset($done[$propname])) continue;
$r = $validator->validate($bit, $config, $context);
if ($r !== false) {
$ret .= $r . ' ';
$done[$propname] = true;
break;
}
}
}
return rtrim($ret);
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,78 @@
<?php
/**
* Validates Color as defined by CSS.
*/
class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
{
public function validate($color, $config, $context) {
static $colors = null;
if ($colors === null) $colors = $config->get('Core.ColorKeywords');
$color = trim($color);
if ($color === '') return false;
$lower = strtolower($color);
if (isset($colors[$lower])) return $colors[$lower];
if (strpos($color, 'rgb(') !== false) {
// rgb literal handling
$length = strlen($color);
if (strpos($color, ')') !== $length - 1) return false;
$triad = substr($color, 4, $length - 4 - 1);
$parts = explode(',', $triad);
if (count($parts) !== 3) return false;
$type = false; // to ensure that they're all the same type
$new_parts = array();
foreach ($parts as $part) {
$part = trim($part);
if ($part === '') return false;
$length = strlen($part);
if ($part[$length - 1] === '%') {
// handle percents
if (!$type) {
$type = 'percentage';
} elseif ($type !== 'percentage') {
return false;
}
$num = (float) substr($part, 0, $length - 1);
if ($num < 0) $num = 0;
if ($num > 100) $num = 100;
$new_parts[] = "$num%";
} else {
// handle integers
if (!$type) {
$type = 'integer';
} elseif ($type !== 'integer') {
return false;
}
$num = (int) $part;
if ($num < 0) $num = 0;
if ($num > 255) $num = 255;
$new_parts[] = (string) $num;
}
}
$new_triad = implode(',', $new_parts);
$color = "rgb($new_triad)";
} else {
// hexadecimal handling
if ($color[0] === '#') {
$hex = substr($color, 1);
} else {
$hex = $color;
$color = '#' . $color;
}
$length = strlen($hex);
if ($length !== 3 && $length !== 6) return false;
if (!ctype_xdigit($hex)) return false;
}
return $color;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,38 @@
<?php
/**
* Allows multiple validators to attempt to validate attribute.
*
* Composite is just what it sounds like: a composite of many validators.
* This means that multiple HTMLPurifier_AttrDef objects will have a whack
* at the string. If one of them passes, that's what is returned. This is
* especially useful for CSS values, which often are a choice between
* an enumerated set of predefined values or a flexible data type.
*/
class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef
{
/**
* List of HTMLPurifier_AttrDef objects that may process strings
* @todo Make protected
*/
public $defs;
/**
* @param $defs List of HTMLPurifier_AttrDef objects
*/
public function __construct($defs) {
$this->defs = $defs;
}
public function validate($string, $config, $context) {
foreach ($this->defs as $i => $def) {
$result = $this->defs[$i]->validate($string, $config, $context);
if ($result !== false) return $result;
}
return false;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,28 @@
<?php
/**
* Decorator which enables CSS properties to be disabled for specific elements.
*/
class HTMLPurifier_AttrDef_CSS_DenyElementDecorator extends HTMLPurifier_AttrDef
{
public $def, $element;
/**
* @param $def Definition to wrap
* @param $element Element to deny
*/
public function __construct($def, $element) {
$this->def = $def;
$this->element = $element;
}
/**
* Checks if CurrentToken is set and equal to $this->element
*/
public function validate($string, $config, $context) {
$token = $context->get('CurrentToken', true);
if ($token && $token->name == $this->element) return false;
return $this->def->validate($string, $config, $context);
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,54 @@
<?php
/**
* Microsoft's proprietary filter: CSS property
* @note Currently supports the alpha filter. In the future, this will
* probably need an extensible framework
*/
class HTMLPurifier_AttrDef_CSS_Filter extends HTMLPurifier_AttrDef
{
protected $intValidator;
public function __construct() {
$this->intValidator = new HTMLPurifier_AttrDef_Integer();
}
public function validate($value, $config, $context) {
$value = $this->parseCDATA($value);
if ($value === 'none') return $value;
// if we looped this we could support multiple filters
$function_length = strcspn($value, '(');
$function = trim(substr($value, 0, $function_length));
if ($function !== 'alpha' &&
$function !== 'Alpha' &&
$function !== 'progid:DXImageTransform.Microsoft.Alpha'
) return false;
$cursor = $function_length + 1;
$parameters_length = strcspn($value, ')', $cursor);
$parameters = substr($value, $cursor, $parameters_length);
$params = explode(',', $parameters);
$ret_params = array();
$lookup = array();
foreach ($params as $param) {
list($key, $value) = explode('=', $param);
$key = trim($key);
$value = trim($value);
if (isset($lookup[$key])) continue;
if ($key !== 'opacity') continue;
$value = $this->intValidator->validate($value, $config, $context);
if ($value === false) continue;
$int = (int) $value;
if ($int > 100) $value = '100';
if ($int < 0) $value = '0';
$ret_params[] = "$key=$value";
$lookup[$key] = true;
}
$ret_parameters = implode(',', $ret_params);
$ret_function = "$function($ret_parameters)";
return $ret_function;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,149 @@
<?php
/**
* Validates shorthand CSS property font.
*/
class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
{
/**
* Local copy of component validators.
*
* @note If we moved specific CSS property definitions to their own
* classes instead of having them be assembled at run time by
* CSSDefinition, this wouldn't be necessary. We'd instantiate
* our own copies.
*/
protected $info = array();
public function __construct($config) {
$def = $config->getCSSDefinition();
$this->info['font-style'] = $def->info['font-style'];
$this->info['font-variant'] = $def->info['font-variant'];
$this->info['font-weight'] = $def->info['font-weight'];
$this->info['font-size'] = $def->info['font-size'];
$this->info['line-height'] = $def->info['line-height'];
$this->info['font-family'] = $def->info['font-family'];
}
public function validate($string, $config, $context) {
static $system_fonts = array(
'caption' => true,
'icon' => true,
'menu' => true,
'message-box' => true,
'small-caption' => true,
'status-bar' => true
);
// regular pre-processing
$string = $this->parseCDATA($string);
if ($string === '') return false;
// check if it's one of the keywords
$lowercase_string = strtolower($string);
if (isset($system_fonts[$lowercase_string])) {
return $lowercase_string;
}
$bits = explode(' ', $string); // bits to process
$stage = 0; // this indicates what we're looking for
$caught = array(); // which stage 0 properties have we caught?
$stage_1 = array('font-style', 'font-variant', 'font-weight');
$final = ''; // output
for ($i = 0, $size = count($bits); $i < $size; $i++) {
if ($bits[$i] === '') continue;
switch ($stage) {
// attempting to catch font-style, font-variant or font-weight
case 0:
foreach ($stage_1 as $validator_name) {
if (isset($caught[$validator_name])) continue;
$r = $this->info[$validator_name]->validate(
$bits[$i], $config, $context);
if ($r !== false) {
$final .= $r . ' ';
$caught[$validator_name] = true;
break;
}
}
// all three caught, continue on
if (count($caught) >= 3) $stage = 1;
if ($r !== false) break;
// attempting to catch font-size and perhaps line-height
case 1:
$found_slash = false;
if (strpos($bits[$i], '/') !== false) {
list($font_size, $line_height) =
explode('/', $bits[$i]);
if ($line_height === '') {
// ooh, there's a space after the slash!
$line_height = false;
$found_slash = true;
}
} else {
$font_size = $bits[$i];
$line_height = false;
}
$r = $this->info['font-size']->validate(
$font_size, $config, $context);
if ($r !== false) {
$final .= $r;
// attempt to catch line-height
if ($line_height === false) {
// we need to scroll forward
for ($j = $i + 1; $j < $size; $j++) {
if ($bits[$j] === '') continue;
if ($bits[$j] === '/') {
if ($found_slash) {
return false;
} else {
$found_slash = true;
continue;
}
}
$line_height = $bits[$j];
break;
}
} else {
// slash already found
$found_slash = true;
$j = $i;
}
if ($found_slash) {
$i = $j;
$r = $this->info['line-height']->validate(
$line_height, $config, $context);
if ($r !== false) {
$final .= '/' . $r;
}
}
$final .= ' ';
$stage = 2;
break;
}
return false;
// attempting to catch font-family
case 2:
$font_family =
implode(' ', array_slice($bits, $i, $size - $i));
$r = $this->info['font-family']->validate(
$font_family, $config, $context);
if ($r !== false) {
$final .= $r . ' ';
// processing completed successfully
return rtrim($final);
}
return false;
}
}
return false;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,72 @@
<?php
/**
* Validates a font family list according to CSS spec
* @todo whitelisting allowed fonts would be nice
*/
class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
{
public function validate($string, $config, $context) {
static $generic_names = array(
'serif' => true,
'sans-serif' => true,
'monospace' => true,
'fantasy' => true,
'cursive' => true
);
// assume that no font names contain commas in them
$fonts = explode(',', $string);
$final = '';
foreach($fonts as $font) {
$font = trim($font);
if ($font === '') continue;
// match a generic name
if (isset($generic_names[$font])) {
$final .= $font . ', ';
continue;
}
// match a quoted name
if ($font[0] === '"' || $font[0] === "'") {
$length = strlen($font);
if ($length <= 2) continue;
$quote = $font[0];
if ($font[$length - 1] !== $quote) continue;
$font = substr($font, 1, $length - 2);
}
$font = $this->expandCSSEscape($font);
// $font is a pure representation of the font name
if (ctype_alnum($font) && $font !== '') {
// very simple font, allow it in unharmed
$final .= $font . ', ';
continue;
}
// bugger out on whitespace. form feed (0C) really
// shouldn't show up regardless
$font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);
// These ugly transforms don't pose a security
// risk (as \\ and \" might). We could try to be clever and
// use single-quote wrapping when there is a double quote
// present, but I have choosen not to implement that.
// (warning: this code relies on the selection of quotation
// mark below)
$font = str_replace('\\', '\\5C ', $font);
$font = str_replace('"', '\\22 ', $font);
// complicated font, requires quoting
$final .= "\"$font\", "; // note that this will later get turned into &quot;
}
$final = rtrim($final, ', ');
if ($final === '') return false;
return $final;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,40 @@
<?php
/**
* Decorator which enables !important to be used in CSS values.
*/
class HTMLPurifier_AttrDef_CSS_ImportantDecorator extends HTMLPurifier_AttrDef
{
public $def, $allow;
/**
* @param $def Definition to wrap
* @param $allow Whether or not to allow !important
*/
public function __construct($def, $allow = false) {
$this->def = $def;
$this->allow = $allow;
}
/**
* Intercepts and removes !important if necessary
*/
public function validate($string, $config, $context) {
// test for ! and important tokens
$string = trim($string);
$is_important = false;
// :TODO: optimization: test directly for !important and ! important
if (strlen($string) >= 9 && substr($string, -9) === 'important') {
$temp = rtrim(substr($string, 0, -9));
// use a temp, because we might want to restore important
if (strlen($temp) >= 1 && substr($temp, -1) === '!') {
$string = rtrim(substr($temp, 0, -1));
$is_important = true;
}
}
$string = $this->def->validate($string, $config, $context);
if ($this->allow && $is_important) $string .= ' !important';
return $string;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,47 @@
<?php
/**
* Represents a Length as defined by CSS.
*/
class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
{
protected $min, $max;
/**
* @param HTMLPurifier_Length $max Minimum length, or null for no bound. String is also acceptable.
* @param HTMLPurifier_Length $max Maximum length, or null for no bound. String is also acceptable.
*/
public function __construct($min = null, $max = null) {
$this->min = $min !== null ? HTMLPurifier_Length::make($min) : null;
$this->max = $max !== null ? HTMLPurifier_Length::make($max) : null;
}
public function validate($string, $config, $context) {
$string = $this->parseCDATA($string);
// Optimizations
if ($string === '') return false;
if ($string === '0') return '0';
if (strlen($string) === 1) return false;
$length = HTMLPurifier_Length::make($string);
if (!$length->isValid()) return false;
if ($this->min) {
$c = $length->compareTo($this->min);
if ($c === false) return false;
if ($c < 0) return false;
}
if ($this->max) {
$c = $length->compareTo($this->max);
if ($c === false) return false;
if ($c > 0) return false;
}
return $length->toString();
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,78 @@
<?php
/**
* Validates shorthand CSS property list-style.
* @warning Does not support url tokens that have internal spaces.
*/
class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef
{
/**
* Local copy of component validators.
* @note See HTMLPurifier_AttrDef_CSS_Font::$info for a similar impl.
*/
protected $info;
public function __construct($config) {
$def = $config->getCSSDefinition();
$this->info['list-style-type'] = $def->info['list-style-type'];
$this->info['list-style-position'] = $def->info['list-style-position'];
$this->info['list-style-image'] = $def->info['list-style-image'];
}
public function validate($string, $config, $context) {
// regular pre-processing
$string = $this->parseCDATA($string);
if ($string === '') return false;
// assumes URI doesn't have spaces in it
$bits = explode(' ', strtolower($string)); // bits to process
$caught = array();
$caught['type'] = false;
$caught['position'] = false;
$caught['image'] = false;
$i = 0; // number of catches
$none = false;
foreach ($bits as $bit) {
if ($i >= 3) return; // optimization bit
if ($bit === '') continue;
foreach ($caught as $key => $status) {
if ($status !== false) continue;
$r = $this->info['list-style-' . $key]->validate($bit, $config, $context);
if ($r === false) continue;
if ($r === 'none') {
if ($none) continue;
else $none = true;
if ($key == 'image') continue;
}
$caught[$key] = $r;
$i++;
break;
}
}
if (!$i) return false;
$ret = array();
// construct type
if ($caught['type']) $ret[] = $caught['type'];
// construct image
if ($caught['image']) $ret[] = $caught['image'];
// construct position
if ($caught['position']) $ret[] = $caught['position'];
if (empty($ret)) return false;
return implode(' ', $ret);
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,58 @@
<?php
/**
* Framework class for strings that involve multiple values.
*
* Certain CSS properties such as border-width and margin allow multiple
* lengths to be specified. This class can take a vanilla border-width
* definition and multiply it, usually into a max of four.
*
* @note Even though the CSS specification isn't clear about it, inherit
* can only be used alone: it will never manifest as part of a multi
* shorthand declaration. Thus, this class does not allow inherit.
*/
class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef
{
/**
* Instance of component definition to defer validation to.
* @todo Make protected
*/
public $single;
/**
* Max number of values allowed.
* @todo Make protected
*/
public $max;
/**
* @param $single HTMLPurifier_AttrDef to multiply
* @param $max Max number of values allowed (usually four)
*/
public function __construct($single, $max = 4) {
$this->single = $single;
$this->max = $max;
}
public function validate($string, $config, $context) {
$string = $this->parseCDATA($string);
if ($string === '') return false;
$parts = explode(' ', $string); // parseCDATA replaced \r, \t and \n
$length = count($parts);
$final = '';
for ($i = 0, $num = 0; $i < $length && $num < $this->max; $i++) {
if (ctype_space($parts[$i])) continue;
$result = $this->single->validate($parts[$i], $config, $context);
if ($result !== false) {
$final .= $result . ' ';
$num++;
}
}
if ($final === '') return false;
return rtrim($final);
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,69 @@
<?php
/**
* Validates a number as defined by the CSS spec.
*/
class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
{
/**
* Bool indicating whether or not only positive values allowed.
*/
protected $non_negative = false;
/**
* @param $non_negative Bool indicating whether negatives are forbidden
*/
public function __construct($non_negative = false) {
$this->non_negative = $non_negative;
}
/**
* @warning Some contexts do not pass $config, $context. These
* variables should not be used without checking HTMLPurifier_Length
*/
public function validate($number, $config, $context) {
$number = $this->parseCDATA($number);
if ($number === '') return false;
if ($number === '0') return '0';
$sign = '';
switch ($number[0]) {
case '-':
if ($this->non_negative) return false;
$sign = '-';
case '+':
$number = substr($number, 1);
}
if (ctype_digit($number)) {
$number = ltrim($number, '0');
return $number ? $sign . $number : '0';
}
// Period is the only non-numeric character allowed
if (strpos($number, '.') === false) return false;
list($left, $right) = explode('.', $number, 2);
if ($left === '' && $right === '') return false;
if ($left !== '' && !ctype_digit($left)) return false;
$left = ltrim($left, '0');
$right = rtrim($right, '0');
if ($right === '') {
return $left ? $sign . $left : '0';
} elseif (!ctype_digit($right)) {
return false;
}
return $sign . $left . '.' . $right;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,40 @@
<?php
/**
* Validates a Percentage as defined by the CSS spec.
*/
class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef
{
/**
* Instance of HTMLPurifier_AttrDef_CSS_Number to defer number validation
*/
protected $number_def;
/**
* @param Bool indicating whether to forbid negative values
*/
public function __construct($non_negative = false) {
$this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
}
public function validate($string, $config, $context) {
$string = $this->parseCDATA($string);
if ($string === '') return false;
$length = strlen($string);
if ($length === 1) return false;
if ($string[$length - 1] !== '%') return false;
$number = substr($string, 0, $length - 1);
$number = $this->number_def->validate($number, $config, $context);
if ($number === false) return false;
return "$number%";
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,38 @@
<?php
/**
* Validates the value for the CSS property text-decoration
* @note This class could be generalized into a version that acts sort of
* like Enum except you can compound the allowed values.
*/
class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
{
public function validate($string, $config, $context) {
static $allowed_values = array(
'line-through' => true,
'overline' => true,
'underline' => true,
);
$string = strtolower($this->parseCDATA($string));
if ($string === 'none') return $string;
$parts = explode(' ', $string);
$final = '';
foreach ($parts as $part) {
if (isset($allowed_values[$part])) {
$final .= $part . ' ';
}
}
$final = rtrim($final);
if ($final === '') return false;
return $final;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,52 @@
<?php
/**
* Validates a URI in CSS syntax, which uses url('http://example.com')
* @note While theoretically speaking a URI in a CSS document could
* be non-embedded, as of CSS2 there is no such usage so we're
* generalizing it. This may need to be changed in the future.
* @warning Since HTMLPurifier_AttrDef_CSS blindly uses semicolons as
* the separator, you cannot put a literal semicolon in
* in the URI. Try percent encoding it, in that case.
*/
class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
{
public function __construct() {
parent::__construct(true); // always embedded
}
public function validate($uri_string, $config, $context) {
// parse the URI out of the string and then pass it onto
// the parent object
$uri_string = $this->parseCDATA($uri_string);
if (strpos($uri_string, 'url(') !== 0) return false;
$uri_string = substr($uri_string, 4);
$new_length = strlen($uri_string) - 1;
if ($uri_string[$new_length] != ')') return false;
$uri = trim(substr($uri_string, 0, $new_length));
if (!empty($uri) && ($uri[0] == "'" || $uri[0] == '"')) {
$quote = $uri[0];
$new_length = strlen($uri) - 1;
if ($uri[$new_length] !== $quote) return false;
$uri = substr($uri, 1, $new_length - 1);
}
$uri = $this->expandCSSEscape($uri);
$result = parent::validate($uri, $config, $context);
if ($result === false) return false;
// extra sanity check; should have been done by URI
$result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result);
return "url(\"$result\")";
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,65 @@
<?php
// Enum = Enumerated
/**
* Validates a keyword against a list of valid values.
* @warning The case-insensitive compare of this function uses PHP's
* built-in strtolower and ctype_lower functions, which may
* cause problems with international comparisons
*/
class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
{
/**
* Lookup table of valid values.
* @todo Make protected
*/
public $valid_values = array();
/**
* Bool indicating whether or not enumeration is case sensitive.
* @note In general this is always case insensitive.
*/
protected $case_sensitive = false; // values according to W3C spec
/**
* @param $valid_values List of valid values
* @param $case_sensitive Bool indicating whether or not case sensitive
*/
public function __construct(
$valid_values = array(), $case_sensitive = false
) {
$this->valid_values = array_flip($valid_values);
$this->case_sensitive = $case_sensitive;
}
public function validate($string, $config, $context) {
$string = trim($string);
if (!$this->case_sensitive) {
// we may want to do full case-insensitive libraries
$string = ctype_lower($string) ? $string : strtolower($string);
}
$result = isset($this->valid_values[$string]);
return $result ? $string : false;
}
/**
* @param $string In form of comma-delimited list of case-insensitive
* valid values. Example: "foo,bar,baz". Prepend "s:" to make
* case sensitive
*/
public function make($string) {
if (strlen($string) > 2 && $string[0] == 's' && $string[1] == ':') {
$string = substr($string, 2);
$sensitive = true;
} else {
$sensitive = false;
}
$values = explode(',', $string);
return new HTMLPurifier_AttrDef_Enum($values, $sensitive);
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,28 @@
<?php
/**
* Validates a boolean attribute
*/
class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef
{
protected $name;
public $minimized = true;
public function __construct($name = false) {$this->name = $name;}
public function validate($string, $config, $context) {
if (empty($string)) return false;
return $this->name;
}
/**
* @param $string Name of attribute
*/
public function make($string) {
return new HTMLPurifier_AttrDef_HTML_Bool($string);
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,34 @@
<?php
/**
* Implements special behavior for class attribute (normally NMTOKENS)
*/
class HTMLPurifier_AttrDef_HTML_Class extends HTMLPurifier_AttrDef_HTML_Nmtokens
{
protected function split($string, $config, $context) {
// really, this twiddle should be lazy loaded
$name = $config->getDefinition('HTML')->doctype->name;
if ($name == "XHTML 1.1" || $name == "XHTML 2.0") {
return parent::split($string, $config, $context);
} else {
return preg_split('/\s+/', $string);
}
}
protected function filter($tokens, $config, $context) {
$allowed = $config->get('Attr.AllowedClasses');
$forbidden = $config->get('Attr.ForbiddenClasses');
$ret = array();
foreach ($tokens as $token) {
if (
($allowed === null || isset($allowed[$token])) &&
!isset($forbidden[$token]) &&
// We need this O(n) check because of PHP's array
// implementation that casts -0 to 0.
!in_array($token, $ret, true)
) {
$ret[] = $token;
}
}
return $ret;
}
}

View file

@ -0,0 +1,32 @@
<?php
/**
* Validates a color according to the HTML spec.
*/
class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
{
public function validate($string, $config, $context) {
static $colors = null;
if ($colors === null) $colors = $config->get('Core.ColorKeywords');
$string = trim($string);
if (empty($string)) return false;
if (isset($colors[$string])) return $colors[$string];
if ($string[0] === '#') $hex = substr($string, 1);
else $hex = $string;
$length = strlen($hex);
if ($length !== 3 && $length !== 6) return false;
if (!ctype_xdigit($hex)) return false;
if ($length === 3) $hex = $hex[0].$hex[0].$hex[1].$hex[1].$hex[2].$hex[2];
return "#$hex";
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,21 @@
<?php
/**
* Special-case enum attribute definition that lazy loads allowed frame targets
*/
class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum
{
public $valid_values = false; // uninitialized value
protected $case_sensitive = false;
public function __construct() {}
public function validate($string, $config, $context) {
if ($this->valid_values === false) $this->valid_values = $config->get('Attr.AllowedFrameTargets');
return parent::validate($string, $config, $context);
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,70 @@
<?php
/**
* Validates the HTML attribute ID.
* @warning Even though this is the id processor, it
* will ignore the directive Attr:IDBlacklist, since it will only
* go according to the ID accumulator. Since the accumulator is
* automatically generated, it will have already absorbed the
* blacklist. If you're hacking around, make sure you use load()!
*/
class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
{
// ref functionality disabled, since we also have to verify
// whether or not the ID it refers to exists
public function validate($id, $config, $context) {
if (!$config->get('Attr.EnableID')) return false;
$id = trim($id); // trim it first
if ($id === '') return false;
$prefix = $config->get('Attr.IDPrefix');
if ($prefix !== '') {
$prefix .= $config->get('Attr.IDPrefixLocal');
// prevent re-appending the prefix
if (strpos($id, $prefix) !== 0) $id = $prefix . $id;
} elseif ($config->get('Attr.IDPrefixLocal') !== '') {
trigger_error('%Attr.IDPrefixLocal cannot be used unless '.
'%Attr.IDPrefix is set', E_USER_WARNING);
}
//if (!$this->ref) {
$id_accumulator =& $context->get('IDAccumulator');
if (isset($id_accumulator->ids[$id])) return false;
//}
// we purposely avoid using regex, hopefully this is faster
if (ctype_alpha($id)) {
$result = true;
} else {
if (!ctype_alpha(@$id[0])) return false;
$trim = trim( // primitive style of regexps, I suppose
$id,
'A..Za..z0..9:-._'
);
$result = ($trim === '');
}
$regexp = $config->get('Attr.IDBlacklistRegexp');
if ($regexp && preg_match($regexp, $id)) {
return false;
}
if (/*!$this->ref && */$result) $id_accumulator->add($id);
// if no change was made to the ID, return the result
// else, return the new id if stripping whitespace made it
// valid, or return false.
return $result ? $id : false;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,41 @@
<?php
/**
* Validates the HTML type length (not to be confused with CSS's length).
*
* This accepts integer pixels or percentages as lengths for certain
* HTML attributes.
*/
class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels
{
public function validate($string, $config, $context) {
$string = trim($string);
if ($string === '') return false;
$parent_result = parent::validate($string, $config, $context);
if ($parent_result !== false) return $parent_result;
$length = strlen($string);
$last_char = $string[$length - 1];
if ($last_char !== '%') return false;
$points = substr($string, 0, $length - 1);
if (!is_numeric($points)) return false;
$points = (int) $points;
if ($points < 0) return '0%';
if ($points > 100) return '100%';
return ((string) $points) . '%';
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,53 @@
<?php
/**
* Validates a rel/rev link attribute against a directive of allowed values
* @note We cannot use Enum because link types allow multiple
* values.
* @note Assumes link types are ASCII text
*/
class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
{
/** Name config attribute to pull. */
protected $name;
public function __construct($name) {
$configLookup = array(
'rel' => 'AllowedRel',
'rev' => 'AllowedRev'
);
if (!isset($configLookup[$name])) {
trigger_error('Unrecognized attribute name for link '.
'relationship.', E_USER_ERROR);
return;
}
$this->name = $configLookup[$name];
}
public function validate($string, $config, $context) {
$allowed = $config->get('Attr.' . $this->name);
if (empty($allowed)) return false;
$string = $this->parseCDATA($string);
$parts = explode(' ', $string);
// lookup to prevent duplicates
$ret_lookup = array();
foreach ($parts as $part) {
$part = strtolower(trim($part));
if (!isset($allowed[$part])) continue;
$ret_lookup[$part] = true;
}
if (empty($ret_lookup)) return false;
$string = implode(' ', array_keys($ret_lookup));
return $string;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,41 @@
<?php
/**
* Validates a MultiLength as defined by the HTML spec.
*
* A multilength is either a integer (pixel count), a percentage, or
* a relative number.
*/
class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Length
{
public function validate($string, $config, $context) {
$string = trim($string);
if ($string === '') return false;
$parent_result = parent::validate($string, $config, $context);
if ($parent_result !== false) return $parent_result;
$length = strlen($string);
$last_char = $string[$length - 1];
if ($last_char !== '*') return false;
$int = substr($string, 0, $length - 1);
if ($int == '') return '*';
if (!is_numeric($int)) return false;
$int = (int) $int;
if ($int < 0) return false;
if ($int == 0) return '0';
if ($int == 1) return '*';
return ((string) $int) . '*';
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,52 @@
<?php
/**
* Validates contents based on NMTOKENS attribute type.
*/
class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
{
public function validate($string, $config, $context) {
$string = trim($string);
// early abort: '' and '0' (strings that convert to false) are invalid
if (!$string) return false;
$tokens = $this->split($string, $config, $context);
$tokens = $this->filter($tokens, $config, $context);
if (empty($tokens)) return false;
return implode(' ', $tokens);
}
/**
* Splits a space separated list of tokens into its constituent parts.
*/
protected function split($string, $config, $context) {
// OPTIMIZABLE!
// do the preg_match, capture all subpatterns for reformulation
// we don't support U+00A1 and up codepoints or
// escaping because I don't know how to do that with regexps
// and plus it would complicate optimization efforts (you never
// see that anyway).
$pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start
'((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'.
'(?:(?=\s)|\z)/'; // look ahead for space or string end
preg_match_all($pattern, $string, $matches);
return $matches[1];
}
/**
* Template method for removing certain tokens based on arbitrary criteria.
* @note If we wanted to be really functional, we'd do an array_filter
* with a callback. But... we're not.
*/
protected function filter($tokens, $config, $context) {
return $tokens;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,48 @@
<?php
/**
* Validates an integer representation of pixels according to the HTML spec.
*/
class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
{
protected $max;
public function __construct($max = null) {
$this->max = $max;
}
public function validate($string, $config, $context) {
$string = trim($string);
if ($string === '0') return $string;
if ($string === '') return false;
$length = strlen($string);
if (substr($string, $length - 2) == 'px') {
$string = substr($string, 0, $length - 2);
}
if (!is_numeric($string)) return false;
$int = (int) $string;
if ($int < 0) return '0';
// upper-bound value, extremely high values can
// crash operating systems, see <http://ha.ckers.org/imagecrash.html>
// WARNING, above link WILL crash you if you're using Windows
if ($this->max !== null && $int > $this->max) return (string) $this->max;
return (string) $int;
}
public function make($string) {
if ($string === '') $max = null;
else $max = (int) $string;
$class = get_class($this);
return new $class($max);
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,73 @@
<?php
/**
* Validates an integer.
* @note While this class was modeled off the CSS definition, no currently
* allowed CSS uses this type. The properties that do are: widows,
* orphans, z-index, counter-increment, counter-reset. Some of the
* HTML attributes, however, find use for a non-negative version of this.
*/
class HTMLPurifier_AttrDef_Integer extends HTMLPurifier_AttrDef
{
/**
* Bool indicating whether or not negative values are allowed
*/
protected $negative = true;
/**
* Bool indicating whether or not zero is allowed
*/
protected $zero = true;
/**
* Bool indicating whether or not positive values are allowed
*/
protected $positive = true;
/**
* @param $negative Bool indicating whether or not negative values are allowed
* @param $zero Bool indicating whether or not zero is allowed
* @param $positive Bool indicating whether or not positive values are allowed
*/
public function __construct(
$negative = true, $zero = true, $positive = true
) {
$this->negative = $negative;
$this->zero = $zero;
$this->positive = $positive;
}
public function validate($integer, $config, $context) {
$integer = $this->parseCDATA($integer);
if ($integer === '') return false;
// we could possibly simply typecast it to integer, but there are
// certain fringe cases that must not return an integer.
// clip leading sign
if ( $this->negative && $integer[0] === '-' ) {
$digits = substr($integer, 1);
if ($digits === '0') $integer = '0'; // rm minus sign for zero
} elseif( $this->positive && $integer[0] === '+' ) {
$digits = $integer = substr($integer, 1); // rm unnecessary plus
} else {
$digits = $integer;
}
// test if it's numeric
if (!ctype_digit($digits)) return false;
// perform scope tests
if (!$this->zero && $integer == 0) return false;
if (!$this->positive && $integer > 0) return false;
if (!$this->negative && $integer < 0) return false;
return $integer;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,73 @@
<?php
/**
* Validates the HTML attribute lang, effectively a language code.
* @note Built according to RFC 3066, which obsoleted RFC 1766
*/
class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
{
public function validate($string, $config, $context) {
$string = trim($string);
if (!$string) return false;
$subtags = explode('-', $string);
$num_subtags = count($subtags);
if ($num_subtags == 0) return false; // sanity check
// process primary subtag : $subtags[0]
$length = strlen($subtags[0]);
switch ($length) {
case 0:
return false;
case 1:
if (! ($subtags[0] == 'x' || $subtags[0] == 'i') ) {
return false;
}
break;
case 2:
case 3:
if (! ctype_alpha($subtags[0]) ) {
return false;
} elseif (! ctype_lower($subtags[0]) ) {
$subtags[0] = strtolower($subtags[0]);
}
break;
default:
return false;
}
$new_string = $subtags[0];
if ($num_subtags == 1) return $new_string;
// process second subtag : $subtags[1]
$length = strlen($subtags[1]);
if ($length == 0 || ($length == 1 && $subtags[1] != 'x') || $length > 8 || !ctype_alnum($subtags[1])) {
return $new_string;
}
if (!ctype_lower($subtags[1])) $subtags[1] = strtolower($subtags[1]);
$new_string .= '-' . $subtags[1];
if ($num_subtags == 2) return $new_string;
// process all other subtags, index 2 and up
for ($i = 2; $i < $num_subtags; $i++) {
$length = strlen($subtags[$i]);
if ($length == 0 || $length > 8 || !ctype_alnum($subtags[$i])) {
return $new_string;
}
if (!ctype_lower($subtags[$i])) {
$subtags[$i] = strtolower($subtags[$i]);
}
$new_string .= '-' . $subtags[$i];
}
return $new_string;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,34 @@
<?php
/**
* Decorator that, depending on a token, switches between two definitions.
*/
class HTMLPurifier_AttrDef_Switch
{
protected $tag;
protected $withTag, $withoutTag;
/**
* @param string $tag Tag name to switch upon
* @param HTMLPurifier_AttrDef $with_tag Call if token matches tag
* @param HTMLPurifier_AttrDef $without_tag Call if token doesn't match, or there is no token
*/
public function __construct($tag, $with_tag, $without_tag) {
$this->tag = $tag;
$this->withTag = $with_tag;
$this->withoutTag = $without_tag;
}
public function validate($string, $config, $context) {
$token = $context->get('CurrentToken', true);
if (!$token || $token->name !== $this->tag) {
return $this->withoutTag->validate($string, $config, $context);
} else {
return $this->withTag->validate($string, $config, $context);
}
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,15 @@
<?php
/**
* Validates arbitrary text according to the HTML spec.
*/
class HTMLPurifier_AttrDef_Text extends HTMLPurifier_AttrDef
{
public function validate($string, $config, $context) {
return $this->parseCDATA($string);
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,77 @@
<?php
/**
* Validates a URI as defined by RFC 3986.
* @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
*/
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
{
protected $parser;
protected $embedsResource;
/**
* @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
*/
public function __construct($embeds_resource = false) {
$this->parser = new HTMLPurifier_URIParser();
$this->embedsResource = (bool) $embeds_resource;
}
public function make($string) {
$embeds = (bool) $string;
return new HTMLPurifier_AttrDef_URI($embeds);
}
public function validate($uri, $config, $context) {
if ($config->get('URI.Disable')) return false;
$uri = $this->parseCDATA($uri);
// parse the URI
$uri = $this->parser->parse($uri);
if ($uri === false) return false;
// add embedded flag to context for validators
$context->register('EmbeddedURI', $this->embedsResource);
$ok = false;
do {
// generic validation
$result = $uri->validate($config, $context);
if (!$result) break;
// chained filtering
$uri_def = $config->getDefinition('URI');
$result = $uri_def->filter($uri, $config, $context);
if (!$result) break;
// scheme-specific validation
$scheme_obj = $uri->getSchemeObj($config, $context);
if (!$scheme_obj) break;
if ($this->embedsResource && !$scheme_obj->browsable) break;
$result = $scheme_obj->validate($uri, $config, $context);
if (!$result) break;
// Post chained filtering
$result = $uri_def->postFilter($uri, $config, $context);
if (!$result) break;
// survived gauntlet
$ok = true;
} while (false);
$context->destroy('EmbeddedURI');
if (!$ok) return false;
// back to string
return $uri->toString();
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,17 @@
<?php
abstract class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
{
/**
* Unpacks a mailbox into its display-name and address
*/
function unpack($string) {
// needs to be implemented
}
}
// sub-implementations
// vim: et sw=4 sts=4

View file

@ -0,0 +1,21 @@
<?php
/**
* Primitive email validation class based on the regexp found at
* http://www.regular-expressions.info/email.html
*/
class HTMLPurifier_AttrDef_URI_Email_SimpleCheck extends HTMLPurifier_AttrDef_URI_Email
{
public function validate($string, $config, $context) {
// no support for named mailboxes i.e. "Bob <bob@example.com>"
// that needs more percent encoding to be done
if ($string == '') return false;
$string = trim($string);
$result = preg_match('/^[A-Z0-9._%-]+@[A-Z0-9.-]+\.[A-Z]{2,4}$/i', $string);
return $result ? $string : false;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,62 @@
<?php
/**
* Validates a host according to the IPv4, IPv6 and DNS (future) specifications.
*/
class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
{
/**
* Instance of HTMLPurifier_AttrDef_URI_IPv4 sub-validator
*/
protected $ipv4;
/**
* Instance of HTMLPurifier_AttrDef_URI_IPv6 sub-validator
*/
protected $ipv6;
public function __construct() {
$this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4();
$this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6();
}
public function validate($string, $config, $context) {
$length = strlen($string);
if ($string === '') return '';
if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') {
//IPv6
$ip = substr($string, 1, $length - 2);
$valid = $this->ipv6->validate($ip, $config, $context);
if ($valid === false) return false;
return '['. $valid . ']';
}
// need to do checks on unusual encodings too
$ipv4 = $this->ipv4->validate($string, $config, $context);
if ($ipv4 !== false) return $ipv4;
// A regular domain name.
// This breaks I18N domain names, but we don't have proper IRI support,
// so force users to insert Punycode. If there's complaining we'll
// try to fix things into an international friendly form.
// The productions describing this are:
$a = '[a-z]'; // alpha
$an = '[a-z0-9]'; // alphanum
$and = '[a-z0-9-]'; // alphanum | "-"
// domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
$domainlabel = "$an($and*$an)?";
// toplabel = alpha | alpha *( alphanum | "-" ) alphanum
$toplabel = "$a($and*$an)?";
// hostname = *( domainlabel "." ) toplabel [ "." ]
$match = preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string);
if (!$match) return false;
return $string;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,39 @@
<?php
/**
* Validates an IPv4 address
* @author Feyd @ forums.devnetwork.net (public domain)
*/
class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
{
/**
* IPv4 regex, protected so that IPv6 can reuse it
*/
protected $ip4;
public function validate($aIP, $config, $context) {
if (!$this->ip4) $this->_loadRegex();
if (preg_match('#^' . $this->ip4 . '$#s', $aIP))
{
return $aIP;
}
return false;
}
/**
* Lazy load function to prevent regex from being stuffed in
* cache.
*/
protected function _loadRegex() {
$oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
$this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,99 @@
<?php
/**
* Validates an IPv6 address.
* @author Feyd @ forums.devnetwork.net (public domain)
* @note This function requires brackets to have been removed from address
* in URI.
*/
class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
{
public function validate($aIP, $config, $context) {
if (!$this->ip4) $this->_loadRegex();
$original = $aIP;
$hex = '[0-9a-fA-F]';
$blk = '(?:' . $hex . '{1,4})';
$pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))'; // /0 - /128
// prefix check
if (strpos($aIP, '/') !== false)
{
if (preg_match('#' . $pre . '$#s', $aIP, $find))
{
$aIP = substr($aIP, 0, 0-strlen($find[0]));
unset($find);
}
else
{
return false;
}
}
// IPv4-compatiblity check
if (preg_match('#(?<=:'.')' . $this->ip4 . '$#s', $aIP, $find))
{
$aIP = substr($aIP, 0, 0-strlen($find[0]));
$ip = explode('.', $find[0]);
$ip = array_map('dechex', $ip);
$aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3];
unset($find, $ip);
}
// compression check
$aIP = explode('::', $aIP);
$c = count($aIP);
if ($c > 2)
{
return false;
}
elseif ($c == 2)
{
list($first, $second) = $aIP;
$first = explode(':', $first);
$second = explode(':', $second);
if (count($first) + count($second) > 8)
{
return false;
}
while(count($first) < 8)
{
array_push($first, '0');
}
array_splice($first, 8 - count($second), 8, $second);
$aIP = $first;
unset($first,$second);
}
else
{
$aIP = explode(':', $aIP[0]);
}
$c = count($aIP);
if ($c != 8)
{
return false;
}
// All the pieces should be 16-bit hex strings. Are they?
foreach ($aIP as $piece)
{
if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece)))
{
return false;
}
}
return $original;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,56 @@
<?php
/**
* Processes an entire attribute array for corrections needing multiple values.
*
* Occasionally, a certain attribute will need to be removed and popped onto
* another value. Instead of creating a complex return syntax for
* HTMLPurifier_AttrDef, we just pass the whole attribute array to a
* specialized object and have that do the special work. That is the
* family of HTMLPurifier_AttrTransform.
*
* An attribute transformation can be assigned to run before or after
* HTMLPurifier_AttrDef validation. See HTMLPurifier_HTMLDefinition for
* more details.
*/
abstract class HTMLPurifier_AttrTransform
{
/**
* Abstract: makes changes to the attributes dependent on multiple values.
*
* @param $attr Assoc array of attributes, usually from
* HTMLPurifier_Token_Tag::$attr
* @param $config Mandatory HTMLPurifier_Config object.
* @param $context Mandatory HTMLPurifier_Context object
* @returns Processed attribute array.
*/
abstract public function transform($attr, $config, $context);
/**
* Prepends CSS properties to the style attribute, creating the
* attribute if it doesn't exist.
* @param $attr Attribute array to process (passed by reference)
* @param $css CSS to prepend
*/
public function prependCSS(&$attr, $css) {
$attr['style'] = isset($attr['style']) ? $attr['style'] : '';
$attr['style'] = $css . $attr['style'];
}
/**
* Retrieves and removes an attribute
* @param $attr Attribute array to process (passed by reference)
* @param $key Key of attribute to confiscate
*/
public function confiscateAttr(&$attr, $key) {
if (!isset($attr[$key])) return null;
$value = $attr[$key];
unset($attr[$key]);
return $value;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,23 @@
<?php
/**
* Pre-transform that changes proprietary background attribute to CSS.
*/
class HTMLPurifier_AttrTransform_Background extends HTMLPurifier_AttrTransform {
public function transform($attr, $config, $context) {
if (!isset($attr['background'])) return $attr;
$background = $this->confiscateAttr($attr, 'background');
// some validation should happen here
$this->prependCSS($attr, "background-image:url($background);");
return $attr;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,19 @@
<?php
// this MUST be placed in post, as it assumes that any value in dir is valid
/**
* Post-trasnform that ensures that bdo tags have the dir attribute set.
*/
class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform
{
public function transform($attr, $config, $context) {
if (isset($attr['dir'])) return $attr;
$attr['dir'] = $config->get('Attr.DefaultTextDir');
return $attr;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,23 @@
<?php
/**
* Pre-transform that changes deprecated bgcolor attribute to CSS.
*/
class HTMLPurifier_AttrTransform_BgColor extends HTMLPurifier_AttrTransform {
public function transform($attr, $config, $context) {
if (!isset($attr['bgcolor'])) return $attr;
$bgcolor = $this->confiscateAttr($attr, 'bgcolor');
// some validation should happen here
$this->prependCSS($attr, "background-color:$bgcolor;");
return $attr;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,36 @@
<?php
/**
* Pre-transform that changes converts a boolean attribute to fixed CSS
*/
class HTMLPurifier_AttrTransform_BoolToCSS extends HTMLPurifier_AttrTransform {
/**
* Name of boolean attribute that is trigger
*/
protected $attr;
/**
* CSS declarations to add to style, needs trailing semicolon
*/
protected $css;
/**
* @param $attr string attribute name to convert from
* @param $css string CSS declarations to add to style (needs semicolon)
*/
public function __construct($attr, $css) {
$this->attr = $attr;
$this->css = $css;
}
public function transform($attr, $config, $context) {
if (!isset($attr[$this->attr])) return $attr;
unset($attr[$this->attr]);
$this->prependCSS($attr, $this->css);
return $attr;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,18 @@
<?php
/**
* Pre-transform that changes deprecated border attribute to CSS.
*/
class HTMLPurifier_AttrTransform_Border extends HTMLPurifier_AttrTransform {
public function transform($attr, $config, $context) {
if (!isset($attr['border'])) return $attr;
$border_width = $this->confiscateAttr($attr, 'border');
// some validation should happen here
$this->prependCSS($attr, "border:{$border_width}px solid;");
return $attr;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,58 @@
<?php
/**
* Generic pre-transform that converts an attribute with a fixed number of
* values (enumerated) to CSS.
*/
class HTMLPurifier_AttrTransform_EnumToCSS extends HTMLPurifier_AttrTransform {
/**
* Name of attribute to transform from
*/
protected $attr;
/**
* Lookup array of attribute values to CSS
*/
protected $enumToCSS = array();
/**
* Case sensitivity of the matching
* @warning Currently can only be guaranteed to work with ASCII
* values.
*/
protected $caseSensitive = false;
/**
* @param $attr String attribute name to transform from
* @param $enumToCSS Lookup array of attribute values to CSS
* @param $case_sensitive Boolean case sensitivity indicator, default false
*/
public function __construct($attr, $enum_to_css, $case_sensitive = false) {
$this->attr = $attr;
$this->enumToCSS = $enum_to_css;
$this->caseSensitive = (bool) $case_sensitive;
}
public function transform($attr, $config, $context) {
if (!isset($attr[$this->attr])) return $attr;
$value = trim($attr[$this->attr]);
unset($attr[$this->attr]);
if (!$this->caseSensitive) $value = strtolower($value);
if (!isset($this->enumToCSS[$value])) {
return $attr;
}
$this->prependCSS($attr, $this->enumToCSS[$value]);
return $attr;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,43 @@
<?php
// must be called POST validation
/**
* Transform that supplies default values for the src and alt attributes
* in img tags, as well as prevents the img tag from being removed
* because of a missing alt tag. This needs to be registered as both
* a pre and post attribute transform.
*/
class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
{
public function transform($attr, $config, $context) {
$src = true;
if (!isset($attr['src'])) {
if ($config->get('Core.RemoveInvalidImg')) return $attr;
$attr['src'] = $config->get('Attr.DefaultInvalidImage');
$src = false;
}
if (!isset($attr['alt'])) {
if ($src) {
$alt = $config->get('Attr.DefaultImageAlt');
if ($alt === null) {
// truncate if the alt is too long
$attr['alt'] = substr(basename($attr['src']),0,40);
} else {
$attr['alt'] = $alt;
}
} else {
$attr['alt'] = $config->get('Attr.DefaultInvalidImageAlt');
}
}
return $attr;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,44 @@
<?php
/**
* Pre-transform that changes deprecated hspace and vspace attributes to CSS
*/
class HTMLPurifier_AttrTransform_ImgSpace extends HTMLPurifier_AttrTransform {
protected $attr;
protected $css = array(
'hspace' => array('left', 'right'),
'vspace' => array('top', 'bottom')
);
public function __construct($attr) {
$this->attr = $attr;
if (!isset($this->css[$attr])) {
trigger_error(htmlspecialchars($attr) . ' is not valid space attribute');
}
}
public function transform($attr, $config, $context) {
if (!isset($attr[$this->attr])) return $attr;
$width = $this->confiscateAttr($attr, $this->attr);
// some validation could happen here
if (!isset($this->css[$this->attr])) return $attr;
$style = '';
foreach ($this->css[$this->attr] as $suffix) {
$property = "margin-$suffix";
$style .= "$property:{$width}px;";
}
$this->prependCSS($attr, $style);
return $attr;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,40 @@
<?php
/**
* Performs miscellaneous cross attribute validation and filtering for
* input elements. This is meant to be a post-transform.
*/
class HTMLPurifier_AttrTransform_Input extends HTMLPurifier_AttrTransform {
protected $pixels;
public function __construct() {
$this->pixels = new HTMLPurifier_AttrDef_HTML_Pixels();
}
public function transform($attr, $config, $context) {
if (!isset($attr['type'])) $t = 'text';
else $t = strtolower($attr['type']);
if (isset($attr['checked']) && $t !== 'radio' && $t !== 'checkbox') {
unset($attr['checked']);
}
if (isset($attr['maxlength']) && $t !== 'text' && $t !== 'password') {
unset($attr['maxlength']);
}
if (isset($attr['size']) && $t !== 'text' && $t !== 'password') {
$result = $this->pixels->validate($attr['size'], $config, $context);
if ($result === false) unset($attr['size']);
else $attr['size'] = $result;
}
if (isset($attr['src']) && $t !== 'image') {
unset($attr['src']);
}
if (!isset($attr['value']) && ($t === 'radio' || $t === 'checkbox')) {
$attr['value'] = '';
}
return $attr;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,28 @@
<?php
/**
* Post-transform that copies lang's value to xml:lang (and vice-versa)
* @note Theoretically speaking, this could be a pre-transform, but putting
* post is more efficient.
*/
class HTMLPurifier_AttrTransform_Lang extends HTMLPurifier_AttrTransform
{
public function transform($attr, $config, $context) {
$lang = isset($attr['lang']) ? $attr['lang'] : false;
$xml_lang = isset($attr['xml:lang']) ? $attr['xml:lang'] : false;
if ($lang !== false && $xml_lang === false) {
$attr['xml:lang'] = $lang;
} elseif ($xml_lang !== false) {
$attr['lang'] = $xml_lang;
}
return $attr;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,27 @@
<?php
/**
* Class for handling width/height length attribute transformations to CSS
*/
class HTMLPurifier_AttrTransform_Length extends HTMLPurifier_AttrTransform
{
protected $name;
protected $cssName;
public function __construct($name, $css_name = null) {
$this->name = $name;
$this->cssName = $css_name ? $css_name : $name;
}
public function transform($attr, $config, $context) {
if (!isset($attr[$this->name])) return $attr;
$length = $this->confiscateAttr($attr, $this->name);
if(ctype_digit($length)) $length .= 'px';
$this->prependCSS($attr, $this->cssName . ":$length;");
return $attr;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,21 @@
<?php
/**
* Pre-transform that changes deprecated name attribute to ID if necessary
*/
class HTMLPurifier_AttrTransform_Name extends HTMLPurifier_AttrTransform
{
public function transform($attr, $config, $context) {
// Abort early if we're using relaxed definition of name
if ($config->get('HTML.Attr.Name.UseCDATA')) return $attr;
if (!isset($attr['name'])) return $attr;
$id = $this->confiscateAttr($attr, 'name');
if ( isset($attr['id'])) return $attr;
$attr['id'] = $id;
return $attr;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,27 @@
<?php
/**
* Post-transform that performs validation to the name attribute; if
* it is present with an equivalent id attribute, it is passed through;
* otherwise validation is performed.
*/
class HTMLPurifier_AttrTransform_NameSync extends HTMLPurifier_AttrTransform
{
public function __construct() {
$this->idDef = new HTMLPurifier_AttrDef_HTML_ID();
}
public function transform($attr, $config, $context) {
if (!isset($attr['name'])) return $attr;
$name = $attr['name'];
if (isset($attr['id']) && $attr['id'] === $name) return $attr;
$result = $this->idDef->validate($name, $config, $context);
if ($result === false) unset($attr['name']);
else $attr['name'] = $result;
return $attr;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,15 @@
<?php
class HTMLPurifier_AttrTransform_SafeEmbed extends HTMLPurifier_AttrTransform
{
public $name = "SafeEmbed";
public function transform($attr, $config, $context) {
$attr['allowscriptaccess'] = 'never';
$attr['allownetworking'] = 'internal';
$attr['type'] = 'application/x-shockwave-flash';
return $attr;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,16 @@
<?php
/**
* Writes default type for all objects. Currently only supports flash.
*/
class HTMLPurifier_AttrTransform_SafeObject extends HTMLPurifier_AttrTransform
{
public $name = "SafeObject";
function transform($attr, $config, $context) {
if (!isset($attr['type'])) $attr['type'] = 'application/x-shockwave-flash';
return $attr;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,56 @@
<?php
/**
* Validates name/value pairs in param tags to be used in safe objects. This
* will only allow name values it recognizes, and pre-fill certain attributes
* with required values.
*
* @note
* This class only supports Flash. In the future, Quicktime support
* may be added.
*
* @warning
* This class expects an injector to add the necessary parameters tags.
*/
class HTMLPurifier_AttrTransform_SafeParam extends HTMLPurifier_AttrTransform
{
public $name = "SafeParam";
private $uri;
public function __construct() {
$this->uri = new HTMLPurifier_AttrDef_URI(true); // embedded
}
public function transform($attr, $config, $context) {
// If we add support for other objects, we'll need to alter the
// transforms.
switch ($attr['name']) {
// application/x-shockwave-flash
// Keep this synchronized with Injector/SafeObject.php
case 'allowScriptAccess':
$attr['value'] = 'never';
break;
case 'allowNetworking':
$attr['value'] = 'internal';
break;
case 'wmode':
$attr['value'] = 'window';
break;
case 'movie':
case 'src':
$attr['name'] = "movie";
$attr['value'] = $this->uri->validate($attr['value'], $config, $context);
break;
case 'flashvars':
// we're going to allow arbitrary inputs to the SWF, on
// the reasoning that it could only hack the SWF, not us.
break;
// add other cases to support other param name/value pairs
default:
$attr['name'] = $attr['value'] = null;
}
return $attr;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,16 @@
<?php
/**
* Implements required attribute stipulation for <script>
*/
class HTMLPurifier_AttrTransform_ScriptRequired extends HTMLPurifier_AttrTransform
{
public function transform($attr, $config, $context) {
if (!isset($attr['type'])) {
$attr['type'] = 'text/javascript';
}
return $attr;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,18 @@
<?php
/**
* Sets height/width defaults for <textarea>
*/
class HTMLPurifier_AttrTransform_Textarea extends HTMLPurifier_AttrTransform
{
public function transform($attr, $config, $context) {
// Calculated from Firefox
if (!isset($attr['cols'])) $attr['cols'] = '22';
if (!isset($attr['rows'])) $attr['rows'] = '3';
return $attr;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,77 @@
<?php
/**
* Provides lookup array of attribute types to HTMLPurifier_AttrDef objects
*/
class HTMLPurifier_AttrTypes
{
/**
* Lookup array of attribute string identifiers to concrete implementations
*/
protected $info = array();
/**
* Constructs the info array, supplying default implementations for attribute
* types.
*/
public function __construct() {
// pseudo-types, must be instantiated via shorthand
$this->info['Enum'] = new HTMLPurifier_AttrDef_Enum();
$this->info['Bool'] = new HTMLPurifier_AttrDef_HTML_Bool();
$this->info['CDATA'] = new HTMLPurifier_AttrDef_Text();
$this->info['ID'] = new HTMLPurifier_AttrDef_HTML_ID();
$this->info['Length'] = new HTMLPurifier_AttrDef_HTML_Length();
$this->info['MultiLength'] = new HTMLPurifier_AttrDef_HTML_MultiLength();
$this->info['NMTOKENS'] = new HTMLPurifier_AttrDef_HTML_Nmtokens();
$this->info['Pixels'] = new HTMLPurifier_AttrDef_HTML_Pixels();
$this->info['Text'] = new HTMLPurifier_AttrDef_Text();
$this->info['URI'] = new HTMLPurifier_AttrDef_URI();
$this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
$this->info['Color'] = new HTMLPurifier_AttrDef_HTML_Color();
// unimplemented aliases
$this->info['ContentType'] = new HTMLPurifier_AttrDef_Text();
$this->info['ContentTypes'] = new HTMLPurifier_AttrDef_Text();
$this->info['Charsets'] = new HTMLPurifier_AttrDef_Text();
$this->info['Character'] = new HTMLPurifier_AttrDef_Text();
// "proprietary" types
$this->info['Class'] = new HTMLPurifier_AttrDef_HTML_Class();
// number is really a positive integer (one or more digits)
// FIXME: ^^ not always, see start and value of list items
$this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true);
}
/**
* Retrieves a type
* @param $type String type name
* @return Object AttrDef for type
*/
public function get($type) {
// determine if there is any extra info tacked on
if (strpos($type, '#') !== false) list($type, $string) = explode('#', $type, 2);
else $string = '';
if (!isset($this->info[$type])) {
trigger_error('Cannot retrieve undefined attribute type ' . $type, E_USER_ERROR);
return;
}
return $this->info[$type]->make($string);
}
/**
* Sets a new implementation for a type
* @param $type String type name
* @param $impl Object AttrDef for type
*/
public function set($type, $impl) {
$this->info[$type] = $impl;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,162 @@
<?php
/**
* Validates the attributes of a token. Doesn't manage required attributes
* very well. The only reason we factored this out was because RemoveForeignElements
* also needed it besides ValidateAttributes.
*/
class HTMLPurifier_AttrValidator
{
/**
* Validates the attributes of a token, returning a modified token
* that has valid tokens
* @param $token Reference to token to validate. We require a reference
* because the operation this class performs on the token are
* not atomic, so the context CurrentToken to be updated
* throughout
* @param $config Instance of HTMLPurifier_Config
* @param $context Instance of HTMLPurifier_Context
*/
public function validateToken(&$token, &$config, $context) {
$definition = $config->getHTMLDefinition();
$e =& $context->get('ErrorCollector', true);
// initialize IDAccumulator if necessary
$ok =& $context->get('IDAccumulator', true);
if (!$ok) {
$id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
$context->register('IDAccumulator', $id_accumulator);
}
// initialize CurrentToken if necessary
$current_token =& $context->get('CurrentToken', true);
if (!$current_token) $context->register('CurrentToken', $token);
if (
!$token instanceof HTMLPurifier_Token_Start &&
!$token instanceof HTMLPurifier_Token_Empty
) return $token;
// create alias to global definition array, see also $defs
// DEFINITION CALL
$d_defs = $definition->info_global_attr;
// don't update token until the very end, to ensure an atomic update
$attr = $token->attr;
// do global transformations (pre)
// nothing currently utilizes this
foreach ($definition->info_attr_transform_pre as $transform) {
$attr = $transform->transform($o = $attr, $config, $context);
if ($e) {
if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
}
}
// do local transformations only applicable to this element (pre)
// ex. <p align="right"> to <p style="text-align:right;">
foreach ($definition->info[$token->name]->attr_transform_pre as $transform) {
$attr = $transform->transform($o = $attr, $config, $context);
if ($e) {
if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
}
}
// create alias to this element's attribute definition array, see
// also $d_defs (global attribute definition array)
// DEFINITION CALL
$defs = $definition->info[$token->name]->attr;
$attr_key = false;
$context->register('CurrentAttr', $attr_key);
// iterate through all the attribute keypairs
// Watch out for name collisions: $key has previously been used
foreach ($attr as $attr_key => $value) {
// call the definition
if ( isset($defs[$attr_key]) ) {
// there is a local definition defined
if ($defs[$attr_key] === false) {
// We've explicitly been told not to allow this element.
// This is usually when there's a global definition
// that must be overridden.
// Theoretically speaking, we could have a
// AttrDef_DenyAll, but this is faster!
$result = false;
} else {
// validate according to the element's definition
$result = $defs[$attr_key]->validate(
$value, $config, $context
);
}
} elseif ( isset($d_defs[$attr_key]) ) {
// there is a global definition defined, validate according
// to the global definition
$result = $d_defs[$attr_key]->validate(
$value, $config, $context
);
} else {
// system never heard of the attribute? DELETE!
$result = false;
}
// put the results into effect
if ($result === false || $result === null) {
// this is a generic error message that should replaced
// with more specific ones when possible
if ($e) $e->send(E_ERROR, 'AttrValidator: Attribute removed');
// remove the attribute
unset($attr[$attr_key]);
} elseif (is_string($result)) {
// generally, if a substitution is happening, there
// was some sort of implicit correction going on. We'll
// delegate it to the attribute classes to say exactly what.
// simple substitution
$attr[$attr_key] = $result;
} else {
// nothing happens
}
// we'd also want slightly more complicated substitution
// involving an array as the return value,
// although we're not sure how colliding attributes would
// resolve (certain ones would be completely overriden,
// others would prepend themselves).
}
$context->destroy('CurrentAttr');
// post transforms
// global (error reporting untested)
foreach ($definition->info_attr_transform_post as $transform) {
$attr = $transform->transform($o = $attr, $config, $context);
if ($e) {
if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
}
}
// local (error reporting untested)
foreach ($definition->info[$token->name]->attr_transform_post as $transform) {
$attr = $transform->transform($o = $attr, $config, $context);
if ($e) {
if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
}
}
$token->attr = $attr;
// destroy CurrentToken if we made it ourselves
if (!$current_token) $context->destroy('CurrentToken');
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,98 @@
<?php
// constants are slow, so we use as few as possible
if (!defined('HTMLPURIFIER_PREFIX')) {
define('HTMLPURIFIER_PREFIX', realpath(dirname(__FILE__) . '/..'));
}
// accomodations for versions earlier than 5.0.2
// borrowed from PHP_Compat, LGPL licensed, by Aidan Lister <aidan@php.net>
if (!defined('PHP_EOL')) {
switch (strtoupper(substr(PHP_OS, 0, 3))) {
case 'WIN':
define('PHP_EOL', "\r\n");
break;
case 'DAR':
define('PHP_EOL', "\r");
break;
default:
define('PHP_EOL', "\n");
}
}
/**
* Bootstrap class that contains meta-functionality for HTML Purifier such as
* the autoload function.
*
* @note
* This class may be used without any other files from HTML Purifier.
*/
class HTMLPurifier_Bootstrap
{
/**
* Autoload function for HTML Purifier
* @param $class Class to load
*/
public static function autoload($class) {
$file = HTMLPurifier_Bootstrap::getPath($class);
if (!$file) return false;
require HTMLPURIFIER_PREFIX . '/' . $file;
return true;
}
/**
* Returns the path for a specific class.
*/
public static function getPath($class) {
if (strncmp('HTMLPurifier', $class, 12) !== 0) return false;
// Custom implementations
if (strncmp('HTMLPurifier_Language_', $class, 22) === 0) {
$code = str_replace('_', '-', substr($class, 22));
$file = 'HTMLPurifier/Language/classes/' . $code . '.php';
} else {
$file = str_replace('_', '/', $class) . '.php';
}
if (!file_exists(HTMLPURIFIER_PREFIX . '/' . $file)) return false;
return $file;
}
/**
* "Pre-registers" our autoloader on the SPL stack.
*/
public static function registerAutoload() {
$autoload = array('HTMLPurifier_Bootstrap', 'autoload');
if ( ($funcs = spl_autoload_functions()) === false ) {
spl_autoload_register($autoload);
} elseif (function_exists('spl_autoload_unregister')) {
$compat = version_compare(PHP_VERSION, '5.1.2', '<=') &&
version_compare(PHP_VERSION, '5.1.0', '>=');
foreach ($funcs as $func) {
if (is_array($func)) {
// :TRICKY: There are some compatibility issues and some
// places where we need to error out
$reflector = new ReflectionMethod($func[0], $func[1]);
if (!$reflector->isStatic()) {
throw new Exception('
HTML Purifier autoloader registrar is not compatible
with non-static object methods due to PHP Bug #44144;
Please do not use HTMLPurifier.autoload.php (or any
file that includes this file); instead, place the code:
spl_autoload_register(array(\'HTMLPurifier_Bootstrap\', \'autoload\'))
after your own autoloaders.
');
}
// Suprisingly, spl_autoload_register supports the
// Class::staticMethod callback format, although call_user_func doesn't
if ($compat) $func = implode('::', $func);
}
spl_autoload_unregister($func);
}
spl_autoload_register($autoload);
foreach ($funcs as $func) spl_autoload_register($func);
}
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,292 @@
<?php
/**
* Defines allowed CSS attributes and what their values are.
* @see HTMLPurifier_HTMLDefinition
*/
class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
{
public $type = 'CSS';
/**
* Assoc array of attribute name to definition object.
*/
public $info = array();
/**
* Constructs the info array. The meat of this class.
*/
protected function doSetup($config) {
$this->info['text-align'] = new HTMLPurifier_AttrDef_Enum(
array('left', 'right', 'center', 'justify'), false);
$border_style =
$this->info['border-bottom-style'] =
$this->info['border-right-style'] =
$this->info['border-left-style'] =
$this->info['border-top-style'] = new HTMLPurifier_AttrDef_Enum(
array('none', 'hidden', 'dotted', 'dashed', 'solid', 'double',
'groove', 'ridge', 'inset', 'outset'), false);
$this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style);
$this->info['clear'] = new HTMLPurifier_AttrDef_Enum(
array('none', 'left', 'right', 'both'), false);
$this->info['float'] = new HTMLPurifier_AttrDef_Enum(
array('none', 'left', 'right'), false);
$this->info['font-style'] = new HTMLPurifier_AttrDef_Enum(
array('normal', 'italic', 'oblique'), false);
$this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum(
array('normal', 'small-caps'), false);
$uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_Enum(array('none')),
new HTMLPurifier_AttrDef_CSS_URI()
)
);
$this->info['list-style-position'] = new HTMLPurifier_AttrDef_Enum(
array('inside', 'outside'), false);
$this->info['list-style-type'] = new HTMLPurifier_AttrDef_Enum(
array('disc', 'circle', 'square', 'decimal', 'lower-roman',
'upper-roman', 'lower-alpha', 'upper-alpha', 'none'), false);
$this->info['list-style-image'] = $uri_or_none;
$this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config);
$this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum(
array('capitalize', 'uppercase', 'lowercase', 'none'), false);
$this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color();
$this->info['background-image'] = $uri_or_none;
$this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum(
array('repeat', 'repeat-x', 'repeat-y', 'no-repeat')
);
$this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum(
array('scroll', 'fixed')
);
$this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition();
$border_color =
$this->info['border-top-color'] =
$this->info['border-bottom-color'] =
$this->info['border-left-color'] =
$this->info['border-right-color'] =
$this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_Enum(array('transparent')),
new HTMLPurifier_AttrDef_CSS_Color()
));
$this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config);
$this->info['border-color'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_color);
$border_width =
$this->info['border-top-width'] =
$this->info['border-bottom-width'] =
$this->info['border-left-width'] =
$this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')),
new HTMLPurifier_AttrDef_CSS_Length('0') //disallow negative
));
$this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width);
$this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_Enum(array('normal')),
new HTMLPurifier_AttrDef_CSS_Length()
));
$this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_Enum(array('normal')),
new HTMLPurifier_AttrDef_CSS_Length()
));
$this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_Enum(array('xx-small', 'x-small',
'small', 'medium', 'large', 'x-large', 'xx-large',
'larger', 'smaller')),
new HTMLPurifier_AttrDef_CSS_Percentage(),
new HTMLPurifier_AttrDef_CSS_Length()
));
$this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_Enum(array('normal')),
new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true)
));
$margin =
$this->info['margin-top'] =
$this->info['margin-bottom'] =
$this->info['margin-left'] =
$this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_CSS_Length(),
new HTMLPurifier_AttrDef_CSS_Percentage(),
new HTMLPurifier_AttrDef_Enum(array('auto'))
));
$this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin);
// non-negative
$padding =
$this->info['padding-top'] =
$this->info['padding-bottom'] =
$this->info['padding-left'] =
$this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true)
));
$this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding);
$this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_CSS_Length(),
new HTMLPurifier_AttrDef_CSS_Percentage()
));
$trusted_wh = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true),
new HTMLPurifier_AttrDef_Enum(array('auto'))
));
$max = $config->get('CSS.MaxImgLength');
$this->info['width'] =
$this->info['height'] =
$max === null ?
$trusted_wh :
new HTMLPurifier_AttrDef_Switch('img',
// For img tags:
new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_CSS_Length('0', $max),
new HTMLPurifier_AttrDef_Enum(array('auto'))
)),
// For everyone else:
$trusted_wh
);
$this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration();
$this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily();
// this could use specialized code
$this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum(
array('normal', 'bold', 'bolder', 'lighter', '100', '200', '300',
'400', '500', '600', '700', '800', '900'), false);
// MUST be called after other font properties, as it references
// a CSSDefinition object
$this->info['font'] = new HTMLPurifier_AttrDef_CSS_Font($config);
// same here
$this->info['border'] =
$this->info['border-bottom'] =
$this->info['border-top'] =
$this->info['border-left'] =
$this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config);
$this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(array(
'collapse', 'separate'));
$this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum(array(
'top', 'bottom'));
$this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum(array(
'auto', 'fixed'));
$this->info['vertical-align'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_Enum(array('baseline', 'sub', 'super',
'top', 'text-top', 'middle', 'bottom', 'text-bottom')),
new HTMLPurifier_AttrDef_CSS_Length(),
new HTMLPurifier_AttrDef_CSS_Percentage()
));
$this->info['border-spacing'] = new HTMLPurifier_AttrDef_CSS_Multiple(new HTMLPurifier_AttrDef_CSS_Length(), 2);
// partial support
$this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap'));
if ($config->get('CSS.Proprietary')) {
$this->doSetupProprietary($config);
}
if ($config->get('CSS.AllowTricky')) {
$this->doSetupTricky($config);
}
$allow_important = $config->get('CSS.AllowImportant');
// wrap all attr-defs with decorator that handles !important
foreach ($this->info as $k => $v) {
$this->info[$k] = new HTMLPurifier_AttrDef_CSS_ImportantDecorator($v, $allow_important);
}
$this->setupConfigStuff($config);
}
protected function doSetupProprietary($config) {
// Internet Explorer only scrollbar colors
$this->info['scrollbar-arrow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
$this->info['scrollbar-base-color'] = new HTMLPurifier_AttrDef_CSS_Color();
$this->info['scrollbar-darkshadow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
$this->info['scrollbar-face-color'] = new HTMLPurifier_AttrDef_CSS_Color();
$this->info['scrollbar-highlight-color'] = new HTMLPurifier_AttrDef_CSS_Color();
$this->info['scrollbar-shadow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
// technically not proprietary, but CSS3, and no one supports it
$this->info['opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
$this->info['-moz-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
$this->info['-khtml-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
// only opacity, for now
$this->info['filter'] = new HTMLPurifier_AttrDef_CSS_Filter();
}
protected function doSetupTricky($config) {
$this->info['display'] = new HTMLPurifier_AttrDef_Enum(array(
'inline', 'block', 'list-item', 'run-in', 'compact',
'marker', 'table', 'inline-table', 'table-row-group',
'table-header-group', 'table-footer-group', 'table-row',
'table-column-group', 'table-column', 'table-cell', 'table-caption', 'none'
));
$this->info['visibility'] = new HTMLPurifier_AttrDef_Enum(array(
'visible', 'hidden', 'collapse'
));
$this->info['overflow'] = new HTMLPurifier_AttrDef_Enum(array('visible', 'hidden', 'auto', 'scroll'));
}
/**
* Performs extra config-based processing. Based off of
* HTMLPurifier_HTMLDefinition.
* @todo Refactor duplicate elements into common class (probably using
* composition, not inheritance).
*/
protected function setupConfigStuff($config) {
// setup allowed elements
$support = "(for information on implementing this, see the ".
"support forums) ";
$allowed_attributes = $config->get('CSS.AllowedProperties');
if ($allowed_attributes !== null) {
foreach ($this->info as $name => $d) {
if(!isset($allowed_attributes[$name])) unset($this->info[$name]);
unset($allowed_attributes[$name]);
}
// emit errors
foreach ($allowed_attributes as $name => $d) {
// :TODO: Is this htmlspecialchars() call really necessary?
$name = htmlspecialchars($name);
trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING);
}
}
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,48 @@
<?php
/**
* Defines allowed child nodes and validates tokens against it.
*/
abstract class HTMLPurifier_ChildDef
{
/**
* Type of child definition, usually right-most part of class name lowercase.
* Used occasionally in terms of context.
*/
public $type;
/**
* Bool that indicates whether or not an empty array of children is okay
*
* This is necessary for redundant checking when changes affecting
* a child node may cause a parent node to now be disallowed.
*/
public $allow_empty;
/**
* Lookup array of all elements that this definition could possibly allow
*/
public $elements = array();
/**
* Get lookup of tag names that should not close this element automatically.
* All other elements will do so.
*/
public function getAllowedElements($config) {
return $this->elements;
}
/**
* Validates nodes according to definition and returns modification.
*
* @param $tokens_of_children Array of HTMLPurifier_Token
* @param $config HTMLPurifier_Config object
* @param $context HTMLPurifier_Context object
* @return bool true to leave nodes as is
* @return bool false to remove parent node
* @return array of replacement child tokens
*/
abstract public function validateChildren($tokens_of_children, $config, $context);
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,48 @@
<?php
/**
* Definition that uses different definitions depending on context.
*
* The del and ins tags are notable because they allow different types of
* elements depending on whether or not they're in a block or inline context.
* Chameleon allows this behavior to happen by using two different
* definitions depending on context. While this somewhat generalized,
* it is specifically intended for those two tags.
*/
class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
{
/**
* Instance of the definition object to use when inline. Usually stricter.
*/
public $inline;
/**
* Instance of the definition object to use when block.
*/
public $block;
public $type = 'chameleon';
/**
* @param $inline List of elements to allow when inline.
* @param $block List of elements to allow when block.
*/
public function __construct($inline, $block) {
$this->inline = new HTMLPurifier_ChildDef_Optional($inline);
$this->block = new HTMLPurifier_ChildDef_Optional($block);
$this->elements = $this->block->elements;
}
public function validateChildren($tokens_of_children, $config, $context) {
if ($context->get('IsInline') === false) {
return $this->block->validateChildren(
$tokens_of_children, $config, $context);
} else {
return $this->inline->validateChildren(
$tokens_of_children, $config, $context);
}
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,90 @@
<?php
/**
* Custom validation class, accepts DTD child definitions
*
* @warning Currently this class is an all or nothing proposition, that is,
* it will only give a bool return value.
*/
class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
{
public $type = 'custom';
public $allow_empty = false;
/**
* Allowed child pattern as defined by the DTD
*/
public $dtd_regex;
/**
* PCRE regex derived from $dtd_regex
* @private
*/
private $_pcre_regex;
/**
* @param $dtd_regex Allowed child pattern from the DTD
*/
public function __construct($dtd_regex) {
$this->dtd_regex = $dtd_regex;
$this->_compileRegex();
}
/**
* Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex)
*/
protected function _compileRegex() {
$raw = str_replace(' ', '', $this->dtd_regex);
if ($raw{0} != '(') {
$raw = "($raw)";
}
$el = '[#a-zA-Z0-9_.-]+';
$reg = $raw;
// COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M
// DOING! Seriously: if there's problems, please report them.
// collect all elements into the $elements array
preg_match_all("/$el/", $reg, $matches);
foreach ($matches[0] as $match) {
$this->elements[$match] = true;
}
// setup all elements as parentheticals with leading commas
$reg = preg_replace("/$el/", '(,\\0)', $reg);
// remove commas when they were not solicited
$reg = preg_replace("/([^,(|]\(+),/", '\\1', $reg);
// remove all non-paranthetical commas: they are handled by first regex
$reg = preg_replace("/,\(/", '(', $reg);
$this->_pcre_regex = $reg;
}
public function validateChildren($tokens_of_children, $config, $context) {
$list_of_children = '';
$nesting = 0; // depth into the nest
foreach ($tokens_of_children as $token) {
if (!empty($token->is_whitespace)) continue;
$is_child = ($nesting == 0); // direct
if ($token instanceof HTMLPurifier_Token_Start) {
$nesting++;
} elseif ($token instanceof HTMLPurifier_Token_End) {
$nesting--;
}
if ($is_child) {
$list_of_children .= $token->name . ',';
}
}
// add leading comma to deal with stray comma declarations
$list_of_children = ',' . rtrim($list_of_children, ',');
$okay =
preg_match(
'/^,?'.$this->_pcre_regex.'$/',
$list_of_children
);
return (bool) $okay;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,20 @@
<?php
/**
* Definition that disallows all elements.
* @warning validateChildren() in this class is actually never called, because
* empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed
* before child definitions are parsed in earnest by
* HTMLPurifier_Strategy_FixNesting.
*/
class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
{
public $allow_empty = true;
public $type = 'empty';
public function __construct() {}
public function validateChildren($tokens_of_children, $config, $context) {
return array();
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,26 @@
<?php
/**
* Definition that allows a set of elements, and allows no children.
* @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required,
* really, one shouldn't inherit from the other. Only altered behavior
* is to overload a returned false with an array. Thus, it will never
* return false.
*/
class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
{
public $allow_empty = true;
public $type = 'optional';
public function validateChildren($tokens_of_children, $config, $context) {
$result = parent::validateChildren($tokens_of_children, $config, $context);
// we assume that $tokens_of_children is not modified
if ($result === false) {
if (empty($tokens_of_children)) return true;
elseif ($this->whitespace) return $tokens_of_children;
else return array();
}
return $result;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,117 @@
<?php
/**
* Definition that allows a set of elements, but disallows empty children.
*/
class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
{
/**
* Lookup table of allowed elements.
* @public
*/
public $elements = array();
/**
* Whether or not the last passed node was all whitespace.
*/
protected $whitespace = false;
/**
* @param $elements List of allowed element names (lowercase).
*/
public function __construct($elements) {
if (is_string($elements)) {
$elements = str_replace(' ', '', $elements);
$elements = explode('|', $elements);
}
$keys = array_keys($elements);
if ($keys == array_keys($keys)) {
$elements = array_flip($elements);
foreach ($elements as $i => $x) {
$elements[$i] = true;
if (empty($i)) unset($elements[$i]); // remove blank
}
}
$this->elements = $elements;
}
public $allow_empty = false;
public $type = 'required';
public function validateChildren($tokens_of_children, $config, $context) {
// Flag for subclasses
$this->whitespace = false;
// if there are no tokens, delete parent node
if (empty($tokens_of_children)) return false;
// the new set of children
$result = array();
// current depth into the nest
$nesting = 0;
// whether or not we're deleting a node
$is_deleting = false;
// whether or not parsed character data is allowed
// this controls whether or not we silently drop a tag
// or generate escaped HTML from it
$pcdata_allowed = isset($this->elements['#PCDATA']);
// a little sanity check to make sure it's not ALL whitespace
$all_whitespace = true;
// some configuration
$escape_invalid_children = $config->get('Core.EscapeInvalidChildren');
// generator
$gen = new HTMLPurifier_Generator($config, $context);
foreach ($tokens_of_children as $token) {
if (!empty($token->is_whitespace)) {
$result[] = $token;
continue;
}
$all_whitespace = false; // phew, we're not talking about whitespace
$is_child = ($nesting == 0);
if ($token instanceof HTMLPurifier_Token_Start) {
$nesting++;
} elseif ($token instanceof HTMLPurifier_Token_End) {
$nesting--;
}
if ($is_child) {
$is_deleting = false;
if (!isset($this->elements[$token->name])) {
$is_deleting = true;
if ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text) {
$result[] = $token;
} elseif ($pcdata_allowed && $escape_invalid_children) {
$result[] = new HTMLPurifier_Token_Text(
$gen->generateFromToken($token)
);
}
continue;
}
}
if (!$is_deleting || ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text)) {
$result[] = $token;
} elseif ($pcdata_allowed && $escape_invalid_children) {
$result[] =
new HTMLPurifier_Token_Text(
$gen->generateFromToken($token)
);
} else {
// drop silently
}
}
if (empty($result)) return false;
if ($all_whitespace) {
$this->whitespace = true;
return false;
}
if ($tokens_of_children == $result) return true;
return $result;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,88 @@
<?php
/**
* Takes the contents of blockquote when in strict and reformats for validation.
*/
class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Required
{
protected $real_elements;
protected $fake_elements;
public $allow_empty = true;
public $type = 'strictblockquote';
protected $init = false;
/**
* @note We don't want MakeWellFormed to auto-close inline elements since
* they might be allowed.
*/
public function getAllowedElements($config) {
$this->init($config);
return $this->fake_elements;
}
public function validateChildren($tokens_of_children, $config, $context) {
$this->init($config);
// trick the parent class into thinking it allows more
$this->elements = $this->fake_elements;
$result = parent::validateChildren($tokens_of_children, $config, $context);
$this->elements = $this->real_elements;
if ($result === false) return array();
if ($result === true) $result = $tokens_of_children;
$def = $config->getHTMLDefinition();
$block_wrap_start = new HTMLPurifier_Token_Start($def->info_block_wrapper);
$block_wrap_end = new HTMLPurifier_Token_End( $def->info_block_wrapper);
$is_inline = false;
$depth = 0;
$ret = array();
// assuming that there are no comment tokens
foreach ($result as $i => $token) {
$token = $result[$i];
// ifs are nested for readability
if (!$is_inline) {
if (!$depth) {
if (
($token instanceof HTMLPurifier_Token_Text && !$token->is_whitespace) ||
(!$token instanceof HTMLPurifier_Token_Text && !isset($this->elements[$token->name]))
) {
$is_inline = true;
$ret[] = $block_wrap_start;
}
}
} else {
if (!$depth) {
// starting tokens have been inline text / empty
if ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) {
if (isset($this->elements[$token->name])) {
// ended
$ret[] = $block_wrap_end;
$is_inline = false;
}
}
}
}
$ret[] = $token;
if ($token instanceof HTMLPurifier_Token_Start) $depth++;
if ($token instanceof HTMLPurifier_Token_End) $depth--;
}
if ($is_inline) $ret[] = $block_wrap_end;
return $ret;
}
private function init($config) {
if (!$this->init) {
$def = $config->getHTMLDefinition();
// allow all inline elements
$this->real_elements = $this->elements;
$this->fake_elements = $def->info_content_sets['Flow'];
$this->fake_elements['#PCDATA'] = true;
$this->init = true;
}
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,142 @@
<?php
/**
* Definition for tables
*/
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
{
public $allow_empty = false;
public $type = 'table';
public $elements = array('tr' => true, 'tbody' => true, 'thead' => true,
'tfoot' => true, 'caption' => true, 'colgroup' => true, 'col' => true);
public function __construct() {}
public function validateChildren($tokens_of_children, $config, $context) {
if (empty($tokens_of_children)) return false;
// this ensures that the loop gets run one last time before closing
// up. It's a little bit of a hack, but it works! Just make sure you
// get rid of the token later.
$tokens_of_children[] = false;
// only one of these elements is allowed in a table
$caption = false;
$thead = false;
$tfoot = false;
// as many of these as you want
$cols = array();
$content = array();
$nesting = 0; // current depth so we can determine nodes
$is_collecting = false; // are we globbing together tokens to package
// into one of the collectors?
$collection = array(); // collected nodes
$tag_index = 0; // the first node might be whitespace,
// so this tells us where the start tag is
foreach ($tokens_of_children as $token) {
$is_child = ($nesting == 0);
if ($token === false) {
// terminating sequence started
} elseif ($token instanceof HTMLPurifier_Token_Start) {
$nesting++;
} elseif ($token instanceof HTMLPurifier_Token_End) {
$nesting--;
}
// handle node collection
if ($is_collecting) {
if ($is_child) {
// okay, let's stash the tokens away
// first token tells us the type of the collection
switch ($collection[$tag_index]->name) {
case 'tr':
case 'tbody':
$content[] = $collection;
break;
case 'caption':
if ($caption !== false) break;
$caption = $collection;
break;
case 'thead':
case 'tfoot':
// access the appropriate variable, $thead or $tfoot
$var = $collection[$tag_index]->name;
if ($$var === false) {
$$var = $collection;
} else {
// transmutate the first and less entries into
// tbody tags, and then put into content
$collection[$tag_index]->name = 'tbody';
$collection[count($collection)-1]->name = 'tbody';
$content[] = $collection;
}
break;
case 'colgroup':
$cols[] = $collection;
break;
}
$collection = array();
$is_collecting = false;
$tag_index = 0;
} else {
// add the node to the collection
$collection[] = $token;
}
}
// terminate
if ($token === false) break;
if ($is_child) {
// determine what we're dealing with
if ($token->name == 'col') {
// the only empty tag in the possie, we can handle it
// immediately
$cols[] = array_merge($collection, array($token));
$collection = array();
$tag_index = 0;
continue;
}
switch($token->name) {
case 'caption':
case 'colgroup':
case 'thead':
case 'tfoot':
case 'tbody':
case 'tr':
$is_collecting = true;
$collection[] = $token;
continue;
default:
if (!empty($token->is_whitespace)) {
$collection[] = $token;
$tag_index++;
}
continue;
}
}
}
if (empty($content)) return false;
$ret = array();
if ($caption !== false) $ret = array_merge($ret, $caption);
if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
if ($thead !== false) $ret = array_merge($ret, $thead);
if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
if (!empty($collection) && $is_collecting == false){
// grab the trailing space
$ret = array_merge($ret, $collection);
}
array_pop($tokens_of_children); // remove phantom token
return ($ret === $tokens_of_children) ? true : $ret;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,580 @@
<?php
/**
* Configuration object that triggers customizable behavior.
*
* @warning This class is strongly defined: that means that the class
* will fail if an undefined directive is retrieved or set.
*
* @note Many classes that could (although many times don't) use the
* configuration object make it a mandatory parameter. This is
* because a configuration object should always be forwarded,
* otherwise, you run the risk of missing a parameter and then
* being stumped when a configuration directive doesn't work.
*
* @todo Reconsider some of the public member variables
*/
class HTMLPurifier_Config
{
/**
* HTML Purifier's version
*/
public $version = '4.1.1';
/**
* Bool indicator whether or not to automatically finalize
* the object if a read operation is done
*/
public $autoFinalize = true;
// protected member variables
/**
* Namespace indexed array of serials for specific namespaces (see
* getSerial() for more info).
*/
protected $serials = array();
/**
* Serial for entire configuration object
*/
protected $serial;
/**
* Parser for variables
*/
protected $parser;
/**
* Reference HTMLPurifier_ConfigSchema for value checking
* @note This is public for introspective purposes. Please don't
* abuse!
*/
public $def;
/**
* Indexed array of definitions
*/
protected $definitions;
/**
* Bool indicator whether or not config is finalized
*/
protected $finalized = false;
/**
* Property list containing configuration directives.
*/
protected $plist;
/**
* Whether or not a set is taking place due to an
* alias lookup.
*/
private $aliasMode;
/**
* Set to false if you do not want line and file numbers in errors
* (useful when unit testing)
*/
public $chatty = true;
/**
* Current lock; only gets to this namespace are allowed.
*/
private $lock;
/**
* @param $definition HTMLPurifier_ConfigSchema that defines what directives
* are allowed.
*/
public function __construct($definition, $parent = null) {
$parent = $parent ? $parent : $definition->defaultPlist;
$this->plist = new HTMLPurifier_PropertyList($parent);
$this->def = $definition; // keep a copy around for checking
$this->parser = new HTMLPurifier_VarParser_Flexible();
}
/**
* Convenience constructor that creates a config object based on a mixed var
* @param mixed $config Variable that defines the state of the config
* object. Can be: a HTMLPurifier_Config() object,
* an array of directives based on loadArray(),
* or a string filename of an ini file.
* @param HTMLPurifier_ConfigSchema Schema object
* @return Configured HTMLPurifier_Config object
*/
public static function create($config, $schema = null) {
if ($config instanceof HTMLPurifier_Config) {
// pass-through
return $config;
}
if (!$schema) {
$ret = HTMLPurifier_Config::createDefault();
} else {
$ret = new HTMLPurifier_Config($schema);
}
if (is_string($config)) $ret->loadIni($config);
elseif (is_array($config)) $ret->loadArray($config);
return $ret;
}
/**
* Creates a new config object that inherits from a previous one.
* @param HTMLPurifier_Config $config Configuration object to inherit
* from.
* @return HTMLPurifier_Config object with $config as its parent.
*/
public static function inherit(HTMLPurifier_Config $config) {
return new HTMLPurifier_Config($config->def, $config->plist);
}
/**
* Convenience constructor that creates a default configuration object.
* @return Default HTMLPurifier_Config object.
*/
public static function createDefault() {
$definition = HTMLPurifier_ConfigSchema::instance();
$config = new HTMLPurifier_Config($definition);
return $config;
}
/**
* Retreives a value from the configuration.
* @param $key String key
*/
public function get($key, $a = null) {
if ($a !== null) {
$this->triggerError("Using deprecated API: use \$config->get('$key.$a') instead", E_USER_WARNING);
$key = "$key.$a";
}
if (!$this->finalized) $this->autoFinalize();
if (!isset($this->def->info[$key])) {
// can't add % due to SimpleTest bug
$this->triggerError('Cannot retrieve value of undefined directive ' . htmlspecialchars($key),
E_USER_WARNING);
return;
}
if (isset($this->def->info[$key]->isAlias)) {
$d = $this->def->info[$key];
$this->triggerError('Cannot get value from aliased directive, use real name ' . $d->key,
E_USER_ERROR);
return;
}
if ($this->lock) {
list($ns) = explode('.', $key);
if ($ns !== $this->lock) {
$this->triggerError('Cannot get value of namespace ' . $ns . ' when lock for ' . $this->lock . ' is active, this probably indicates a Definition setup method is accessing directives that are not within its namespace', E_USER_ERROR);
return;
}
}
return $this->plist->get($key);
}
/**
* Retreives an array of directives to values from a given namespace
* @param $namespace String namespace
*/
public function getBatch($namespace) {
if (!$this->finalized) $this->autoFinalize();
$full = $this->getAll();
if (!isset($full[$namespace])) {
$this->triggerError('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace),
E_USER_WARNING);
return;
}
return $full[$namespace];
}
/**
* Returns a md5 signature of a segment of the configuration object
* that uniquely identifies that particular configuration
* @note Revision is handled specially and is removed from the batch
* before processing!
* @param $namespace Namespace to get serial for
*/
public function getBatchSerial($namespace) {
if (empty($this->serials[$namespace])) {
$batch = $this->getBatch($namespace);
unset($batch['DefinitionRev']);
$this->serials[$namespace] = md5(serialize($batch));
}
return $this->serials[$namespace];
}
/**
* Returns a md5 signature for the entire configuration object
* that uniquely identifies that particular configuration
*/
public function getSerial() {
if (empty($this->serial)) {
$this->serial = md5(serialize($this->getAll()));
}
return $this->serial;
}
/**
* Retrieves all directives, organized by namespace
* @warning This is a pretty inefficient function, avoid if you can
*/
public function getAll() {
if (!$this->finalized) $this->autoFinalize();
$ret = array();
foreach ($this->plist->squash() as $name => $value) {
list($ns, $key) = explode('.', $name, 2);
$ret[$ns][$key] = $value;
}
return $ret;
}
/**
* Sets a value to configuration.
* @param $key String key
* @param $value Mixed value
*/
public function set($key, $value, $a = null) {
if (strpos($key, '.') === false) {
$namespace = $key;
$directive = $value;
$value = $a;
$key = "$key.$directive";
$this->triggerError("Using deprecated API: use \$config->set('$key', ...) instead", E_USER_NOTICE);
} else {
list($namespace) = explode('.', $key);
}
if ($this->isFinalized('Cannot set directive after finalization')) return;
if (!isset($this->def->info[$key])) {
$this->triggerError('Cannot set undefined directive ' . htmlspecialchars($key) . ' to value',
E_USER_WARNING);
return;
}
$def = $this->def->info[$key];
if (isset($def->isAlias)) {
if ($this->aliasMode) {
$this->triggerError('Double-aliases not allowed, please fix '.
'ConfigSchema bug with' . $key, E_USER_ERROR);
return;
}
$this->aliasMode = true;
$this->set($def->key, $value);
$this->aliasMode = false;
$this->triggerError("$key is an alias, preferred directive name is {$def->key}", E_USER_NOTICE);
return;
}
// Raw type might be negative when using the fully optimized form
// of stdclass, which indicates allow_null == true
$rtype = is_int($def) ? $def : $def->type;
if ($rtype < 0) {
$type = -$rtype;
$allow_null = true;
} else {
$type = $rtype;
$allow_null = isset($def->allow_null);
}
try {
$value = $this->parser->parse($value, $type, $allow_null);
} catch (HTMLPurifier_VarParserException $e) {
$this->triggerError('Value for ' . $key . ' is of invalid type, should be ' . HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING);
return;
}
if (is_string($value) && is_object($def)) {
// resolve value alias if defined
if (isset($def->aliases[$value])) {
$value = $def->aliases[$value];
}
// check to see if the value is allowed
if (isset($def->allowed) && !isset($def->allowed[$value])) {
$this->triggerError('Value not supported, valid values are: ' .
$this->_listify($def->allowed), E_USER_WARNING);
return;
}
}
$this->plist->set($key, $value);
// reset definitions if the directives they depend on changed
// this is a very costly process, so it's discouraged
// with finalization
if ($namespace == 'HTML' || $namespace == 'CSS' || $namespace == 'URI') {
$this->definitions[$namespace] = null;
}
$this->serials[$namespace] = false;
}
/**
* Convenience function for error reporting
*/
private function _listify($lookup) {
$list = array();
foreach ($lookup as $name => $b) $list[] = $name;
return implode(', ', $list);
}
/**
* Retrieves object reference to the HTML definition.
* @param $raw Return a copy that has not been setup yet. Must be
* called before it's been setup, otherwise won't work.
*/
public function getHTMLDefinition($raw = false) {
return $this->getDefinition('HTML', $raw);
}
/**
* Retrieves object reference to the CSS definition
* @param $raw Return a copy that has not been setup yet. Must be
* called before it's been setup, otherwise won't work.
*/
public function getCSSDefinition($raw = false) {
return $this->getDefinition('CSS', $raw);
}
/**
* Retrieves a definition
* @param $type Type of definition: HTML, CSS, etc
* @param $raw Whether or not definition should be returned raw
*/
public function getDefinition($type, $raw = false) {
if (!$this->finalized) $this->autoFinalize();
// temporarily suspend locks, so we can handle recursive definition calls
$lock = $this->lock;
$this->lock = null;
$factory = HTMLPurifier_DefinitionCacheFactory::instance();
$cache = $factory->create($type, $this);
$this->lock = $lock;
if (!$raw) {
// see if we can quickly supply a definition
if (!empty($this->definitions[$type])) {
if (!$this->definitions[$type]->setup) {
$this->definitions[$type]->setup($this);
$cache->set($this->definitions[$type], $this);
}
return $this->definitions[$type];
}
// memory check missed, try cache
$this->definitions[$type] = $cache->get($this);
if ($this->definitions[$type]) {
// definition in cache, return it
return $this->definitions[$type];
}
} elseif (
!empty($this->definitions[$type]) &&
!$this->definitions[$type]->setup
) {
// raw requested, raw in memory, quick return
return $this->definitions[$type];
}
// quick checks failed, let's create the object
if ($type == 'HTML') {
$this->definitions[$type] = new HTMLPurifier_HTMLDefinition();
} elseif ($type == 'CSS') {
$this->definitions[$type] = new HTMLPurifier_CSSDefinition();
} elseif ($type == 'URI') {
$this->definitions[$type] = new HTMLPurifier_URIDefinition();
} else {
throw new HTMLPurifier_Exception("Definition of $type type not supported");
}
// quick abort if raw
if ($raw) {
if (is_null($this->get($type . '.DefinitionID'))) {
// fatally error out if definition ID not set
throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID");
}
return $this->definitions[$type];
}
// set it up
$this->lock = $type;
$this->definitions[$type]->setup($this);
$this->lock = null;
// save in cache
$cache->set($this->definitions[$type], $this);
return $this->definitions[$type];
}
/**
* Loads configuration values from an array with the following structure:
* Namespace.Directive => Value
* @param $config_array Configuration associative array
*/
public function loadArray($config_array) {
if ($this->isFinalized('Cannot load directives after finalization')) return;
foreach ($config_array as $key => $value) {
$key = str_replace('_', '.', $key);
if (strpos($key, '.') !== false) {
$this->set($key, $value);
} else {
$namespace = $key;
$namespace_values = $value;
foreach ($namespace_values as $directive => $value) {
$this->set($namespace .'.'. $directive, $value);
}
}
}
}
/**
* Returns a list of array(namespace, directive) for all directives
* that are allowed in a web-form context as per an allowed
* namespaces/directives list.
* @param $allowed List of allowed namespaces/directives
*/
public static function getAllowedDirectivesForForm($allowed, $schema = null) {
if (!$schema) {
$schema = HTMLPurifier_ConfigSchema::instance();
}
if ($allowed !== true) {
if (is_string($allowed)) $allowed = array($allowed);
$allowed_ns = array();
$allowed_directives = array();
$blacklisted_directives = array();
foreach ($allowed as $ns_or_directive) {
if (strpos($ns_or_directive, '.') !== false) {
// directive
if ($ns_or_directive[0] == '-') {
$blacklisted_directives[substr($ns_or_directive, 1)] = true;
} else {
$allowed_directives[$ns_or_directive] = true;
}
} else {
// namespace
$allowed_ns[$ns_or_directive] = true;
}
}
}
$ret = array();
foreach ($schema->info as $key => $def) {
list($ns, $directive) = explode('.', $key, 2);
if ($allowed !== true) {
if (isset($blacklisted_directives["$ns.$directive"])) continue;
if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue;
}
if (isset($def->isAlias)) continue;
if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue;
$ret[] = array($ns, $directive);
}
return $ret;
}
/**
* Loads configuration values from $_GET/$_POST that were posted
* via ConfigForm
* @param $array $_GET or $_POST array to import
* @param $index Index/name that the config variables are in
* @param $allowed List of allowed namespaces/directives
* @param $mq_fix Boolean whether or not to enable magic quotes fix
* @param $schema Instance of HTMLPurifier_ConfigSchema to use, if not global copy
*/
public static function loadArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) {
$ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $schema);
$config = HTMLPurifier_Config::create($ret, $schema);
return $config;
}
/**
* Merges in configuration values from $_GET/$_POST to object. NOT STATIC.
* @note Same parameters as loadArrayFromForm
*/
public function mergeArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true) {
$ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $this->def);
$this->loadArray($ret);
}
/**
* Prepares an array from a form into something usable for the more
* strict parts of HTMLPurifier_Config
*/
public static function prepareArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) {
if ($index !== false) $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array();
$mq = $mq_fix && function_exists('get_magic_quotes_gpc') && get_magic_quotes_gpc();
$allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed, $schema);
$ret = array();
foreach ($allowed as $key) {
list($ns, $directive) = $key;
$skey = "$ns.$directive";
if (!empty($array["Null_$skey"])) {
$ret[$ns][$directive] = null;
continue;
}
if (!isset($array[$skey])) continue;
$value = $mq ? stripslashes($array[$skey]) : $array[$skey];
$ret[$ns][$directive] = $value;
}
return $ret;
}
/**
* Loads configuration values from an ini file
* @param $filename Name of ini file
*/
public function loadIni($filename) {
if ($this->isFinalized('Cannot load directives after finalization')) return;
$array = parse_ini_file($filename, true);
$this->loadArray($array);
}
/**
* Checks whether or not the configuration object is finalized.
* @param $error String error message, or false for no error
*/
public function isFinalized($error = false) {
if ($this->finalized && $error) {
$this->triggerError($error, E_USER_ERROR);
}
return $this->finalized;
}
/**
* Finalizes configuration only if auto finalize is on and not
* already finalized
*/
public function autoFinalize() {
if ($this->autoFinalize) {
$this->finalize();
} else {
$this->plist->squash(true);
}
}
/**
* Finalizes a configuration object, prohibiting further change
*/
public function finalize() {
$this->finalized = true;
unset($this->parser);
}
/**
* Produces a nicely formatted error message by supplying the
* stack frame information from two levels up and OUTSIDE of
* HTMLPurifier_Config.
*/
protected function triggerError($msg, $no) {
// determine previous stack frame
$backtrace = debug_backtrace();
if ($this->chatty && isset($backtrace[1])) {
$frame = $backtrace[1];
$extra = " on line {$frame['line']} in file {$frame['file']}";
} else {
$extra = '';
}
trigger_error($msg . $extra, $no);
}
/**
* Returns a serialized form of the configuration object that can
* be reconstituted.
*/
public function serialize() {
$this->getDefinition('HTML');
$this->getDefinition('CSS');
$this->getDefinition('URI');
return serialize($this);
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,158 @@
<?php
/**
* Configuration definition, defines directives and their defaults.
*/
class HTMLPurifier_ConfigSchema {
/**
* Defaults of the directives and namespaces.
* @note This shares the exact same structure as HTMLPurifier_Config::$conf
*/
public $defaults = array();
/**
* The default property list. Do not edit this property list.
*/
public $defaultPlist;
/**
* Definition of the directives. The structure of this is:
*
* array(
* 'Namespace' => array(
* 'Directive' => new stdclass(),
* )
* )
*
* The stdclass may have the following properties:
*
* - If isAlias isn't set:
* - type: Integer type of directive, see HTMLPurifier_VarParser for definitions
* - allow_null: If set, this directive allows null values
* - aliases: If set, an associative array of value aliases to real values
* - allowed: If set, a lookup array of allowed (string) values
* - If isAlias is set:
* - namespace: Namespace this directive aliases to
* - name: Directive name this directive aliases to
*
* In certain degenerate cases, stdclass will actually be an integer. In
* that case, the value is equivalent to an stdclass with the type
* property set to the integer. If the integer is negative, type is
* equal to the absolute value of integer, and allow_null is true.
*
* This class is friendly with HTMLPurifier_Config. If you need introspection
* about the schema, you're better of using the ConfigSchema_Interchange,
* which uses more memory but has much richer information.
*/
public $info = array();
/**
* Application-wide singleton
*/
static protected $singleton;
public function __construct() {
$this->defaultPlist = new HTMLPurifier_PropertyList();
}
/**
* Unserializes the default ConfigSchema.
*/
public static function makeFromSerial() {
return unserialize(file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser'));
}
/**
* Retrieves an instance of the application-wide configuration definition.
*/
public static function instance($prototype = null) {
if ($prototype !== null) {
HTMLPurifier_ConfigSchema::$singleton = $prototype;
} elseif (HTMLPurifier_ConfigSchema::$singleton === null || $prototype === true) {
HTMLPurifier_ConfigSchema::$singleton = HTMLPurifier_ConfigSchema::makeFromSerial();
}
return HTMLPurifier_ConfigSchema::$singleton;
}
/**
* Defines a directive for configuration
* @warning Will fail of directive's namespace is defined.
* @warning This method's signature is slightly different from the legacy
* define() static method! Beware!
* @param $namespace Namespace the directive is in
* @param $name Key of directive
* @param $default Default value of directive
* @param $type Allowed type of the directive. See
* HTMLPurifier_DirectiveDef::$type for allowed values
* @param $allow_null Whether or not to allow null values
*/
public function add($key, $default, $type, $allow_null) {
$obj = new stdclass();
$obj->type = is_int($type) ? $type : HTMLPurifier_VarParser::$types[$type];
if ($allow_null) $obj->allow_null = true;
$this->info[$key] = $obj;
$this->defaults[$key] = $default;
$this->defaultPlist->set($key, $default);
}
/**
* Defines a directive value alias.
*
* Directive value aliases are convenient for developers because it lets
* them set a directive to several values and get the same result.
* @param $namespace Directive's namespace
* @param $name Name of Directive
* @param $aliases Hash of aliased values to the real alias
*/
public function addValueAliases($key, $aliases) {
if (!isset($this->info[$key]->aliases)) {
$this->info[$key]->aliases = array();
}
foreach ($aliases as $alias => $real) {
$this->info[$key]->aliases[$alias] = $real;
}
}
/**
* Defines a set of allowed values for a directive.
* @warning This is slightly different from the corresponding static
* method definition.
* @param $namespace Namespace of directive
* @param $name Name of directive
* @param $allowed Lookup array of allowed values
*/
public function addAllowedValues($key, $allowed) {
$this->info[$key]->allowed = $allowed;
}
/**
* Defines a directive alias for backwards compatibility
* @param $namespace
* @param $name Directive that will be aliased
* @param $new_namespace
* @param $new_name Directive that the alias will be to
*/
public function addAlias($key, $new_key) {
$obj = new stdclass;
$obj->key = $new_key;
$obj->isAlias = true;
$this->info[$key] = $obj;
}
/**
* Replaces any stdclass that only has the type property with type integer.
*/
public function postProcess() {
foreach ($this->info as $key => $v) {
if (count((array) $v) == 1) {
$this->info[$key] = $v->type;
} elseif (count((array) $v) == 2 && isset($v->allow_null)) {
$this->info[$key] = -$v->type;
}
}
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,44 @@
<?php
/**
* Converts HTMLPurifier_ConfigSchema_Interchange to our runtime
* representation used to perform checks on user configuration.
*/
class HTMLPurifier_ConfigSchema_Builder_ConfigSchema
{
public function build($interchange) {
$schema = new HTMLPurifier_ConfigSchema();
foreach ($interchange->directives as $d) {
$schema->add(
$d->id->key,
$d->default,
$d->type,
$d->typeAllowsNull
);
if ($d->allowed !== null) {
$schema->addAllowedValues(
$d->id->key,
$d->allowed
);
}
foreach ($d->aliases as $alias) {
$schema->addAlias(
$alias->key,
$d->id->key
);
}
if ($d->valueAliases !== null) {
$schema->addValueAliases(
$d->id->key,
$d->valueAliases
);
}
}
$schema->postProcess();
return $schema;
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,106 @@
<?php
/**
* Converts HTMLPurifier_ConfigSchema_Interchange to an XML format,
* which can be further processed to generate documentation.
*/
class HTMLPurifier_ConfigSchema_Builder_Xml extends XMLWriter
{
protected $interchange;
private $namespace;
protected function writeHTMLDiv($html) {
$this->startElement('div');
$purifier = HTMLPurifier::getInstance();
$html = $purifier->purify($html);
$this->writeAttribute('xmlns', 'http://www.w3.org/1999/xhtml');
$this->writeRaw($html);
$this->endElement(); // div
}
protected function export($var) {
if ($var === array()) return 'array()';
return var_export($var, true);
}
public function build($interchange) {
// global access, only use as last resort
$this->interchange = $interchange;
$this->setIndent(true);
$this->startDocument('1.0', 'UTF-8');
$this->startElement('configdoc');
$this->writeElement('title', $interchange->name);
foreach ($interchange->directives as $directive) {
$this->buildDirective($directive);
}
if ($this->namespace) $this->endElement(); // namespace
$this->endElement(); // configdoc
$this->flush();
}
public function buildDirective($directive) {
// Kludge, although I suppose having a notion of a "root namespace"
// certainly makes things look nicer when documentation is built.
// Depends on things being sorted.
if (!$this->namespace || $this->namespace !== $directive->id->getRootNamespace()) {
if ($this->namespace) $this->endElement(); // namespace
$this->namespace = $directive->id->getRootNamespace();
$this->startElement('namespace');
$this->writeAttribute('id', $this->namespace);
$this->writeElement('name', $this->namespace);
}
$this->startElement('directive');
$this->writeAttribute('id', $directive->id->toString());
$this->writeElement('name', $directive->id->getDirective());
$this->startElement('aliases');
foreach ($directive->aliases as $alias) $this->writeElement('alias', $alias->toString());
$this->endElement(); // aliases
$this->startElement('constraints');
if ($directive->version) $this->writeElement('version', $directive->version);
$this->startElement('type');
if ($directive->typeAllowsNull) $this->writeAttribute('allow-null', 'yes');
$this->text($directive->type);
$this->endElement(); // type
if ($directive->allowed) {
$this->startElement('allowed');
foreach ($directive->allowed as $value => $x) $this->writeElement('value', $value);
$this->endElement(); // allowed
}
$this->writeElement('default', $this->export($directive->default));
$this->writeAttribute('xml:space', 'preserve');
if ($directive->external) {
$this->startElement('external');
foreach ($directive->external as $project) $this->writeElement('project', $project);
$this->endElement();
}
$this->endElement(); // constraints
if ($directive->deprecatedVersion) {
$this->startElement('deprecated');
$this->writeElement('version', $directive->deprecatedVersion);
$this->writeElement('use', $directive->deprecatedUse->toString());
$this->endElement(); // deprecated
}
$this->startElement('description');
$this->writeHTMLDiv($directive->description);
$this->endElement(); // description
$this->endElement(); // directive
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,11 @@
<?php
/**
* Exceptions related to configuration schema
*/
class HTMLPurifier_ConfigSchema_Exception extends HTMLPurifier_Exception
{
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,42 @@
<?php
/**
* Generic schema interchange format that can be converted to a runtime
* representation (HTMLPurifier_ConfigSchema) or HTML documentation. Members
* are completely validated.
*/
class HTMLPurifier_ConfigSchema_Interchange
{
/**
* Name of the application this schema is describing.
*/
public $name;
/**
* Array of Directive ID => array(directive info)
*/
public $directives = array();
/**
* Adds a directive array to $directives
*/
public function addDirective($directive) {
if (isset($this->directives[$i = $directive->id->toString()])) {
throw new HTMLPurifier_ConfigSchema_Exception("Cannot redefine directive '$i'");
}
$this->directives[$i] = $directive;
}
/**
* Convenience function to perform standard validation. Throws exception
* on failed validation.
*/
public function validate() {
$validator = new HTMLPurifier_ConfigSchema_Validator();
return $validator->validate($this);
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,77 @@
<?php
/**
* Interchange component class describing configuration directives.
*/
class HTMLPurifier_ConfigSchema_Interchange_Directive
{
/**
* ID of directive, instance of HTMLPurifier_ConfigSchema_Interchange_Id.
*/
public $id;
/**
* String type, e.g. 'integer' or 'istring'.
*/
public $type;
/**
* Default value, e.g. 3 or 'DefaultVal'.
*/
public $default;
/**
* HTML description.
*/
public $description;
/**
* Boolean whether or not null is allowed as a value.
*/
public $typeAllowsNull = false;
/**
* Lookup table of allowed scalar values, e.g. array('allowed' => true).
* Null if all values are allowed.
*/
public $allowed;
/**
* List of aliases for the directive,
* e.g. array(new HTMLPurifier_ConfigSchema_Interchange_Id('Ns', 'Dir'))).
*/
public $aliases = array();
/**
* Hash of value aliases, e.g. array('alt' => 'real'). Null if value
* aliasing is disabled (necessary for non-scalar types).
*/
public $valueAliases;
/**
* Version of HTML Purifier the directive was introduced, e.g. '1.3.1'.
* Null if the directive has always existed.
*/
public $version;
/**
* ID of directive that supercedes this old directive, is an instance
* of HTMLPurifier_ConfigSchema_Interchange_Id. Null if not deprecated.
*/
public $deprecatedUse;
/**
* Version of HTML Purifier this directive was deprecated. Null if not
* deprecated.
*/
public $deprecatedVersion;
/**
* List of external projects this directive depends on, e.g. array('CSSTidy').
*/
public $external = array();
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,37 @@
<?php
/**
* Represents a directive ID in the interchange format.
*/
class HTMLPurifier_ConfigSchema_Interchange_Id
{
public $key;
public function __construct($key) {
$this->key = $key;
}
/**
* @warning This is NOT magic, to ensure that people don't abuse SPL and
* cause problems for PHP 5.0 support.
*/
public function toString() {
return $this->key;
}
public function getRootNamespace() {
return substr($this->key, 0, strpos($this->key, "."));
}
public function getDirective() {
return substr($this->key, strpos($this->key, ".") + 1);
}
public static function make($id) {
return new HTMLPurifier_ConfigSchema_Interchange_Id($id);
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,180 @@
<?php
class HTMLPurifier_ConfigSchema_InterchangeBuilder
{
/**
* Used for processing DEFAULT, nothing else.
*/
protected $varParser;
public function __construct($varParser = null) {
$this->varParser = $varParser ? $varParser : new HTMLPurifier_VarParser_Native();
}
public static function buildFromDirectory($dir = null) {
$builder = new HTMLPurifier_ConfigSchema_InterchangeBuilder();
$interchange = new HTMLPurifier_ConfigSchema_Interchange();
return $builder->buildDir($interchange, $dir);
}
public function buildDir($interchange, $dir = null) {
if (!$dir) $dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema';
if (file_exists($dir . '/info.ini')) {
$info = parse_ini_file($dir . '/info.ini');
$interchange->name = $info['name'];
}
$files = array();
$dh = opendir($dir);
while (false !== ($file = readdir($dh))) {
if (!$file || $file[0] == '.' || strrchr($file, '.') !== '.txt') {
continue;
}
$files[] = $file;
}
closedir($dh);
sort($files);
foreach ($files as $file) {
$this->buildFile($interchange, $dir . '/' . $file);
}
return $interchange;
}
public function buildFile($interchange, $file) {
$parser = new HTMLPurifier_StringHashParser();
$this->build(
$interchange,
new HTMLPurifier_StringHash( $parser->parseFile($file) )
);
}
/**
* Builds an interchange object based on a hash.
* @param $interchange HTMLPurifier_ConfigSchema_Interchange object to build
* @param $hash HTMLPurifier_ConfigSchema_StringHash source data
*/
public function build($interchange, $hash) {
if (!$hash instanceof HTMLPurifier_StringHash) {
$hash = new HTMLPurifier_StringHash($hash);
}
if (!isset($hash['ID'])) {
throw new HTMLPurifier_ConfigSchema_Exception('Hash does not have any ID');
}
if (strpos($hash['ID'], '.') === false) {
if (count($hash) == 2 && isset($hash['DESCRIPTION'])) {
$hash->offsetGet('DESCRIPTION'); // prevent complaining
} else {
throw new HTMLPurifier_ConfigSchema_Exception('All directives must have a namespace');
}
} else {
$this->buildDirective($interchange, $hash);
}
$this->_findUnused($hash);
}
public function buildDirective($interchange, $hash) {
$directive = new HTMLPurifier_ConfigSchema_Interchange_Directive();
// These are required elements:
$directive->id = $this->id($hash->offsetGet('ID'));
$id = $directive->id->toString(); // convenience
if (isset($hash['TYPE'])) {
$type = explode('/', $hash->offsetGet('TYPE'));
if (isset($type[1])) $directive->typeAllowsNull = true;
$directive->type = $type[0];
} else {
throw new HTMLPurifier_ConfigSchema_Exception("TYPE in directive hash '$id' not defined");
}
if (isset($hash['DEFAULT'])) {
try {
$directive->default = $this->varParser->parse($hash->offsetGet('DEFAULT'), $directive->type, $directive->typeAllowsNull);
} catch (HTMLPurifier_VarParserException $e) {
throw new HTMLPurifier_ConfigSchema_Exception($e->getMessage() . " in DEFAULT in directive hash '$id'");
}
}
if (isset($hash['DESCRIPTION'])) {
$directive->description = $hash->offsetGet('DESCRIPTION');
}
if (isset($hash['ALLOWED'])) {
$directive->allowed = $this->lookup($this->evalArray($hash->offsetGet('ALLOWED')));
}
if (isset($hash['VALUE-ALIASES'])) {
$directive->valueAliases = $this->evalArray($hash->offsetGet('VALUE-ALIASES'));
}
if (isset($hash['ALIASES'])) {
$raw_aliases = trim($hash->offsetGet('ALIASES'));
$aliases = preg_split('/\s*,\s*/', $raw_aliases);
foreach ($aliases as $alias) {
$directive->aliases[] = $this->id($alias);
}
}
if (isset($hash['VERSION'])) {
$directive->version = $hash->offsetGet('VERSION');
}
if (isset($hash['DEPRECATED-USE'])) {
$directive->deprecatedUse = $this->id($hash->offsetGet('DEPRECATED-USE'));
}
if (isset($hash['DEPRECATED-VERSION'])) {
$directive->deprecatedVersion = $hash->offsetGet('DEPRECATED-VERSION');
}
if (isset($hash['EXTERNAL'])) {
$directive->external = preg_split('/\s*,\s*/', trim($hash->offsetGet('EXTERNAL')));
}
$interchange->addDirective($directive);
}
/**
* Evaluates an array PHP code string without array() wrapper
*/
protected function evalArray($contents) {
return eval('return array('. $contents .');');
}
/**
* Converts an array list into a lookup array.
*/
protected function lookup($array) {
$ret = array();
foreach ($array as $val) $ret[$val] = true;
return $ret;
}
/**
* Convenience function that creates an HTMLPurifier_ConfigSchema_Interchange_Id
* object based on a string Id.
*/
protected function id($id) {
return HTMLPurifier_ConfigSchema_Interchange_Id::make($id);
}
/**
* Triggers errors for any unused keys passed in the hash; such keys
* may indicate typos, missing values, etc.
* @param $hash Instance of ConfigSchema_StringHash to check.
*/
protected function _findUnused($hash) {
$accessed = $hash->getAccessed();
foreach ($hash as $k => $v) {
if (!isset($accessed[$k])) {
trigger_error("String hash key '$k' not used by builder", E_USER_NOTICE);
}
}
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,206 @@
<?php
/**
* Performs validations on HTMLPurifier_ConfigSchema_Interchange
*
* @note If you see '// handled by InterchangeBuilder', that means a
* design decision in that class would prevent this validation from
* ever being necessary. We have them anyway, however, for
* redundancy.
*/
class HTMLPurifier_ConfigSchema_Validator
{
/**
* Easy to access global objects.
*/
protected $interchange, $aliases;
/**
* Context-stack to provide easy to read error messages.
*/
protected $context = array();
/**
* HTMLPurifier_VarParser to test default's type.
*/
protected $parser;
public function __construct() {
$this->parser = new HTMLPurifier_VarParser();
}
/**
* Validates a fully-formed interchange object. Throws an
* HTMLPurifier_ConfigSchema_Exception if there's a problem.
*/
public function validate($interchange) {
$this->interchange = $interchange;
$this->aliases = array();
// PHP is a bit lax with integer <=> string conversions in
// arrays, so we don't use the identical !== comparison
foreach ($interchange->directives as $i => $directive) {
$id = $directive->id->toString();
if ($i != $id) $this->error(false, "Integrity violation: key '$i' does not match internal id '$id'");
$this->validateDirective($directive);
}
return true;
}
/**
* Validates a HTMLPurifier_ConfigSchema_Interchange_Id object.
*/
public function validateId($id) {
$id_string = $id->toString();
$this->context[] = "id '$id_string'";
if (!$id instanceof HTMLPurifier_ConfigSchema_Interchange_Id) {
// handled by InterchangeBuilder
$this->error(false, 'is not an instance of HTMLPurifier_ConfigSchema_Interchange_Id');
}
// keys are now unconstrained (we might want to narrow down to A-Za-z0-9.)
// we probably should check that it has at least one namespace
$this->with($id, 'key')
->assertNotEmpty()
->assertIsString(); // implicit assertIsString handled by InterchangeBuilder
array_pop($this->context);
}
/**
* Validates a HTMLPurifier_ConfigSchema_Interchange_Directive object.
*/
public function validateDirective($d) {
$id = $d->id->toString();
$this->context[] = "directive '$id'";
$this->validateId($d->id);
$this->with($d, 'description')
->assertNotEmpty();
// BEGIN - handled by InterchangeBuilder
$this->with($d, 'type')
->assertNotEmpty();
$this->with($d, 'typeAllowsNull')
->assertIsBool();
try {
// This also tests validity of $d->type
$this->parser->parse($d->default, $d->type, $d->typeAllowsNull);
} catch (HTMLPurifier_VarParserException $e) {
$this->error('default', 'had error: ' . $e->getMessage());
}
// END - handled by InterchangeBuilder
if (!is_null($d->allowed) || !empty($d->valueAliases)) {
// allowed and valueAliases require that we be dealing with
// strings, so check for that early.
$d_int = HTMLPurifier_VarParser::$types[$d->type];
if (!isset(HTMLPurifier_VarParser::$stringTypes[$d_int])) {
$this->error('type', 'must be a string type when used with allowed or value aliases');
}
}
$this->validateDirectiveAllowed($d);
$this->validateDirectiveValueAliases($d);
$this->validateDirectiveAliases($d);
array_pop($this->context);
}
/**
* Extra validation if $allowed member variable of
* HTMLPurifier_ConfigSchema_Interchange_Directive is defined.
*/
public function validateDirectiveAllowed($d) {
if (is_null($d->allowed)) return;
$this->with($d, 'allowed')
->assertNotEmpty()
->assertIsLookup(); // handled by InterchangeBuilder
if (is_string($d->default) && !isset($d->allowed[$d->default])) {
$this->error('default', 'must be an allowed value');
}
$this->context[] = 'allowed';
foreach ($d->allowed as $val => $x) {
if (!is_string($val)) $this->error("value $val", 'must be a string');
}
array_pop($this->context);
}
/**
* Extra validation if $valueAliases member variable of
* HTMLPurifier_ConfigSchema_Interchange_Directive is defined.
*/
public function validateDirectiveValueAliases($d) {
if (is_null($d->valueAliases)) return;
$this->with($d, 'valueAliases')
->assertIsArray(); // handled by InterchangeBuilder
$this->context[] = 'valueAliases';
foreach ($d->valueAliases as $alias => $real) {
if (!is_string($alias)) $this->error("alias $alias", 'must be a string');
if (!is_string($real)) $this->error("alias target $real from alias '$alias'", 'must be a string');
if ($alias === $real) {
$this->error("alias '$alias'", "must not be an alias to itself");
}
}
if (!is_null($d->allowed)) {
foreach ($d->valueAliases as $alias => $real) {
if (isset($d->allowed[$alias])) {
$this->error("alias '$alias'", 'must not be an allowed value');
} elseif (!isset($d->allowed[$real])) {
$this->error("alias '$alias'", 'must be an alias to an allowed value');
}
}
}
array_pop($this->context);
}
/**
* Extra validation if $aliases member variable of
* HTMLPurifier_ConfigSchema_Interchange_Directive is defined.
*/
public function validateDirectiveAliases($d) {
$this->with($d, 'aliases')
->assertIsArray(); // handled by InterchangeBuilder
$this->context[] = 'aliases';
foreach ($d->aliases as $alias) {
$this->validateId($alias);
$s = $alias->toString();
if (isset($this->interchange->directives[$s])) {
$this->error("alias '$s'", 'collides with another directive');
}
if (isset($this->aliases[$s])) {
$other_directive = $this->aliases[$s];
$this->error("alias '$s'", "collides with alias for directive '$other_directive'");
}
$this->aliases[$s] = $d->id->toString();
}
array_pop($this->context);
}
// protected helper functions
/**
* Convenience function for generating HTMLPurifier_ConfigSchema_ValidatorAtom
* for validating simple member variables of objects.
*/
protected function with($obj, $member) {
return new HTMLPurifier_ConfigSchema_ValidatorAtom($this->getFormattedContext(), $obj, $member);
}
/**
* Emits an error, providing helpful context.
*/
protected function error($target, $msg) {
if ($target !== false) $prefix = ucfirst($target) . ' in ' . $this->getFormattedContext();
else $prefix = ucfirst($this->getFormattedContext());
throw new HTMLPurifier_ConfigSchema_Exception(trim($prefix . ' ' . $msg));
}
/**
* Returns a formatted context string.
*/
protected function getFormattedContext() {
return implode(' in ', array_reverse($this->context));
}
}
// vim: et sw=4 sts=4

View file

@ -0,0 +1,66 @@
<?php
/**
* Fluent interface for validating the contents of member variables.
* This should be immutable. See HTMLPurifier_ConfigSchema_Validator for
* use-cases. We name this an 'atom' because it's ONLY for validations that
* are independent and usually scalar.
*/
class HTMLPurifier_ConfigSchema_ValidatorAtom
{
protected $context, $obj, $member, $contents;
public function __construct($context, $obj, $member) {
$this->context = $context;
$this->obj = $obj;
$this->member = $member;
$this->contents =& $obj->$member;
}
public function assertIsString() {
if (!is_string($this->contents)) $this->error('must be a string');
return $this;
}
public function assertIsBool() {
if (!is_bool($this->contents)) $this->error('must be a boolean');
return $this;
}
public function assertIsArray() {
if (!is_array($this->contents)) $this->error('must be an array');
return $this;
}
public function assertNotNull() {
if ($this->contents === null) $this->error('must not be null');
return $this;
}
public function assertAlnum() {
$this->assertIsString();
if (!ctype_alnum($this->contents)) $this->error('must be alphanumeric');
return $this;
}
public function assertNotEmpty() {
if (empty($this->contents)) $this->error('must not be empty');
return $this;
}
public function assertIsLookup() {
$this->assertIsArray();
foreach ($this->contents as $v) {
if ($v !== true) $this->error('must be a lookup array');
}
return $this;
}
protected function error($msg) {
throw new HTMLPurifier_ConfigSchema_Exception(ucfirst($this->member) . ' in ' . $this->context . ' ' . $msg);
}
}
// vim: et sw=4 sts=4

Binary file not shown.

View file

@ -0,0 +1,8 @@
Attr.AllowedClasses
TYPE: lookup/null
VERSION: 4.0.0
DEFAULT: null
--DESCRIPTION--
List of allowed class values in the class attribute. By default, this is null,
which means all classes are allowed.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,12 @@
Attr.AllowedFrameTargets
TYPE: lookup
DEFAULT: array()
--DESCRIPTION--
Lookup table of all allowed link frame targets. Some commonly used link
targets include _blank, _self, _parent and _top. Values should be
lowercase, as validation will be done in a case-sensitive manner despite
W3C's recommendation. XHTML 1.0 Strict does not permit the target attribute
so this directive will have no effect in that doctype. XHTML 1.1 does not
enable the Target module by default, you will have to manually enable it
(see the module documentation for more details.)
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,9 @@
Attr.AllowedRel
TYPE: lookup
VERSION: 1.6.0
DEFAULT: array()
--DESCRIPTION--
List of allowed forward document relationships in the rel attribute. Common
values may be nofollow or print. By default, this is empty, meaning that no
document relationships are allowed.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,9 @@
Attr.AllowedRev
TYPE: lookup
VERSION: 1.6.0
DEFAULT: array()
--DESCRIPTION--
List of allowed reverse document relationships in the rev attribute. This
attribute is a bit of an edge-case; if you don't know what it is for, stay
away.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,19 @@
Attr.ClassUseCDATA
TYPE: bool/null
DEFAULT: null
VERSION: 4.0.0
--DESCRIPTION--
If null, class will auto-detect the doctype and, if matching XHTML 1.1 or
XHTML 2.0, will use the restrictive NMTOKENS specification of class. Otherwise,
it will use a relaxed CDATA definition. If true, the relaxed CDATA definition
is forced; if false, the NMTOKENS definition is forced. To get behavior
of HTML Purifier prior to 4.0.0, set this directive to false.
Some rational behind the auto-detection:
in previous versions of HTML Purifier, it was assumed that the form of
class was NMTOKENS, as specified by the XHTML Modularization (representing
XHTML 1.1 and XHTML 2.0). The DTDs for HTML 4.01 and XHTML 1.0, however
specify class as CDATA. HTML 5 effectively defines it as CDATA, but
with the additional constraint that each name should be unique (this is not
explicitly outlined in previous specifications).
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,11 @@
Attr.DefaultImageAlt
TYPE: string/null
DEFAULT: null
VERSION: 3.2.0
--DESCRIPTION--
This is the content of the alt tag of an image if the user had not
previously specified an alt attribute. This applies to all images without
a valid alt attribute, as opposed to %Attr.DefaultInvalidImageAlt, which
only applies to invalid images, and overrides in the case of an invalid image.
Default behavior with null is to use the basename of the src tag for the alt.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,9 @@
Attr.DefaultInvalidImage
TYPE: string
DEFAULT: ''
--DESCRIPTION--
This is the default image an img tag will be pointed to if it does not have
a valid src attribute. In future versions, we may allow the image tag to
be removed completely, but due to design issues, this is not possible right
now.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,8 @@
Attr.DefaultInvalidImageAlt
TYPE: string
DEFAULT: 'Invalid image'
--DESCRIPTION--
This is the content of the alt tag of an invalid image if the user had not
previously specified an alt attribute. It has no effect when the image is
valid but there was no alt attribute present.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,10 @@
Attr.DefaultTextDir
TYPE: string
DEFAULT: 'ltr'
--DESCRIPTION--
Defines the default text direction (ltr or rtl) of the document being
parsed. This generally is the same as the value of the dir attribute in
HTML, or ltr if that is not specified.
--ALLOWED--
'ltr', 'rtl'
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,16 @@
Attr.EnableID
TYPE: bool
DEFAULT: false
VERSION: 1.2.0
--DESCRIPTION--
Allows the ID attribute in HTML. This is disabled by default due to the
fact that without proper configuration user input can easily break the
validation of a webpage by specifying an ID that is already on the
surrounding HTML. If you don't mind throwing caution to the wind, enable
this directive, but I strongly recommend you also consider blacklisting IDs
you use (%Attr.IDBlacklist) or prefixing all user supplied IDs
(%Attr.IDPrefix). When set to true HTML Purifier reverts to the behavior of
pre-1.2.0 versions.
--ALIASES--
HTML.EnableAttrID
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,8 @@
Attr.ForbiddenClasses
TYPE: lookup
VERSION: 4.0.0
DEFAULT: array()
--DESCRIPTION--
List of forbidden class values in the class attribute. By default, this is
empty, which means that no classes are forbidden. See also %Attr.AllowedClasses.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,5 @@
Attr.IDBlacklist
TYPE: list
DEFAULT: array()
DESCRIPTION: Array of IDs not allowed in the document.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,9 @@
Attr.IDBlacklistRegexp
TYPE: string/null
VERSION: 1.6.0
DEFAULT: NULL
--DESCRIPTION--
PCRE regular expression to be matched against all IDs. If the expression is
matches, the ID is rejected. Use this with care: may cause significant
degradation. ID matching is done after all other validation.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,12 @@
Attr.IDPrefix
TYPE: string
VERSION: 1.2.0
DEFAULT: ''
--DESCRIPTION--
String to prefix to IDs. If you have no idea what IDs your pages may use,
you may opt to simply add a prefix to all user-submitted ID attributes so
that they are still usable, but will not conflict with core page IDs.
Example: setting the directive to 'user_' will result in a user submitted
'foo' to become 'user_foo' Be sure to set %HTML.EnableAttrID to true
before using this.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,14 @@
Attr.IDPrefixLocal
TYPE: string
VERSION: 1.2.0
DEFAULT: ''
--DESCRIPTION--
Temporary prefix for IDs used in conjunction with %Attr.IDPrefix. If you
need to allow multiple sets of user content on web page, you may need to
have a seperate prefix that changes with each iteration. This way,
seperately submitted user content displayed on the same page doesn't
clobber each other. Ideal values are unique identifiers for the content it
represents (i.e. the id of the row in the database). Be sure to add a
seperator (like an underscore) at the end. Warning: this directive will
not work unless %Attr.IDPrefix is set to a non-empty value!
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,31 @@
AutoFormat.AutoParagraph
TYPE: bool
VERSION: 2.0.1
DEFAULT: false
--DESCRIPTION--
<p>
This directive turns on auto-paragraphing, where double newlines are
converted in to paragraphs whenever possible. Auto-paragraphing:
</p>
<ul>
<li>Always applies to inline elements or text in the root node,</li>
<li>Applies to inline elements or text with double newlines in nodes
that allow paragraph tags,</li>
<li>Applies to double newlines in paragraph tags</li>
</ul>
<p>
<code>p</code> tags must be allowed for this directive to take effect.
We do not use <code>br</code> tags for paragraphing, as that is
semantically incorrect.
</p>
<p>
To prevent auto-paragraphing as a content-producer, refrain from using
double-newlines except to specify a new paragraph or in contexts where
it has special meaning (whitespace usually has no meaning except in
tags like <code>pre</code>, so this should not be difficult.) To prevent
the paragraphing of inline text adjacent to block elements, wrap them
in <code>div</code> tags (the behavior is slightly different outside of
the root node.)
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,12 @@
AutoFormat.Custom
TYPE: list
VERSION: 2.0.1
DEFAULT: array()
--DESCRIPTION--
<p>
This directive can be used to add custom auto-format injectors.
Specify an array of injector names (class name minus the prefix)
or concrete implementations. Injector class must exist.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,11 @@
AutoFormat.DisplayLinkURI
TYPE: bool
VERSION: 3.2.0
DEFAULT: false
--DESCRIPTION--
<p>
This directive turns on the in-text display of URIs in &lt;a&gt; tags, and disables
those links. For example, <a href="http://example.com">example</a> becomes
example (<a>http://example.com</a>).
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,12 @@
AutoFormat.Linkify
TYPE: bool
VERSION: 2.0.1
DEFAULT: false
--DESCRIPTION--
<p>
This directive turns on linkification, auto-linking http, ftp and
https URLs. <code>a</code> tags with the <code>href</code> attribute
must be allowed.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,12 @@
AutoFormat.PurifierLinkify.DocURL
TYPE: string
VERSION: 2.0.1
DEFAULT: '#%s'
ALIASES: AutoFormatParam.PurifierLinkifyDocURL
--DESCRIPTION--
<p>
Location of configuration documentation to link to, let %s substitute
into the configuration's namespace and directive names sans the percent
sign.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,12 @@
AutoFormat.PurifierLinkify
TYPE: bool
VERSION: 2.0.1
DEFAULT: false
--DESCRIPTION--
<p>
Internal auto-formatter that converts configuration directives in
syntax <a>%Namespace.Directive</a> to links. <code>a</code> tags
with the <code>href</code> attribute must be allowed.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,11 @@
AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions
TYPE: lookup
VERSION: 4.0.0
DEFAULT: array('td' => true, 'th' => true)
--DESCRIPTION--
<p>
When %AutoFormat.RemoveEmpty and %AutoFormat.RemoveEmpty.RemoveNbsp
are enabled, this directive defines what HTML elements should not be
removede if they have only a non-breaking space in them.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,15 @@
AutoFormat.RemoveEmpty.RemoveNbsp
TYPE: bool
VERSION: 4.0.0
DEFAULT: false
--DESCRIPTION--
<p>
When enabled, HTML Purifier will treat any elements that contain only
non-breaking spaces as well as regular whitespace as empty, and remove
them when %AutoForamt.RemoveEmpty is enabled.
</p>
<p>
See %AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions for a list of elements
that don't have this behavior applied to them.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,46 @@
AutoFormat.RemoveEmpty
TYPE: bool
VERSION: 3.2.0
DEFAULT: false
--DESCRIPTION--
<p>
When enabled, HTML Purifier will attempt to remove empty elements that
contribute no semantic information to the document. The following types
of nodes will be removed:
</p>
<ul><li>
Tags with no attributes and no content, and that are not empty
elements (remove <code>&lt;a&gt;&lt;/a&gt;</code> but not
<code>&lt;br /&gt;</code>), and
</li>
<li>
Tags with no content, except for:<ul>
<li>The <code>colgroup</code> element, or</li>
<li>
Elements with the <code>id</code> or <code>name</code> attribute,
when those attributes are permitted on those elements.
</li>
</ul></li>
</ul>
<p>
Please be very careful when using this functionality; while it may not
seem that empty elements contain useful information, they can alter the
layout of a document given appropriate styling. This directive is most
useful when you are processing machine-generated HTML, please avoid using
it on regular user HTML.
</p>
<p>
Elements that contain only whitespace will be treated as empty. Non-breaking
spaces, however, do not count as whitespace. See
%AutoFormat.RemoveEmpty.RemoveNbsp for alternate behavior.
</p>
<p>
This algorithm is not perfect; you may still notice some empty tags,
particularly if a node had elements, but those elements were later removed
because they were not permitted in that context, or tags that, after
being auto-closed by another tag, where empty. This is for safety reasons
to prevent clever code from breaking validation. The general rule of thumb:
if a tag looked empty on the way in, it will get removed; if HTML Purifier
made it empty, it will stay.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,11 @@
AutoFormat.RemoveSpansWithoutAttributes
TYPE: bool
VERSION: 4.0.1
DEFAULT: false
--DESCRIPTION--
<p>
This directive causes <code>span</code> tags without any attributes
to be removed. It will also remove spans that had all attributes
removed during processing.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,8 @@
CSS.AllowImportant
TYPE: bool
DEFAULT: false
VERSION: 3.1.0
--DESCRIPTION--
This parameter determines whether or not !important cascade modifiers should
be allowed in user CSS. If false, !important will stripped.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,11 @@
CSS.AllowTricky
TYPE: bool
DEFAULT: false
VERSION: 3.1.0
--DESCRIPTION--
This parameter determines whether or not to allow "tricky" CSS properties and
values. Tricky CSS properties/values can drastically modify page layout or
be used for deceptive practices but do not directly constitute a security risk.
For example, <code>display:none;</code> is considered a tricky property that
will only be allowed if this directive is set to true.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,18 @@
CSS.AllowedProperties
TYPE: lookup/null
VERSION: 3.1.0
DEFAULT: NULL
--DESCRIPTION--
<p>
If HTML Purifier's style attributes set is unsatisfactory for your needs,
you can overload it with your own list of tags to allow. Note that this
method is subtractive: it does its job by taking away from HTML Purifier
usual feature set, so you cannot add an attribute that HTML Purifier never
supported in the first place.
</p>
<p>
<strong>Warning:</strong> If another directive conflicts with the
elements here, <em>that</em> directive will win and override.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,11 @@
CSS.DefinitionRev
TYPE: int
VERSION: 2.0.0
DEFAULT: 1
--DESCRIPTION--
<p>
Revision identifier for your custom definition. See
%HTML.DefinitionRev for details.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,16 @@
CSS.MaxImgLength
TYPE: string/null
DEFAULT: '1200px'
VERSION: 3.1.1
--DESCRIPTION--
<p>
This parameter sets the maximum allowed length on <code>img</code> tags,
effectively the <code>width</code> and <code>height</code> properties.
Only absolute units of measurement (in, pt, pc, mm, cm) and pixels (px) are allowed. This is
in place to prevent imagecrash attacks, disable with null at your own risk.
This directive is similar to %HTML.MaxImgLength, and both should be
concurrently edited, although there are
subtle differences in the input format (the CSS max is a number with
a unit).
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,10 @@
CSS.Proprietary
TYPE: bool
VERSION: 3.0.0
DEFAULT: false
--DESCRIPTION--
<p>
Whether or not to allow safe, proprietary CSS values.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,14 @@
Cache.DefinitionImpl
TYPE: string/null
VERSION: 2.0.0
DEFAULT: 'Serializer'
--DESCRIPTION--
This directive defines which method to use when caching definitions,
the complex data-type that makes HTML Purifier tick. Set to null
to disable caching (not recommended, as you will see a definite
performance degradation).
--ALIASES--
Core.DefinitionCache
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,13 @@
Cache.SerializerPath
TYPE: string/null
VERSION: 2.0.0
DEFAULT: NULL
--DESCRIPTION--
<p>
Absolute path with no trailing slash to store serialized definitions in.
Default is within the
HTML Purifier library inside DefinitionCache/Serializer. This
path must be writable by the webserver.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,18 @@
Core.AggressivelyFixLt
TYPE: bool
VERSION: 2.1.0
DEFAULT: true
--DESCRIPTION--
<p>
This directive enables aggressive pre-filter fixes HTML Purifier can
perform in order to ensure that open angled-brackets do not get killed
during parsing stage. Enabling this will result in two preg_replace_callback
calls and at least two preg_replace calls for every HTML document parsed;
if your users make very well-formed HTML, you can set this directive false.
This has no effect when DirectLex is used.
</p>
<p>
<strong>Notice:</strong> This directive's default turned from false to true
in HTML Purifier 3.2.0.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,12 @@
Core.CollectErrors
TYPE: bool
VERSION: 2.0.0
DEFAULT: false
--DESCRIPTION--
Whether or not to collect errors found while filtering the document. This
is a useful way to give feedback to your users. <strong>Warning:</strong>
Currently this feature is very patchy and experimental, with lots of
possible error messages not yet implemented. It will not cause any
problems, but it may not help your users either.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,28 @@
Core.ColorKeywords
TYPE: hash
VERSION: 2.0.0
--DEFAULT--
array (
'maroon' => '#800000',
'red' => '#FF0000',
'orange' => '#FFA500',
'yellow' => '#FFFF00',
'olive' => '#808000',
'purple' => '#800080',
'fuchsia' => '#FF00FF',
'white' => '#FFFFFF',
'lime' => '#00FF00',
'green' => '#008000',
'navy' => '#000080',
'blue' => '#0000FF',
'aqua' => '#00FFFF',
'teal' => '#008080',
'black' => '#000000',
'silver' => '#C0C0C0',
'gray' => '#808080',
)
--DESCRIPTION--
Lookup array of color names to six digit hexadecimal number corresponding
to color, with preceding hash mark. Used when parsing colors.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,14 @@
Core.ConvertDocumentToFragment
TYPE: bool
DEFAULT: true
--DESCRIPTION--
This parameter determines whether or not the filter should convert
input that is a full document with html and body tags to a fragment
of just the contents of a body tag. This parameter is simply something
HTML Purifier can do during an edge-case: for most inputs, this
processing is not necessary.
--ALIASES--
Core.AcceptFullDocuments
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,17 @@
Core.DirectLexLineNumberSyncInterval
TYPE: int
VERSION: 2.0.0
DEFAULT: 0
--DESCRIPTION--
<p>
Specifies the number of tokens the DirectLex line number tracking
implementations should process before attempting to resyncronize the
current line count by manually counting all previous new-lines. When
at 0, this functionality is disabled. Lower values will decrease
performance, and this is only strictly necessary if the counting
algorithm is buggy (in which case you should report it as a bug).
This has no effect when %Core.MaintainLineNumbers is disabled or DirectLex is
not being used.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,15 @@
Core.Encoding
TYPE: istring
DEFAULT: 'utf-8'
--DESCRIPTION--
If for some reason you are unable to convert all webpages to UTF-8, you can
use this directive as a stop-gap compatibility change to let HTML Purifier
deal with non UTF-8 input. This technique has notable deficiencies:
absolutely no characters outside of the selected character encoding will be
preserved, not even the ones that have been ampersand escaped (this is due
to a UTF-8 specific <em>feature</em> that automatically resolves all
entities), making it pretty useless for anything except the most I18N-blind
applications, although %Core.EscapeNonASCIICharacters offers fixes this
trouble with another tradeoff. This directive only accepts ISO-8859-1 if
iconv is not enabled.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,10 @@
Core.EscapeInvalidChildren
TYPE: bool
DEFAULT: false
--DESCRIPTION--
When true, a child is found that is not allowed in the context of the
parent element will be transformed into text as if it were ASCII. When
false, that element and all internal tags will be dropped, though text will
be preserved. There is no option for dropping the element but preserving
child nodes.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,7 @@
Core.EscapeInvalidTags
TYPE: bool
DEFAULT: false
--DESCRIPTION--
When true, invalid tags will be written back to the document as plain text.
Otherwise, they are silently dropped.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,13 @@
Core.EscapeNonASCIICharacters
TYPE: bool
VERSION: 1.4.0
DEFAULT: false
--DESCRIPTION--
This directive overcomes a deficiency in %Core.Encoding by blindly
converting all non-ASCII characters into decimal numeric entities before
converting it to its native encoding. This means that even characters that
can be expressed in the non-UTF-8 encoding will be entity-ized, which can
be a real downer for encodings like Big5. It also assumes that the ASCII
repetoire is available, although this is the case for almost all encodings.
Anyway, use UTF-8!
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,19 @@
Core.HiddenElements
TYPE: lookup
--DEFAULT--
array (
'script' => true,
'style' => true,
)
--DESCRIPTION--
<p>
This directive is a lookup array of elements which should have their
contents removed when they are not allowed by the HTML definition.
For example, the contents of a <code>script</code> tag are not
normally shown in a document, so if script tags are to be removed,
their contents should be removed to. This is opposed to a <code>b</code>
tag, which defines some presentational changes but does not hide its
contents.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,10 @@
Core.Language
TYPE: string
VERSION: 2.0.0
DEFAULT: 'en'
--DESCRIPTION--
ISO 639 language code for localizable things in HTML Purifier to use,
which is mainly error reporting. There is currently only an English (en)
translation, so this directive is currently useless.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,34 @@
Core.LexerImpl
TYPE: mixed/null
VERSION: 2.0.0
DEFAULT: NULL
--DESCRIPTION--
<p>
This parameter determines what lexer implementation can be used. The
valid values are:
</p>
<dl>
<dt><em>null</em></dt>
<dd>
Recommended, the lexer implementation will be auto-detected based on
your PHP-version and configuration.
</dd>
<dt><em>string</em> lexer identifier</dt>
<dd>
This is a slim way of manually overridding the implementation.
Currently recognized values are: DOMLex (the default PHP5
implementation)
and DirectLex (the default PHP4 implementation). Only use this if
you know what you are doing: usually, the auto-detection will
manage things for cases you aren't even aware of.
</dd>
<dt><em>object</em> lexer instance</dt>
<dd>
Super-advanced: you can specify your own, custom, implementation that
implements the interface defined by <code>HTMLPurifier_Lexer</code>.
I may remove this option simply because I don't expect anyone
to use it.
</dd>
</dl>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,16 @@
Core.MaintainLineNumbers
TYPE: bool/null
VERSION: 2.0.0
DEFAULT: NULL
--DESCRIPTION--
<p>
If true, HTML Purifier will add line number information to all tokens.
This is useful when error reporting is turned on, but can result in
significant performance degradation and should not be used when
unnecessary. This directive must be used with the DirectLex lexer,
as the DOMLex lexer does not (yet) support this functionality.
If the value is null, an appropriate value will be selected based
on other configuration.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,12 @@
Core.RemoveInvalidImg
TYPE: bool
DEFAULT: true
VERSION: 1.3.0
--DESCRIPTION--
<p>
This directive enables pre-emptive URI checking in <code>img</code>
tags, as the attribute validation strategy is not authorized to
remove elements from the document. Revert to pre-1.3.0 behavior by setting to false.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,12 @@
Core.RemoveScriptContents
TYPE: bool/null
DEFAULT: NULL
VERSION: 2.0.0
DEPRECATED-VERSION: 2.1.0
DEPRECATED-USE: Core.HiddenElements
--DESCRIPTION--
<p>
This directive enables HTML Purifier to remove not only script tags
but all of their contents.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,11 @@
Filter.Custom
TYPE: list
VERSION: 3.1.0
DEFAULT: array()
--DESCRIPTION--
<p>
This directive can be used to add custom filters; it is nearly the
equivalent of the now deprecated <code>HTMLPurifier-&gt;addFilter()</code>
method. Specify an array of concrete implementations.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,14 @@
Filter.ExtractStyleBlocks.Escaping
TYPE: bool
VERSION: 3.0.0
DEFAULT: true
ALIASES: Filter.ExtractStyleBlocksEscaping, FilterParam.ExtractStyleBlocksEscaping
--DESCRIPTION--
<p>
Whether or not to escape the dangerous characters &lt;, &gt; and &amp;
as \3C, \3E and \26, respectively. This is can be safely set to false
if the contents of StyleBlocks will be placed in an external stylesheet,
where there is no risk of it being interpreted as HTML.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,29 @@
Filter.ExtractStyleBlocks.Scope
TYPE: string/null
VERSION: 3.0.0
DEFAULT: NULL
ALIASES: Filter.ExtractStyleBlocksScope, FilterParam.ExtractStyleBlocksScope
--DESCRIPTION--
<p>
If you would like users to be able to define external stylesheets, but
only allow them to specify CSS declarations for a specific node and
prevent them from fiddling with other elements, use this directive.
It accepts any valid CSS selector, and will prepend this to any
CSS declaration extracted from the document. For example, if this
directive is set to <code>#user-content</code> and a user uses the
selector <code>a:hover</code>, the final selector will be
<code>#user-content a:hover</code>.
</p>
<p>
The comma shorthand may be used; consider the above example, with
<code>#user-content, #user-content2</code>, the final selector will
be <code>#user-content a:hover, #user-content2 a:hover</code>.
</p>
<p>
<strong>Warning:</strong> It is possible for users to bypass this measure
using a naughty + selector. This is a bug in CSS Tidy 1.3, not HTML
Purifier, and I am working to get it fixed. Until then, HTML Purifier
performs a basic check to prevent this.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,16 @@
Filter.ExtractStyleBlocks.TidyImpl
TYPE: mixed/null
VERSION: 3.1.0
DEFAULT: NULL
ALIASES: FilterParam.ExtractStyleBlocksTidyImpl
--DESCRIPTION--
<p>
If left NULL, HTML Purifier will attempt to instantiate a <code>csstidy</code>
class to use for internal cleaning. This will usually be good enough.
</p>
<p>
However, for trusted user input, you can set this to <code>false</code> to
disable cleaning. In addition, you can supply your own concrete implementation
of Tidy's interface to use, although I don't know why you'd want to do that.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,74 @@
Filter.ExtractStyleBlocks
TYPE: bool
VERSION: 3.1.0
DEFAULT: false
EXTERNAL: CSSTidy
--DESCRIPTION--
<p>
This directive turns on the style block extraction filter, which removes
<code>style</code> blocks from input HTML, cleans them up with CSSTidy,
and places them in the <code>StyleBlocks</code> context variable, for further
use by you, usually to be placed in an external stylesheet, or a
<code>style</code> block in the <code>head</code> of your document.
</p>
<p>
Sample usage:
</p>
<pre><![CDATA[
<?php
header('Content-type: text/html; charset=utf-8');
echo '<?xml version="1.0" encoding="UTF-8"?>';
?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
<head>
<title>Filter.ExtractStyleBlocks</title>
<?php
require_once '/path/to/library/HTMLPurifier.auto.php';
require_once '/path/to/csstidy.class.php';
$dirty = '<style>body {color:#F00;}</style> Some text';
$config = HTMLPurifier_Config::createDefault();
$config->set('Filter', 'ExtractStyleBlocks', true);
$purifier = new HTMLPurifier($config);
$html = $purifier->purify($dirty);
// This implementation writes the stylesheets to the styles/ directory.
// You can also echo the styles inside the document, but it's a bit
// more difficult to make sure they get interpreted properly by
// browsers; try the usual CSS armoring techniques.
$styles = $purifier->context->get('StyleBlocks');
$dir = 'styles/';
if (!is_dir($dir)) mkdir($dir);
$hash = sha1($_GET['html']);
foreach ($styles as $i => $style) {
file_put_contents($name = $dir . $hash . "_$i");
echo '<link rel="stylesheet" type="text/css" href="'.$name.'" />';
}
?>
</head>
<body>
<div>
<?php echo $html; ?>
</div>
</b]]><![CDATA[ody>
</html>
]]></pre>
<p>
<strong>Warning:</strong> It is possible for a user to mount an
imagecrash attack using this CSS. Counter-measures are difficult;
it is not simply enough to limit the range of CSS lengths (using
relative lengths with many nesting levels allows for large values
to be attained without actually specifying them in the stylesheet),
and the flexible nature of selectors makes it difficult to selectively
disable lengths on image tags (HTML Purifier, however, does disable
CSS width and height in inline styling). There are probably two effective
counter measures: an explicit width and height set to auto in all
images in your document (unlikely) or the disabling of width and
height (somewhat reasonable). Whether or not these measures should be
used is left to the reader.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,11 @@
Filter.YouTube
TYPE: bool
VERSION: 3.1.0
DEFAULT: false
--DESCRIPTION--
<p>
This directive enables YouTube video embedding in HTML Purifier. Check
<a href="http://htmlpurifier.org/docs/enduser-youtube.html">this document
on embedding videos</a> for more information on what this filter does.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,22 @@
HTML.Allowed
TYPE: itext/null
VERSION: 2.0.0
DEFAULT: NULL
--DESCRIPTION--
<p>
This is a convenience directive that rolls the functionality of
%HTML.AllowedElements and %HTML.AllowedAttributes into one directive.
Specify elements and attributes that are allowed using:
<code>element1[attr1|attr2],element2...</code>. You can also use
newlines instead of commas to separate elements.
</p>
<p>
<strong>Warning</strong>:
All of the constraints on the component directives are still enforced.
The syntax is a <em>subset</em> of TinyMCE's <code>valid_elements</code>
whitelist: directly copy-pasting it here will probably result in
broken whitelists. If %HTML.AllowedElements or %HTML.AllowedAttributes
are set, this directive has no effect.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,19 @@
HTML.AllowedAttributes
TYPE: lookup/null
VERSION: 1.3.0
DEFAULT: NULL
--DESCRIPTION--
<p>
If HTML Purifier's attribute set is unsatisfactory, overload it!
The syntax is "tag.attr" or "*.attr" for the global attributes
(style, id, class, dir, lang, xml:lang).
</p>
<p>
<strong>Warning:</strong> If another directive conflicts with the
elements here, <em>that</em> directive will win and override. For
example, %HTML.EnableAttrID will take precedence over *.id in this
directive. You must set that directive to true before you can use
IDs at all.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,18 @@
HTML.AllowedElements
TYPE: lookup/null
VERSION: 1.3.0
DEFAULT: NULL
--DESCRIPTION--
<p>
If HTML Purifier's tag set is unsatisfactory for your needs, you
can overload it with your own list of tags to allow. Note that this
method is subtractive: it does its job by taking away from HTML Purifier
usual feature set, so you cannot add a tag that HTML Purifier never
supported in the first place (like embed, form or head). If you
change this, you probably also want to change %HTML.AllowedAttributes.
</p>
<p>
<strong>Warning:</strong> If another directive conflicts with the
elements here, <em>that</em> directive will win and override.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,20 @@
HTML.AllowedModules
TYPE: lookup/null
VERSION: 2.0.0
DEFAULT: NULL
--DESCRIPTION--
<p>
A doctype comes with a set of usual modules to use. Without having
to mucking about with the doctypes, you can quickly activate or
disable these modules by specifying which modules you wish to allow
with this directive. This is most useful for unit testing specific
modules, although end users may find it useful for their own ends.
</p>
<p>
If you specify a module that does not exist, the manager will silently
fail to use it, so be careful! User-defined modules are not affected
by this directive. Modules defined in %HTML.CoreModules are not
affected by this directive.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,11 @@
HTML.Attr.Name.UseCDATA
TYPE: bool
DEFAULT: false
VERSION: 4.0.0
--DESCRIPTION--
The W3C specification DTD defines the name attribute to be CDATA, not ID, due
to limitations of DTD. In certain documents, this relaxed behavior is desired,
whether it is to specify duplicate names, or to specify names that would be
illegal IDs (for example, names that begin with a digit.) Set this configuration
directive to true to use the relaxed parsing rules.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,18 @@
HTML.BlockWrapper
TYPE: string
VERSION: 1.3.0
DEFAULT: 'p'
--DESCRIPTION--
<p>
String name of element to wrap inline elements that are inside a block
context. This only occurs in the children of blockquote in strict mode.
</p>
<p>
Example: by default value,
<code>&lt;blockquote&gt;Foo&lt;/blockquote&gt;</code> would become
<code>&lt;blockquote&gt;&lt;p&gt;Foo&lt;/p&gt;&lt;/blockquote&gt;</code>.
The <code>&lt;p&gt;</code> tags can be replaced with whatever you desire,
as long as it is a block level element.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,23 @@
HTML.CoreModules
TYPE: lookup
VERSION: 2.0.0
--DEFAULT--
array (
'Structure' => true,
'Text' => true,
'Hypertext' => true,
'List' => true,
'NonXMLCommonAttributes' => true,
'XMLCommonAttributes' => true,
'CommonAttributes' => true,
)
--DESCRIPTION--
<p>
Certain modularized doctypes (XHTML, namely), have certain modules
that must be included for the doctype to be an conforming document
type: put those modules here. By default, XHTML's core modules
are used. You can set this to a blank array to disable core module
protection, but this is not recommended.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,9 @@
HTML.CustomDoctype
TYPE: string/null
VERSION: 2.0.1
DEFAULT: NULL
--DESCRIPTION--
A custom doctype for power-users who defined there own document
type. This directive only applies when %HTML.Doctype is blank.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,33 @@
HTML.DefinitionID
TYPE: string/null
DEFAULT: NULL
VERSION: 2.0.0
--DESCRIPTION--
<p>
Unique identifier for a custom-built HTML definition. If you edit
the raw version of the HTMLDefinition, introducing changes that the
configuration object does not reflect, you must specify this variable.
If you change your custom edits, you should change this directive, or
clear your cache. Example:
</p>
<pre>
$config = HTMLPurifier_Config::createDefault();
$config->set('HTML', 'DefinitionID', '1');
$def = $config->getHTMLDefinition();
$def->addAttribute('a', 'tabindex', 'Number');
</pre>
<p>
In the above example, the configuration is still at the defaults, but
using the advanced API, an extra attribute has been added. The
configuration object normally has no way of knowing that this change
has taken place, so it needs an extra directive: %HTML.DefinitionID.
If someone else attempts to use the default configuration, these two
pieces of code will not clobber each other in the cache, since one has
an extra directive attached to it.
</p>
<p>
You <em>must</em> specify a value to this directive to use the
advanced API features.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,16 @@
HTML.DefinitionRev
TYPE: int
VERSION: 2.0.0
DEFAULT: 1
--DESCRIPTION--
<p>
Revision identifier for your custom definition specified in
%HTML.DefinitionID. This serves the same purpose: uniquely identifying
your custom definition, but this one does so in a chronological
context: revision 3 is more up-to-date then revision 2. Thus, when
this gets incremented, the cache handling is smart enough to clean
up any older revisions of your definition as well as flush the
cache.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,11 @@
HTML.Doctype
TYPE: string/null
DEFAULT: NULL
--DESCRIPTION--
Doctype to use during filtering. Technically speaking this is not actually
a doctype (as it does not identify a corresponding DTD), but we are using
this name for sake of simplicity. When non-blank, this will override any
older directives like %HTML.XHTML or %HTML.Strict.
--ALLOWED--
'HTML 4.01 Transitional', 'HTML 4.01 Strict', 'XHTML 1.0 Transitional', 'XHTML 1.0 Strict', 'XHTML 1.1'
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,21 @@
HTML.ForbiddenAttributes
TYPE: lookup
VERSION: 3.1.0
DEFAULT: array()
--DESCRIPTION--
<p>
While this directive is similar to %HTML.AllowedAttributes, for
forwards-compatibility with XML, this attribute has a different syntax. Instead of
<code>tag.attr</code>, use <code>tag@attr</code>. To disallow <code>href</code>
attributes in <code>a</code> tags, set this directive to
<code>a@href</code>. You can also disallow an attribute globally with
<code>attr</code> or <code>*@attr</code> (either syntax is fine; the latter
is provided for consistency with %HTML.AllowedAttributes).
</p>
<p>
<strong>Warning:</strong> This directive complements %HTML.ForbiddenElements,
accordingly, check
out that directive for a discussion of why you
should think twice before using this directive.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,20 @@
HTML.ForbiddenElements
TYPE: lookup
VERSION: 3.1.0
DEFAULT: array()
--DESCRIPTION--
<p>
This was, perhaps, the most requested feature ever in HTML
Purifier. Please don't abuse it! This is the logical inverse of
%HTML.AllowedElements, and it will override that directive, or any
other directive.
</p>
<p>
If possible, %HTML.Allowed is recommended over this directive, because it
can sometimes be difficult to tell whether or not you've forbidden all of
the behavior you would like to disallow. If you forbid <code>img</code>
with the expectation of preventing images on your site, you'll be in for
a nasty surprise when people start using the <code>background-image</code>
CSS property.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,14 @@
HTML.MaxImgLength
TYPE: int/null
DEFAULT: 1200
VERSION: 3.1.1
--DESCRIPTION--
<p>
This directive controls the maximum number of pixels in the width and
height attributes in <code>img</code> tags. This is
in place to prevent imagecrash attacks, disable with null at your own risk.
This directive is similar to %CSS.MaxImgLength, and both should be
concurrently edited, although there are
subtle differences in the input format (the HTML max is an integer).
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,12 @@
HTML.Parent
TYPE: string
VERSION: 1.3.0
DEFAULT: 'div'
--DESCRIPTION--
<p>
String name of element that HTML fragment passed to library will be
inserted in. An interesting variation would be using span as the
parent element, meaning that only inline tags would be allowed.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,12 @@
HTML.Proprietary
TYPE: bool
VERSION: 3.1.0
DEFAULT: false
--DESCRIPTION--
<p>
Whether or not to allow proprietary elements and attributes in your
documents, as per <code>HTMLPurifier_HTMLModule_Proprietary</code>.
<strong>Warning:</strong> This can cause your documents to stop
validating!
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,13 @@
HTML.SafeEmbed
TYPE: bool
VERSION: 3.1.1
DEFAULT: false
--DESCRIPTION--
<p>
Whether or not to permit embed tags in documents, with a number of extra
security features added to prevent script execution. This is similar to
what websites like MySpace do to embed tags. Embed is a proprietary
element and will cause your website to stop validating; you should
see if you can use %Output.FlashCompat with %HTML.SafeObject instead
first.</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,13 @@
HTML.SafeObject
TYPE: bool
VERSION: 3.1.1
DEFAULT: false
--DESCRIPTION--
<p>
Whether or not to permit object tags in documents, with a number of extra
security features added to prevent script execution. This is similar to
what websites like MySpace do to object tags. You should also enable
%Output.FlashCompat in order to generate Internet Explorer
compatibility code for your object tags.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,9 @@
HTML.Strict
TYPE: bool
VERSION: 1.3.0
DEFAULT: false
DEPRECATED-VERSION: 1.7.0
DEPRECATED-USE: HTML.Doctype
--DESCRIPTION--
Determines whether or not to use Transitional (loose) or Strict rulesets.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,8 @@
HTML.TidyAdd
TYPE: lookup
VERSION: 2.0.0
DEFAULT: array()
--DESCRIPTION--
Fixes to add to the default set of Tidy fixes as per your level.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,24 @@
HTML.TidyLevel
TYPE: string
VERSION: 2.0.0
DEFAULT: 'medium'
--DESCRIPTION--
<p>General level of cleanliness the Tidy module should enforce.
There are four allowed values:</p>
<dl>
<dt>none</dt>
<dd>No extra tidying should be done</dd>
<dt>light</dt>
<dd>Only fix elements that would be discarded otherwise due to
lack of support in doctype</dd>
<dt>medium</dt>
<dd>Enforce best practices</dd>
<dt>heavy</dt>
<dd>Transform all deprecated elements and attributes to standards
compliant equivalents</dd>
</dl>
--ALLOWED--
'none', 'light', 'medium', 'heavy'
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,8 @@
HTML.TidyRemove
TYPE: lookup
VERSION: 2.0.0
DEFAULT: array()
--DESCRIPTION--
Fixes to remove from the default set of Tidy fixes as per your level.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,8 @@
HTML.Trusted
TYPE: bool
VERSION: 2.0.0
DEFAULT: false
--DESCRIPTION--
Indicates whether or not the user input is trusted or not. If the input is
trusted, a more expansive set of allowed tags and attributes will be used.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,11 @@
HTML.XHTML
TYPE: bool
DEFAULT: true
VERSION: 1.1.0
DEPRECATED-VERSION: 1.7.0
DEPRECATED-USE: HTML.Doctype
--DESCRIPTION--
Determines whether or not output is XHTML 1.0 or HTML 4.01 flavor.
--ALIASES--
Core.XHTML
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,10 @@
Output.CommentScriptContents
TYPE: bool
VERSION: 2.0.0
DEFAULT: true
--DESCRIPTION--
Determines whether or not HTML Purifier should attempt to fix up the
contents of script tags for legacy browsers with comments.
--ALIASES--
Core.CommentScriptContents
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,11 @@
Output.FlashCompat
TYPE: bool
VERSION: 4.1.0
DEFAULT: false
--DESCRIPTION--
<p>
If true, HTML Purifier will generate Internet Explorer compatibility
code for all object code. This is highly recommended if you enable
%HTML.SafeObject.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,13 @@
Output.Newline
TYPE: string/null
VERSION: 2.0.1
DEFAULT: NULL
--DESCRIPTION--
<p>
Newline string to format final output with. If left null, HTML Purifier
will auto-detect the default newline type of the system and use that;
you can manually override it here. Remember, \r\n is Windows, \r
is Mac, and \n is Unix.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,14 @@
Output.SortAttr
TYPE: bool
VERSION: 3.2.0
DEFAULT: false
--DESCRIPTION--
<p>
If true, HTML Purifier will sort attributes by name before writing them back
to the document, converting a tag like: <code>&lt;el b="" a="" c="" /&gt;</code>
to <code>&lt;el a="" b="" c="" /&gt;</code>. This is a workaround for
a bug in FCKeditor which causes it to swap attributes order, adding noise
to text diffs. If you're not seeing this bug, chances are, you don't need
this directive.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,25 @@
Output.TidyFormat
TYPE: bool
VERSION: 1.1.1
DEFAULT: false
--DESCRIPTION--
<p>
Determines whether or not to run Tidy on the final output for pretty
formatting reasons, such as indentation and wrap.
</p>
<p>
This can greatly improve readability for editors who are hand-editing
the HTML, but is by no means necessary as HTML Purifier has already
fixed all major errors the HTML may have had. Tidy is a non-default
extension, and this directive will silently fail if Tidy is not
available.
</p>
<p>
If you are looking to make the overall look of your page's source
better, I recommend running Tidy on the entire page rather than just
user-content (after all, the indentation relative to the containing
blocks will be incorrect).
</p>
--ALIASES--
Core.TidyFormat
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,7 @@
Test.ForceNoIconv
TYPE: bool
DEFAULT: false
--DESCRIPTION--
When set to true, HTMLPurifier_Encoder will act as if iconv does not exist
and use only pure PHP implementations.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,17 @@
URI.AllowedSchemes
TYPE: lookup
--DEFAULT--
array (
'http' => true,
'https' => true,
'mailto' => true,
'ftp' => true,
'nntp' => true,
'news' => true,
)
--DESCRIPTION--
Whitelist that defines the schemes that a URI is allowed to have. This
prevents XSS attacks from using pseudo-schemes like javascript or mocha.
There is also support for the <code>data</code> URI scheme, but it is not
enabled by default.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,17 @@
URI.Base
TYPE: string/null
VERSION: 2.1.0
DEFAULT: NULL
--DESCRIPTION--
<p>
The base URI is the URI of the document this purified HTML will be
inserted into. This information is important if HTML Purifier needs
to calculate absolute URIs from relative URIs, such as when %URI.MakeAbsolute
is on. You may use a non-absolute URI for this value, but behavior
may vary (%URI.MakeAbsolute deals nicely with both absolute and
relative paths, but forwards-compatibility is not guaranteed).
<strong>Warning:</strong> If set, the scheme on this URI
overrides the one specified by %URI.DefaultScheme.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,10 @@
URI.DefaultScheme
TYPE: string
DEFAULT: 'http'
--DESCRIPTION--
<p>
Defines through what scheme the output will be served, in order to
select the proper object validator when no scheme information is present.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,11 @@
URI.DefinitionID
TYPE: string/null
VERSION: 2.1.0
DEFAULT: NULL
--DESCRIPTION--
<p>
Unique identifier for a custom-built URI definition. If you want
to add custom URIFilters, you must specify this value.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,11 @@
URI.DefinitionRev
TYPE: int
VERSION: 2.1.0
DEFAULT: 1
--DESCRIPTION--
<p>
Revision identifier for your custom definition. See
%HTML.DefinitionRev for details.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,14 @@
URI.Disable
TYPE: bool
VERSION: 1.3.0
DEFAULT: false
--DESCRIPTION--
<p>
Disables all URIs in all forms. Not sure why you'd want to do that
(after all, the Internet's founded on the notion of a hyperlink).
</p>
--ALIASES--
Attr.DisableURI
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,11 @@
URI.DisableExternal
TYPE: bool
VERSION: 1.2.0
DEFAULT: false
--DESCRIPTION--
Disables links to external websites. This is a highly effective anti-spam
and anti-pagerank-leech measure, but comes at a hefty price: nolinks or
images outside of your domain will be allowed. Non-linkified URIs will
still be preserved. If you want to be able to link to subdomains or use
absolute URIs, specify %URI.Host for your website.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,13 @@
URI.DisableExternalResources
TYPE: bool
VERSION: 1.3.0
DEFAULT: false
--DESCRIPTION--
Disables the embedding of external resources, preventing users from
embedding things like images from other hosts. This prevents access
tracking (good for email viewers), bandwidth leeching, cross-site request
forging, goatse.cx posting, and other nasties, but also results in a loss
of end-user functionality (they can't directly post a pic they posted from
Flickr anymore). Use it if you don't have a robust user-content moderation
team.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,12 @@
URI.DisableResources
TYPE: bool
VERSION: 1.3.0
DEFAULT: false
--DESCRIPTION--
<p>
Disables embedding resources, essentially meaning no pictures. You can
still link to them though. See %URI.DisableExternalResources for why
this might be a good idea.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,19 @@
URI.Host
TYPE: string/null
VERSION: 1.2.0
DEFAULT: NULL
--DESCRIPTION--
<p>
Defines the domain name of the server, so we can determine whether or
an absolute URI is from your website or not. Not strictly necessary,
as users should be using relative URIs to reference resources on your
website. It will, however, let you use absolute URIs to link to
subdomains of the domain you post here: i.e. example.com will allow
sub.example.com. However, higher up domains will still be excluded:
if you set %URI.Host to sub.example.com, example.com will be blocked.
<strong>Note:</strong> This directive overrides %URI.Base because
a given page may be on a sub-domain, but you wish HTML Purifier to be
more relaxed and allow some of the parent domains too.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,9 @@
URI.HostBlacklist
TYPE: list
VERSION: 1.3.0
DEFAULT: array()
--DESCRIPTION--
List of strings that are forbidden in the host of any URI. Use it to kill
domain names of spam, etc. Note that it will catch anything in the domain,
so <tt>moo.com</tt> will catch <tt>moo.com.example.com</tt>.
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,13 @@
URI.MakeAbsolute
TYPE: bool
VERSION: 2.1.0
DEFAULT: false
--DESCRIPTION--
<p>
Converts all URIs into absolute forms. This is useful when the HTML
being filtered assumes a specific base path, but will actually be
viewed in a different context (and setting an alternate base URI is
not possible). %URI.Base must be set for this directive to work.
</p>
--# vim: et sw=4 sts=4

View file

@ -0,0 +1,83 @@
URI.Munge
TYPE: string/null
VERSION: 1.3.0
DEFAULT: NULL
--DESCRIPTION--
<p>
Munges all browsable (usually http, https and ftp)
absolute URIs into another URI, usually a URI redirection service.
This directive accepts a URI, formatted with a <code>%s</code> where
the url-encoded original URI should be inserted (sample:
<code>http://www.google.com/url?q=%s</code>).
</p>
<p>
Uses for this directive:
</p>
<ul>
<li>
Prevent PageRank leaks, while being fairly transparent
to users (you may also want to add some client side JavaScript to
override the text in the statusbar). <strong>Notice</strong>:
Many security experts believe that this form of protection does not deter spam-bots.
</li>
<li>
Redirect users to a splash page telling them they are leaving your
website. While this is poor usability practice, it is often mandated
in corporate environments.
</li>
</ul>
<p>
Prior to HTML Purifier 3.1.1, this directive also enabled the munging
of browsable external resources, which could break things if your redirection
script was a splash page or used <code>meta</code> tags. To revert to
previous behavior, please use %URI.MungeResources.
</p>
<p>
You may want to also use %URI.MungeSecretKey along with this directive
in order to enforce what URIs your redirector script allows. Open
redirector scripts can be a security risk and negatively affect the
reputation of your domain name.
</p>
<p>
Starting with HTML Purifier 3.1.1, there is also these substitutions:
</p>
<table>
<thead>
<tr>
<th>Key</th>
<th>Description</th>
<th>Example <code>&lt;a href=""&gt;</code></th>
</tr>
</thead>
<tbody>
<tr>
<td>%r</td>
<td>1 - The URI embeds a resource<br />(blank) - The URI is merely a link</td>
<td></td>
</tr>
<tr>
<td>%n</td>
<td>The name of the tag this URI came from</td>
<td>a</td>
</tr>
<tr>
<td>%m</td>
<td>The name of the attribute this URI came from</td>
<td>href</td>
</tr>
<tr>
<td>%p</td>
<td>The name of the CSS property this URI came from, or blank if irrelevant</td>
<td></td>
</tr>
</tbody>
</table>
<p>
Admittedly, these letters are somewhat arbitrary; the only stipulation
was that they couldn't be a through f. r is for resource (I would have preferred
e, but you take what you can get), n is for name, m
was picked because it came after n (and I couldn't use a), p is for
property.
</p>
--# vim: et sw=4 sts=4

Some files were not shown because too many files have changed in this diff Show more