171 lines
		
	
	
	
		
			7.4 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			171 lines
		
	
	
	
		
			7.4 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
<?php
 | 
						|
 | 
						|
/**
 | 
						|
 * Removes all unrecognized tags from the list of tokens.
 | 
						|
 *
 | 
						|
 * This strategy iterates through all the tokens and removes unrecognized
 | 
						|
 * tokens. If a token is not recognized but a TagTransform is defined for
 | 
						|
 * that element, the element will be transformed accordingly.
 | 
						|
 */
 | 
						|
 | 
						|
class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
 | 
						|
{
 | 
						|
 | 
						|
    public function execute($tokens, $config, $context) {
 | 
						|
        $definition = $config->getHTMLDefinition();
 | 
						|
        $generator = new HTMLPurifier_Generator($config, $context);
 | 
						|
        $result = array();
 | 
						|
 | 
						|
        $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
 | 
						|
        $remove_invalid_img  = $config->get('Core.RemoveInvalidImg');
 | 
						|
 | 
						|
        // currently only used to determine if comments should be kept
 | 
						|
        $trusted = $config->get('HTML.Trusted');
 | 
						|
 | 
						|
        $remove_script_contents = $config->get('Core.RemoveScriptContents');
 | 
						|
        $hidden_elements     = $config->get('Core.HiddenElements');
 | 
						|
 | 
						|
        // remove script contents compatibility
 | 
						|
        if ($remove_script_contents === true) {
 | 
						|
            $hidden_elements['script'] = true;
 | 
						|
        } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
 | 
						|
            unset($hidden_elements['script']);
 | 
						|
        }
 | 
						|
 | 
						|
        $attr_validator = new HTMLPurifier_AttrValidator();
 | 
						|
 | 
						|
        // removes tokens until it reaches a closing tag with its value
 | 
						|
        $remove_until = false;
 | 
						|
 | 
						|
        // converts comments into text tokens when this is equal to a tag name
 | 
						|
        $textify_comments = false;
 | 
						|
 | 
						|
        $token = false;
 | 
						|
        $context->register('CurrentToken', $token);
 | 
						|
 | 
						|
        $e = false;
 | 
						|
        if ($config->get('Core.CollectErrors')) {
 | 
						|
            $e =& $context->get('ErrorCollector');
 | 
						|
        }
 | 
						|
 | 
						|
        foreach($tokens as $token) {
 | 
						|
            if ($remove_until) {
 | 
						|
                if (empty($token->is_tag) || $token->name !== $remove_until) {
 | 
						|
                    continue;
 | 
						|
                }
 | 
						|
            }
 | 
						|
            if (!empty( $token->is_tag )) {
 | 
						|
                // DEFINITION CALL
 | 
						|
 | 
						|
                // before any processing, try to transform the element
 | 
						|
                if (
 | 
						|
                    isset($definition->info_tag_transform[$token->name])
 | 
						|
                ) {
 | 
						|
                    $original_name = $token->name;
 | 
						|
                    // there is a transformation for this tag
 | 
						|
                    // DEFINITION CALL
 | 
						|
                    $token = $definition->
 | 
						|
                                info_tag_transform[$token->name]->
 | 
						|
                                    transform($token, $config, $context);
 | 
						|
                    if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
 | 
						|
                }
 | 
						|
 | 
						|
                if (isset($definition->info[$token->name])) {
 | 
						|
 | 
						|
                    // mostly everything's good, but
 | 
						|
                    // we need to make sure required attributes are in order
 | 
						|
                    if (
 | 
						|
                        ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
 | 
						|
                        $definition->info[$token->name]->required_attr &&
 | 
						|
                        ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
 | 
						|
                    ) {
 | 
						|
                        $attr_validator->validateToken($token, $config, $context);
 | 
						|
                        $ok = true;
 | 
						|
                        foreach ($definition->info[$token->name]->required_attr as $name) {
 | 
						|
                            if (!isset($token->attr[$name])) {
 | 
						|
                                $ok = false;
 | 
						|
                                break;
 | 
						|
                            }
 | 
						|
                        }
 | 
						|
                        if (!$ok) {
 | 
						|
                            if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $name);
 | 
						|
                            continue;
 | 
						|
                        }
 | 
						|
                        $token->armor['ValidateAttributes'] = true;
 | 
						|
                    }
 | 
						|
 | 
						|
                    if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
 | 
						|
                        $textify_comments = $token->name;
 | 
						|
                    } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
 | 
						|
                        $textify_comments = false;
 | 
						|
                    }
 | 
						|
 | 
						|
                } elseif ($escape_invalid_tags) {
 | 
						|
                    // invalid tag, generate HTML representation and insert in
 | 
						|
                    if ($e) $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
 | 
						|
                    $token = new HTMLPurifier_Token_Text(
 | 
						|
                        $generator->generateFromToken($token)
 | 
						|
                    );
 | 
						|
                } else {
 | 
						|
                    // check if we need to destroy all of the tag's children
 | 
						|
                    // CAN BE GENERICIZED
 | 
						|
                    if (isset($hidden_elements[$token->name])) {
 | 
						|
                        if ($token instanceof HTMLPurifier_Token_Start) {
 | 
						|
                            $remove_until = $token->name;
 | 
						|
                        } elseif ($token instanceof HTMLPurifier_Token_Empty) {
 | 
						|
                            // do nothing: we're still looking
 | 
						|
                        } else {
 | 
						|
                            $remove_until = false;
 | 
						|
                        }
 | 
						|
                        if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
 | 
						|
                    } else {
 | 
						|
                        if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
 | 
						|
                    }
 | 
						|
                    continue;
 | 
						|
                }
 | 
						|
            } elseif ($token instanceof HTMLPurifier_Token_Comment) {
 | 
						|
                // textify comments in script tags when they are allowed
 | 
						|
                if ($textify_comments !== false) {
 | 
						|
                    $data = $token->data;
 | 
						|
                    $token = new HTMLPurifier_Token_Text($data);
 | 
						|
                } elseif ($trusted) {
 | 
						|
                    // keep, but perform comment cleaning
 | 
						|
                    if ($e) {
 | 
						|
                        // perform check whether or not there's a trailing hyphen
 | 
						|
                        if (substr($token->data, -1) == '-') {
 | 
						|
                            $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
 | 
						|
                        }
 | 
						|
                    }
 | 
						|
                    $token->data = rtrim($token->data, '-');
 | 
						|
                    $found_double_hyphen = false;
 | 
						|
                    while (strpos($token->data, '--') !== false) {
 | 
						|
                        if ($e && !$found_double_hyphen) {
 | 
						|
                            $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
 | 
						|
                        }
 | 
						|
                        $found_double_hyphen = true; // prevent double-erroring
 | 
						|
                        $token->data = str_replace('--', '-', $token->data);
 | 
						|
                    }
 | 
						|
                } else {
 | 
						|
                    // strip comments
 | 
						|
                    if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
 | 
						|
                    continue;
 | 
						|
                }
 | 
						|
            } elseif ($token instanceof HTMLPurifier_Token_Text) {
 | 
						|
            } else {
 | 
						|
                continue;
 | 
						|
            }
 | 
						|
            $result[] = $token;
 | 
						|
        }
 | 
						|
        if ($remove_until && $e) {
 | 
						|
            // we removed tokens until the end, throw error
 | 
						|
            $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
 | 
						|
        }
 | 
						|
 | 
						|
        $context->destroy('CurrentToken');
 | 
						|
 | 
						|
        return $result;
 | 
						|
    }
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
// vim: et sw=4 sts=4
 |