237 lines
		
	
	
	
		
			8.6 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			237 lines
		
	
	
	
		
			8.6 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| <?php
 | |
| 
 | |
| /*! @mainpage
 | |
|  *
 | |
|  * HTML Purifier is an HTML filter that will take an arbitrary snippet of
 | |
|  * HTML and rigorously test, validate and filter it into a version that
 | |
|  * is safe for output onto webpages. It achieves this by:
 | |
|  *
 | |
|  *  -# Lexing (parsing into tokens) the document,
 | |
|  *  -# Executing various strategies on the tokens:
 | |
|  *      -# Removing all elements not in the whitelist,
 | |
|  *      -# Making the tokens well-formed,
 | |
|  *      -# Fixing the nesting of the nodes, and
 | |
|  *      -# Validating attributes of the nodes; and
 | |
|  *  -# Generating HTML from the purified tokens.
 | |
|  *
 | |
|  * However, most users will only need to interface with the HTMLPurifier
 | |
|  * and HTMLPurifier_Config.
 | |
|  */
 | |
| 
 | |
| /*
 | |
|     HTML Purifier 4.1.1 - Standards Compliant HTML Filtering
 | |
|     Copyright (C) 2006-2008 Edward Z. Yang
 | |
| 
 | |
|     This library is free software; you can redistribute it and/or
 | |
|     modify it under the terms of the GNU Lesser General Public
 | |
|     License as published by the Free Software Foundation; either
 | |
|     version 2.1 of the License, or (at your option) any later version.
 | |
| 
 | |
|     This library is distributed in the hope that it will be useful,
 | |
|     but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
|     Lesser General Public License for more details.
 | |
| 
 | |
|     You should have received a copy of the GNU Lesser General Public
 | |
|     License along with this library; if not, write to the Free Software
 | |
|     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 | |
|  */
 | |
| 
 | |
| /**
 | |
|  * Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
 | |
|  *
 | |
|  * @note There are several points in which configuration can be specified
 | |
|  *       for HTML Purifier.  The precedence of these (from lowest to
 | |
|  *       highest) is as follows:
 | |
|  *          -# Instance: new HTMLPurifier($config)
 | |
|  *          -# Invocation: purify($html, $config)
 | |
|  *       These configurations are entirely independent of each other and
 | |
|  *       are *not* merged (this behavior may change in the future).
 | |
|  *
 | |
|  * @todo We need an easier way to inject strategies using the configuration
 | |
|  *       object.
 | |
|  */
 | |
| class HTMLPurifier
 | |
| {
 | |
| 
 | |
|     /** Version of HTML Purifier */
 | |
|     public $version = '4.1.1';
 | |
| 
 | |
|     /** Constant with version of HTML Purifier */
 | |
|     const VERSION = '4.1.1';
 | |
| 
 | |
|     /** Global configuration object */
 | |
|     public $config;
 | |
| 
 | |
|     /** Array of extra HTMLPurifier_Filter objects to run on HTML, for backwards compatibility */
 | |
|     private $filters = array();
 | |
| 
 | |
|     /** Single instance of HTML Purifier */
 | |
|     private static $instance;
 | |
| 
 | |
|     protected $strategy, $generator;
 | |
| 
 | |
|     /**
 | |
|      * Resultant HTMLPurifier_Context of last run purification. Is an array
 | |
|      * of contexts if the last called method was purifyArray().
 | |
|      */
 | |
|     public $context;
 | |
| 
 | |
|     /**
 | |
|      * Initializes the purifier.
 | |
|      * @param $config Optional HTMLPurifier_Config object for all instances of
 | |
|      *                the purifier, if omitted, a default configuration is
 | |
|      *                supplied (which can be overridden on a per-use basis).
 | |
|      *                The parameter can also be any type that
 | |
|      *                HTMLPurifier_Config::create() supports.
 | |
|      */
 | |
|     public function __construct($config = null) {
 | |
| 
 | |
|         $this->config = HTMLPurifier_Config::create($config);
 | |
| 
 | |
|         $this->strategy     = new HTMLPurifier_Strategy_Core();
 | |
| 
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Adds a filter to process the output. First come first serve
 | |
|      * @param $filter HTMLPurifier_Filter object
 | |
|      */
 | |
|     public function addFilter($filter) {
 | |
|         trigger_error('HTMLPurifier->addFilter() is deprecated, use configuration directives in the Filter namespace or Filter.Custom', E_USER_WARNING);
 | |
|         $this->filters[] = $filter;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Filters an HTML snippet/document to be XSS-free and standards-compliant.
 | |
|      *
 | |
|      * @param $html String of HTML to purify
 | |
|      * @param $config HTMLPurifier_Config object for this operation, if omitted,
 | |
|      *                defaults to the config object specified during this
 | |
|      *                object's construction. The parameter can also be any type
 | |
|      *                that HTMLPurifier_Config::create() supports.
 | |
|      * @return Purified HTML
 | |
|      */
 | |
|     public function purify($html, $config = null) {
 | |
| 
 | |
|         // :TODO: make the config merge in, instead of replace
 | |
|         $config = $config ? HTMLPurifier_Config::create($config) : $this->config;
 | |
| 
 | |
|         // implementation is partially environment dependant, partially
 | |
|         // configuration dependant
 | |
|         $lexer = HTMLPurifier_Lexer::create($config);
 | |
| 
 | |
|         $context = new HTMLPurifier_Context();
 | |
| 
 | |
|         // setup HTML generator
 | |
|         $this->generator = new HTMLPurifier_Generator($config, $context);
 | |
|         $context->register('Generator', $this->generator);
 | |
| 
 | |
|         // set up global context variables
 | |
|         if ($config->get('Core.CollectErrors')) {
 | |
|             // may get moved out if other facilities use it
 | |
|             $language_factory = HTMLPurifier_LanguageFactory::instance();
 | |
|             $language = $language_factory->create($config, $context);
 | |
|             $context->register('Locale', $language);
 | |
| 
 | |
|             $error_collector = new HTMLPurifier_ErrorCollector($context);
 | |
|             $context->register('ErrorCollector', $error_collector);
 | |
|         }
 | |
| 
 | |
|         // setup id_accumulator context, necessary due to the fact that
 | |
|         // AttrValidator can be called from many places
 | |
|         $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
 | |
|         $context->register('IDAccumulator', $id_accumulator);
 | |
| 
 | |
|         $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
 | |
| 
 | |
|         // setup filters
 | |
|         $filter_flags = $config->getBatch('Filter');
 | |
|         $custom_filters = $filter_flags['Custom'];
 | |
|         unset($filter_flags['Custom']);
 | |
|         $filters = array();
 | |
|         foreach ($filter_flags as $filter => $flag) {
 | |
|             if (!$flag) continue;
 | |
|             if (strpos($filter, '.') !== false) continue;
 | |
|             $class = "HTMLPurifier_Filter_$filter";
 | |
|             $filters[] = new $class;
 | |
|         }
 | |
|         foreach ($custom_filters as $filter) {
 | |
|             // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat
 | |
|             $filters[] = $filter;
 | |
|         }
 | |
|         $filters = array_merge($filters, $this->filters);
 | |
|         // maybe prepare(), but later
 | |
| 
 | |
|         for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) {
 | |
|             $html = $filters[$i]->preFilter($html, $config, $context);
 | |
|         }
 | |
| 
 | |
|         // purified HTML
 | |
|         $html =
 | |
|             $this->generator->generateFromTokens(
 | |
|                 // list of tokens
 | |
|                 $this->strategy->execute(
 | |
|                     // list of un-purified tokens
 | |
|                     $lexer->tokenizeHTML(
 | |
|                         // un-purified HTML
 | |
|                         $html, $config, $context
 | |
|                     ),
 | |
|                     $config, $context
 | |
|                 )
 | |
|             );
 | |
| 
 | |
|         for ($i = $filter_size - 1; $i >= 0; $i--) {
 | |
|             $html = $filters[$i]->postFilter($html, $config, $context);
 | |
|         }
 | |
| 
 | |
|         $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
 | |
|         $this->context =& $context;
 | |
|         return $html;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Filters an array of HTML snippets
 | |
|      * @param $config Optional HTMLPurifier_Config object for this operation.
 | |
|      *                See HTMLPurifier::purify() for more details.
 | |
|      * @return Array of purified HTML
 | |
|      */
 | |
|     public function purifyArray($array_of_html, $config = null) {
 | |
|         $context_array = array();
 | |
|         foreach ($array_of_html as $key => $html) {
 | |
|             $array_of_html[$key] = $this->purify($html, $config);
 | |
|             $context_array[$key] = $this->context;
 | |
|         }
 | |
|         $this->context = $context_array;
 | |
|         return $array_of_html;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Singleton for enforcing just one HTML Purifier in your system
 | |
|      * @param $prototype Optional prototype HTMLPurifier instance to
 | |
|      *                   overload singleton with, or HTMLPurifier_Config
 | |
|      *                   instance to configure the generated version with.
 | |
|      */
 | |
|     public static function instance($prototype = null) {
 | |
|         if (!self::$instance || $prototype) {
 | |
|             if ($prototype instanceof HTMLPurifier) {
 | |
|                 self::$instance = $prototype;
 | |
|             } elseif ($prototype) {
 | |
|                 self::$instance = new HTMLPurifier($prototype);
 | |
|             } else {
 | |
|                 self::$instance = new HTMLPurifier();
 | |
|             }
 | |
|         }
 | |
|         return self::$instance;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * @note Backwards compatibility, see instance()
 | |
|      */
 | |
|     public static function getInstance($prototype = null) {
 | |
|         return HTMLPurifier::instance($prototype);
 | |
|     }
 | |
| 
 | |
| }
 | |
| 
 | |
| // vim: et sw=4 sts=4
 |