get('Core.LexerImpl');
        }
        $needs_tracking =
            $config->get('Core.MaintainLineNumbers') ||
            $config->get('Core.CollectErrors');
        $inst = null;
        if (is_object($lexer)) {
            $inst = $lexer;
        } else {
            if (is_null($lexer)) { do {
                // auto-detection algorithm
                if ($needs_tracking) {
                    $lexer = 'DirectLex';
                    break;
                }
                if (
                    class_exists('DOMDocument') &&
                    method_exists('DOMDocument', 'loadHTML') &&
                    !extension_loaded('domxml')
                ) {
                    // check for DOM support, because while it's part of the
                    // core, it can be disabled compile time. Also, the PECL
                    // domxml extension overrides the default DOM, and is evil
                    // and nasty and we shan't bother to support it
                    $lexer = 'DOMLex';
                } else {
                    $lexer = 'DirectLex';
                }
            } while(0); } // do..while so we can break
            // instantiate recognized string names
            switch ($lexer) {
                case 'DOMLex':
                    $inst = new HTMLPurifier_Lexer_DOMLex();
                    break;
                case 'DirectLex':
                    $inst = new HTMLPurifier_Lexer_DirectLex();
                    break;
                case 'PH5P':
                    $inst = new HTMLPurifier_Lexer_PH5P();
                    break;
                default:
                    throw new HTMLPurifier_Exception("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer));
            }
        }
        if (!$inst) throw new HTMLPurifier_Exception('No lexer was instantiated');
        // once PHP DOM implements native line numbers, or we
        // hack out something using XSLT, remove this stipulation
        if ($needs_tracking && !$inst->tracksLineNumbers) {
            throw new HTMLPurifier_Exception('Cannot use lexer that does not support line numbers with Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)');
        }
        return $inst;
    }
    // -- CONVENIENCE MEMBERS ---------------------------------------------
    public function __construct() {
        $this->_entity_parser = new HTMLPurifier_EntityParser();
    }
    /**
     * Most common entity to raw value conversion table for special entities.
     */
    protected $_special_entity2str =
            array(
                    '"' => '"',
                    '&'  => '&',
                    '<'   => '<',
                    '>'   => '>',
                    '''  => "'",
                    ''' => "'",
                    ''' => "'"
            );
    /**
     * Parses special entities into the proper characters.
     *
     * This string will translate escaped versions of the special characters
     * into the correct ones.
     *
     * @warning
     * You should be able to treat the output of this function as
     * completely parsed, but that's only because all other entities should
     * have been handled previously in substituteNonSpecialEntities()
     *
     * @param $string String character data to be parsed.
     * @returns Parsed character data.
     */
    public function parseData($string) {
        // following functions require at least one character
        if ($string === '') return '';
        // subtracts amps that cannot possibly be escaped
        $num_amp = substr_count($string, '&') - substr_count($string, '& ') -
            ($string[strlen($string)-1] === '&' ? 1 : 0);
        if (!$num_amp) return $string; // abort if no entities
        $num_esc_amp = substr_count($string, '&');
        $string = strtr($string, $this->_special_entity2str);
        // code duplication for sake of optimization, see above
        $num_amp_2 = substr_count($string, '&') - substr_count($string, '& ') -
            ($string[strlen($string)-1] === '&' ? 1 : 0);
        if ($num_amp_2 <= $num_esc_amp) return $string;
        // hmm... now we have some uncommon entities. Use the callback.
        $string = $this->_entity_parser->substituteSpecialEntities($string);
        return $string;
    }
    /**
     * Lexes an HTML string into tokens.
     *
     * @param $string String HTML.
     * @return HTMLPurifier_Token array representation of HTML.
     */
    public function tokenizeHTML($string, $config, $context) {
        trigger_error('Call to abstract class', E_USER_ERROR);
    }
    /**
     * Translates CDATA sections into regular sections (through escaping).
     *
     * @param $string HTML string to process.
     * @returns HTML with CDATA sections escaped.
     */
    protected static function escapeCDATA($string) {
        return preg_replace_callback(
            '//s',
            array('HTMLPurifier_Lexer', 'CDATACallback'),
            $string
        );
    }
    /**
     * Special CDATA case that is especially convoluted for