Move HTML to Markdown library to Composer
This commit is contained in:
parent
1ab28bbe03
commit
c9dafe3b4e
43 changed files with 2380 additions and 776 deletions
60
vendor/league/html-to-markdown/src/Configuration.php
vendored
Normal file
60
vendor/league/html-to-markdown/src/Configuration.php
vendored
Normal file
|
@ -0,0 +1,60 @@
|
|||
<?php
|
||||
|
||||
namespace League\HTMLToMarkdown;
|
||||
|
||||
class Configuration
|
||||
{
|
||||
protected $config;
|
||||
|
||||
/**
|
||||
* @param array $config
|
||||
*/
|
||||
public function __construct(array $config = array())
|
||||
{
|
||||
$this->config = $config;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $config
|
||||
*/
|
||||
public function merge(array $config = array())
|
||||
{
|
||||
$this->config = array_replace_recursive($this->config, $config);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $config
|
||||
*/
|
||||
public function replace(array $config = array())
|
||||
{
|
||||
$this->config = $config;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $key
|
||||
* @param mixed $value
|
||||
*/
|
||||
public function setOption($key, $value)
|
||||
{
|
||||
$this->config[$key] = $value;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string|null $key
|
||||
* @param mixed|null $default
|
||||
*
|
||||
* @return mixed|null
|
||||
*/
|
||||
public function getOption($key = null, $default = null)
|
||||
{
|
||||
if ($key === null) {
|
||||
return $this->config;
|
||||
}
|
||||
|
||||
if (!isset($this->config[$key])) {
|
||||
return $default;
|
||||
}
|
||||
|
||||
return $this->config[$key];
|
||||
}
|
||||
}
|
11
vendor/league/html-to-markdown/src/ConfigurationAwareInterface.php
vendored
Normal file
11
vendor/league/html-to-markdown/src/ConfigurationAwareInterface.php
vendored
Normal file
|
@ -0,0 +1,11 @@
|
|||
<?php
|
||||
|
||||
namespace League\HTMLToMarkdown;
|
||||
|
||||
interface ConfigurationAwareInterface
|
||||
{
|
||||
/**
|
||||
* @param Configuration $config
|
||||
*/
|
||||
public function setConfig(Configuration $config);
|
||||
}
|
44
vendor/league/html-to-markdown/src/Converter/BlockquoteConverter.php
vendored
Normal file
44
vendor/league/html-to-markdown/src/Converter/BlockquoteConverter.php
vendored
Normal file
|
@ -0,0 +1,44 @@
|
|||
<?php
|
||||
|
||||
namespace League\HTMLToMarkdown\Converter;
|
||||
|
||||
use League\HTMLToMarkdown\ElementInterface;
|
||||
|
||||
class BlockquoteConverter implements ConverterInterface
|
||||
{
|
||||
/**
|
||||
* @param ElementInterface $element
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function convert(ElementInterface $element)
|
||||
{
|
||||
// Contents should have already been converted to Markdown by this point,
|
||||
// so we just need to add '>' symbols to each line.
|
||||
|
||||
$markdown = '';
|
||||
|
||||
$quote_content = trim($element->getValue());
|
||||
|
||||
$lines = preg_split('/\r\n|\r|\n/', $quote_content);
|
||||
|
||||
$total_lines = count($lines);
|
||||
|
||||
foreach ($lines as $i => $line) {
|
||||
$markdown .= '> ' . $line . "\n";
|
||||
if ($i + 1 === $total_lines) {
|
||||
$markdown .= "\n";
|
||||
}
|
||||
}
|
||||
|
||||
return $markdown;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSupportedTags()
|
||||
{
|
||||
return array('blockquote');
|
||||
}
|
||||
}
|
62
vendor/league/html-to-markdown/src/Converter/CodeConverter.php
vendored
Normal file
62
vendor/league/html-to-markdown/src/Converter/CodeConverter.php
vendored
Normal file
|
@ -0,0 +1,62 @@
|
|||
<?php
|
||||
|
||||
namespace League\HTMLToMarkdown\Converter;
|
||||
|
||||
use League\HTMLToMarkdown\ElementInterface;
|
||||
|
||||
class CodeConverter implements ConverterInterface
|
||||
{
|
||||
/**
|
||||
* @param ElementInterface $element
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function convert(ElementInterface $element)
|
||||
{
|
||||
$language = null;
|
||||
|
||||
// Checking for language class on the code block
|
||||
$classes = $element->getAttribute('class');
|
||||
|
||||
if ($classes) {
|
||||
// Since tags can have more than one class, we need to find the one that starts with 'language-'
|
||||
$classes = explode(' ', $classes);
|
||||
foreach ($classes as $class) {
|
||||
if (strpos($class, 'language-') !== false) {
|
||||
// Found one, save it as the selected language and stop looping over the classes.
|
||||
// The space after the language avoids gluing the actual code with the language tag
|
||||
$language = str_replace('language-', '', $class) . ' ';
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$markdown = '';
|
||||
$code = html_entity_decode($element->getChildrenAsString());
|
||||
|
||||
// In order to remove the code tags we need to search for them and, in the case of the opening tag
|
||||
// use a regular expression to find the tag and the other attributes it might have
|
||||
$code = preg_replace('/<code\b[^>]*>/', '', $code);
|
||||
$code = str_replace('</code>', '', $code);
|
||||
|
||||
// Checking if the code has multiple lines
|
||||
$lines = preg_split('/\r\n|\r|\n/', $code);
|
||||
if (count($lines) > 1) {
|
||||
// Multiple lines detected, adding three backticks and newlines
|
||||
$markdown .= '```' . $language . "\n" . $code . "\n" . '```';
|
||||
} else {
|
||||
// One line of code, wrapping it on one backtick.
|
||||
$markdown .= '`' . $language . $code . '`';
|
||||
}
|
||||
|
||||
return $markdown;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSupportedTags()
|
||||
{
|
||||
return array('code');
|
||||
}
|
||||
}
|
26
vendor/league/html-to-markdown/src/Converter/CommentConverter.php
vendored
Normal file
26
vendor/league/html-to-markdown/src/Converter/CommentConverter.php
vendored
Normal file
|
@ -0,0 +1,26 @@
|
|||
<?php
|
||||
|
||||
namespace League\HTMLToMarkdown\Converter;
|
||||
|
||||
use League\HTMLToMarkdown\ElementInterface;
|
||||
|
||||
class CommentConverter implements ConverterInterface
|
||||
{
|
||||
/**
|
||||
* @param ElementInterface $element
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function convert(ElementInterface $element)
|
||||
{
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSupportedTags()
|
||||
{
|
||||
return array('#comment');
|
||||
}
|
||||
}
|
20
vendor/league/html-to-markdown/src/Converter/ConverterInterface.php
vendored
Normal file
20
vendor/league/html-to-markdown/src/Converter/ConverterInterface.php
vendored
Normal file
|
@ -0,0 +1,20 @@
|
|||
<?php
|
||||
|
||||
namespace League\HTMLToMarkdown\Converter;
|
||||
|
||||
use League\HTMLToMarkdown\ElementInterface;
|
||||
|
||||
interface ConverterInterface
|
||||
{
|
||||
/**
|
||||
* @param ElementInterface $element
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function convert(ElementInterface $element);
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSupportedTags();
|
||||
}
|
50
vendor/league/html-to-markdown/src/Converter/DefaultConverter.php
vendored
Normal file
50
vendor/league/html-to-markdown/src/Converter/DefaultConverter.php
vendored
Normal file
|
@ -0,0 +1,50 @@
|
|||
<?php
|
||||
|
||||
namespace League\HTMLToMarkdown\Converter;
|
||||
|
||||
use League\HTMLToMarkdown\Configuration;
|
||||
use League\HTMLToMarkdown\ConfigurationAwareInterface;
|
||||
use League\HTMLToMarkdown\ElementInterface;
|
||||
|
||||
class DefaultConverter implements ConverterInterface, ConfigurationAwareInterface
|
||||
{
|
||||
const DEFAULT_CONVERTER = '_default';
|
||||
|
||||
/**
|
||||
* @var Configuration
|
||||
*/
|
||||
protected $config;
|
||||
|
||||
/**
|
||||
* @param Configuration $config
|
||||
*/
|
||||
public function setConfig(Configuration $config)
|
||||
{
|
||||
$this->config = $config;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ElementInterface $element
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function convert(ElementInterface $element)
|
||||
{
|
||||
// If strip_tags is false (the default), preserve tags that don't have Markdown equivalents,
|
||||
// such as <span> nodes on their own. C14N() canonicalizes the node to a string.
|
||||
// See: http://www.php.net/manual/en/domnode.c14n.php
|
||||
if ($this->config->getOption('strip_tags', false)) {
|
||||
return $element->getValue();
|
||||
}
|
||||
|
||||
return html_entity_decode($element->getChildrenAsString());
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSupportedTags()
|
||||
{
|
||||
return array(self::DEFAULT_CONVERTER);
|
||||
}
|
||||
}
|
45
vendor/league/html-to-markdown/src/Converter/DivConverter.php
vendored
Normal file
45
vendor/league/html-to-markdown/src/Converter/DivConverter.php
vendored
Normal file
|
@ -0,0 +1,45 @@
|
|||
<?php
|
||||
|
||||
namespace League\HTMLToMarkdown\Converter;
|
||||
|
||||
use League\HTMLToMarkdown\Configuration;
|
||||
use League\HTMLToMarkdown\ConfigurationAwareInterface;
|
||||
use League\HTMLToMarkdown\ElementInterface;
|
||||
|
||||
class DivConverter implements ConverterInterface, ConfigurationAwareInterface
|
||||
{
|
||||
/**
|
||||
* @var Configuration
|
||||
*/
|
||||
protected $config;
|
||||
|
||||
/**
|
||||
* @param Configuration $config
|
||||
*/
|
||||
public function setConfig(Configuration $config)
|
||||
{
|
||||
$this->config = $config;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ElementInterface $element
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function convert(ElementInterface $element)
|
||||
{
|
||||
if ($this->config->getOption('strip_tags', false)) {
|
||||
return $element->getValue() . "\n\n";
|
||||
}
|
||||
|
||||
return html_entity_decode($element->getChildrenAsString());
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSupportedTags()
|
||||
{
|
||||
return array('div');
|
||||
}
|
||||
}
|
57
vendor/league/html-to-markdown/src/Converter/EmphasisConverter.php
vendored
Normal file
57
vendor/league/html-to-markdown/src/Converter/EmphasisConverter.php
vendored
Normal file
|
@ -0,0 +1,57 @@
|
|||
<?php
|
||||
|
||||
namespace League\HTMLToMarkdown\Converter;
|
||||
|
||||
use League\HTMLToMarkdown\Configuration;
|
||||
use League\HTMLToMarkdown\ConfigurationAwareInterface;
|
||||
use League\HTMLToMarkdown\ElementInterface;
|
||||
|
||||
class EmphasisConverter implements ConverterInterface, ConfigurationAwareInterface
|
||||
{
|
||||
/**
|
||||
* @var Configuration
|
||||
*/
|
||||
protected $config;
|
||||
|
||||
/**
|
||||
* @param Configuration $config
|
||||
*/
|
||||
public function setConfig(Configuration $config)
|
||||
{
|
||||
$this->config = $config;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ElementInterface $element
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function convert(ElementInterface $element)
|
||||
{
|
||||
$tag = $element->getTagName();
|
||||
$value = $element->getValue();
|
||||
|
||||
if (!trim($value)) {
|
||||
return '';
|
||||
}
|
||||
|
||||
if ($tag === 'i' || $tag === 'em') {
|
||||
$style = $this->config->getOption('italic_style');
|
||||
} else {
|
||||
$style = $this->config->getOption('bold_style');
|
||||
}
|
||||
|
||||
$prefix = ltrim($value) !== $value ? ' ' : '';
|
||||
$suffix = rtrim($value) !== $value ? ' ' : '';
|
||||
|
||||
return $prefix . $style . trim($value) . $style . $suffix;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSupportedTags()
|
||||
{
|
||||
return array('em', 'i', 'strong', 'b');
|
||||
}
|
||||
}
|
41
vendor/league/html-to-markdown/src/Converter/HardBreakConverter.php
vendored
Normal file
41
vendor/league/html-to-markdown/src/Converter/HardBreakConverter.php
vendored
Normal file
|
@ -0,0 +1,41 @@
|
|||
<?php
|
||||
|
||||
namespace League\HTMLToMarkdown\Converter;
|
||||
|
||||
use League\HTMLToMarkdown\Configuration;
|
||||
use League\HTMLToMarkdown\ConfigurationAwareInterface;
|
||||
use League\HTMLToMarkdown\ElementInterface;
|
||||
|
||||
class HardBreakConverter implements ConverterInterface, ConfigurationAwareInterface
|
||||
{
|
||||
/**
|
||||
* @var Configuration
|
||||
*/
|
||||
protected $config;
|
||||
|
||||
/**
|
||||
* @param Configuration $config
|
||||
*/
|
||||
public function setConfig(Configuration $config)
|
||||
{
|
||||
$this->config = $config;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ElementInterface $element
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function convert(ElementInterface $element)
|
||||
{
|
||||
return $this->config->getOption('hard_break') ? "\n" : " \n";
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSupportedTags()
|
||||
{
|
||||
return array('br');
|
||||
}
|
||||
}
|
78
vendor/league/html-to-markdown/src/Converter/HeaderConverter.php
vendored
Normal file
78
vendor/league/html-to-markdown/src/Converter/HeaderConverter.php
vendored
Normal file
|
@ -0,0 +1,78 @@
|
|||
<?php
|
||||
|
||||
namespace League\HTMLToMarkdown\Converter;
|
||||
|
||||
use League\HTMLToMarkdown\Configuration;
|
||||
use League\HTMLToMarkdown\ConfigurationAwareInterface;
|
||||
use League\HTMLToMarkdown\ElementInterface;
|
||||
|
||||
class HeaderConverter implements ConverterInterface, ConfigurationAwareInterface
|
||||
{
|
||||
const STYLE_ATX = 'atx';
|
||||
const STYLE_SETEXT = 'setext';
|
||||
|
||||
/**
|
||||
* @var Configuration
|
||||
*/
|
||||
protected $config;
|
||||
|
||||
/**
|
||||
* @param Configuration $config
|
||||
*/
|
||||
public function setConfig(Configuration $config)
|
||||
{
|
||||
$this->config = $config;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ElementInterface $element
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function convert(ElementInterface $element)
|
||||
{
|
||||
$level = (int) substr($element->getTagName(), 1, 1);
|
||||
$style = $this->config->getOption('header_style', self::STYLE_SETEXT);
|
||||
|
||||
if (($level === 1 || $level === 2) && !$element->isDescendantOf('blockquote') && $style === self::STYLE_SETEXT) {
|
||||
return $this->createSetextHeader($level, $element->getValue());
|
||||
}
|
||||
|
||||
return $this->createAtxHeader($level, $element->getValue());
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSupportedTags()
|
||||
{
|
||||
return array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int $level
|
||||
* @param string $content
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private function createSetextHeader($level, $content)
|
||||
{
|
||||
$length = function_exists('mb_strlen') ? mb_strlen($content, 'utf-8') : strlen($content);
|
||||
$underline = ($level === 1) ? '=' : '-';
|
||||
|
||||
return $content . "\n" . str_repeat($underline, $length) . "\n\n";
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int $level
|
||||
* @param string $content
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private function createAtxHeader($level, $content)
|
||||
{
|
||||
$prefix = str_repeat('#', $level) . ' ';
|
||||
|
||||
return $prefix . $content . "\n\n";
|
||||
}
|
||||
}
|
26
vendor/league/html-to-markdown/src/Converter/HorizontalRuleConverter.php
vendored
Normal file
26
vendor/league/html-to-markdown/src/Converter/HorizontalRuleConverter.php
vendored
Normal file
|
@ -0,0 +1,26 @@
|
|||
<?php
|
||||
|
||||
namespace League\HTMLToMarkdown\Converter;
|
||||
|
||||
use League\HTMLToMarkdown\ElementInterface;
|
||||
|
||||
class HorizontalRuleConverter implements ConverterInterface
|
||||
{
|
||||
/**
|
||||
* @param ElementInterface $element
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function convert(ElementInterface $element)
|
||||
{
|
||||
return "- - - - - -\n\n";
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSupportedTags()
|
||||
{
|
||||
return array('hr');
|
||||
}
|
||||
}
|
35
vendor/league/html-to-markdown/src/Converter/ImageConverter.php
vendored
Normal file
35
vendor/league/html-to-markdown/src/Converter/ImageConverter.php
vendored
Normal file
|
@ -0,0 +1,35 @@
|
|||
<?php
|
||||
|
||||
namespace League\HTMLToMarkdown\Converter;
|
||||
|
||||
use League\HTMLToMarkdown\ElementInterface;
|
||||
|
||||
class ImageConverter implements ConverterInterface
|
||||
{
|
||||
/**
|
||||
* @param ElementInterface $element
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function convert(ElementInterface $element)
|
||||
{
|
||||
$src = $element->getAttribute('src');
|
||||
$alt = $element->getAttribute('alt');
|
||||
$title = $element->getAttribute('title');
|
||||
|
||||
if ($title !== '') {
|
||||
// No newlines added. <img> should be in a block-level element.
|
||||
return '';
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSupportedTags()
|
||||
{
|
||||
return array('img');
|
||||
}
|
||||
}
|
52
vendor/league/html-to-markdown/src/Converter/LinkConverter.php
vendored
Normal file
52
vendor/league/html-to-markdown/src/Converter/LinkConverter.php
vendored
Normal file
|
@ -0,0 +1,52 @@
|
|||
<?php
|
||||
|
||||
namespace League\HTMLToMarkdown\Converter;
|
||||
|
||||
use League\HTMLToMarkdown\ElementInterface;
|
||||
|
||||
class LinkConverter implements ConverterInterface
|
||||
{
|
||||
/**
|
||||
* @param ElementInterface $element
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function convert(ElementInterface $element)
|
||||
{
|
||||
$href = $element->getAttribute('href');
|
||||
$title = $element->getAttribute('title');
|
||||
$text = trim($element->getValue());
|
||||
|
||||
if ($title !== '') {
|
||||
$markdown = '[' . $text . '](' . $href . ' "' . $title . '")';
|
||||
} elseif ($href === $text && $this->isValidAutolink($href)) {
|
||||
$markdown = '<' . $href . '>';
|
||||
} else {
|
||||
$markdown = '[' . $text . '](' . $href . ')';
|
||||
}
|
||||
|
||||
if (!$href) {
|
||||
$markdown = html_entity_decode($element->getChildrenAsString());
|
||||
}
|
||||
|
||||
return $markdown;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSupportedTags()
|
||||
{
|
||||
return array('a');
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $href
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
private function isValidAutolink($href)
|
||||
{
|
||||
return preg_match('/^[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*/i', $href) === 1;
|
||||
}
|
||||
}
|
26
vendor/league/html-to-markdown/src/Converter/ListBlockConverter.php
vendored
Normal file
26
vendor/league/html-to-markdown/src/Converter/ListBlockConverter.php
vendored
Normal file
|
@ -0,0 +1,26 @@
|
|||
<?php
|
||||
|
||||
namespace League\HTMLToMarkdown\Converter;
|
||||
|
||||
use League\HTMLToMarkdown\ElementInterface;
|
||||
|
||||
class ListBlockConverter implements ConverterInterface
|
||||
{
|
||||
/**
|
||||
* @param ElementInterface $element
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function convert(ElementInterface $element)
|
||||
{
|
||||
return $element->getValue() . "\n";
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSupportedTags()
|
||||
{
|
||||
return array('ol', 'ul');
|
||||
}
|
||||
}
|
47
vendor/league/html-to-markdown/src/Converter/ListItemConverter.php
vendored
Normal file
47
vendor/league/html-to-markdown/src/Converter/ListItemConverter.php
vendored
Normal file
|
@ -0,0 +1,47 @@
|
|||
<?php
|
||||
|
||||
namespace League\HTMLToMarkdown\Converter;
|
||||
|
||||
use League\HTMLToMarkdown\ElementInterface;
|
||||
|
||||
class ListItemConverter implements ConverterInterface
|
||||
{
|
||||
/**
|
||||
* @param ElementInterface $element
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function convert(ElementInterface $element)
|
||||
{
|
||||
// If parent is an ol, use numbers, otherwise, use dashes
|
||||
$list_type = $element->getParent()->getTagName();
|
||||
|
||||
// Add spaces to start for nested list items
|
||||
$level = $element->getListItemLevel($element);
|
||||
|
||||
$prefixForParagraph = str_repeat(' ', $level + 1);
|
||||
$value = trim(implode("\n" . $prefixForParagraph, explode("\n", trim($element->getValue()))));
|
||||
|
||||
// If list item is the first in a nested list, add a newline before it
|
||||
$prefix = '';
|
||||
if ($level > 0 && $element->getSiblingPosition() === 1) {
|
||||
$prefix = "\n";
|
||||
}
|
||||
|
||||
if ($list_type === 'ul') {
|
||||
return $prefix . '- ' . $value . "\n";
|
||||
}
|
||||
|
||||
$number = $element->getSiblingPosition();
|
||||
|
||||
return $prefix . $number . '. ' . $value . "\n";
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSupportedTags()
|
||||
{
|
||||
return array('li');
|
||||
}
|
||||
}
|
124
vendor/league/html-to-markdown/src/Converter/ParagraphConverter.php
vendored
Normal file
124
vendor/league/html-to-markdown/src/Converter/ParagraphConverter.php
vendored
Normal file
|
@ -0,0 +1,124 @@
|
|||
<?php
|
||||
|
||||
namespace League\HTMLToMarkdown\Converter;
|
||||
|
||||
use League\HTMLToMarkdown\ElementInterface;
|
||||
|
||||
class ParagraphConverter implements ConverterInterface
|
||||
{
|
||||
/**
|
||||
* @param ElementInterface $element
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function convert(ElementInterface $element)
|
||||
{
|
||||
$value = $element->getValue();
|
||||
|
||||
$markdown = '';
|
||||
|
||||
$lines = preg_split('/\r\n|\r|\n/', $value);
|
||||
foreach ($lines as $line) {
|
||||
/*
|
||||
* Some special characters need to be escaped based on the position that they appear
|
||||
* The following function will deal with those special cases.
|
||||
*/
|
||||
$markdown .= $this->escapeSpecialCharacters($line);
|
||||
$markdown .= "\n";
|
||||
}
|
||||
|
||||
return trim($markdown) !== '' ? rtrim($markdown) . "\n\n" : '';
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSupportedTags()
|
||||
{
|
||||
return array('p');
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $line
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private function escapeSpecialCharacters($line)
|
||||
{
|
||||
$line = $this->escapeFirstCharacters($line);
|
||||
$line = $this->escapeOtherCharacters($line);
|
||||
$line = $this->escapeOtherCharactersRegex($line);
|
||||
|
||||
return $line;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $line
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private function escapeFirstCharacters($line)
|
||||
{
|
||||
$escapable = array(
|
||||
'>',
|
||||
'- ',
|
||||
'+ ',
|
||||
'--',
|
||||
'~~~',
|
||||
'---',
|
||||
'- - -'
|
||||
);
|
||||
|
||||
foreach ($escapable as $i) {
|
||||
if (strpos(ltrim($line), $i) === 0) {
|
||||
// Found a character that must be escaped, adding a backslash before
|
||||
return '\\' . ltrim($line);
|
||||
}
|
||||
}
|
||||
|
||||
return $line;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $line
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private function escapeOtherCharacters($line)
|
||||
{
|
||||
$escapable = array(
|
||||
'<!--'
|
||||
);
|
||||
|
||||
foreach ($escapable as $i) {
|
||||
if (strpos($line, $i) !== false) {
|
||||
// Found an escapable character, escaping it
|
||||
$line = substr_replace($line, '\\', strpos($line, $i), 0);
|
||||
}
|
||||
}
|
||||
|
||||
return $line;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $line
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private function escapeOtherCharactersRegex($line)
|
||||
{
|
||||
$regExs = array(
|
||||
// Match numbers ending on ')' or '.' that are at the beginning of the line.
|
||||
'/^[0-9]+(?=\)|\.)/'
|
||||
);
|
||||
|
||||
foreach ($regExs as $i) {
|
||||
if (preg_match($i, $line, $match)) {
|
||||
// Matched an escapable character, adding a backslash on the string before the offending character
|
||||
$line = substr_replace($line, '\\', strlen($match[0]), 0);
|
||||
}
|
||||
}
|
||||
|
||||
return $line;
|
||||
}
|
||||
}
|
59
vendor/league/html-to-markdown/src/Converter/PreformattedConverter.php
vendored
Normal file
59
vendor/league/html-to-markdown/src/Converter/PreformattedConverter.php
vendored
Normal file
|
@ -0,0 +1,59 @@
|
|||
<?php
|
||||
|
||||
namespace League\HTMLToMarkdown\Converter;
|
||||
|
||||
use League\HTMLToMarkdown\ElementInterface;
|
||||
|
||||
class PreformattedConverter implements ConverterInterface
|
||||
{
|
||||
/**
|
||||
* @param ElementInterface $element
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function convert(ElementInterface $element)
|
||||
{
|
||||
$markdown = '';
|
||||
|
||||
$pre_content = html_entity_decode($element->getChildrenAsString());
|
||||
$pre_content = str_replace(array('<pre>', '</pre>'), '', $pre_content);
|
||||
|
||||
/*
|
||||
* Checking for the code tag.
|
||||
* Usually pre tags are used along with code tags. This conditional will check for already converted code tags,
|
||||
* which use backticks, and if those backticks are at the beginning and at the end of the string it means
|
||||
* there's no more information to convert.
|
||||
*/
|
||||
|
||||
$firstBacktick = strpos(trim($pre_content), '`');
|
||||
$lastBacktick = strrpos(trim($pre_content), '`');
|
||||
if ($firstBacktick === 0 && $lastBacktick === strlen(trim($pre_content)) - 1) {
|
||||
return $pre_content;
|
||||
}
|
||||
|
||||
// If the execution reaches this point it means it's just a pre tag, with no code tag nested
|
||||
|
||||
// Normalizing new lines
|
||||
$pre_content = preg_replace('/\r\n|\r|\n/', PHP_EOL, $pre_content);
|
||||
|
||||
// Checking if the string has multiple lines
|
||||
$lines = preg_split('/\r\n|\r|\n/', $pre_content);
|
||||
if (count($lines) > 1) {
|
||||
// Multiple lines detected, adding three backticks and newlines
|
||||
$markdown .= '```' . "\n" . $pre_content . "\n" . '```';
|
||||
} else {
|
||||
// One line of code, wrapping it on one backtick.
|
||||
$markdown .= '`' . $pre_content . '`';
|
||||
}
|
||||
|
||||
return $markdown;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSupportedTags()
|
||||
{
|
||||
return array('pre');
|
||||
}
|
||||
}
|
46
vendor/league/html-to-markdown/src/Converter/TextConverter.php
vendored
Normal file
46
vendor/league/html-to-markdown/src/Converter/TextConverter.php
vendored
Normal file
|
@ -0,0 +1,46 @@
|
|||
<?php
|
||||
|
||||
namespace League\HTMLToMarkdown\Converter;
|
||||
|
||||
use League\HTMLToMarkdown\ElementInterface;
|
||||
|
||||
class TextConverter implements ConverterInterface
|
||||
{
|
||||
/**
|
||||
* @param ElementInterface $element
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function convert(ElementInterface $element)
|
||||
{
|
||||
$markdown = $element->getValue();
|
||||
|
||||
// Remove leftover \n at the beginning of the line
|
||||
$markdown = ltrim($markdown, "\n");
|
||||
|
||||
// Replace sequences of invisible characters with spaces
|
||||
$markdown = preg_replace('~\s+~u', ' ', $markdown);
|
||||
|
||||
// Escape the following characters: '*', '_', '[', ']' and '\'
|
||||
$markdown = preg_replace('~([*_\\[\\]\\\\])~u', '\\\\$1', $markdown);
|
||||
|
||||
$markdown = preg_replace('~^#~u', '\\\\#', $markdown);
|
||||
|
||||
if ($markdown === ' ') {
|
||||
$next = $element->getNext();
|
||||
if (!$next || $next->isBlock()) {
|
||||
$markdown = '';
|
||||
}
|
||||
}
|
||||
|
||||
return $markdown;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string[]
|
||||
*/
|
||||
public function getSupportedTags()
|
||||
{
|
||||
return array('#text');
|
||||
}
|
||||
}
|
257
vendor/league/html-to-markdown/src/Element.php
vendored
Normal file
257
vendor/league/html-to-markdown/src/Element.php
vendored
Normal file
|
@ -0,0 +1,257 @@
|
|||
<?php
|
||||
|
||||
namespace League\HTMLToMarkdown;
|
||||
|
||||
class Element implements ElementInterface
|
||||
{
|
||||
/**
|
||||
* @var \DOMNode
|
||||
*/
|
||||
protected $node;
|
||||
|
||||
/**
|
||||
* @var ElementInterface|null
|
||||
*/
|
||||
private $nextCached;
|
||||
|
||||
public function __construct(\DOMNode $node)
|
||||
{
|
||||
$this->node = $node;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return bool
|
||||
*/
|
||||
public function isBlock()
|
||||
{
|
||||
switch ($this->getTagName()) {
|
||||
case 'blockquote':
|
||||
case 'body':
|
||||
case 'code':
|
||||
case 'div':
|
||||
case 'h1':
|
||||
case 'h2':
|
||||
case 'h3':
|
||||
case 'h4':
|
||||
case 'h5':
|
||||
case 'h6':
|
||||
case 'hr':
|
||||
case 'html':
|
||||
case 'li':
|
||||
case 'p':
|
||||
case 'ol':
|
||||
case 'ul':
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return bool
|
||||
*/
|
||||
public function isText()
|
||||
{
|
||||
return $this->getTagName() === '#text';
|
||||
}
|
||||
|
||||
/**
|
||||
* @return bool
|
||||
*/
|
||||
public function isWhitespace()
|
||||
{
|
||||
return $this->getTagName() === '#text' && trim($this->getValue()) === '';
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string
|
||||
*/
|
||||
public function getTagName()
|
||||
{
|
||||
return $this->node->nodeName;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string
|
||||
*/
|
||||
public function getValue()
|
||||
{
|
||||
return $this->node->nodeValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return ElementInterface|null
|
||||
*/
|
||||
public function getParent()
|
||||
{
|
||||
return new static($this->node->parentNode) ?: null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return bool
|
||||
*/
|
||||
public function hasChildren()
|
||||
{
|
||||
return $this->node->hasChildNodes();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return ElementInterface[]
|
||||
*/
|
||||
public function getChildren()
|
||||
{
|
||||
$ret = array();
|
||||
/** @var \DOMNode $node */
|
||||
foreach ($this->node->childNodes as $node) {
|
||||
$ret[] = new static($node);
|
||||
}
|
||||
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return ElementInterface|null
|
||||
*/
|
||||
public function getNext()
|
||||
{
|
||||
if ($this->nextCached === null) {
|
||||
$nextNode = $this->getNextNode($this->node);
|
||||
if ($nextNode !== null) {
|
||||
$this->nextCached = new static($nextNode);
|
||||
}
|
||||
}
|
||||
|
||||
return $this->nextCached;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param \DomNode $node
|
||||
* @param bool $checkChildren
|
||||
*
|
||||
* @return \DomNode|null
|
||||
*/
|
||||
private function getNextNode($node, $checkChildren = true)
|
||||
{
|
||||
if ($checkChildren && $node->firstChild) {
|
||||
return $node->firstChild;
|
||||
}
|
||||
|
||||
if ($node->nextSibling) {
|
||||
return $node->nextSibling;
|
||||
}
|
||||
|
||||
if ($node->parentNode) {
|
||||
return $this->getNextNode($node->parentNode, false);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string[]|string $tagNames
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isDescendantOf($tagNames)
|
||||
{
|
||||
if (!is_array($tagNames)) {
|
||||
$tagNames = array($tagNames);
|
||||
}
|
||||
|
||||
for ($p = $this->node->parentNode; $p !== false; $p = $p->parentNode) {
|
||||
if (is_null($p)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (in_array($p->nodeName, $tagNames)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $markdown
|
||||
*/
|
||||
public function setFinalMarkdown($markdown)
|
||||
{
|
||||
$markdown_node = $this->node->ownerDocument->createTextNode($markdown);
|
||||
$this->node->parentNode->replaceChild($markdown_node, $this->node);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string
|
||||
*/
|
||||
public function getChildrenAsString()
|
||||
{
|
||||
return $this->node->C14N();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return int
|
||||
*/
|
||||
public function getSiblingPosition()
|
||||
{
|
||||
$position = 0;
|
||||
|
||||
// Loop through all nodes and find the given $node
|
||||
foreach ($this->getParent()->getChildren() as $current_node) {
|
||||
if (!$current_node->isWhitespace()) {
|
||||
$position++;
|
||||
}
|
||||
|
||||
// TODO: Need a less-buggy way of comparing these
|
||||
// Perhaps we can somehow ensure that we always have the exact same object and use === instead?
|
||||
if ($this->equals($current_node)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return $position;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return int
|
||||
*/
|
||||
public function getListItemLevel()
|
||||
{
|
||||
$level = 0;
|
||||
$parent = $this->getParent();
|
||||
|
||||
while ($parent !== null && $parent->node->parentNode) {
|
||||
if ($parent->getTagName() === 'li') {
|
||||
$level++;
|
||||
}
|
||||
$parent = $parent->getParent();
|
||||
}
|
||||
|
||||
return $level;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $name
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getAttribute($name)
|
||||
{
|
||||
if ($this->node instanceof \DOMElement) {
|
||||
return $this->node->getAttribute($name);
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ElementInterface $element
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function equals(ElementInterface $element)
|
||||
{
|
||||
if ($element instanceof self) {
|
||||
return $element->node === $this->node;
|
||||
}
|
||||
|
||||
return $element === $this;
|
||||
}
|
||||
}
|
80
vendor/league/html-to-markdown/src/ElementInterface.php
vendored
Normal file
80
vendor/league/html-to-markdown/src/ElementInterface.php
vendored
Normal file
|
@ -0,0 +1,80 @@
|
|||
<?php
|
||||
|
||||
namespace League\HTMLToMarkdown;
|
||||
|
||||
interface ElementInterface
|
||||
{
|
||||
/**
|
||||
* @return bool
|
||||
*/
|
||||
public function isBlock();
|
||||
|
||||
/**
|
||||
* @return bool
|
||||
*/
|
||||
public function isText();
|
||||
|
||||
/**
|
||||
* @return bool
|
||||
*/
|
||||
public function isWhitespace();
|
||||
|
||||
/**
|
||||
* @return string
|
||||
*/
|
||||
public function getTagName();
|
||||
|
||||
/**
|
||||
* @return string
|
||||
*/
|
||||
public function getValue();
|
||||
|
||||
/**
|
||||
* @return ElementInterface|null
|
||||
*/
|
||||
public function getParent();
|
||||
|
||||
/**
|
||||
* @param string|string[] $tagNames
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isDescendantOf($tagNames);
|
||||
|
||||
/**
|
||||
* @return bool
|
||||
*/
|
||||
public function hasChildren();
|
||||
|
||||
/**
|
||||
* @return ElementInterface[]
|
||||
*/
|
||||
public function getChildren();
|
||||
|
||||
/**
|
||||
* @return ElementInterface|null
|
||||
*/
|
||||
public function getNext();
|
||||
|
||||
/**
|
||||
* @return int
|
||||
*/
|
||||
public function getSiblingPosition();
|
||||
|
||||
/**
|
||||
* @return string
|
||||
*/
|
||||
public function getChildrenAsString();
|
||||
|
||||
/**
|
||||
* @param string $markdown
|
||||
*/
|
||||
public function setFinalMarkdown($markdown);
|
||||
|
||||
/**
|
||||
* @param string $name
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getAttribute($name);
|
||||
}
|
104
vendor/league/html-to-markdown/src/Environment.php
vendored
Normal file
104
vendor/league/html-to-markdown/src/Environment.php
vendored
Normal file
|
@ -0,0 +1,104 @@
|
|||
<?php
|
||||
|
||||
namespace League\HTMLToMarkdown;
|
||||
|
||||
use League\HTMLToMarkdown\Converter\BlockquoteConverter;
|
||||
use League\HTMLToMarkdown\Converter\CodeConverter;
|
||||
use League\HTMLToMarkdown\Converter\CommentConverter;
|
||||
use League\HTMLToMarkdown\Converter\ConverterInterface;
|
||||
use League\HTMLToMarkdown\Converter\DefaultConverter;
|
||||
use League\HTMLToMarkdown\Converter\DivConverter;
|
||||
use League\HTMLToMarkdown\Converter\EmphasisConverter;
|
||||
use League\HTMLToMarkdown\Converter\HardBreakConverter;
|
||||
use League\HTMLToMarkdown\Converter\HeaderConverter;
|
||||
use League\HTMLToMarkdown\Converter\HorizontalRuleConverter;
|
||||
use League\HTMLToMarkdown\Converter\ImageConverter;
|
||||
use League\HTMLToMarkdown\Converter\LinkConverter;
|
||||
use League\HTMLToMarkdown\Converter\ListBlockConverter;
|
||||
use League\HTMLToMarkdown\Converter\ListItemConverter;
|
||||
use League\HTMLToMarkdown\Converter\ParagraphConverter;
|
||||
use League\HTMLToMarkdown\Converter\PreformattedConverter;
|
||||
use League\HTMLToMarkdown\Converter\TextConverter;
|
||||
|
||||
final class Environment
|
||||
{
|
||||
/**
|
||||
* @var Configuration
|
||||
*/
|
||||
protected $config;
|
||||
|
||||
/**
|
||||
* @var ConverterInterface[]
|
||||
*/
|
||||
protected $converters = array();
|
||||
|
||||
public function __construct(array $config = array())
|
||||
{
|
||||
$this->config = new Configuration($config);
|
||||
$this->addConverter(new DefaultConverter());
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Configuration
|
||||
*/
|
||||
public function getConfig()
|
||||
{
|
||||
return $this->config;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ConverterInterface $converter
|
||||
*/
|
||||
public function addConverter(ConverterInterface $converter)
|
||||
{
|
||||
if ($converter instanceof ConfigurationAwareInterface) {
|
||||
$converter->setConfig($this->config);
|
||||
}
|
||||
|
||||
foreach ($converter->getSupportedTags() as $tag) {
|
||||
$this->converters[$tag] = $converter;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $tag
|
||||
*
|
||||
* @return ConverterInterface
|
||||
*/
|
||||
public function getConverterByTag($tag)
|
||||
{
|
||||
if (isset($this->converters[$tag])) {
|
||||
return $this->converters[$tag];
|
||||
}
|
||||
|
||||
return $this->converters[DefaultConverter::DEFAULT_CONVERTER];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $config
|
||||
*
|
||||
* @return Environment
|
||||
*/
|
||||
public static function createDefaultEnvironment(array $config = array())
|
||||
{
|
||||
$environment = new static($config);
|
||||
|
||||
$environment->addConverter(new BlockquoteConverter());
|
||||
$environment->addConverter(new CodeConverter());
|
||||
$environment->addConverter(new CommentConverter());
|
||||
$environment->addConverter(new DivConverter());
|
||||
$environment->addConverter(new EmphasisConverter());
|
||||
$environment->addConverter(new HardBreakConverter());
|
||||
$environment->addConverter(new HeaderConverter());
|
||||
$environment->addConverter(new HorizontalRuleConverter());
|
||||
$environment->addConverter(new ImageConverter());
|
||||
$environment->addConverter(new LinkConverter());
|
||||
$environment->addConverter(new ListBlockConverter());
|
||||
$environment->addConverter(new ListItemConverter());
|
||||
$environment->addConverter(new ParagraphConverter());
|
||||
$environment->addConverter(new PreformattedConverter());
|
||||
$environment->addConverter(new TextConverter());
|
||||
|
||||
return $environment;
|
||||
}
|
||||
}
|
231
vendor/league/html-to-markdown/src/HtmlConverter.php
vendored
Normal file
231
vendor/league/html-to-markdown/src/HtmlConverter.php
vendored
Normal file
|
@ -0,0 +1,231 @@
|
|||
<?php
|
||||
|
||||
namespace League\HTMLToMarkdown;
|
||||
|
||||
/**
|
||||
* Class HtmlConverter
|
||||
*
|
||||
* A helper class to convert HTML to Markdown.
|
||||
*
|
||||
* @author Colin O'Dell <colinodell@gmail.com>
|
||||
* @author Nick Cernis <nick@cern.is>
|
||||
*
|
||||
* @link https://github.com/thephpleague/html-to-markdown/ Latest version on GitHub.
|
||||
*
|
||||
* @license http://www.opensource.org/licenses/mit-license.php MIT
|
||||
*/
|
||||
class HtmlConverter
|
||||
{
|
||||
/**
|
||||
* @var Environment
|
||||
*/
|
||||
protected $environment;
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
*
|
||||
* @param Environment|array $options Environment object or configuration options
|
||||
*/
|
||||
public function __construct($options = array())
|
||||
{
|
||||
if ($options instanceof Environment) {
|
||||
$this->environment = $options;
|
||||
} elseif (is_array($options)) {
|
||||
$defaults = array(
|
||||
'header_style' => 'setext', // Set to 'atx' to output H1 and H2 headers as # Header1 and ## Header2
|
||||
'suppress_errors' => true, // Set to false to show warnings when loading malformed HTML
|
||||
'strip_tags' => false, // Set to true to strip tags that don't have markdown equivalents. N.B. Strips tags, not their content. Useful to clean MS Word HTML output.
|
||||
'bold_style' => '**', // Set to '__' if you prefer the underlined style
|
||||
'italic_style' => '_', // Set to '*' if you prefer the asterisk style
|
||||
'remove_nodes' => '', // space-separated list of dom nodes that should be removed. example: 'meta style script'
|
||||
'hard_break' => false,// Set to true to turn <br> into `\n` instead of ` \n`
|
||||
);
|
||||
|
||||
$this->environment = Environment::createDefaultEnvironment($defaults);
|
||||
|
||||
$this->environment->getConfig()->merge($options);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Environment
|
||||
*/
|
||||
public function getEnvironment()
|
||||
{
|
||||
return $this->environment;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Configuration
|
||||
*/
|
||||
public function getConfig()
|
||||
{
|
||||
return $this->environment->getConfig();
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert
|
||||
*
|
||||
* @see HtmlConverter::convert
|
||||
*
|
||||
* @param string $html
|
||||
*
|
||||
* @return string The Markdown version of the html
|
||||
*/
|
||||
public function __invoke($html)
|
||||
{
|
||||
return $this->convert($html);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert
|
||||
*
|
||||
* Loads HTML and passes to getMarkdown()
|
||||
*
|
||||
* @param string $html
|
||||
*
|
||||
* @throws \InvalidArgumentException
|
||||
*
|
||||
* @return string The Markdown version of the html
|
||||
*/
|
||||
public function convert($html)
|
||||
{
|
||||
if (trim($html) === '') {
|
||||
return '';
|
||||
}
|
||||
|
||||
$document = $this->createDOMDocument($html);
|
||||
|
||||
// Work on the entire DOM tree (including head and body)
|
||||
if (!($root = $document->getElementsByTagName('html')->item(0))) {
|
||||
throw new \InvalidArgumentException('Invalid HTML was provided');
|
||||
}
|
||||
|
||||
$rootElement = new Element($root);
|
||||
$this->convertChildren($rootElement);
|
||||
|
||||
// Store the now-modified DOMDocument as a string
|
||||
$markdown = $document->saveHTML();
|
||||
|
||||
return $this->sanitize($markdown);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $html
|
||||
*
|
||||
* @return \DOMDocument
|
||||
*/
|
||||
private function createDOMDocument($html)
|
||||
{
|
||||
$document = new \DOMDocument();
|
||||
|
||||
if ($this->getConfig()->getOption('suppress_errors')) {
|
||||
// Suppress conversion errors (from http://bit.ly/pCCRSX)
|
||||
libxml_use_internal_errors(true);
|
||||
}
|
||||
|
||||
// Hack to load utf-8 HTML (from http://bit.ly/pVDyCt)
|
||||
$document->loadHTML('<?xml encoding="UTF-8">' . $html);
|
||||
$document->encoding = 'UTF-8';
|
||||
|
||||
if ($this->getConfig()->getOption('suppress_errors')) {
|
||||
libxml_clear_errors();
|
||||
}
|
||||
|
||||
return $document;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert Children
|
||||
*
|
||||
* Recursive function to drill into the DOM and convert each node into Markdown from the inside out.
|
||||
*
|
||||
* Finds children of each node and convert those to #text nodes containing their Markdown equivalent,
|
||||
* starting with the innermost element and working up to the outermost element.
|
||||
*
|
||||
* @param ElementInterface $element
|
||||
*/
|
||||
private function convertChildren(ElementInterface $element)
|
||||
{
|
||||
// Don't convert HTML code inside <code> and <pre> blocks to Markdown - that should stay as HTML
|
||||
// except if the current node is a code tag, which needs to be converted by the CodeConverter.
|
||||
if ($element->isDescendantOf(array('pre', 'code')) && $element->getTagName() !== 'code') {
|
||||
return;
|
||||
}
|
||||
|
||||
// If the node has children, convert those to Markdown first
|
||||
if ($element->hasChildren()) {
|
||||
foreach ($element->getChildren() as $child) {
|
||||
$this->convertChildren($child);
|
||||
}
|
||||
}
|
||||
|
||||
// Now that child nodes have been converted, convert the original node
|
||||
$markdown = $this->convertToMarkdown($element);
|
||||
|
||||
// Create a DOM text node containing the Markdown equivalent of the original node
|
||||
|
||||
// Replace the old $node e.g. '<h3>Title</h3>' with the new $markdown_node e.g. '### Title'
|
||||
$element->setFinalMarkdown($markdown);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert to Markdown
|
||||
*
|
||||
* Converts an individual node into a #text node containing a string of its Markdown equivalent.
|
||||
*
|
||||
* Example: An <h3> node with text content of 'Title' becomes a text node with content of '### Title'
|
||||
*
|
||||
* @param ElementInterface $element
|
||||
*
|
||||
* @return string The converted HTML as Markdown
|
||||
*/
|
||||
protected function convertToMarkdown(ElementInterface $element)
|
||||
{
|
||||
$tag = $element->getTagName();
|
||||
|
||||
// Strip nodes named in remove_nodes
|
||||
$tags_to_remove = explode(' ', $this->getConfig()->getOption('remove_nodes'));
|
||||
if (in_array($tag, $tags_to_remove)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$converter = $this->environment->getConverterByTag($tag);
|
||||
|
||||
return $converter->convert($element);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $markdown
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
protected function sanitize($markdown)
|
||||
{
|
||||
$markdown = html_entity_decode($markdown, ENT_QUOTES, 'UTF-8');
|
||||
$markdown = preg_replace('/<!DOCTYPE [^>]+>/', '', $markdown); // Strip doctype declaration
|
||||
$markdown = trim($markdown); // Remove blank spaces at the beggining of the html
|
||||
|
||||
/*
|
||||
* Removing unwanted tags. Tags should be added to the array in the order they are expected.
|
||||
* XML, html and body opening tags should be in that order. Same case with closing tags
|
||||
*/
|
||||
$unwanted = array('<?xml encoding="UTF-8">', '<html>', '</html>', '<body>', '</body>', '<head>', '</head>', '
');
|
||||
|
||||
foreach ($unwanted as $tag) {
|
||||
if (strpos($tag, '/') === false) {
|
||||
// Opening tags
|
||||
if (strpos($markdown, $tag) === 0) {
|
||||
$markdown = substr($markdown, strlen($tag));
|
||||
}
|
||||
} else {
|
||||
// Closing tags
|
||||
if (strpos($markdown, $tag) === strlen($markdown) - strlen($tag)) {
|
||||
$markdown = substr($markdown, 0, -strlen($tag));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return trim($markdown, "\n\r\0\x0B");
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue