123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382 |
- <?php
- /**
- * @copyright Copyright (c) 2014 Carsten Brandt
- * @license https://github.com/cebe/markdown/blob/master/LICENSE
- * @link https://github.com/cebe/markdown#readme
- */
- namespace cebe\markdown\latex;
- use cebe\markdown\block\CodeTrait;
- use cebe\markdown\block\HeadlineTrait;
- use cebe\markdown\block\ListTrait;
- use cebe\markdown\block\QuoteTrait;
- use cebe\markdown\block\RuleTrait;
- use cebe\markdown\inline\CodeTrait as InlineCodeTrait;
- use cebe\markdown\inline\EmphStrongTrait;
- use cebe\markdown\inline\LinkTrait;
- use MikeVanRiel\TextToLatex;
- /**
- * Markdown parser for the [initial markdown spec](http://daringfireball.net/projects/markdown/syntax).
- *
- * @author Carsten Brandt <mail@cebe.cc>
- */
- class Markdown extends \cebe\markdown\Parser
- {
- // include block element parsing using traits
- use CodeTrait;
- use HeadlineTrait;
- use ListTrait {
- // Check Ul List before headline
- identifyUl as protected identifyBUl;
- consumeUl as protected consumeBUl;
- }
- use QuoteTrait;
- use RuleTrait {
- // Check Hr before checking lists
- identifyHr as protected identifyAHr;
- consumeHr as protected consumeAHr;
- }
- // include inline element parsing using traits
- use InlineCodeTrait;
- use EmphStrongTrait;
- use LinkTrait;
- /**
- * @var string this string will be prefixed to all auto generated labels.
- * This can be used to disambiguate labels when combining multiple markdown files into one document.
- */
- public $labelPrefix = '';
- const LINK_STYLE_FOOTNOTE = 'footnote';
- const LINK_STYLE_HREF = 'href';
- /**
- * @var string link style defines how links are rendered in LaTeX, there are two different options:
- *
- * - `footnote` (default) - render all links with a footnote, which contains the full URL of the link. This is good for printing the PDF.
- * - `href` - render all links with a hyperref, similar to HTML, the link target is not visible in this case.
- */
- public $linkStyle = self::LINK_STYLE_FOOTNOTE;
- /**
- * @var array these are "escapeable" characters. When using one of these prefixed with a
- * backslash, the character will be outputted without the backslash and is not interpreted
- * as markdown.
- */
- protected $escapeCharacters = [
- '\\', // backslash
- '`', // backtick
- '*', // asterisk
- '_', // underscore
- '{', '}', // curly braces
- '[', ']', // square brackets
- '(', ')', // parentheses
- '#', // hash mark
- '+', // plus sign
- '-', // minus sign (hyphen)
- '.', // dot
- '!', // exclamation mark
- '<', '>',
- ];
- /**
- * @inheritDoc
- */
- protected function prepare()
- {
- // reset references
- $this->references = [];
- }
- /**
- * Consume lines for a paragraph
- *
- * Allow headlines and code to break paragraphs
- */
- protected function consumeParagraph($lines, $current)
- {
- // consume until newline
- $content = [];
- for ($i = $current, $count = count($lines); $i < $count; $i++) {
- $line = $lines[$i];
- if (!empty($line) && ltrim($line) !== '' &&
- !($line[0] === "\t" || $line[0] === " " && strncmp($line, ' ', 4) === 0) &&
- !$this->identifyHeadline($line, $lines, $i))
- {
- $content[] = $line;
- } else {
- break;
- }
- }
- $block = [
- 'paragraph',
- 'content' => $this->parseInline(implode("\n", $content)),
- ];
- return [$block, --$i];
- }
- // rendering adjusted for LaTeX output
- /**
- * @inheritdoc
- */
- protected function renderParagraph($block)
- {
- return $this->renderAbsy($block['content']) . "\n\n";
- }
- /**
- * @inheritdoc
- */
- protected function renderQuote($block)
- {
- return '\begin{quote}' . $this->renderAbsy($block['content']) . "\\end{quote}\n";
- }
- /**
- * @inheritdoc
- */
- protected function renderCode($block)
- {
- $language = isset($block['language']) ? "\\lstset{language={$block['language']}}" : '\lstset{language={}}';
- $content = $block['content'];
- // replace No-Break Space characters in code block, which do not render in LaTeX
- $content = preg_replace("/[\x{00a0}\x{202f}]/u", ' ', $content);
- return "$language\\begin{lstlisting}\n{$content}\n\\end{lstlisting}\n";
- }
- /**
- * @inheritdoc
- */
- protected function renderList($block)
- {
- $type = ($block['list'] === 'ol') ? 'enumerate' : 'itemize';
- $output = "\\begin{{$type}}\n";
- foreach ($block['items'] as $item => $itemLines) {
- $output .= '\item ' . $this->renderAbsy($itemLines). "\n";
- }
- return "$output\\end{{$type}}\n";
- }
- /**
- * @inheritdoc
- */
- protected function renderHeadline($block)
- {
- $content = $this->renderAbsy($block['content']);
- switch($block['level']) {
- case 1: return "\\section{{$content}}\n";
- case 2: return "\\subsection{{$content}}\n";
- case 3: return "\\subsubsection{{$content}}\n";
- default: return "\\paragraph{{$content}}\n";
- }
- }
- /**
- * @inheritdoc
- */
- protected function renderHr($block)
- {
- return "\n\\noindent\\rule{\\textwidth}{0.4pt}\n";
- }
- /**
- * @inheritdoc
- */
- protected function renderLink($block)
- {
- if (isset($block['refkey'])) {
- if (($ref = $this->lookupReference($block['refkey'])) !== false) {
- $block = array_merge($block, $ref);
- } else {
- return $block['orig'];
- }
- }
- $url = $block['url'];
- $text = $this->renderAbsy($block['text']);
- if (strpos($url, '://') === false) {
- // consider all non absolute links as relative in the document
- // $title is ignored in this case.
- if (isset($url[0]) && $url[0] === '#') {
- $url = $this->labelPrefix . $url;
- }
- return '\hyperref['.str_replace('#', '::', $url).']{' . $text . '}';
- } else {
- if ($this->linkStyle === self::LINK_STYLE_HREF) {
- return '\href{' . $this->escapeUrl($url) . '}{' . $text . '}';
- }
- return $text . '\\footnote{' . (empty($block['title']) ? '' : $this->escapeLatex($block['title']) . ': ') . '\url{' . $this->escapeUrl($url) . '}}';
- }
- }
- /**
- * @inheritdoc
- */
- protected function renderImage($block)
- {
- if (isset($block['refkey'])) {
- if (($ref = $this->lookupReference($block['refkey'])) !== false) {
- $block = array_merge($block, $ref);
- } else {
- return $block['orig'];
- }
- }
- // TODO create figure with caption with title
- $replaces = [
- '%' => '\\%',
- '{' => '\\%7B',
- '}' => '\\%7D',
- '\\' => '\\\\',
- '#' => '\\#',
- '$' => '\\%24',
- ];
- $url = str_replace(array_keys($replaces), array_values($replaces), $block['url']);
- return "\\noindent\\includegraphics[width=\\textwidth]{{$url}}";
- }
- /**
- * Parses <a name="..."></a> tags as reference labels
- */
- private function parseInlineHtml($text)
- {
- if (strpos($text, '>') !== false) {
- // convert a name markers to \labels
- if (preg_match('~^<((a|span)) (name|id)="(.*?)">.*?</\1>~i', $text, $matches)) {
- return [
- ['label', 'name' => str_replace('#', '::', $this->labelPrefix . $matches[4])],
- strlen($matches[0])
- ];
- }
- }
- return [['text', '<'], 1];
- }
- /**
- * renders a reference label
- */
- protected function renderLabel($block)
- {
- return "\\label{{$block['name']}}";
- }
- /**
- * @inheritdoc
- */
- protected function renderEmail($block)
- {
- $email = $this->escapeUrl($block[1]);
- return "\\href{mailto:{$email}}{{$email}}";
- }
- /**
- * @inheritdoc
- */
- protected function renderUrl($block)
- {
- return '\url{' . $this->escapeUrl($block[1]) . '}';
- }
- /**
- * @inheritdoc
- */
- protected function renderInlineCode($block)
- {
- // replace No-Break Space characters in code block, which do not render in LaTeX
- $content = preg_replace("/[\x{00a0}\x{202f}]/u", ' ', $block[1]);
- if (strpos($content, '|') !== false) {
- return '\\lstinline`' . str_replace("\n", ' ', $content) . '`'; // TODO make this more robust against code containing backticks
- } else {
- return '\\lstinline|' . str_replace("\n", ' ', $content) . '|';
- }
- }
- /**
- * @inheritdoc
- */
- protected function renderStrong($block)
- {
- return '\textbf{' . $this->renderAbsy($block[1]) . '}';
- }
- /**
- * @inheritdoc
- */
- protected function renderEmph($block)
- {
- return '\textit{' . $this->renderAbsy($block[1]) . '}';
- }
- /**
- * Parses escaped special characters.
- * This allow a backslash to be interpreted as LaTeX
- * @marker \
- */
- protected function parseEscape($text)
- {
- if (isset($text[1]) && in_array($text[1], $this->escapeCharacters)) {
- if ($text[1] === '\\') {
- return [['backslash'], 2];
- }
- return [['text', $text[1]], 2];
- }
- return [['text', $text[0]], 1];
- }
- protected function renderBackslash()
- {
- return '\\';
- }
- private $_escaper;
- /**
- * Escape special characters in URLs
- */
- protected function escapeUrl($string)
- {
- return str_replace('%', '\\%', $this->escapeLatex($string));
- }
- /**
- * Escape special LaTeX characters
- */
- protected function escapeLatex($string)
- {
- if ($this->_escaper === null) {
- $this->_escaper = new TextToLatex();
- }
- return $this->_escaper->convert($string);
- }
- /**
- * @inheritdocs
- *
- * Parses a newline indicated by two spaces on the end of a markdown line.
- */
- protected function renderText($text)
- {
- $output = str_replace(" \n", "\\\\\n", $this->escapeLatex($text[1]));
- // support No-Break Space in LaTeX
- $output = preg_replace("/\x{00a0}/u", '~', $output);
- // support Narrow No-Break Space spaces in LaTeX
- // http://unicode-table.com/en/202F/
- // http://tex.stackexchange.com/questions/76132/how-to-typeset-a-small-non-breaking-space
- $output = preg_replace("/\x{202f}/u", '\nobreak\hspace{.16667em plus .08333em}', $output);
- return $output;
- }
- }
|