| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388 | <?php/** * @copyright Copyright (c) 2014 Carsten Brandt * @license https://github.com/cebe/markdown/blob/master/LICENSE * @link https://github.com/cebe/markdown#readme */namespace cebe\markdown;use ReflectionMethod;/** * A generic parser for markdown-like languages. * * @author Carsten Brandt <mail@cebe.cc> */abstract class Parser{	/**	 * @var integer the maximum nesting level for language elements.	 */	public $maximumNestingLevel = 32;	/**	 * @var string the current context the parser is in.	 * TODO remove in favor of absy	 */	protected $context = [];	/**	 * @var array these are "escapeable" characters. When using one of these prefixed with a	 * backslash, the character will be outputted without the backslash and is not interpreted	 * as markdown.	 */	protected $escapeCharacters = [		'\\', // backslash	];	private $_depth = 0;	/**	 * Parses the given text considering the full language.	 *	 * This includes parsing block elements as well as inline elements.	 *	 * @param string $text the text to parse	 * @return string parsed markup	 */	public function parse($text)	{		$this->prepare();				if (empty($text)) {			return '';		}		$text = str_replace(["\r\n", "\n\r", "\r"], "\n", $text);		$this->prepareMarkers($text);		$absy = $this->parseBlocks(explode("\n", $text));		$markup = $this->renderAbsy($absy);		$this->cleanup();		return $markup;	}	/**	 * Parses a paragraph without block elements (block elements are ignored).	 *	 * @param string $text the text to parse	 * @return string parsed markup	 */	public function parseParagraph($text)	{		$this->prepare();		if (empty($text)) {			return '';		}		$text = str_replace(["\r\n", "\n\r", "\r"], "\n", $text);		$this->prepareMarkers($text);		$absy = $this->parseInline($text);		$markup = $this->renderAbsy($absy);		$this->cleanup();		return $markup;	}	/**	 * This method will be called before `parse()` and `parseParagraph()`.	 * You can override it to do some initialization work.	 */	protected function prepare()	{	}	/**	 * This method will be called after `parse()` and `parseParagraph()`.	 * You can override it to do cleanup.	 */	protected function cleanup()	{	}	// block parsing	private $_blockTypes;	/**	 * @return array a list of block element types available.	 */	protected function blockTypes()	{		if ($this->_blockTypes === null) {			// detect block types via "identify" functions			$reflection = new \ReflectionClass($this);			$this->_blockTypes = array_filter(array_map(function($method) {				$name = $method->getName();				return strncmp($name, 'identify', 8) === 0 ? strtolower(substr($name, 8)) : false;			}, $reflection->getMethods(ReflectionMethod::IS_PROTECTED)));			sort($this->_blockTypes);		}		return $this->_blockTypes;	}	/**	 * Given a set of lines and an index of a current line it uses the registed block types to	 * detect the type of this line.	 * @param array $lines	 * @param integer $current	 * @return string name of the block type in lower case	 */	protected function detectLineType($lines, $current)	{		$line = $lines[$current];		$blockTypes = $this->blockTypes();		foreach($blockTypes as $blockType) {			if ($this->{'identify' . $blockType}($line, $lines, $current)) {				return $blockType;			}		}		return 'paragraph';	}	/**	 * Parse block elements by calling `identifyLine()` to identify them	 * and call consume function afterwards.	 * The blocks are then rendered by the corresponding rendering methods.	 */	protected function parseBlocks($lines)	{		if ($this->_depth >= $this->maximumNestingLevel) {			// maximum depth is reached, do not parse input			return [['text', implode("\n", $lines)]];		}		$this->_depth++;		$blocks = [];		$blockTypes = $this->blockTypes();		// convert lines to blocks		for ($i = 0, $count = count($lines); $i < $count; $i++) {			$line = $lines[$i];			if (!empty($line) && rtrim($line) !== '') { // skip empty lines				// identify a blocks beginning				$identified = false;				foreach($blockTypes as $blockType) {					if ($this->{'identify' . $blockType}($line, $lines, $i)) {						// call consume method for the detected block type to consume further lines						list($block, $i) = $this->{'consume' . $blockType}($lines, $i);						if ($block !== false) {							$blocks[] = $block;						}						$identified = true;						break 1;					}				}				// consider the line a normal paragraph				if (!$identified) {					list($block, $i) = $this->consumeParagraph($lines, $i);					$blocks[] = $block;				}			}		}		$this->_depth--;		return $blocks;	}	protected function renderAbsy($blocks)	{		$output = '';		foreach ($blocks as $block) {			array_unshift($this->context, $block[0]);			$output .= $this->{'render' . $block[0]}($block);			array_shift($this->context);		}		return $output;	}	/**	 * Consume lines for a paragraph	 *	 * @param $lines	 * @param $current	 * @return array	 */	protected function consumeParagraph($lines, $current)	{		// consume until newline		$content = [];		for ($i = $current, $count = count($lines); $i < $count; $i++) {			if (ltrim($lines[$i]) !== '') {				$content[] = $lines[$i];			} else {				break;			}		}		$block = [			'paragraph',			'content' => $this->parseInline(implode("\n", $content)),		];		return [$block, --$i];	}	/**	 * Render a paragraph block	 *	 * @param $block	 * @return string	 */	protected function renderParagraph($block)	{		return '<p>' . $this->renderAbsy($block['content']) . "</p>\n";	}	// inline parsing	/**	 * @var array the set of inline markers to use in different contexts.	 */	private $_inlineMarkers = [];	/**	 * Returns a map of inline markers to the corresponding parser methods.	 *	 * This array defines handler methods for inline markdown markers.	 * When a marker is found in the text, the handler method is called with the text	 * starting at the position of the marker.	 *	 * Note that markers starting with whitespace may slow down the parser,	 * you may want to use [[renderText]] to deal with them.	 *	 * You may override this method to define a set of markers and parsing methods.	 * The default implementation looks for protected methods starting with `parse` that	 * also have an `@marker` annotation in PHPDoc.	 *	 * @return array a map of markers to parser methods	 */	protected function inlineMarkers()	{		$markers = [];		// detect "parse" functions		$reflection = new \ReflectionClass($this);		foreach($reflection->getMethods(ReflectionMethod::IS_PROTECTED) as $method) {			$methodName = $method->getName();			if (strncmp($methodName, 'parse', 5) === 0) {				preg_match_all('/@marker ([^\s]+)/', $method->getDocComment(), $matches);				foreach($matches[1] as $match) {					$markers[$match] = $methodName;				}			}		}		return $markers;	}	/**	 * Prepare markers that are used in the text to parse	 *	 * Add all markers that are present in markdown.	 * Check is done to avoid iterations in parseInline(), good for huge markdown files	 * @param string $text	 */	private function prepareMarkers($text)	{		$this->_inlineMarkers = [];		foreach ($this->inlineMarkers() as $marker => $method) {			if (strpos($text, $marker) !== false) {				$m = $marker[0];				// put the longest marker first				if (isset($this->_inlineMarkers[$m])) {					reset($this->_inlineMarkers[$m]);					if (strlen($marker) > strlen(key($this->_inlineMarkers[$m]))) {						$this->_inlineMarkers[$m] = array_merge([$marker => $method], $this->_inlineMarkers[$m]);						continue;					}				}				$this->_inlineMarkers[$m][$marker] = $method;			}		}	}	/**	 * Parses inline elements of the language.	 *	 * @param string $text the inline text to parse.	 * @return array	 */	protected function parseInline($text)	{		if ($this->_depth >= $this->maximumNestingLevel) {			// maximum depth is reached, do not parse input			return [['text', $text]];		}		$this->_depth++;		$markers = implode('', array_keys($this->_inlineMarkers));		$paragraph = [];		while (!empty($markers) && ($found = strpbrk($text, $markers)) !== false) {			$pos = strpos($text, $found);			// add the text up to next marker to the paragraph			if ($pos !== 0) {				$paragraph[] = ['text', substr($text, 0, $pos)];			}			$text = $found;			$parsed = false;			foreach ($this->_inlineMarkers[$text[0]] as $marker => $method) {				if (strncmp($text, $marker, strlen($marker)) === 0) {					// parse the marker					array_unshift($this->context, $method);					list($output, $offset) = $this->$method($text);					array_shift($this->context);					$paragraph[] = $output;					$text = substr($text, $offset);					$parsed = true;					break;				}			}			if (!$parsed) {				$paragraph[] = ['text', substr($text, 0, 1)];				$text = substr($text, 1);			}		}		$paragraph[] = ['text', $text];		$this->_depth--;		return $paragraph;	}	/**	 * Parses escaped special characters.	 * @marker \	 */	protected function parseEscape($text)	{		if (isset($text[1]) && in_array($text[1], $this->escapeCharacters)) {			return [['text', $text[1]], 2];		}		return [['text', $text[0]], 1];	}	/**	 * This function renders plain text sections in the markdown text.	 * It can be used to work on normal text sections for example to highlight keywords or	 * do special escaping.	 */	protected function renderText($block)	{		return $block[1];	}}
 |