| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555 | <?php/** * Block Serialization Parser * * @package WordPress *//** * Class WP_Block_Parser_Block * * Holds the block structure in memory * * @since 3.8.0 */class WP_Block_Parser_Block {	/**	 * Name of block	 *	 * @example "core/paragraph"	 *	 * @since 3.8.0	 * @var string	 */	public $blockName;	/**	 * Optional set of attributes from block comment delimiters	 *	 * @example null	 * @example array( 'columns' => 3 )	 *	 * @since 3.8.0	 * @var array|null	 */	public $attrs;	/**	 * List of inner blocks (of this same class)	 *	 * @since 3.8.0	 * @var WP_Block_Parser_Block[]	 */	public $innerBlocks;	/**	 * Resultant HTML from inside block comment delimiters	 * after removing inner blocks	 *	 * @example "...Just <!-- wp:test /--> testing..." -> "Just testing..."	 *	 * @since 3.8.0	 * @var string	 */	public $innerHTML;	/**	 * List of string fragments and null markers where inner blocks were found	 *	 * @example array(	 *   'innerHTML'    => 'BeforeInnerAfter',	 *   'innerBlocks'  => array( block, block ),	 *   'innerContent' => array( 'Before', null, 'Inner', null, 'After' ),	 * )	 *	 * @since 4.2.0	 * @var array	 */	public $innerContent;	/**	 * Constructor.	 *	 * Will populate object properties from the provided arguments.	 *	 * @since 3.8.0	 *	 * @param string $name         Name of block.	 * @param array  $attrs        Optional set of attributes from block comment delimiters.	 * @param array  $innerBlocks  List of inner blocks (of this same class).	 * @param string $innerHTML    Resultant HTML from inside block comment delimiters after removing inner blocks.	 * @param array  $innerContent List of string fragments and null markers where inner blocks were found.	 */	function __construct( $name, $attrs, $innerBlocks, $innerHTML, $innerContent ) {		$this->blockName    = $name;		$this->attrs        = $attrs;		$this->innerBlocks  = $innerBlocks;		$this->innerHTML    = $innerHTML;		$this->innerContent = $innerContent;	}}/** * Class WP_Block_Parser_Frame * * Holds partial blocks in memory while parsing * * @internal * @since 3.8.0 */class WP_Block_Parser_Frame {	/**	 * Full or partial block	 *	 * @since 3.8.0	 * @var WP_Block_Parser_Block	 */	public $block;	/**	 * Byte offset into document for start of parse token	 *	 * @since 3.8.0	 * @var int	 */	public $token_start;	/**	 * Byte length of entire parse token string	 *	 * @since 3.8.0	 * @var int	 */	public $token_length;	/**	 * Byte offset into document for after parse token ends	 * (used during reconstruction of stack into parse production)	 *	 * @since 3.8.0	 * @var int	 */	public $prev_offset;	/**	 * Byte offset into document where leading HTML before token starts	 *	 * @since 3.8.0	 * @var int	 */	public $leading_html_start;	/**	 * Constructor	 *	 * Will populate object properties from the provided arguments.	 *	 * @since 3.8.0	 *	 * @param WP_Block_Parser_Block $block              Full or partial block.	 * @param int                   $token_start        Byte offset into document for start of parse token.	 * @param int                   $token_length       Byte length of entire parse token string.	 * @param int                   $prev_offset        Byte offset into document for after parse token ends.	 * @param int                   $leading_html_start Byte offset into document where leading HTML before token starts.	 */	function __construct( $block, $token_start, $token_length, $prev_offset = null, $leading_html_start = null ) {		$this->block              = $block;		$this->token_start        = $token_start;		$this->token_length       = $token_length;		$this->prev_offset        = isset( $prev_offset ) ? $prev_offset : $token_start + $token_length;		$this->leading_html_start = $leading_html_start;	}}/** * Class WP_Block_Parser * * Parses a document and constructs a list of parsed block objects * * @since 3.8.0 * @since 4.0.0 returns arrays not objects, all attributes are arrays */class WP_Block_Parser {	/**	 * Input document being parsed	 *	 * @example "Pre-text\n<!-- wp:paragraph -->This is inside a block!<!-- /wp:paragraph -->"	 *	 * @since 3.8.0	 * @var string	 */	public $document;	/**	 * Tracks parsing progress through document	 *	 * @since 3.8.0	 * @var int	 */	public $offset;	/**	 * List of parsed blocks	 *	 * @since 3.8.0	 * @var WP_Block_Parser_Block[]	 */	public $output;	/**	 * Stack of partially-parsed structures in memory during parse	 *	 * @since 3.8.0	 * @var WP_Block_Parser_Frame[]	 */	public $stack;	/**	 * Empty associative array, here due to PHP quirks	 *	 * @since 4.4.0	 * @var array empty associative array	 */	public $empty_attrs;	/**	 * Parses a document and returns a list of block structures	 *	 * When encountering an invalid parse will return a best-effort	 * parse. In contrast to the specification parser this does not	 * return an error on invalid inputs.	 *	 * @since 3.8.0	 *	 * @param string $document Input document being parsed.	 * @return WP_Block_Parser_Block[]	 */	function parse( $document ) {		$this->document    = $document;		$this->offset      = 0;		$this->output      = array();		$this->stack       = array();		$this->empty_attrs = json_decode( '{}', true );		do {			// twiddle our thumbs.		} while ( $this->proceed() );		return $this->output;	}	/**	 * Processes the next token from the input document	 * and returns whether to proceed eating more tokens	 *	 * This is the "next step" function that essentially	 * takes a token as its input and decides what to do	 * with that token before descending deeper into a	 * nested block tree or continuing along the document	 * or breaking out of a level of nesting.	 *	 * @internal	 * @since 3.8.0	 * @return bool	 */	function proceed() {		$next_token = $this->next_token();		list( $token_type, $block_name, $attrs, $start_offset, $token_length ) = $next_token;		$stack_depth = count( $this->stack );		// we may have some HTML soup before the next block.		$leading_html_start = $start_offset > $this->offset ? $this->offset : null;		switch ( $token_type ) {			case 'no-more-tokens':				// if not in a block then flush output.				if ( 0 === $stack_depth ) {					$this->add_freeform();					return false;				}				/*				 * Otherwise we have a problem				 * This is an error				 *				 * we have options				 * - treat it all as freeform text				 * - assume an implicit closer (easiest when not nesting)				 */				// for the easy case we'll assume an implicit closer.				if ( 1 === $stack_depth ) {					$this->add_block_from_stack();					return false;				}				/*				 * for the nested case where it's more difficult we'll				 * have to assume that multiple closers are missing				 * and so we'll collapse the whole stack piecewise				 */				while ( 0 < count( $this->stack ) ) {					$this->add_block_from_stack();				}				return false;			case 'void-block':				/*				 * easy case is if we stumbled upon a void block				 * in the top-level of the document				 */				if ( 0 === $stack_depth ) {					if ( isset( $leading_html_start ) ) {						$this->output[] = (array) self::freeform(							substr(								$this->document,								$leading_html_start,								$start_offset - $leading_html_start							)						);					}					$this->output[] = (array) new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() );					$this->offset   = $start_offset + $token_length;					return true;				}				// otherwise we found an inner block.				$this->add_inner_block(					new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ),					$start_offset,					$token_length				);				$this->offset = $start_offset + $token_length;				return true;			case 'block-opener':				// track all newly-opened blocks on the stack.				array_push(					$this->stack,					new WP_Block_Parser_Frame(						new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ),						$start_offset,						$token_length,						$start_offset + $token_length,						$leading_html_start					)				);				$this->offset = $start_offset + $token_length;				return true;			case 'block-closer':				/*				 * if we're missing an opener we're in trouble				 * This is an error				 */				if ( 0 === $stack_depth ) {					/*					 * we have options					 * - assume an implicit opener					 * - assume _this_ is the opener					 * - give up and close out the document					 */					$this->add_freeform();					return false;				}				// if we're not nesting then this is easy - close the block.				if ( 1 === $stack_depth ) {					$this->add_block_from_stack( $start_offset );					$this->offset = $start_offset + $token_length;					return true;				}				/*				 * otherwise we're nested and we have to close out the current				 * block and add it as a new innerBlock to the parent				 */				$stack_top                        = array_pop( $this->stack );				$html                             = substr( $this->document, $stack_top->prev_offset, $start_offset - $stack_top->prev_offset );				$stack_top->block->innerHTML     .= $html;				$stack_top->block->innerContent[] = $html;				$stack_top->prev_offset           = $start_offset + $token_length;				$this->add_inner_block(					$stack_top->block,					$stack_top->token_start,					$stack_top->token_length,					$start_offset + $token_length				);				$this->offset = $start_offset + $token_length;				return true;			default:				// This is an error.				$this->add_freeform();				return false;		}	}	/**	 * Scans the document from where we last left off	 * and finds the next valid token to parse if it exists	 *	 * Returns the type of the find: kind of find, block information, attributes	 *	 * @internal	 * @since 3.8.0	 * @since 4.6.1 fixed a bug in attribute parsing which caused catastrophic backtracking on invalid block comments	 * @return array	 */	function next_token() {		$matches = null;		/*		 * aye the magic		 * we're using a single RegExp to tokenize the block comment delimiters		 * we're also using a trick here because the only difference between a		 * block opener and a block closer is the leading `/` before `wp:` (and		 * a closer has no attributes). we can trap them both and process the		 * match back in PHP to see which one it was.		 */		$has_match = preg_match(			'/<!--\s+(?P<closer>\/)?wp:(?P<namespace>[a-z][a-z0-9_-]*\/)?(?P<name>[a-z][a-z0-9_-]*)\s+(?P<attrs>{(?:(?:[^}]+|}+(?=})|(?!}\s+\/?-->).)*+)?}\s+)?(?P<void>\/)?-->/s',			$this->document,			$matches,			PREG_OFFSET_CAPTURE,			$this->offset		);		// if we get here we probably have catastrophic backtracking or out-of-memory in the PCRE.		if ( false === $has_match ) {			return array( 'no-more-tokens', null, null, null, null );		}		// we have no more tokens.		if ( 0 === $has_match ) {			return array( 'no-more-tokens', null, null, null, null );		}		list( $match, $started_at ) = $matches[0];		$length    = strlen( $match );		$is_closer = isset( $matches['closer'] ) && -1 !== $matches['closer'][1];		$is_void   = isset( $matches['void'] ) && -1 !== $matches['void'][1];		$namespace = $matches['namespace'];		$namespace = ( isset( $namespace ) && -1 !== $namespace[1] ) ? $namespace[0] : 'core/';		$name      = $namespace . $matches['name'][0];		$has_attrs = isset( $matches['attrs'] ) && -1 !== $matches['attrs'][1];		/*		 * Fun fact! It's not trivial in PHP to create "an empty associative array" since all arrays		 * are associative arrays. If we use `array()` we get a JSON `[]`		 */		$attrs = $has_attrs			? json_decode( $matches['attrs'][0], /* as-associative */ true )			: $this->empty_attrs;		/*		 * This state isn't allowed		 * This is an error		 */		if ( $is_closer && ( $is_void || $has_attrs ) ) {			// we can ignore them since they don't hurt anything.		}		if ( $is_void ) {			return array( 'void-block', $name, $attrs, $started_at, $length );		}		if ( $is_closer ) {			return array( 'block-closer', $name, null, $started_at, $length );		}		return array( 'block-opener', $name, $attrs, $started_at, $length );	}	/**	 * Returns a new block object for freeform HTML	 *	 * @internal	 * @since 3.9.0	 *	 * @param string $innerHTML HTML content of block.	 * @return WP_Block_Parser_Block freeform block object.	 */	function freeform( $innerHTML ) {		return new WP_Block_Parser_Block( null, $this->empty_attrs, array(), $innerHTML, array( $innerHTML ) );	}	/**	 * Pushes a length of text from the input document	 * to the output list as a freeform block.	 *	 * @internal	 * @since 3.8.0	 * @param null $length how many bytes of document text to output.	 */	function add_freeform( $length = null ) {		$length = $length ? $length : strlen( $this->document ) - $this->offset;		if ( 0 === $length ) {			return;		}		$this->output[] = (array) self::freeform( substr( $this->document, $this->offset, $length ) );	}	/**	 * Given a block structure from memory pushes	 * a new block to the output list.	 *	 * @internal	 * @since 3.8.0	 * @param WP_Block_Parser_Block $block        The block to add to the output.	 * @param int                   $token_start  Byte offset into the document where the first token for the block starts.	 * @param int                   $token_length Byte length of entire block from start of opening token to end of closing token.	 * @param int|null              $last_offset  Last byte offset into document if continuing form earlier output.	 */	function add_inner_block( WP_Block_Parser_Block $block, $token_start, $token_length, $last_offset = null ) {		$parent                       = $this->stack[ count( $this->stack ) - 1 ];		$parent->block->innerBlocks[] = (array) $block;		$html                         = substr( $this->document, $parent->prev_offset, $token_start - $parent->prev_offset );		if ( ! empty( $html ) ) {			$parent->block->innerHTML     .= $html;			$parent->block->innerContent[] = $html;		}		$parent->block->innerContent[] = null;		$parent->prev_offset           = $last_offset ? $last_offset : $token_start + $token_length;	}	/**	 * Pushes the top block from the parsing stack to the output list.	 *	 * @internal	 * @since 3.8.0	 * @param int|null $end_offset byte offset into document for where we should stop sending text output as HTML.	 */	function add_block_from_stack( $end_offset = null ) {		$stack_top   = array_pop( $this->stack );		$prev_offset = $stack_top->prev_offset;		$html = isset( $end_offset )			? substr( $this->document, $prev_offset, $end_offset - $prev_offset )			: substr( $this->document, $prev_offset );		if ( ! empty( $html ) ) {			$stack_top->block->innerHTML     .= $html;			$stack_top->block->innerContent[] = $html;		}		if ( isset( $stack_top->leading_html_start ) ) {			$this->output[] = (array) self::freeform(				substr(					$this->document,					$stack_top->leading_html_start,					$stack_top->token_start - $stack_top->leading_html_start				)			);		}		$this->output[] = (array) $stack_top->block;	}}
 |