123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386 |
- <?php
- /**
- * WPSEO plugin file.
- *
- * @package WPSEO\XML_Sitemaps
- */
- /**
- * Renders XML output for sitemaps.
- */
- class WPSEO_Sitemaps_Renderer {
- /**
- * XSL stylesheet for styling a sitemap for web browsers.
- *
- * @var string
- */
- protected $stylesheet = '';
- /**
- * Holds the get_bloginfo( 'charset' ) value to reuse for performance.
- *
- * @var string
- */
- protected $charset = 'UTF-8';
- /**
- * Holds charset of output, might be converted.
- *
- * @var string
- */
- protected $output_charset = 'UTF-8';
- /**
- * If data encoding needs to be converted for output.
- *
- * @var bool
- */
- protected $needs_conversion = false;
- /**
- * Timezone.
- *
- * @var WPSEO_Sitemap_Timezone
- */
- protected $timezone;
- /**
- * Set up object properties.
- */
- public function __construct() {
- $stylesheet_url = preg_replace( '/(^http[s]?:)/', '', $this->get_xsl_url() );
- $this->stylesheet = '<?xml-stylesheet type="text/xsl" href="' . esc_url( $stylesheet_url ) . '"?>';
- $this->charset = get_bloginfo( 'charset' );
- $this->output_charset = $this->charset;
- $this->timezone = new WPSEO_Sitemap_Timezone();
- if (
- 'UTF-8' !== $this->charset
- && function_exists( 'mb_list_encodings' )
- && in_array( $this->charset, mb_list_encodings(), true )
- ) {
- $this->output_charset = 'UTF-8';
- }
- $this->needs_conversion = $this->output_charset !== $this->charset;
- }
- /**
- * Builds the sitemap index.
- *
- * @param array $links Set of sitemaps index links.
- *
- * @return string
- */
- public function get_index( $links ) {
- $xml = '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";
- foreach ( $links as $link ) {
- $xml .= $this->sitemap_index_url( $link );
- }
- /**
- * Filter to append sitemaps to the index.
- *
- * @param string $index String to append to sitemaps index, defaults to empty.
- */
- $xml .= apply_filters( 'wpseo_sitemap_index', '' );
- $xml .= '</sitemapindex>';
- return $xml;
- }
- /**
- * Builds the sitemap.
- *
- * @param array $links Set of sitemap links.
- * @param string $type Sitemap type.
- * @param int $current_page Current sitemap page number.
- *
- * @return string
- */
- public function get_sitemap( $links, $type, $current_page ) {
- $urlset = '<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" '
- . 'xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd '
- . 'http://www.google.com/schemas/sitemap-image/1.1 http://www.google.com/schemas/sitemap-image/1.1/sitemap-image.xsd" '
- . 'xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";
- /**
- * Filters the `urlset` for a sitemap by type.
- *
- * @api string $urlset The output for the sitemap's `urlset`.
- */
- $xml = apply_filters( "wpseo_sitemap_{$type}_urlset", $urlset );
- foreach ( $links as $url ) {
- $xml .= $this->sitemap_url( $url );
- }
- /**
- * Filter to add extra URLs to the XML sitemap by type.
- *
- * Only runs for the first page, not on all.
- *
- * @param string $content String content to add, defaults to empty.
- */
- if ( $current_page === 1 ) {
- $xml .= apply_filters( "wpseo_sitemap_{$type}_content", '' );
- }
- $xml .= '</urlset>';
- return $xml;
- }
- /**
- * Produce final XML output with debug information.
- *
- * @param string $sitemap Sitemap XML.
- * @param boolean $transient Transient cache flag.
- *
- * @return string
- */
- public function get_output( $sitemap, $transient ) {
- $output = '<?xml version="1.0" encoding="' . esc_attr( $this->output_charset ) . '"?>';
- if ( $this->stylesheet ) {
- /**
- * Filter the stylesheet URL for the XML sitemap.
- *
- * @param string $stylesheet Stylesheet URL.
- */
- $output .= apply_filters( 'wpseo_stylesheet_url', $this->stylesheet ) . "\n";
- }
- $output .= $sitemap;
- $output .= "\n<!-- XML Sitemap generated by Yoast SEO -->";
- $output .= $this->get_debug( $transient );
- return $output;
- }
- /**
- * Get charset for the output.
- *
- * @return string
- */
- public function get_output_charset() {
- return $this->output_charset;
- }
- /**
- * Set a custom stylesheet for this sitemap. Set to empty to just remove the default stylesheet.
- *
- * @param string $stylesheet Full XML-stylesheet declaration.
- */
- public function set_stylesheet( $stylesheet ) {
- $this->stylesheet = $stylesheet;
- }
- /**
- * Build the `<sitemap>` tag for a given URL.
- *
- * @param array $url Array of parts that make up this entry.
- *
- * @return string
- */
- protected function sitemap_index_url( $url ) {
- $date = null;
- if ( ! empty( $url['lastmod'] ) ) {
- $date = $this->timezone->format_date( $url['lastmod'] );
- }
- $url['loc'] = htmlspecialchars( $url['loc'], ENT_COMPAT, $this->output_charset, false );
- $output = "\t<sitemap>\n";
- $output .= "\t\t<loc>" . $url['loc'] . "</loc>\n";
- $output .= empty( $date ) ? '' : "\t\t<lastmod>" . htmlspecialchars( $date, ENT_COMPAT, $this->output_charset, false ) . "</lastmod>\n";
- $output .= "\t</sitemap>\n";
- return $output;
- }
- /**
- * Build the `<url>` tag for a given URL.
- *
- * Public access for backwards compatibility reasons.
- *
- * @param array $url Array of parts that make up this entry.
- *
- * @return string
- */
- public function sitemap_url( $url ) {
- $date = null;
- if ( ! empty( $url['mod'] ) ) {
- // Create a DateTime object date in the correct timezone.
- $date = $this->timezone->format_date( $url['mod'] );
- }
- $url['loc'] = htmlspecialchars( $url['loc'], ENT_COMPAT, $this->output_charset, false );
- $output = "\t<url>\n";
- $output .= "\t\t<loc>" . $this->encode_url_rfc3986( $url['loc'] ) . "</loc>\n";
- $output .= empty( $date ) ? '' : "\t\t<lastmod>" . htmlspecialchars( $date, ENT_COMPAT, $this->output_charset, false ) . "</lastmod>\n";
- if ( empty( $url['images'] ) ) {
- $url['images'] = [];
- }
- foreach ( $url['images'] as $img ) {
- if ( empty( $img['src'] ) ) {
- continue;
- }
- $output .= "\t\t<image:image>\n";
- $output .= "\t\t\t<image:loc>" . esc_html( $this->encode_url_rfc3986( $img['src'] ) ) . "</image:loc>\n";
- if ( ! empty( $img['title'] ) ) {
- $title = $img['title'];
- if ( $this->needs_conversion ) {
- $title = mb_convert_encoding( $title, $this->output_charset, $this->charset );
- }
- $title = _wp_specialchars( html_entity_decode( $title, ENT_QUOTES, $this->output_charset ) );
- $output .= "\t\t\t<image:title><![CDATA[{$title}]]></image:title>\n";
- }
- if ( ! empty( $img['alt'] ) ) {
- $alt = $img['alt'];
- if ( $this->needs_conversion ) {
- $alt = mb_convert_encoding( $alt, $this->output_charset, $this->charset );
- }
- $alt = _wp_specialchars( html_entity_decode( $alt, ENT_QUOTES, $this->output_charset ) );
- $output .= "\t\t\t<image:caption><![CDATA[{$alt}]]></image:caption>\n";
- }
- $output .= "\t\t</image:image>\n";
- }
- unset( $img, $title, $alt );
- $output .= "\t</url>\n";
- /**
- * Filters the output for the sitemap URL tag.
- *
- * @api string $output The output for the sitemap url tag.
- *
- * @param array $url The sitemap URL array on which the output is based.
- */
- return apply_filters( 'wpseo_sitemap_url', $output, $url );
- }
- /**
- * Apply some best effort conversion to comply with RFC3986.
- *
- * @param string $url URL to encode.
- *
- * @return string
- */
- protected function encode_url_rfc3986( $url ) {
- if ( filter_var( $url, FILTER_VALIDATE_URL ) ) {
- return $url;
- }
- $path = wp_parse_url( $url, PHP_URL_PATH );
- if ( ! empty( $path ) && '/' !== $path ) {
- $encoded_path = explode( '/', $path );
- // First decode the path, to prevent double encoding.
- $encoded_path = array_map( 'rawurldecode', $encoded_path );
- $encoded_path = array_map( 'rawurlencode', $encoded_path );
- $encoded_path = implode( '/', $encoded_path );
- $encoded_path = str_replace( '%7E', '~', $encoded_path ); // PHP < 5.3.
- $url = str_replace( $path, $encoded_path, $url );
- }
- $query = wp_parse_url( $url, PHP_URL_QUERY );
- if ( ! empty( $query ) ) {
- parse_str( $query, $parsed_query );
- if ( defined( 'PHP_QUERY_RFC3986' ) ) { // PHP 5.4+.
- $parsed_query = http_build_query( $parsed_query, null, '&', PHP_QUERY_RFC3986 );
- }
- else {
- $parsed_query = http_build_query( $parsed_query, null, '&' );
- $parsed_query = str_replace( '+', '%20', $parsed_query );
- $parsed_query = str_replace( '%7E', '~', $parsed_query );
- }
- $url = str_replace( $query, $parsed_query, $url );
- }
- return $url;
- }
- /**
- * Retrieves the XSL URL that should be used in the current environment
- *
- * When home_url and site_url are not the same, the home_url should be used.
- * This is because the XSL needs to be served from the same domain, protocol and port
- * as the XML file that is loading it.
- *
- * @return string The XSL URL that needs to be used.
- */
- protected function get_xsl_url() {
- if ( home_url() !== site_url() ) {
- return home_url( 'main-sitemap.xsl' );
- }
- /*
- * Fallback to circumvent a cross-domain security problem when the XLS file is
- * loaded from a different (sub)domain.
- */
- if ( strpos( plugins_url(), home_url() ) !== 0 ) {
- return home_url( 'main-sitemap.xsl' );
- }
- return plugin_dir_url( WPSEO_FILE ) . 'css/main-sitemap.xsl';
- }
- /**
- * Adds debugging information to the output.
- *
- * @param bool $transient Transient cache was used or not.
- *
- * @return string Information about the functionality used to build the sitemap.
- */
- protected function get_debug( $transient ) {
- $debug = defined( 'YOAST_SEO_DEBUG_SITEMAPS' ) && YOAST_SEO_DEBUG_SITEMAPS === true;
- if ( ! $debug ) {
- return '';
- }
- $memory_used = number_format( ( memory_get_peak_usage() / 1048576 ), 2 );
- $queries_run = ( $transient ) ? 'Served from transient cache' : 'Queries executed ' . absint( $GLOBALS['wpdb']->num_queries );
- $output = "\n<!-- {$memory_used}MB | {$queries_run} -->";
- if ( defined( 'SAVEQUERIES' ) && SAVEQUERIES ) {
- $queries = print_r( $GLOBALS['wpdb']->queries, true );
- $output .= "\n<!-- {$queries} -->";
- }
- return $output;
- }
- }
|