class-sitemaps-renderer.php 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. <?php
  2. /**
  3. * WPSEO plugin file.
  4. *
  5. * @package WPSEO\XML_Sitemaps
  6. */
  7. /**
  8. * Renders XML output for sitemaps.
  9. */
  10. class WPSEO_Sitemaps_Renderer {
  11. /**
  12. * XSL stylesheet for styling a sitemap for web browsers.
  13. *
  14. * @var string
  15. */
  16. protected $stylesheet = '';
  17. /**
  18. * Holds the get_bloginfo( 'charset' ) value to reuse for performance.
  19. *
  20. * @var string
  21. */
  22. protected $charset = 'UTF-8';
  23. /**
  24. * Holds charset of output, might be converted.
  25. *
  26. * @var string
  27. */
  28. protected $output_charset = 'UTF-8';
  29. /**
  30. * If data encoding needs to be converted for output.
  31. *
  32. * @var bool
  33. */
  34. protected $needs_conversion = false;
  35. /**
  36. * Timezone.
  37. *
  38. * @var WPSEO_Sitemap_Timezone
  39. */
  40. protected $timezone;
  41. /**
  42. * Set up object properties.
  43. */
  44. public function __construct() {
  45. $stylesheet_url = preg_replace( '/(^http[s]?:)/', '', $this->get_xsl_url() );
  46. $this->stylesheet = '<?xml-stylesheet type="text/xsl" href="' . esc_url( $stylesheet_url ) . '"?>';
  47. $this->charset = get_bloginfo( 'charset' );
  48. $this->output_charset = $this->charset;
  49. $this->timezone = new WPSEO_Sitemap_Timezone();
  50. if (
  51. 'UTF-8' !== $this->charset
  52. && function_exists( 'mb_list_encodings' )
  53. && in_array( $this->charset, mb_list_encodings(), true )
  54. ) {
  55. $this->output_charset = 'UTF-8';
  56. }
  57. $this->needs_conversion = $this->output_charset !== $this->charset;
  58. }
  59. /**
  60. * Builds the sitemap index.
  61. *
  62. * @param array $links Set of sitemaps index links.
  63. *
  64. * @return string
  65. */
  66. public function get_index( $links ) {
  67. $xml = '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";
  68. foreach ( $links as $link ) {
  69. $xml .= $this->sitemap_index_url( $link );
  70. }
  71. /**
  72. * Filter to append sitemaps to the index.
  73. *
  74. * @param string $index String to append to sitemaps index, defaults to empty.
  75. */
  76. $xml .= apply_filters( 'wpseo_sitemap_index', '' );
  77. $xml .= '</sitemapindex>';
  78. return $xml;
  79. }
  80. /**
  81. * Builds the sitemap.
  82. *
  83. * @param array $links Set of sitemap links.
  84. * @param string $type Sitemap type.
  85. * @param int $current_page Current sitemap page number.
  86. *
  87. * @return string
  88. */
  89. public function get_sitemap( $links, $type, $current_page ) {
  90. $urlset = '<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" '
  91. . 'xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd '
  92. . 'http://www.google.com/schemas/sitemap-image/1.1 http://www.google.com/schemas/sitemap-image/1.1/sitemap-image.xsd" '
  93. . 'xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";
  94. /**
  95. * Filters the `urlset` for a sitemap by type.
  96. *
  97. * @api string $urlset The output for the sitemap's `urlset`.
  98. */
  99. $xml = apply_filters( "wpseo_sitemap_{$type}_urlset", $urlset );
  100. foreach ( $links as $url ) {
  101. $xml .= $this->sitemap_url( $url );
  102. }
  103. /**
  104. * Filter to add extra URLs to the XML sitemap by type.
  105. *
  106. * Only runs for the first page, not on all.
  107. *
  108. * @param string $content String content to add, defaults to empty.
  109. */
  110. if ( $current_page === 1 ) {
  111. $xml .= apply_filters( "wpseo_sitemap_{$type}_content", '' );
  112. }
  113. $xml .= '</urlset>';
  114. return $xml;
  115. }
  116. /**
  117. * Produce final XML output with debug information.
  118. *
  119. * @param string $sitemap Sitemap XML.
  120. * @param boolean $transient Transient cache flag.
  121. *
  122. * @return string
  123. */
  124. public function get_output( $sitemap, $transient ) {
  125. $output = '<?xml version="1.0" encoding="' . esc_attr( $this->output_charset ) . '"?>';
  126. if ( $this->stylesheet ) {
  127. /**
  128. * Filter the stylesheet URL for the XML sitemap.
  129. *
  130. * @param string $stylesheet Stylesheet URL.
  131. */
  132. $output .= apply_filters( 'wpseo_stylesheet_url', $this->stylesheet ) . "\n";
  133. }
  134. $output .= $sitemap;
  135. $output .= "\n<!-- XML Sitemap generated by Yoast SEO -->";
  136. $output .= $this->get_debug( $transient );
  137. return $output;
  138. }
  139. /**
  140. * Get charset for the output.
  141. *
  142. * @return string
  143. */
  144. public function get_output_charset() {
  145. return $this->output_charset;
  146. }
  147. /**
  148. * Set a custom stylesheet for this sitemap. Set to empty to just remove the default stylesheet.
  149. *
  150. * @param string $stylesheet Full XML-stylesheet declaration.
  151. */
  152. public function set_stylesheet( $stylesheet ) {
  153. $this->stylesheet = $stylesheet;
  154. }
  155. /**
  156. * Build the `<sitemap>` tag for a given URL.
  157. *
  158. * @param array $url Array of parts that make up this entry.
  159. *
  160. * @return string
  161. */
  162. protected function sitemap_index_url( $url ) {
  163. $date = null;
  164. if ( ! empty( $url['lastmod'] ) ) {
  165. $date = $this->timezone->format_date( $url['lastmod'] );
  166. }
  167. $url['loc'] = htmlspecialchars( $url['loc'], ENT_COMPAT, $this->output_charset, false );
  168. $output = "\t<sitemap>\n";
  169. $output .= "\t\t<loc>" . $url['loc'] . "</loc>\n";
  170. $output .= empty( $date ) ? '' : "\t\t<lastmod>" . htmlspecialchars( $date, ENT_COMPAT, $this->output_charset, false ) . "</lastmod>\n";
  171. $output .= "\t</sitemap>\n";
  172. return $output;
  173. }
  174. /**
  175. * Build the `<url>` tag for a given URL.
  176. *
  177. * Public access for backwards compatibility reasons.
  178. *
  179. * @param array $url Array of parts that make up this entry.
  180. *
  181. * @return string
  182. */
  183. public function sitemap_url( $url ) {
  184. $date = null;
  185. if ( ! empty( $url['mod'] ) ) {
  186. // Create a DateTime object date in the correct timezone.
  187. $date = $this->timezone->format_date( $url['mod'] );
  188. }
  189. $url['loc'] = htmlspecialchars( $url['loc'], ENT_COMPAT, $this->output_charset, false );
  190. $output = "\t<url>\n";
  191. $output .= "\t\t<loc>" . $this->encode_url_rfc3986( $url['loc'] ) . "</loc>\n";
  192. $output .= empty( $date ) ? '' : "\t\t<lastmod>" . htmlspecialchars( $date, ENT_COMPAT, $this->output_charset, false ) . "</lastmod>\n";
  193. if ( empty( $url['images'] ) ) {
  194. $url['images'] = [];
  195. }
  196. foreach ( $url['images'] as $img ) {
  197. if ( empty( $img['src'] ) ) {
  198. continue;
  199. }
  200. $output .= "\t\t<image:image>\n";
  201. $output .= "\t\t\t<image:loc>" . esc_html( $this->encode_url_rfc3986( $img['src'] ) ) . "</image:loc>\n";
  202. if ( ! empty( $img['title'] ) ) {
  203. $title = $img['title'];
  204. if ( $this->needs_conversion ) {
  205. $title = mb_convert_encoding( $title, $this->output_charset, $this->charset );
  206. }
  207. $title = _wp_specialchars( html_entity_decode( $title, ENT_QUOTES, $this->output_charset ) );
  208. $output .= "\t\t\t<image:title><![CDATA[{$title}]]></image:title>\n";
  209. }
  210. if ( ! empty( $img['alt'] ) ) {
  211. $alt = $img['alt'];
  212. if ( $this->needs_conversion ) {
  213. $alt = mb_convert_encoding( $alt, $this->output_charset, $this->charset );
  214. }
  215. $alt = _wp_specialchars( html_entity_decode( $alt, ENT_QUOTES, $this->output_charset ) );
  216. $output .= "\t\t\t<image:caption><![CDATA[{$alt}]]></image:caption>\n";
  217. }
  218. $output .= "\t\t</image:image>\n";
  219. }
  220. unset( $img, $title, $alt );
  221. $output .= "\t</url>\n";
  222. /**
  223. * Filters the output for the sitemap URL tag.
  224. *
  225. * @api string $output The output for the sitemap url tag.
  226. *
  227. * @param array $url The sitemap URL array on which the output is based.
  228. */
  229. return apply_filters( 'wpseo_sitemap_url', $output, $url );
  230. }
  231. /**
  232. * Apply some best effort conversion to comply with RFC3986.
  233. *
  234. * @param string $url URL to encode.
  235. *
  236. * @return string
  237. */
  238. protected function encode_url_rfc3986( $url ) {
  239. if ( filter_var( $url, FILTER_VALIDATE_URL ) ) {
  240. return $url;
  241. }
  242. $path = wp_parse_url( $url, PHP_URL_PATH );
  243. if ( ! empty( $path ) && '/' !== $path ) {
  244. $encoded_path = explode( '/', $path );
  245. // First decode the path, to prevent double encoding.
  246. $encoded_path = array_map( 'rawurldecode', $encoded_path );
  247. $encoded_path = array_map( 'rawurlencode', $encoded_path );
  248. $encoded_path = implode( '/', $encoded_path );
  249. $encoded_path = str_replace( '%7E', '~', $encoded_path ); // PHP < 5.3.
  250. $url = str_replace( $path, $encoded_path, $url );
  251. }
  252. $query = wp_parse_url( $url, PHP_URL_QUERY );
  253. if ( ! empty( $query ) ) {
  254. parse_str( $query, $parsed_query );
  255. if ( defined( 'PHP_QUERY_RFC3986' ) ) { // PHP 5.4+.
  256. $parsed_query = http_build_query( $parsed_query, null, '&amp;', PHP_QUERY_RFC3986 );
  257. }
  258. else {
  259. $parsed_query = http_build_query( $parsed_query, null, '&amp;' );
  260. $parsed_query = str_replace( '+', '%20', $parsed_query );
  261. $parsed_query = str_replace( '%7E', '~', $parsed_query );
  262. }
  263. $url = str_replace( $query, $parsed_query, $url );
  264. }
  265. return $url;
  266. }
  267. /**
  268. * Retrieves the XSL URL that should be used in the current environment
  269. *
  270. * When home_url and site_url are not the same, the home_url should be used.
  271. * This is because the XSL needs to be served from the same domain, protocol and port
  272. * as the XML file that is loading it.
  273. *
  274. * @return string The XSL URL that needs to be used.
  275. */
  276. protected function get_xsl_url() {
  277. if ( home_url() !== site_url() ) {
  278. return home_url( 'main-sitemap.xsl' );
  279. }
  280. /*
  281. * Fallback to circumvent a cross-domain security problem when the XLS file is
  282. * loaded from a different (sub)domain.
  283. */
  284. if ( strpos( plugins_url(), home_url() ) !== 0 ) {
  285. return home_url( 'main-sitemap.xsl' );
  286. }
  287. return plugin_dir_url( WPSEO_FILE ) . 'css/main-sitemap.xsl';
  288. }
  289. /**
  290. * Adds debugging information to the output.
  291. *
  292. * @param bool $transient Transient cache was used or not.
  293. *
  294. * @return string Information about the functionality used to build the sitemap.
  295. */
  296. protected function get_debug( $transient ) {
  297. $debug = defined( 'YOAST_SEO_DEBUG_SITEMAPS' ) && YOAST_SEO_DEBUG_SITEMAPS === true;
  298. if ( ! $debug ) {
  299. return '';
  300. }
  301. $memory_used = number_format( ( memory_get_peak_usage() / 1048576 ), 2 );
  302. $queries_run = ( $transient ) ? 'Served from transient cache' : 'Queries executed ' . absint( $GLOBALS['wpdb']->num_queries );
  303. $output = "\n<!-- {$memory_used}MB | {$queries_run} -->";
  304. if ( defined( 'SAVEQUERIES' ) && SAVEQUERIES ) {
  305. $queries = print_r( $GLOBALS['wpdb']->queries, true );
  306. $output .= "\n<!-- {$queries} -->";
  307. }
  308. return $output;
  309. }
  310. }