class-sitemaps.php 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646
  1. <?php
  2. /**
  3. * WPSEO plugin file.
  4. *
  5. * @package WPSEO\XML_Sitemaps
  6. */
  7. /**
  8. * Class WPSEO_Sitemaps.
  9. *
  10. * @todo This class could use a general description with some explanation on sitemaps. OR.
  11. */
  12. class WPSEO_Sitemaps {
  13. /**
  14. * Sitemap index identifier.
  15. *
  16. * @var string
  17. */
  18. const SITEMAP_INDEX_TYPE = '1';
  19. /**
  20. * Content of the sitemap to output.
  21. *
  22. * @var string
  23. */
  24. protected $sitemap = '';
  25. /**
  26. * Flag to indicate if this is an invalid or empty sitemap.
  27. *
  28. * @var bool
  29. */
  30. public $bad_sitemap = false;
  31. /**
  32. * Whether or not the XML sitemap was served from a transient or not.
  33. *
  34. * @var bool
  35. */
  36. private $transient = false;
  37. /**
  38. * HTTP protocol to use in headers.
  39. *
  40. * @since 3.2
  41. *
  42. * @var string
  43. */
  44. protected $http_protocol = 'HTTP/1.1';
  45. /**
  46. * Holds the n variable.
  47. *
  48. * @var int
  49. */
  50. private $current_page = 1;
  51. /**
  52. * The timezone.
  53. *
  54. * @var WPSEO_Sitemap_Timezone
  55. */
  56. private $timezone;
  57. /**
  58. * The sitemaps router.
  59. *
  60. * @since 3.2
  61. *
  62. * @var WPSEO_Sitemaps_Router
  63. */
  64. public $router;
  65. /**
  66. * The sitemap renderer.
  67. *
  68. * @since 3.2
  69. *
  70. * @var WPSEO_Sitemaps_Renderer
  71. */
  72. public $renderer;
  73. /**
  74. * The sitemap cache.
  75. *
  76. * @since 3.2
  77. *
  78. * @var WPSEO_Sitemaps_Cache
  79. */
  80. public $cache;
  81. /**
  82. * The sitemap providers.
  83. *
  84. * @since 3.2
  85. *
  86. * @var WPSEO_Sitemap_Provider[]
  87. */
  88. public $providers;
  89. /**
  90. * Class constructor.
  91. */
  92. public function __construct() {
  93. add_action( 'after_setup_theme', [ $this, 'init_sitemaps_providers' ] );
  94. add_action( 'after_setup_theme', [ $this, 'reduce_query_load' ], 99 );
  95. add_action( 'pre_get_posts', [ $this, 'redirect' ], 1 );
  96. add_action( 'wpseo_hit_sitemap_index', [ $this, 'hit_sitemap_index' ] );
  97. add_action( 'wpseo_ping_search_engines', [ __CLASS__, 'ping_search_engines' ] );
  98. $this->timezone = new WPSEO_Sitemap_Timezone();
  99. $this->router = new WPSEO_Sitemaps_Router();
  100. $this->renderer = new WPSEO_Sitemaps_Renderer();
  101. $this->cache = new WPSEO_Sitemaps_Cache();
  102. if ( ! empty( $_SERVER['SERVER_PROTOCOL'] ) ) {
  103. $this->http_protocol = sanitize_text_field( wp_unslash( $_SERVER['SERVER_PROTOCOL'] ) );
  104. }
  105. }
  106. /**
  107. * Initialize sitemap providers classes.
  108. *
  109. * @since 5.3
  110. */
  111. public function init_sitemaps_providers() {
  112. $this->providers = [
  113. new WPSEO_Post_Type_Sitemap_Provider(),
  114. new WPSEO_Taxonomy_Sitemap_Provider(),
  115. new WPSEO_Author_Sitemap_Provider(),
  116. ];
  117. $external_providers = apply_filters( 'wpseo_sitemaps_providers', [] );
  118. foreach ( $external_providers as $provider ) {
  119. if ( is_object( $provider ) && $provider instanceof WPSEO_Sitemap_Provider ) {
  120. $this->providers[] = $provider;
  121. }
  122. }
  123. }
  124. /**
  125. * Check the current request URI, if we can determine it's probably an XML sitemap, kill loading the widgets.
  126. */
  127. public function reduce_query_load() {
  128. if ( ! isset( $_SERVER['REQUEST_URI'] ) ) {
  129. return;
  130. }
  131. $request_uri = sanitize_text_field( wp_unslash( $_SERVER['REQUEST_URI'] ) );
  132. $extension = substr( $request_uri, -4 );
  133. if ( false !== stripos( $request_uri, 'sitemap' ) && in_array( $extension, [ '.xml', '.xsl' ], true ) ) {
  134. remove_all_actions( 'widgets_init' );
  135. }
  136. }
  137. /**
  138. * Register your own sitemap. Call this during 'init'.
  139. *
  140. * @param string $name The name of the sitemap.
  141. * @param callback $function Function to build your sitemap.
  142. * @param string $rewrite Optional. Regular expression to match your sitemap with.
  143. */
  144. public function register_sitemap( $name, $function, $rewrite = '' ) {
  145. add_action( 'wpseo_do_sitemap_' . $name, $function );
  146. if ( ! empty( $rewrite ) ) {
  147. add_rewrite_rule( $rewrite, 'index.php?sitemap=' . $name, 'top' );
  148. }
  149. }
  150. /**
  151. * Register your own XSL file. Call this during 'init'.
  152. *
  153. * @since 1.4.23
  154. *
  155. * @param string $name The name of the XSL file.
  156. * @param callback $function Function to build your XSL file.
  157. * @param string $rewrite Optional. Regular expression to match your sitemap with.
  158. */
  159. public function register_xsl( $name, $function, $rewrite = '' ) {
  160. add_action( 'wpseo_xsl_' . $name, $function );
  161. if ( ! empty( $rewrite ) ) {
  162. add_rewrite_rule( $rewrite, 'index.php?yoast-sitemap-xsl=' . $name, 'top' );
  163. }
  164. }
  165. /**
  166. * Set the sitemap current page to allow creating partial sitemaps with WP-CLI
  167. * in a one-off process.
  168. *
  169. * @param integer $current_page The part that should be generated.
  170. */
  171. public function set_n( $current_page ) {
  172. if ( is_scalar( $current_page ) && intval( $current_page ) > 0 ) {
  173. $this->current_page = intval( $current_page );
  174. }
  175. }
  176. /**
  177. * Set the sitemap content to display after you have generated it.
  178. *
  179. * @param string $sitemap The generated sitemap to output.
  180. */
  181. public function set_sitemap( $sitemap ) {
  182. $this->sitemap = $sitemap;
  183. }
  184. /**
  185. * Set as true to make the request 404. Used stop the display of empty sitemaps or invalid requests.
  186. *
  187. * @param bool $bool Is this a bad request. True or false.
  188. */
  189. public function set_bad_sitemap( $bool ) {
  190. $this->bad_sitemap = (bool) $bool;
  191. }
  192. /**
  193. * Prevent stupid plugins from running shutdown scripts when we're obviously not outputting HTML.
  194. *
  195. * @since 1.4.16
  196. */
  197. public function sitemap_close() {
  198. remove_all_actions( 'wp_footer' );
  199. die();
  200. }
  201. /**
  202. * Hijack requests for potential sitemaps and XSL files.
  203. *
  204. * @param \WP_Query $query Main query instance.
  205. */
  206. public function redirect( $query ) {
  207. if ( ! $query->is_main_query() ) {
  208. return;
  209. }
  210. $yoast_sitemap_xsl = get_query_var( 'yoast-sitemap-xsl' );
  211. if ( ! empty( $yoast_sitemap_xsl ) ) {
  212. /*
  213. * This is a method to provide the XSL via the home_url.
  214. * Needed when the site_url and home_url are not the same.
  215. * Loading the XSL needs to come from the same domain, protocol and port as the XML.
  216. *
  217. * Whenever home_url and site_url are the same, the file can be loaded directly.
  218. */
  219. $this->xsl_output( $yoast_sitemap_xsl );
  220. $this->sitemap_close();
  221. return;
  222. }
  223. $type = get_query_var( 'sitemap' );
  224. if ( empty( $type ) ) {
  225. return;
  226. }
  227. $this->set_n( get_query_var( 'sitemap_n' ) );
  228. if ( ! $this->get_sitemap_from_cache( $type, $this->current_page ) ) {
  229. $this->build_sitemap( $type );
  230. }
  231. if ( $this->bad_sitemap ) {
  232. $query->set_404();
  233. status_header( 404 );
  234. return;
  235. }
  236. $this->output();
  237. $this->sitemap_close();
  238. }
  239. /**
  240. * Try to get the sitemap from cache.
  241. *
  242. * @param string $type Sitemap type.
  243. * @param int $page_number The page number to retrieve.
  244. *
  245. * @return bool If the sitemap has been retrieved from cache.
  246. */
  247. private function get_sitemap_from_cache( $type, $page_number ) {
  248. $this->transient = false;
  249. if ( true !== $this->cache->is_enabled() ) {
  250. return false;
  251. }
  252. /**
  253. * Fires before the attempt to retrieve XML sitemap from the transient cache.
  254. *
  255. * @param WPSEO_Sitemaps $sitemaps Sitemaps object.
  256. */
  257. do_action( 'wpseo_sitemap_stylesheet_cache_' . $type, $this );
  258. $sitemap_cache_data = $this->cache->get_sitemap_data( $type, $page_number );
  259. // No cache was found, refresh it because cache is enabled.
  260. if ( empty( $sitemap_cache_data ) ) {
  261. return $this->refresh_sitemap_cache( $type, $page_number );
  262. }
  263. // Cache object was found, parse information.
  264. $this->transient = true;
  265. $this->sitemap = $sitemap_cache_data->get_sitemap();
  266. $this->bad_sitemap = ! $sitemap_cache_data->is_usable();
  267. return true;
  268. }
  269. /**
  270. * Build and save sitemap to cache.
  271. *
  272. * @param string $type Sitemap type.
  273. * @param int $page_number The page number to save to.
  274. *
  275. * @return bool
  276. */
  277. private function refresh_sitemap_cache( $type, $page_number ) {
  278. $this->set_n( $page_number );
  279. $this->build_sitemap( $type );
  280. return $this->cache->store_sitemap( $type, $page_number, $this->sitemap, ! $this->bad_sitemap );
  281. }
  282. /**
  283. * Attempts to build the requested sitemap.
  284. *
  285. * Sets $bad_sitemap if this isn't for the root sitemap, a post type or taxonomy.
  286. *
  287. * @param string $type The requested sitemap's identifier.
  288. */
  289. public function build_sitemap( $type ) {
  290. /**
  291. * Filter the type of sitemap to build.
  292. *
  293. * @param string $type Sitemap type, determined by the request.
  294. */
  295. $type = apply_filters( 'wpseo_build_sitemap_post_type', $type );
  296. if ( $type === '1' ) {
  297. $this->build_root_map();
  298. return;
  299. }
  300. $entries_per_page = $this->get_entries_per_page();
  301. foreach ( $this->providers as $provider ) {
  302. if ( ! $provider->handles_type( $type ) ) {
  303. continue;
  304. }
  305. try {
  306. $links = $provider->get_sitemap_links( $type, $entries_per_page, $this->current_page );
  307. } catch ( OutOfBoundsException $exception ) {
  308. $this->bad_sitemap = true;
  309. return;
  310. }
  311. $this->sitemap = $this->renderer->get_sitemap( $links, $type, $this->current_page );
  312. return;
  313. }
  314. if ( has_action( 'wpseo_do_sitemap_' . $type ) ) {
  315. /**
  316. * Fires custom handler, if hooked to generate sitemap for the type.
  317. */
  318. do_action( 'wpseo_do_sitemap_' . $type );
  319. return;
  320. }
  321. $this->bad_sitemap = true;
  322. }
  323. /**
  324. * Build the root sitemap (example.com/sitemap_index.xml) which lists sub-sitemaps for other content types.
  325. */
  326. public function build_root_map() {
  327. $links = [];
  328. $entries_per_page = $this->get_entries_per_page();
  329. foreach ( $this->providers as $provider ) {
  330. $links = array_merge( $links, $provider->get_index_links( $entries_per_page ) );
  331. }
  332. if ( empty( $links ) ) {
  333. $this->bad_sitemap = true;
  334. $this->sitemap = '';
  335. return;
  336. }
  337. $this->sitemap = $this->renderer->get_index( $links );
  338. }
  339. /**
  340. * Spits out the XSL for the XML sitemap.
  341. *
  342. * @param string $type Type to output.
  343. *
  344. * @since 1.4.13
  345. */
  346. public function xsl_output( $type ) {
  347. if ( $type !== 'main' ) {
  348. /**
  349. * Fires for the output of XSL for XML sitemaps, other than type "main".
  350. */
  351. do_action( 'wpseo_xsl_' . $type );
  352. return;
  353. }
  354. header( $this->http_protocol . ' 200 OK', true, 200 );
  355. // Prevent the search engines from indexing the XML Sitemap.
  356. header( 'X-Robots-Tag: noindex, follow', true );
  357. header( 'Content-Type: text/xml' );
  358. // Make the browser cache this file properly.
  359. $expires = YEAR_IN_SECONDS;
  360. header( 'Pragma: public' );
  361. header( 'Cache-Control: maxage=' . $expires );
  362. header( 'Expires: ' . gmdate( 'D, d M Y H:i:s', ( time() + $expires ) ) . ' GMT' );
  363. readfile( WPSEO_PATH . 'css/main-sitemap.xsl' );
  364. }
  365. /**
  366. * Spit out the generated sitemap.
  367. */
  368. public function output() {
  369. $this->send_headers();
  370. echo $this->renderer->get_output( $this->sitemap, $this->transient );
  371. }
  372. /**
  373. * Makes a request to the sitemap index to cache it before the arrival of the search engines.
  374. *
  375. * @return void
  376. */
  377. public function hit_sitemap_index() {
  378. if ( ! $this->cache->is_enabled() ) {
  379. return;
  380. }
  381. wp_remote_get( WPSEO_Sitemaps_Router::get_base_url( 'sitemap_index.xml' ) );
  382. }
  383. /**
  384. * Get the GMT modification date for the last modified post in the post type.
  385. *
  386. * @since 3.2
  387. *
  388. * @param string|array $post_types Post type or array of types.
  389. * @param bool $return_all Flag to return array of values.
  390. *
  391. * @return string|array|false
  392. */
  393. public static function get_last_modified_gmt( $post_types, $return_all = false ) {
  394. global $wpdb;
  395. static $post_type_dates = null;
  396. if ( ! is_array( $post_types ) ) {
  397. $post_types = [ $post_types ];
  398. }
  399. foreach ( $post_types as $post_type ) {
  400. if ( ! isset( $post_type_dates[ $post_type ] ) ) { // If we hadn't seen post type before. R.
  401. $post_type_dates = null;
  402. break;
  403. }
  404. }
  405. if ( is_null( $post_type_dates ) ) {
  406. $post_type_dates = [];
  407. $post_type_names = WPSEO_Post_Type::get_accessible_post_types();
  408. if ( ! empty( $post_type_names ) ) {
  409. $post_statuses = array_map( 'esc_sql', self::get_post_statuses() );
  410. $sql = "
  411. SELECT post_type, MAX(post_modified_gmt) AS date
  412. FROM $wpdb->posts
  413. WHERE post_status IN ('" . implode( "','", $post_statuses ) . "')
  414. AND post_type IN ('" . implode( "','", $post_type_names ) . "')
  415. GROUP BY post_type
  416. ORDER BY post_modified_gmt DESC
  417. ";
  418. foreach ( $wpdb->get_results( $sql ) as $obj ) {
  419. $post_type_dates[ $obj->post_type ] = $obj->date;
  420. }
  421. }
  422. }
  423. $dates = array_intersect_key( $post_type_dates, array_flip( $post_types ) );
  424. if ( count( $dates ) > 0 ) {
  425. if ( $return_all ) {
  426. return $dates;
  427. }
  428. return max( $dates );
  429. }
  430. return false;
  431. }
  432. /**
  433. * Get the modification date for the last modified post in the post type.
  434. *
  435. * @param array $post_types Post types to get the last modification date for.
  436. *
  437. * @return string
  438. */
  439. public function get_last_modified( $post_types ) {
  440. return $this->timezone->format_date( self::get_last_modified_gmt( $post_types ) );
  441. }
  442. /**
  443. * Notify search engines of the updated sitemap.
  444. *
  445. * @param string|null $url Optional URL to make the ping for.
  446. */
  447. public static function ping_search_engines( $url = null ) {
  448. /**
  449. * Filter: 'wpseo_allow_xml_sitemap_ping' - Check if pinging is not allowed (allowed by default)
  450. *
  451. * @api boolean $allow_ping The boolean that is set to true by default.
  452. */
  453. if ( apply_filters( 'wpseo_allow_xml_sitemap_ping', true ) === false ) {
  454. return;
  455. }
  456. if ( '0' === get_option( 'blog_public' ) ) { // Don't ping if blog is not public.
  457. return;
  458. }
  459. if ( empty( $url ) ) {
  460. $url = urlencode( WPSEO_Sitemaps_Router::get_base_url( 'sitemap_index.xml' ) );
  461. }
  462. // Ping Google and Bing.
  463. wp_remote_get( 'https://www.google.com/ping?sitemap=' . $url, [ 'blocking' => false ] );
  464. wp_remote_get( 'https://www.bing.com/ping?sitemap=' . $url, [ 'blocking' => false ] );
  465. }
  466. /**
  467. * Get the maximum number of entries per XML sitemap.
  468. *
  469. * @return int The maximum number of entries.
  470. */
  471. protected function get_entries_per_page() {
  472. /**
  473. * Filter the maximum number of entries per XML sitemap.
  474. *
  475. * After changing the output of the filter, make sure that you disable and enable the
  476. * sitemaps to make sure the value is picked up for the sitemap cache.
  477. *
  478. * @param int $entries The maximum number of entries per XML sitemap.
  479. */
  480. $entries = (int) apply_filters( 'wpseo_sitemap_entries_per_page', 1000 );
  481. return $entries;
  482. }
  483. /**
  484. * Get post statuses for post_type or the root sitemap.
  485. *
  486. * @param string $type Provide a type for a post_type sitemap, SITEMAP_INDEX_TYPE for the root sitemap.
  487. *
  488. * @since 10.2
  489. *
  490. * @return array List of post statuses.
  491. */
  492. public static function get_post_statuses( $type = self::SITEMAP_INDEX_TYPE ) {
  493. /**
  494. * Filter post status list for sitemap query for the post type.
  495. *
  496. * @param array $post_statuses Post status list, defaults to array( 'publish' ).
  497. * @param string $type Post type or SITEMAP_INDEX_TYPE.
  498. */
  499. $post_statuses = apply_filters( 'wpseo_sitemap_post_statuses', [ 'publish' ], $type );
  500. if ( ! is_array( $post_statuses ) || empty( $post_statuses ) ) {
  501. $post_statuses = [ 'publish' ];
  502. }
  503. if ( ( $type === self::SITEMAP_INDEX_TYPE || $type === 'attachment' )
  504. && ! in_array( 'inherit', $post_statuses, true )
  505. ) {
  506. $post_statuses[] = 'inherit';
  507. }
  508. return $post_statuses;
  509. }
  510. /**
  511. * Sends all the required HTTP Headers.
  512. */
  513. private function send_headers() {
  514. if ( headers_sent() ) {
  515. return;
  516. }
  517. $headers = [
  518. $this->http_protocol . ' 200 OK' => 200,
  519. // Prevent the search engines from indexing the XML Sitemap.
  520. 'X-Robots-Tag: noindex, follow' => '',
  521. 'Content-Type: text/xml; charset=' . esc_attr( $this->renderer->get_output_charset() ) => '',
  522. ];
  523. /**
  524. * Filter the HTTP headers we send before an XML sitemap.
  525. *
  526. * @param array $headers The HTTP headers we're going to send out.
  527. */
  528. $headers = apply_filters( 'wpseo_sitemap_http_headers', $headers );
  529. foreach ( $headers as $header => $status ) {
  530. if ( is_numeric( $status ) ) {
  531. header( $header, true, $status );
  532. continue;
  533. }
  534. header( $header, true );
  535. }
  536. }
  537. }