summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/jetpack/modules/sitemaps/sitemap-builder.php')
-rw-r--r--plugins/jetpack/modules/sitemaps/sitemap-builder.php1468
1 files changed, 1468 insertions, 0 deletions
diff --git a/plugins/jetpack/modules/sitemaps/sitemap-builder.php b/plugins/jetpack/modules/sitemaps/sitemap-builder.php
new file mode 100644
index 00000000..e04f58d5
--- /dev/null
+++ b/plugins/jetpack/modules/sitemaps/sitemap-builder.php
@@ -0,0 +1,1468 @@
+<?php
+/**
+ * Build the sitemap tree.
+ *
+ * @package Jetpack
+ * @since 4.8.0
+ * @author Automattic
+ */
+
+/* Include sitemap subclasses, if not already, and include proper buffer based on phpxml's availability. */
+require_once dirname( __FILE__ ) . '/sitemap-constants.php';
+require_once dirname( __FILE__ ) . '/sitemap-buffer.php';
+
+if ( ! class_exists( 'DOMDocument' ) ) {
+ require_once dirname( __FILE__ ) . '/sitemap-buffer-fallback.php';
+ require_once dirname( __FILE__ ) . '/sitemap-buffer-image-fallback.php';
+ require_once dirname( __FILE__ ) . '/sitemap-buffer-master-fallback.php';
+ require_once dirname( __FILE__ ) . '/sitemap-buffer-news-fallback.php';
+ require_once dirname( __FILE__ ) . '/sitemap-buffer-page-fallback.php';
+ require_once dirname( __FILE__ ) . '/sitemap-buffer-video-fallback.php';
+} else {
+ require_once dirname( __FILE__ ) . '/sitemap-buffer-image.php';
+ require_once dirname( __FILE__ ) . '/sitemap-buffer-master.php';
+ require_once dirname( __FILE__ ) . '/sitemap-buffer-news.php';
+ require_once dirname( __FILE__ ) . '/sitemap-buffer-page.php';
+ require_once dirname( __FILE__ ) . '/sitemap-buffer-video.php';
+}
+
+require_once dirname( __FILE__ ) . '/sitemap-librarian.php';
+require_once dirname( __FILE__ ) . '/sitemap-finder.php';
+require_once dirname( __FILE__ ) . '/sitemap-state.php';
+
+if ( defined( 'WP_DEBUG' ) && WP_DEBUG ) {
+ require_once dirname( __FILE__ ) . '/sitemap-logger.php';
+}
+
+/**
+ * Simple class for rendering an empty sitemap with a short TTL
+ */
+class Jetpack_Sitemap_Buffer_Empty extends Jetpack_Sitemap_Buffer {
+
+ public function __construct() {
+ parent::__construct( JP_SITEMAP_MAX_ITEMS, JP_SITEMAP_MAX_BYTES, '1970-01-01 00:00:00' );
+
+ $this->doc->appendChild(
+ $this->doc->createComment( "generator='jetpack-" . JETPACK__VERSION . "'" )
+ );
+
+ $this->doc->appendChild(
+ $this->doc->createProcessingInstruction(
+ 'xml-stylesheet',
+ 'type="text/xsl" href="' . $this->finder->construct_sitemap_url( 'sitemap-index.xsl' ) . '"'
+ )
+ );
+ }
+
+ protected function get_root_element() {
+ if ( ! isset( $this->root ) ) {
+ $this->root = $this->doc->createElement( 'sitemapindex' );
+ $this->root->setAttribute( 'xmlns', 'http://www.sitemaps.org/schemas/sitemap/0.9' );
+ $this->doc->appendChild( $this->root );
+ $this->byte_capacity -= strlen( $this->doc->saveXML( $this->root ) );
+ }
+
+ return $this->root;
+ }
+}
+
+/**
+ * The Jetpack_Sitemap_Builder object handles the construction of
+ * all sitemap files (except the XSL files, which are handled by
+ * Jetpack_Sitemap_Stylist.) Other than the constructor, there are
+ * only two public functions: build_all_sitemaps and news_sitemap_xml.
+ *
+ * @since 4.8.0
+ */
+class Jetpack_Sitemap_Builder {
+
+ /**
+ * Librarian object for storing and retrieving sitemap data.
+ *
+ * @access private
+ * @since 4.8.0
+ * @var $librarian Jetpack_Sitemap_Librarian
+ */
+ private $librarian;
+
+ /**
+ * Logger object for reporting debug messages.
+ *
+ * @access private
+ * @since 4.8.0
+ * @var $logger Jetpack_Sitemap_Logger
+ */
+ private $logger = false;
+
+ /**
+ * Finder object for dealing with sitemap URIs.
+ *
+ * @access private
+ * @since 4.8.0
+ * @var $finder Jetpack_Sitemap_Finder
+ */
+ private $finder;
+
+ /**
+ * Construct a new Jetpack_Sitemap_Builder object.
+ *
+ * @access public
+ * @since 4.8.0
+ */
+ public function __construct() {
+ $this->librarian = new Jetpack_Sitemap_Librarian();
+ $this->finder = new Jetpack_Sitemap_Finder();
+
+ if ( defined( 'WP_DEBUG' ) && WP_DEBUG ) {
+ $this->logger = new Jetpack_Sitemap_Logger();
+ }
+
+ update_option(
+ 'jetpack_sitemap_post_types',
+ /**
+ * The array of post types to be included in the sitemap.
+ *
+ * Add your custom post type name to the array to have posts of
+ * that type included in the sitemap. The default array includes
+ * 'page' and 'post'.
+ *
+ * The result of this filter is cached in an option, 'jetpack_sitemap_post_types',
+ * so this filter only has to be applied once per generation.
+ *
+ * @since 4.8.0
+ */
+ apply_filters(
+ 'jetpack_sitemap_post_types',
+ array( 'post', 'page' )
+ )
+ );
+ }
+
+ /**
+ * Update the sitemap.
+ *
+ * All we do here is call build_next_sitemap_file a bunch of times.
+ *
+ * @since 4.8.0
+ */
+ public function update_sitemap() {
+ if ( $this->logger ) {
+ $this->logger->report( '-- Updating...' );
+ if ( ! class_exists( 'DOMDocument' ) ) {
+ $this->logger->report(
+ __(
+ 'Jetpack can not load necessary XML manipulation libraries. Please ask your hosting provider to refer to our server requirements at https://jetpack.com/support/server-requirements/ .',
+ 'jetpack'
+ ),
+ true
+ );
+ }
+ }
+
+ for ( $i = 1; $i <= JP_SITEMAP_UPDATE_SIZE; $i++ ) {
+ if ( true === $this->build_next_sitemap_file() ) {
+ break; // All finished!
+ }
+ }
+
+ if ( $this->logger ) {
+ $this->logger->report( '-- ...done for now.' );
+ $this->logger->time();
+ }
+ }
+
+ /**
+ * Generate the next sitemap file.
+ *
+ * Reads the most recent state of the sitemap generation phase,
+ * constructs the next file, and updates the state.
+ *
+ * @since 4.8.0
+ *
+ * @return bool True when finished.
+ */
+ private function build_next_sitemap_file() {
+ $finished = false; // Initialize finished flag.
+
+ // Get the most recent state, and lock the state.
+ $state = Jetpack_Sitemap_State::check_out();
+
+ // Do nothing if the state was locked.
+ if ( false === $state ) {
+ return false;
+ }
+
+ // Otherwise, branch on the sitemap-type key of $state.
+ switch ( $state['sitemap-type'] ) {
+ case JP_PAGE_SITEMAP_TYPE:
+ $this->build_next_sitemap_of_type(
+ JP_PAGE_SITEMAP_TYPE,
+ array( $this, 'build_one_page_sitemap' ),
+ $state
+ );
+ break;
+
+ case JP_PAGE_SITEMAP_INDEX_TYPE:
+ $this->build_next_sitemap_index_of_type(
+ JP_PAGE_SITEMAP_INDEX_TYPE,
+ JP_IMAGE_SITEMAP_TYPE,
+ $state
+ );
+ break;
+
+ case JP_IMAGE_SITEMAP_TYPE:
+ $this->build_next_sitemap_of_type(
+ JP_IMAGE_SITEMAP_TYPE,
+ array( $this, 'build_one_image_sitemap' ),
+ $state
+ );
+ break;
+
+ case JP_IMAGE_SITEMAP_INDEX_TYPE:
+ $this->build_next_sitemap_index_of_type(
+ JP_IMAGE_SITEMAP_INDEX_TYPE,
+ JP_VIDEO_SITEMAP_TYPE,
+ $state
+ );
+ break;
+
+ case JP_VIDEO_SITEMAP_TYPE:
+ $this->build_next_sitemap_of_type(
+ JP_VIDEO_SITEMAP_TYPE,
+ array( $this, 'build_one_video_sitemap' ),
+ $state
+ );
+ break;
+
+ case JP_VIDEO_SITEMAP_INDEX_TYPE:
+ $this->build_next_sitemap_index_of_type(
+ JP_VIDEO_SITEMAP_INDEX_TYPE,
+ JP_MASTER_SITEMAP_TYPE,
+ $state
+ );
+ break;
+
+ case JP_MASTER_SITEMAP_TYPE:
+ $this->build_master_sitemap( $state['max'] );
+
+ // Reset the state and quit.
+ Jetpack_Sitemap_State::reset(
+ JP_PAGE_SITEMAP_TYPE
+ );
+
+ if ( $this->logger ) {
+ $this->logger->report( '-- Finished.' );
+ $this->logger->time();
+ }
+ $finished = true;
+
+ break;
+
+ default:
+ Jetpack_Sitemap_State::reset(
+ JP_PAGE_SITEMAP_TYPE
+ );
+ $finished = true;
+
+ break;
+ } // End switch.
+
+ // Unlock the state.
+ Jetpack_Sitemap_State::unlock();
+
+ return $finished;
+ }
+
+ /**
+ * Build the next sitemap of a given type and update the sitemap state.
+ *
+ * @since 4.8.0
+ *
+ * @param string $sitemap_type The type of the sitemap being generated.
+ * @param callback $build_one A callback which builds a single sitemap file.
+ * @param array $state A sitemap state.
+ */
+ private function build_next_sitemap_of_type( $sitemap_type, $build_one, $state ) {
+ $index_type = jp_sitemap_index_type_of( $sitemap_type );
+
+ // Try to build a sitemap.
+ $result = call_user_func_array(
+ $build_one,
+ array(
+ $state['number'] + 1,
+ $state['last-added'],
+ )
+ );
+
+ if ( false === $result ) {
+ // If no sitemap was generated, advance to the next type.
+ Jetpack_Sitemap_State::check_in(
+ array(
+ 'sitemap-type' => $index_type,
+ 'last-added' => 0,
+ 'number' => 0,
+ 'last-modified' => '1970-01-01 00:00:00',
+ )
+ );
+
+ if ( $this->logger ) {
+ $this->logger->report( "-- Cleaning Up $sitemap_type" );
+ }
+
+ // Clean up old files.
+ $this->librarian->delete_numbered_sitemap_rows_after(
+ $state['number'],
+ $sitemap_type
+ );
+
+ return;
+ }
+
+ // Otherwise, update the state.
+ Jetpack_Sitemap_State::check_in(
+ array(
+ 'sitemap-type' => $state['sitemap-type'],
+ 'last-added' => $result['last_id'],
+ 'number' => $state['number'] + 1,
+ 'last-modified' => $result['last_modified'],
+ )
+ );
+
+ if ( true === $result['any_left'] ) {
+ // If there's more work to be done with this type, return.
+ return;
+ }
+
+ // Otherwise, advance state to the next sitemap type.
+ Jetpack_Sitemap_State::check_in(
+ array(
+ 'sitemap-type' => $index_type,
+ 'last-added' => 0,
+ 'number' => 0,
+ 'last-modified' => '1970-01-01 00:00:00',
+ )
+ );
+
+ if ( $this->logger ) {
+ $this->logger->report( "-- Cleaning Up $sitemap_type" );
+ }
+
+ // Clean up old files.
+ $this->librarian->delete_numbered_sitemap_rows_after(
+ $state['number'] + 1,
+ $sitemap_type
+ );
+ }
+
+ /**
+ * Build the next sitemap index of a given type and update the state.
+ *
+ * @since 4.8.0
+ *
+ * @param string $index_type The type of index being generated.
+ * @param string $next_type The next type to generate after this one.
+ * @param array $state A sitemap state.
+ */
+ private function build_next_sitemap_index_of_type( $index_type, $next_type, $state ) {
+ $sitemap_type = jp_sitemap_child_type_of( $index_type );
+
+ // If only 0 or 1 sitemaps were built, advance to the next type and return.
+ if ( 1 >= $state['max'][ $sitemap_type ]['number'] ) {
+ Jetpack_Sitemap_State::check_in(
+ array(
+ 'sitemap-type' => $next_type,
+ 'last-added' => 0,
+ 'number' => 0,
+ 'last-modified' => '1970-01-01 00:00:00',
+ )
+ );
+
+ if ( $this->logger ) {
+ $this->logger->report( "-- Cleaning Up $index_type" );
+ }
+
+ // There are no indices of this type.
+ $this->librarian->delete_numbered_sitemap_rows_after(
+ 0,
+ $index_type
+ );
+
+ return;
+ }
+
+ // Otherwise, try to build a sitemap index.
+ $result = $this->build_one_sitemap_index(
+ $state['number'] + 1,
+ $state['last-added'],
+ $state['last-modified'],
+ $index_type
+ );
+
+ // If no index was built, advance to the next type and return.
+ if ( false === $result ) {
+ Jetpack_Sitemap_State::check_in(
+ array(
+ 'sitemap-type' => $next_type,
+ 'last-added' => 0,
+ 'number' => 0,
+ 'last-modified' => '1970-01-01 00:00:00',
+ )
+ );
+
+ if ( $this->logger ) {
+ $this->logger->report( "-- Cleaning Up $index_type" );
+ }
+
+ // Clean up old files.
+ $this->librarian->delete_numbered_sitemap_rows_after(
+ $state['number'],
+ $index_type
+ );
+
+ return;
+ }
+
+ // Otherwise, check in the state.
+ Jetpack_Sitemap_State::check_in(
+ array(
+ 'sitemap-type' => $index_type,
+ 'last-added' => $result['last_id'],
+ 'number' => $state['number'] + 1,
+ 'last-modified' => $result['last_modified'],
+ )
+ );
+
+ // If there are still sitemaps left to index, return.
+ if ( true === $result['any_left'] ) {
+ return;
+ }
+
+ // Otherwise, advance to the next type.
+ Jetpack_Sitemap_State::check_in(
+ array(
+ 'sitemap-type' => $next_type,
+ 'last-added' => 0,
+ 'number' => 0,
+ 'last-modified' => '1970-01-01 00:00:00',
+ )
+ );
+
+ if ( $this->logger ) {
+ $this->logger->report( "-- Cleaning Up $index_type" );
+ }
+
+ // We're done generating indices of this type.
+ $this->librarian->delete_numbered_sitemap_rows_after(
+ $state['number'] + 1,
+ $index_type
+ );
+ }
+
+ /**
+ * Builds the master sitemap index.
+ *
+ * @param array $max Array of sitemap types with max index and datetime.
+ *
+ * @since 4.8.0
+ */
+ private function build_master_sitemap( $max ) {
+ $page = array();
+ $image = array();
+ $video = array();
+ if ( $this->logger ) {
+ $this->logger->report( '-- Building Master Sitemap.' );
+ }
+
+ $buffer = new Jetpack_Sitemap_Buffer_Master(
+ JP_SITEMAP_MAX_ITEMS,
+ JP_SITEMAP_MAX_BYTES
+ );
+
+ if ( 0 < $max[ JP_PAGE_SITEMAP_TYPE ]['number'] ) {
+ if ( 1 === $max[ JP_PAGE_SITEMAP_TYPE ]['number'] ) {
+ $page['filename'] = jp_sitemap_filename( JP_PAGE_SITEMAP_TYPE, 1 );
+ $page['last_modified'] = jp_sitemap_datetime( $max[ JP_PAGE_SITEMAP_TYPE ]['lastmod'] );
+ } else {
+ $page['filename'] = jp_sitemap_filename(
+ JP_PAGE_SITEMAP_INDEX_TYPE,
+ $max[ JP_PAGE_SITEMAP_INDEX_TYPE ]['number']
+ );
+ $page['last_modified'] = jp_sitemap_datetime( $max[ JP_PAGE_SITEMAP_INDEX_TYPE ]['lastmod'] );
+ }
+
+ $buffer->append(
+ array(
+ 'sitemap' => array(
+ 'loc' => $this->finder->construct_sitemap_url( $page['filename'] ),
+ 'lastmod' => $page['last_modified'],
+ ),
+ )
+ );
+ }
+
+ if ( 0 < $max[ JP_IMAGE_SITEMAP_TYPE ]['number'] ) {
+ if ( 1 === $max[ JP_IMAGE_SITEMAP_TYPE ]['number'] ) {
+ $image['filename'] = jp_sitemap_filename( JP_IMAGE_SITEMAP_TYPE, 1 );
+ $image['last_modified'] = jp_sitemap_datetime( $max[ JP_IMAGE_SITEMAP_TYPE ]['lastmod'] );
+ } else {
+ $image['filename'] = jp_sitemap_filename(
+ JP_IMAGE_SITEMAP_INDEX_TYPE,
+ $max[ JP_IMAGE_SITEMAP_INDEX_TYPE ]['number']
+ );
+ $image['last_modified'] = jp_sitemap_datetime( $max[ JP_IMAGE_SITEMAP_INDEX_TYPE ]['lastmod'] );
+ }
+
+ $buffer->append(
+ array(
+ 'sitemap' => array(
+ 'loc' => $this->finder->construct_sitemap_url( $image['filename'] ),
+ 'lastmod' => $image['last_modified'],
+ ),
+ )
+ );
+ }
+
+ if ( 0 < $max[ JP_VIDEO_SITEMAP_TYPE ]['number'] ) {
+ if ( 1 === $max[ JP_VIDEO_SITEMAP_TYPE ]['number'] ) {
+ $video['filename'] = jp_sitemap_filename( JP_VIDEO_SITEMAP_TYPE, 1 );
+ $video['last_modified'] = jp_sitemap_datetime( $max[ JP_VIDEO_SITEMAP_TYPE ]['lastmod'] );
+ } else {
+ $video['filename'] = jp_sitemap_filename(
+ JP_VIDEO_SITEMAP_INDEX_TYPE,
+ $max[ JP_VIDEO_SITEMAP_INDEX_TYPE ]['number']
+ );
+ $video['last_modified'] = jp_sitemap_datetime( $max[ JP_VIDEO_SITEMAP_INDEX_TYPE ]['lastmod'] );
+ }
+
+ $buffer->append(
+ array(
+ 'sitemap' => array(
+ 'loc' => $this->finder->construct_sitemap_url( $video['filename'] ),
+ 'lastmod' => $video['last_modified'],
+ ),
+ )
+ );
+ }
+
+ $this->librarian->store_sitemap_data(
+ 0,
+ JP_MASTER_SITEMAP_TYPE,
+ $buffer->contents(),
+ ''
+ );
+ }
+
+ /**
+ * Build and store a single page sitemap. Returns false if no sitemap is built.
+ *
+ * Side effect: Create/update a sitemap row.
+ *
+ * @access private
+ * @since 4.8.0
+ *
+ * @param int $number The number of the current sitemap.
+ * @param int $from_id The greatest lower bound of the IDs of the posts to be included.
+ *
+ * @return bool|array @args {
+ * @type int $last_id The ID of the last item to be successfully added to the buffer.
+ * @type bool $any_left 'true' if there are items which haven't been saved to a sitemap, 'false' otherwise.
+ * @type string $last_modified The most recent timestamp to appear on the sitemap.
+ * }
+ */
+ public function build_one_page_sitemap( $number, $from_id ) {
+ $last_post_id = $from_id;
+ $any_posts_left = true;
+
+ if ( $this->logger ) {
+ $debug_name = jp_sitemap_filename( JP_PAGE_SITEMAP_TYPE, $number );
+ $this->logger->report( "-- Building $debug_name" );
+ }
+
+ $buffer = new Jetpack_Sitemap_Buffer_Page(
+ JP_SITEMAP_MAX_ITEMS,
+ JP_SITEMAP_MAX_BYTES
+ );
+
+ // Add entry for the main page (only if we're at the first one) and it isn't already going to be included as a page.
+ if ( 1 === $number && 'page' !== get_option( 'show_on_front' ) ) {
+ $item_array = array(
+ 'url' => array(
+ 'loc' => home_url(),
+ ),
+ );
+
+ /**
+ * Filter associative array with data to build <url> node
+ * and its descendants for site home.
+ *
+ * @module sitemaps
+ *
+ * @since 3.9.0
+ *
+ * @param array $blog_home Data to build parent and children nodes for site home.
+ */
+ $item_array = apply_filters( 'jetpack_sitemap_url_home', $item_array );
+
+ $buffer->append( $item_array );
+ }
+
+ // Add as many items to the buffer as possible.
+ while ( $last_post_id >= 0 && false === $buffer->is_full() ) {
+ $posts = $this->librarian->query_posts_after_id(
+ $last_post_id,
+ JP_SITEMAP_BATCH_SIZE
+ );
+
+ if ( null == $posts ) { // WPCS: loose comparison ok.
+ $any_posts_left = false;
+ break;
+ }
+
+ foreach ( $posts as $post ) {
+ $current_item = $this->post_to_sitemap_item( $post );
+
+ if ( true === $buffer->append( $current_item['xml'] ) ) {
+ $last_post_id = $post->ID;
+ $buffer->view_time( $current_item['last_modified'] );
+ } else {
+ break;
+ }
+ }
+ }
+
+ // Handle other page sitemap URLs.
+ if ( false === $any_posts_left || $last_post_id < 0 ) {
+ // Negative IDs are used to track URL indexes.
+ $last_post_id = min( 0, $last_post_id );
+ $any_posts_left = true; // Reinitialize.
+
+ /**
+ * Filter other page sitemap URLs.
+ *
+ * @module sitemaps
+ *
+ * @since 6.1.0
+ *
+ * @param array $urls An array of other URLs.
+ */
+ $other_urls = apply_filters( 'jetpack_page_sitemap_other_urls', array() );
+
+ if ( $other_urls ) { // Start with index [1].
+ $other_urls = array_values( $other_urls );
+ array_unshift( $other_urls, $other_urls[0] );
+ unset( $other_urls[0] );
+ }
+
+ // Add as many items to the buffer as possible.
+ while ( false === $buffer->is_full() ) {
+ $last_post_id_index = abs( $last_post_id );
+ $start_from_post_id_index = $last_post_id_index ? $last_post_id_index + 1 : 0;
+ $urls = array_slice(
+ $other_urls,
+ $start_from_post_id_index,
+ JP_SITEMAP_BATCH_SIZE,
+ true
+ );
+
+ if ( ! $urls ) {
+ $any_posts_left = false;
+ break;
+ }
+
+ foreach ( $urls as $index => $url ) {
+ if ( ! is_array( $url ) ) {
+ $url = array( 'loc' => $url );
+ }
+ $item = array( 'xml' => compact( 'url' ) );
+
+ if ( true === $buffer->append( $item['xml'] ) ) {
+ $last_post_id = -$index;
+ } else {
+ break;
+ }
+ }
+ }
+ }
+
+ // If no items were added, return false.
+ if ( true === $buffer->is_empty() ) {
+ return false;
+ }
+
+ /**
+ * Filter sitemap before rendering it as XML.
+ *
+ * @module sitemaps
+ *
+ * @since 3.9.0
+ * @since 5.3.0 returns an element of DOMDocument type instead of SimpleXMLElement
+ *
+ * @param DOMDocument $doc Data tree for sitemap.
+ * @param string $last_modified Date of last modification.
+ */
+ $tree = apply_filters(
+ 'jetpack_print_sitemap',
+ $buffer->get_document(),
+ $buffer->last_modified()
+ );
+
+ // Store the buffer as the content of a sitemap row.
+ $this->librarian->store_sitemap_data(
+ $number,
+ JP_PAGE_SITEMAP_TYPE,
+ $buffer->contents(),
+ $buffer->last_modified()
+ );
+
+ /*
+ * Now report back with the ID of the last post ID to be
+ * successfully added and whether there are any posts left.
+ */
+ return array(
+ 'last_id' => $last_post_id,
+ 'any_left' => $any_posts_left,
+ 'last_modified' => $buffer->last_modified(),
+ );
+ }
+
+ /**
+ * Build and store a single image sitemap. Returns false if no sitemap is built.
+ *
+ * Side effect: Create/update an image sitemap row.
+ *
+ * @access private
+ * @since 4.8.0
+ *
+ * @param int $number The number of the current sitemap.
+ * @param int $from_id The greatest lower bound of the IDs of the posts to be included.
+ *
+ * @return bool|array @args {
+ * @type int $last_id The ID of the last item to be successfully added to the buffer.
+ * @type bool $any_left 'true' if there are items which haven't been saved to a sitemap, 'false' otherwise.
+ * @type string $last_modified The most recent timestamp to appear on the sitemap.
+ * }
+ */
+ public function build_one_image_sitemap( $number, $from_id ) {
+ $last_post_id = $from_id;
+ $any_posts_left = true;
+
+ if ( $this->logger ) {
+ $debug_name = jp_sitemap_filename( JP_IMAGE_SITEMAP_TYPE, $number );
+ $this->logger->report( "-- Building $debug_name" );
+ }
+
+ $buffer = new Jetpack_Sitemap_Buffer_Image(
+ JP_SITEMAP_MAX_ITEMS,
+ JP_SITEMAP_MAX_BYTES
+ );
+
+ // Add as many items to the buffer as possible.
+ while ( false === $buffer->is_full() ) {
+ $posts = $this->librarian->query_images_after_id(
+ $last_post_id,
+ JP_SITEMAP_BATCH_SIZE
+ );
+
+ if ( null == $posts ) { // WPCS: loose comparison ok.
+ $any_posts_left = false;
+ break;
+ }
+
+ foreach ( $posts as $post ) {
+ $current_item = $this->image_post_to_sitemap_item( $post );
+
+ if ( true === $buffer->append( $current_item['xml'] ) ) {
+ $last_post_id = $post->ID;
+ $buffer->view_time( $current_item['last_modified'] );
+ } else {
+ break;
+ }
+ }
+ }
+
+ // If no items were added, return false.
+ if ( true === $buffer->is_empty() ) {
+ return false;
+ }
+
+ // Store the buffer as the content of a jp_sitemap post.
+ $this->librarian->store_sitemap_data(
+ $number,
+ JP_IMAGE_SITEMAP_TYPE,
+ $buffer->contents(),
+ $buffer->last_modified()
+ );
+
+ /*
+ * Now report back with the ID of the last post to be
+ * successfully added and whether there are any posts left.
+ */
+ return array(
+ 'last_id' => $last_post_id,
+ 'any_left' => $any_posts_left,
+ 'last_modified' => $buffer->last_modified(),
+ );
+ }
+
+ /**
+ * Build and store a single video sitemap. Returns false if no sitemap is built.
+ *
+ * Side effect: Create/update an video sitemap row.
+ *
+ * @access private
+ * @since 4.8.0
+ *
+ * @param int $number The number of the current sitemap.
+ * @param int $from_id The greatest lower bound of the IDs of the posts to be included.
+ *
+ * @return bool|array @args {
+ * @type int $last_id The ID of the last item to be successfully added to the buffer.
+ * @type bool $any_left 'true' if there are items which haven't been saved to a sitemap, 'false' otherwise.
+ * @type string $last_modified The most recent timestamp to appear on the sitemap.
+ * }
+ */
+ public function build_one_video_sitemap( $number, $from_id ) {
+ $last_post_id = $from_id;
+ $any_posts_left = true;
+
+ if ( $this->logger ) {
+ $debug_name = jp_sitemap_filename( JP_VIDEO_SITEMAP_TYPE, $number );
+ $this->logger->report( "-- Building $debug_name" );
+ }
+
+ $buffer = new Jetpack_Sitemap_Buffer_Video(
+ JP_SITEMAP_MAX_ITEMS,
+ JP_SITEMAP_MAX_BYTES
+ );
+
+ // Add as many items to the buffer as possible.
+ while ( false === $buffer->is_full() ) {
+ $posts = $this->librarian->query_videos_after_id(
+ $last_post_id,
+ JP_SITEMAP_BATCH_SIZE
+ );
+
+ if ( null == $posts ) { // WPCS: loose comparison ok.
+ $any_posts_left = false;
+ break;
+ }
+
+ foreach ( $posts as $post ) {
+ $current_item = $this->video_post_to_sitemap_item( $post );
+
+ if ( true === $buffer->append( $current_item['xml'] ) ) {
+ $last_post_id = $post->ID;
+ $buffer->view_time( $current_item['last_modified'] );
+ } else {
+ break;
+ }
+ }
+ }
+
+ // If no items were added, return false.
+ if ( true === $buffer->is_empty() ) {
+ return false;
+ }
+
+ if ( false === $buffer->is_empty() ) {
+ $this->librarian->store_sitemap_data(
+ $number,
+ JP_VIDEO_SITEMAP_TYPE,
+ $buffer->contents(),
+ $buffer->last_modified()
+ );
+ }
+
+ /*
+ * Now report back with the ID of the last post to be
+ * successfully added and whether there are any posts left.
+ */
+ return array(
+ 'last_id' => $last_post_id,
+ 'any_left' => $any_posts_left,
+ 'last_modified' => $buffer->last_modified(),
+ );
+ }
+
+ /**
+ * Build and store a single page sitemap index. Return false if no index is built.
+ *
+ * Side effect: Create/update a sitemap index row.
+ *
+ * @access private
+ * @since 4.8.0
+ *
+ * @param int $number The number of the current sitemap index.
+ * @param int $from_id The greatest lower bound of the IDs of the sitemaps to be included.
+ * @param string $datetime Datetime of previous sitemap in 'YYYY-MM-DD hh:mm:ss' format.
+ * @param string $index_type Sitemap index type.
+ *
+ * @return bool|array @args {
+ * @type int $last_id The ID of the last item to be successfully added to the buffer.
+ * @type bool $any_left 'true' if there are items which haven't been saved to a sitemap, 'false' otherwise.
+ * @type string $last_modified The most recent timestamp to appear on the sitemap.
+ * }
+ */
+ private function build_one_sitemap_index( $number, $from_id, $datetime, $index_type ) {
+ $last_sitemap_id = $from_id;
+ $any_sitemaps_left = true;
+
+ // Check the datetime format.
+ $datetime = jp_sitemap_datetime( $datetime );
+
+ $sitemap_type = jp_sitemap_child_type_of( $index_type );
+
+ if ( $this->logger ) {
+ $index_debug_name = jp_sitemap_filename( $index_type, $number );
+ $this->logger->report( "-- Building $index_debug_name" );
+ }
+
+ $buffer = new Jetpack_Sitemap_Buffer_Master(
+ JP_SITEMAP_MAX_ITEMS,
+ JP_SITEMAP_MAX_BYTES,
+ $datetime
+ );
+
+ // Add pointer to the previous sitemap index (unless we're at the first one).
+ if ( 1 !== $number ) {
+ $i = $number - 1;
+ $prev_index_url = $this->finder->construct_sitemap_url(
+ jp_sitemap_filename( $index_type, $i )
+ );
+
+ $item_array = array(
+ 'sitemap' => array(
+ 'loc' => $prev_index_url,
+ 'lastmod' => $datetime,
+ ),
+ );
+
+ $buffer->append( $item_array );
+ }
+
+ // Add as many items to the buffer as possible.
+ while ( false === $buffer->is_full() ) {
+ // Retrieve a batch of posts (in order).
+ $posts = $this->librarian->query_sitemaps_after_id(
+ $sitemap_type,
+ $last_sitemap_id,
+ JP_SITEMAP_BATCH_SIZE
+ );
+
+ // If there were no posts to get, make a note.
+ if ( null == $posts ) { // WPCS: loose comparison ok.
+ $any_sitemaps_left = false;
+ break;
+ }
+
+ // Otherwise, loop through each post in the batch.
+ foreach ( $posts as $post ) {
+ // Generate the sitemap XML for the post.
+ $current_item = $this->sitemap_row_to_index_item( (array) $post );
+
+ // Try adding this item to the buffer.
+ if ( true === $buffer->append( $current_item['xml'] ) ) {
+ $last_sitemap_id = $post['ID'];
+ $buffer->view_time( $current_item['last_modified'] );
+ } else {
+ // Otherwise stop looping through posts.
+ break;
+ }
+ }
+ }
+
+ // If no items were added, return false.
+ if ( true === $buffer->is_empty() ) {
+ return false;
+ }
+
+ $this->librarian->store_sitemap_data(
+ $number,
+ $index_type,
+ $buffer->contents(),
+ $buffer->last_modified()
+ );
+
+ /*
+ * Now report back with the ID of the last sitemap post ID to
+ * be successfully added, whether there are any sitemap posts
+ * left, and the most recent modification time seen.
+ */
+ return array(
+ 'last_id' => $last_sitemap_id,
+ 'any_left' => $any_sitemaps_left,
+ 'last_modified' => $buffer->last_modified(),
+ );
+ }
+
+ /**
+ * Construct the sitemap index url entry for a sitemap row.
+ *
+ * @link http://www.sitemaps.org/protocol.html#sitemapIndex_sitemap
+ *
+ * @access private
+ * @since 4.8.0
+ *
+ * @param array $row The sitemap data to be processed.
+ *
+ * @return string An XML fragment representing the post URL.
+ */
+ private function sitemap_row_to_index_item( $row ) {
+ $url = $this->finder->construct_sitemap_url( $row['post_title'] );
+
+ $item_array = array(
+ 'sitemap' => array(
+ 'loc' => $url,
+ 'lastmod' => jp_sitemap_datetime( $row['post_date'] ),
+ ),
+ );
+
+ return array(
+ 'xml' => $item_array,
+ 'last_modified' => $row['post_date'],
+ );
+ }
+
+
+ /**
+ * This is served instead of a 404 when the master sitemap is requested
+ * but not yet generated.
+ *
+ * @access public
+ * @since 6.7.0
+ *
+ * @return string The empty sitemap xml.
+ */
+ public function empty_sitemap_xml() {
+ $empty_sitemap = new Jetpack_Sitemap_Buffer_Empty();
+ return $empty_sitemap->contents();
+ }
+
+ /**
+ * Build and return the news sitemap xml. Note that the result of this
+ * function is cached in the transient 'jetpack_news_sitemap_xml'.
+ *
+ * @access public
+ * @since 4.8.0
+ *
+ * @return string The news sitemap xml.
+ */
+ public function news_sitemap_xml() {
+ $the_stored_news_sitemap = get_transient( 'jetpack_news_sitemap_xml' );
+
+ if ( false === $the_stored_news_sitemap ) {
+
+ if ( $this->logger ) {
+ $this->logger->report( 'Beginning news sitemap generation.' );
+ }
+
+ /**
+ * Filter limit of entries to include in news sitemap.
+ *
+ * @module sitemaps
+ *
+ * @since 3.9.0
+ *
+ * @param int $count Number of entries to include in news sitemap.
+ */
+ $item_limit = apply_filters(
+ 'jetpack_sitemap_news_sitemap_count',
+ JP_NEWS_SITEMAP_MAX_ITEMS
+ );
+
+ $buffer = new Jetpack_Sitemap_Buffer_News(
+ min( $item_limit, JP_NEWS_SITEMAP_MAX_ITEMS ),
+ JP_SITEMAP_MAX_BYTES
+ );
+
+ $posts = $this->librarian->query_most_recent_posts( JP_NEWS_SITEMAP_MAX_ITEMS );
+
+ foreach ( $posts as $post ) {
+ $current_item = $this->post_to_news_sitemap_item( $post );
+
+ if ( false === $buffer->append( $current_item['xml'] ) ) {
+ break;
+ }
+ }
+
+ if ( $this->logger ) {
+ $this->logger->time( 'End news sitemap generation.' );
+ }
+
+ $the_stored_news_sitemap = $buffer->contents();
+
+ set_transient(
+ 'jetpack_news_sitemap_xml',
+ $the_stored_news_sitemap,
+ JP_NEWS_SITEMAP_INTERVAL
+ );
+ } // End if.
+
+ return $the_stored_news_sitemap;
+ }
+
+ /**
+ * Construct the sitemap url entry for a WP_Post.
+ *
+ * @link http://www.sitemaps.org/protocol.html#urldef
+ * @access private
+ * @since 4.8.0
+ *
+ * @param WP_Post $post The post to be processed.
+ *
+ * @return array
+ * @type array $xml An XML fragment representing the post URL.
+ * @type string $last_modified Date post was last modified.
+ */
+ private function post_to_sitemap_item( $post ) {
+
+ /**
+ * Filter condition to allow skipping specific posts in sitemap.
+ *
+ * @module sitemaps
+ *
+ * @since 3.9.0
+ *
+ * @param bool $skip Current boolean. False by default, so no post is skipped.
+ * @param object $post Current post in the form of a $wpdb result object. Not WP_Post.
+ */
+ if ( true === apply_filters( 'jetpack_sitemap_skip_post', false, $post ) ) {
+ return array(
+ 'xml' => null,
+ 'last_modified' => null,
+ );
+ }
+
+ $url = esc_url( get_permalink( $post ) );
+
+ /*
+ * Spec requires the URL to be <=2048 bytes.
+ * In practice this constraint is unlikely to be violated.
+ */
+ if ( 2048 < strlen( $url ) ) {
+ $url = home_url() . '/?p=' . $post->ID;
+ }
+
+ $last_modified = $post->post_modified_gmt;
+
+ // Check for more recent comments.
+ // Note that 'Y-m-d h:i:s' strings sort lexicographically.
+ if ( 0 < $post->comment_count ) {
+ $last_modified = max(
+ $last_modified,
+ $this->librarian->query_latest_approved_comment_time_on_post( $post->ID )
+ );
+ }
+
+ $item_array = array(
+ 'url' => array(
+ 'loc' => $url,
+ 'lastmod' => jp_sitemap_datetime( $last_modified ),
+ ),
+ );
+
+ /**
+ * Filter sitemap URL item before rendering it as XML.
+ *
+ * @module sitemaps
+ *
+ * @since 3.9.0
+ *
+ * @param array $tree Associative array representing sitemap URL element.
+ * @param int $post_id ID of the post being processed.
+ */
+ $item_array = apply_filters( 'jetpack_sitemap_url', $item_array, $post->ID );
+
+ return array(
+ 'xml' => $item_array,
+ 'last_modified' => $last_modified,
+ );
+ }
+
+ /**
+ * Construct the image sitemap url entry for a WP_Post of image type.
+ *
+ * @link http://www.sitemaps.org/protocol.html#urldef
+ *
+ * @access private
+ * @since 4.8.0
+ *
+ * @param WP_Post $post The image post to be processed.
+ *
+ * @return array
+ * @type array $xml An XML fragment representing the post URL.
+ * @type string $last_modified Date post was last modified.
+ */
+ private function image_post_to_sitemap_item( $post ) {
+
+ /**
+ * Filter condition to allow skipping specific image posts in the sitemap.
+ *
+ * @module sitemaps
+ *
+ * @since 4.8.0
+ *
+ * @param bool $skip Current boolean. False by default, so no post is skipped.
+ * @param WP_POST $post Current post object.
+ */
+ if ( apply_filters( 'jetpack_sitemap_image_skip_post', false, $post ) ) {
+ return array(
+ 'xml' => null,
+ 'last_modified' => null,
+ );
+ }
+
+ $url = wp_get_attachment_url( $post->ID );
+
+ // Do not include the image if the attached parent is not published.
+ // Unattached will be published. Otherwise, will inherit parent status.
+ if ( 'publish' !== get_post_status( $post ) ) {
+ return array(
+ 'xml' => null,
+ 'last_modified' => null,
+ );
+ }
+
+ $parent_url = get_permalink( get_post( $post->post_parent ) );
+ if ( '' == $parent_url ) { // WPCS: loose comparison ok.
+ $parent_url = get_permalink( $post );
+ }
+
+ $item_array = array(
+ 'url' => array(
+ 'loc' => $parent_url,
+ 'lastmod' => jp_sitemap_datetime( $post->post_modified_gmt ),
+ 'image:image' => array(
+ 'image:loc' => $url,
+ ),
+ ),
+ );
+
+ $item_array['url']['image:image']['image:title'] = $post->post_title;
+ $item_array['url']['image:image']['image:caption'] = $post->post_excerpt;
+
+ /**
+ * Filter associative array with data to build <url> node
+ * and its descendants for current post in image sitemap.
+ *
+ * @module sitemaps
+ *
+ * @since 4.8.0
+ *
+ * @param array $item_array Data to build parent and children nodes for current post.
+ * @param int $post_id Current image post ID.
+ */
+ $item_array = apply_filters(
+ 'jetpack_sitemap_image_sitemap_item',
+ $item_array,
+ $post->ID
+ );
+
+ return array(
+ 'xml' => $item_array,
+ 'last_modified' => $post->post_modified_gmt,
+ );
+ }
+
+ /**
+ * Construct the video sitemap url entry for a WP_Post of video type.
+ *
+ * @link http://www.sitemaps.org/protocol.html#urldef
+ * @link https://developers.google.com/webmasters/videosearch/sitemaps
+ *
+ * @access private
+ * @since 4.8.0
+ *
+ * @param WP_Post $post The video post to be processed.
+ *
+ * @return array
+ * @type array $xml An XML fragment representing the post URL.
+ * @type string $last_modified Date post was last modified.
+ */
+ private function video_post_to_sitemap_item( $post ) {
+
+ /**
+ * Filter condition to allow skipping specific image posts in the sitemap.
+ *
+ * @module sitemaps
+ *
+ * @since 4.8.0
+ *
+ * @param bool $skip Current boolean. False by default, so no post is skipped.
+ * @param WP_POST $post Current post object.
+ */
+ if ( apply_filters( 'jetpack_sitemap_video_skip_post', false, $post ) ) {
+ return array(
+ 'xml' => null,
+ 'last_modified' => null,
+ );
+ }
+
+ // Do not include the video if the attached parent is not published.
+ // Unattached will be published. Otherwise, will inherit parent status.
+ if ( 'publish' !== get_post_status( $post ) ) {
+ return array(
+ 'xml' => null,
+ 'last_modified' => null,
+ );
+ }
+
+ $parent_url = esc_url( get_permalink( get_post( $post->post_parent ) ) );
+ if ( '' == $parent_url ) { // WPCS: loose comparison ok.
+ $parent_url = esc_url( get_permalink( $post ) );
+ }
+
+ // Prepare the content like get_the_content_feed().
+ $content = $post->post_content;
+ /** This filter is already documented in core/wp-includes/post-template.php */
+ $content = apply_filters( 'the_content', $content );
+
+ /** This filter is already documented in core/wp-includes/feed.php */
+ $content = apply_filters( 'the_content_feed', $content, 'rss2' );
+
+ // Include thumbnails for VideoPress videos, use blank image for others
+ if ( 'complete' === get_post_meta( $post->ID, 'videopress_status', true ) && has_post_thumbnail( $post ) ) {
+ $video_thumbnail_url = get_the_post_thumbnail_url( $post );
+ } else {
+ /**
+ * Filter the thumbnail image used in the video sitemap for non-VideoPress videos.
+ *
+ * @since 7.2.0
+ *
+ * @param string $str Image URL.
+ */
+ $video_thumbnail_url = apply_filters( 'jetpack_video_sitemap_default_thumbnail', 'https://s0.wp.com/i/blank.jpg' );
+ }
+
+ $item_array = array(
+ 'url' => array(
+ 'loc' => $parent_url,
+ 'lastmod' => jp_sitemap_datetime( $post->post_modified_gmt ),
+ 'video:video' => array(
+ /** This filter is already documented in core/wp-includes/feed.php */
+ 'video:title' => apply_filters( 'the_title_rss', $post->post_title ),
+ 'video:thumbnail_loc' => esc_url( $video_thumbnail_url ),
+ 'video:description' => $content,
+ 'video:content_loc' => esc_url( wp_get_attachment_url( $post->ID ) ),
+ ),
+ ),
+ );
+
+ // TODO: Integrate with VideoPress here.
+ // cf. video:player_loc tag in video sitemap spec.
+
+ /**
+ * Filter associative array with data to build <url> node
+ * and its descendants for current post in video sitemap.
+ *
+ * @module sitemaps
+ *
+ * @since 4.8.0
+ *
+ * @param array $item_array Data to build parent and children nodes for current post.
+ * @param int $post_id Current video post ID.
+ */
+ $item_array = apply_filters(
+ 'jetpack_sitemap_video_sitemap_item',
+ $item_array,
+ $post->ID
+ );
+
+ return array(
+ 'xml' => $item_array,
+ 'last_modified' => $post->post_modified_gmt,
+ );
+ }
+
+ /**
+ * Construct the news sitemap url entry for a WP_Post.
+ *
+ * @link http://www.sitemaps.org/protocol.html#urldef
+ *
+ * @access private
+ * @since 4.8.0
+ *
+ * @param WP_Post $post The post to be processed.
+ *
+ * @return string An XML fragment representing the post URL.
+ */
+ private function post_to_news_sitemap_item( $post ) {
+
+ /**
+ * Filter condition to allow skipping specific posts in news sitemap.
+ *
+ * @module sitemaps
+ *
+ * @since 3.9.0
+ *
+ * @param bool $skip Current boolean. False by default, so no post is skipped.
+ * @param WP_POST $post Current post object.
+ */
+ if ( apply_filters( 'jetpack_sitemap_news_skip_post', false, $post ) ) {
+ return array(
+ 'xml' => null,
+ );
+ }
+
+ $url = get_permalink( $post );
+
+ /*
+ * Spec requires the URL to be <=2048 bytes.
+ * In practice this constraint is unlikely to be violated.
+ */
+ if ( 2048 < strlen( $url ) ) {
+ $url = home_url() . '/?p=' . $post->ID;
+ }
+
+ /*
+ * Trim the locale to an ISO 639 language code as required by Google.
+ * Special cases are zh-cn (Simplified Chinese) and zh-tw (Traditional Chinese).
+ * @link http://www.loc.gov/standards/iso639-2/php/code_list.php
+ */
+ $language = strtolower( get_locale() );
+
+ if ( in_array( $language, array( 'zh_tw', 'zh_cn' ), true ) ) {
+ $language = str_replace( '_', '-', $language );
+ } else {
+ $language = preg_replace( '/(_.*)$/i', '', $language );
+ }
+
+ $item_array = array(
+ 'url' => array(
+ 'loc' => $url,
+ 'lastmod' => jp_sitemap_datetime( $post->post_modified_gmt ),
+ 'news:news' => array(
+ 'news:publication' => array(
+ 'news:name' => html_entity_decode( get_bloginfo( 'name' ) ),
+ 'news:language' => $language,
+ ),
+ /** This filter is already documented in core/wp-includes/feed.php */
+ 'news:title' => apply_filters( 'the_title_rss', $post->post_title ),
+ 'news:publication_date' => jp_sitemap_datetime( $post->post_date_gmt ),
+ 'news:genres' => 'Blog',
+ ),
+ ),
+ );
+
+ /**
+ * Filter associative array with data to build <url> node
+ * and its descendants for current post in news sitemap.
+ *
+ * @module sitemaps
+ *
+ * @since 3.9.0
+ *
+ * @param array $item_array Data to build parent and children nodes for current post.
+ * @param int $post_id Current post ID.
+ */
+ $item_array = apply_filters(
+ 'jetpack_sitemap_news_sitemap_item',
+ $item_array,
+ $post->ID
+ );
+
+ return array(
+ 'xml' => $item_array,
+ );
+ }
+}