Completed
Push — remove/grunt ( 82bad1...654b22 )
by
unknown
14:04
created

modules/sitemaps/sitemap-builder.php (1 issue)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
/**
3
 * Build the sitemap tree.
4
 *
5
 * @package Jetpack
6
 * @since 4.8.0
7
 * @author Automattic
8
 */
9
10
require_once dirname( __FILE__ ) . '/sitemap-constants.php';
11
require_once dirname( __FILE__ ) . '/sitemap-buffer.php';
12
13
if ( ! class_exists( 'DOMDocument' ) ) {
14
	require_once dirname( __FILE__ ) . '/sitemap-buffer-fallback.php';
15
	require_once dirname( __FILE__ ) . '/sitemap-buffer-image-fallback.php';
16
	require_once dirname( __FILE__ ) . '/sitemap-buffer-master-fallback.php';
17
	require_once dirname( __FILE__ ) . '/sitemap-buffer-news-fallback.php';
18
	require_once dirname( __FILE__ ) . '/sitemap-buffer-page-fallback.php';
19
	require_once dirname( __FILE__ ) . '/sitemap-buffer-video-fallback.php';
20
} else {
21
	require_once dirname( __FILE__ ) . '/sitemap-buffer-image.php';
22
	require_once dirname( __FILE__ ) . '/sitemap-buffer-master.php';
23
	require_once dirname( __FILE__ ) . '/sitemap-buffer-news.php';
24
	require_once dirname( __FILE__ ) . '/sitemap-buffer-page.php';
25
	require_once dirname( __FILE__ ) . '/sitemap-buffer-video.php';
26
}
27
28
require_once dirname( __FILE__ ) . '/sitemap-librarian.php';
29
require_once dirname( __FILE__ ) . '/sitemap-finder.php';
30
require_once dirname( __FILE__ ) . '/sitemap-state.php';
31
32
if ( defined( 'WP_DEBUG' ) && WP_DEBUG ) {
33
	require_once dirname( __FILE__ ) . '/sitemap-logger.php';
34
}
35
36
/**
37
 * The Jetpack_Sitemap_Builder object handles the construction of
38
 * all sitemap files (except the XSL files, which are handled by
39
 * Jetpack_Sitemap_Stylist.) Other than the constructor, there are
40
 * only two public functions: build_all_sitemaps and news_sitemap_xml.
41
 *
42
 * @since 4.8.0
43
 */
44
class Jetpack_Sitemap_Builder {
45
46
	/**
47
	 * Librarian object for storing and retrieving sitemap data.
48
	 *
49
	 * @access private
50
	 * @since 4.8.0
51
	 * @var $librarian Jetpack_Sitemap_Librarian
52
	 */
53
	private $librarian;
54
55
	/**
56
	 * Logger object for reporting debug messages.
57
	 *
58
	 * @access private
59
	 * @since 4.8.0
60
	 * @var $logger Jetpack_Sitemap_Logger
61
	 */
62
	private $logger = false;
63
64
	/**
65
	 * Finder object for dealing with sitemap URIs.
66
	 *
67
	 * @access private
68
	 * @since 4.8.0
69
	 * @var $finder Jetpack_Sitemap_Finder
70
	 */
71
	private $finder;
72
73
	/**
74
	 * Construct a new Jetpack_Sitemap_Builder object.
75
	 *
76
	 * @access public
77
	 * @since 4.8.0
78
	 */
79
	public function __construct() {
80
		$this->librarian = new Jetpack_Sitemap_Librarian();
81
		$this->finder = new Jetpack_Sitemap_Finder();
82
83
		if ( defined( 'WP_DEBUG' ) && WP_DEBUG ) {
84
			$this->logger = new Jetpack_Sitemap_Logger();
85
		}
86
87
		update_option(
88
			'jetpack_sitemap_post_types',
89
			/**
90
			 * The array of post types to be included in the sitemap.
91
			 *
92
			 * Add your custom post type name to the array to have posts of
93
			 * that type included in the sitemap. The default array includes
94
			 * 'page' and 'post'.
95
			 *
96
			 * The result of this filter is cached in an option, 'jetpack_sitemap_post_types',
97
			 * so this filter only has to be applied once per generation.
98
			 *
99
			 * @since 4.8.0
100
			 */
101
			apply_filters(
102
				'jetpack_sitemap_post_types',
103
				array( 'post', 'page' )
104
			)
105
		);
106
	}
107
108
	/**
109
	 * Update the sitemap.
110
	 *
111
	 * All we do here is call build_next_sitemap_file a bunch of times.
112
	 *
113
	 * @since 4.8.0
114
	 */
115
	public function update_sitemap() {
116
		if ( $this->logger ) {
117
			$this->logger->report( '-- Updating...' );
118
			if ( ! class_exists( 'DOMDocument' ) ) {
119
				$this->logger->report(
120
					__(
121
						'-- WARNING: Jetpack can not load necessary XML manipulation libraries. '
122
						. 'This can happen if XML support in PHP is not enabled on your server. '
123
						. 'XML support is highly recommended for WordPress and Jetpack, please enable '
124
						. 'it or contact your hosting provider about it.',
125
						'jetpack'
126
					),
127
					true
128
				);
129
			}
130
		}
131
132
		for ( $i = 1; $i <= JP_SITEMAP_UPDATE_SIZE; $i++ ) {
133
			if ( true === $this->build_next_sitemap_file() ) {
134
				break; // All finished!
135
			}
136
		}
137
138
		if ( $this->logger ) {
139
			$this->logger->report( '-- ...done for now.' );
140
			$this->logger->time();
141
		}
142
	}
143
144
	/**
145
	 * Generate the next sitemap file.
146
	 *
147
	 * Reads the most recent state of the sitemap generation phase,
148
	 * constructs the next file, and updates the state.
149
	 *
150
	 * @since 4.8.0
151
	 *
152
	 * @return bool True when finished.
153
	 */
154
	private function build_next_sitemap_file() {
155
		$finished = false; // Initialize finished flag.
156
157
		// Get the most recent state, and lock the state.
158
		$state = Jetpack_Sitemap_State::check_out();
159
160
		// Do nothing if the state was locked.
161
		if ( false === $state ) {
162
			return false;
163
		}
164
165
		// Otherwise, branch on the sitemap-type key of $state.
166
		switch ( $state['sitemap-type'] ) {
167
			case JP_PAGE_SITEMAP_TYPE:
168
				$this->build_next_sitemap_of_type(
169
					JP_PAGE_SITEMAP_TYPE,
170
					array( $this, 'build_one_page_sitemap' ),
171
					$state
172
				);
173
				break;
174
175
			case JP_PAGE_SITEMAP_INDEX_TYPE:
176
				$this->build_next_sitemap_index_of_type(
177
					JP_PAGE_SITEMAP_INDEX_TYPE,
178
					JP_IMAGE_SITEMAP_TYPE,
179
					$state
180
				);
181
				break;
182
183
			case JP_IMAGE_SITEMAP_TYPE:
184
				$this->build_next_sitemap_of_type(
185
					JP_IMAGE_SITEMAP_TYPE,
186
					array( $this, 'build_one_image_sitemap' ),
187
					$state
188
				);
189
				break;
190
191
			case JP_IMAGE_SITEMAP_INDEX_TYPE:
192
				$this->build_next_sitemap_index_of_type(
193
					JP_IMAGE_SITEMAP_INDEX_TYPE,
194
					JP_VIDEO_SITEMAP_TYPE,
195
					$state
196
				);
197
				break;
198
199
			case JP_VIDEO_SITEMAP_TYPE:
200
				$this->build_next_sitemap_of_type(
201
					JP_VIDEO_SITEMAP_TYPE,
202
					array( $this, 'build_one_video_sitemap' ),
203
					$state
204
				);
205
				break;
206
207
			case JP_VIDEO_SITEMAP_INDEX_TYPE:
208
				$this->build_next_sitemap_index_of_type(
209
					JP_VIDEO_SITEMAP_INDEX_TYPE,
210
					JP_MASTER_SITEMAP_TYPE,
211
					$state
212
				);
213
				break;
214
215
			case JP_MASTER_SITEMAP_TYPE:
216
				$this->build_master_sitemap( $state['max'] );
217
218
				// Reset the state and quit.
219
				Jetpack_Sitemap_State::reset(
220
					JP_PAGE_SITEMAP_TYPE
221
				);
222
223
				if ( $this->logger ) {
224
					$this->logger->report( '-- Finished.' );
225
					$this->logger->time();
226
				}
227
				$finished = true;
228
229
				break;
230
231
			default:
232
				Jetpack_Sitemap_State::reset(
233
					JP_PAGE_SITEMAP_TYPE
234
				);
235
				$finished = true;
236
237
				break;
238
		} // End switch().
239
240
		// Unlock the state.
241
		Jetpack_Sitemap_State::unlock();
242
243
		return $finished;
244
	}
245
246
	/**
247
	 * Build the next sitemap of a given type and update the sitemap state.
248
	 *
249
	 * @since 4.8.0
250
	 *
251
	 * @param string   $sitemap_type The type of the sitemap being generated.
252
	 * @param callback $build_one    A callback which builds a single sitemap file.
253
	 * @param array    $state        A sitemap state.
254
	 */
255
	private function build_next_sitemap_of_type( $sitemap_type, $build_one, $state ) {
256
		$index_type = jp_sitemap_index_type_of( $sitemap_type );
257
258
		// Try to build a sitemap.
259
		$result = call_user_func_array(
260
			$build_one,
261
			array(
262
				$state['number'] + 1,
263
				$state['last-added'],
264
			)
265
		);
266
267 View Code Duplication
		if ( false === $result ) {
268
			// If no sitemap was generated, advance to the next type.
269
			Jetpack_Sitemap_State::check_in( array(
270
				'sitemap-type'  => $index_type,
271
				'last-added'    => 0,
272
				'number'        => 0,
273
				'last-modified' => '1970-01-01 00:00:00',
274
			) );
275
276
			if ( $this->logger ) {
277
				$this->logger->report( "-- Cleaning Up $sitemap_type" );
278
			}
279
280
			// Clean up old files.
281
			$this->librarian->delete_numbered_sitemap_rows_after(
282
				$state['number'], $sitemap_type
283
			);
284
285
			return;
286
		}
287
288
		// Otherwise, update the state.
289
		Jetpack_Sitemap_State::check_in( array(
290
			'sitemap-type'  => $state['sitemap-type'],
291
			'last-added'    => $result['last_id'],
292
			'number'        => $state['number'] + 1,
293
			'last-modified' => $result['last_modified'],
294
		) );
295
296
		if ( true === $result['any_left'] ) {
297
			// If there's more work to be done with this type, return.
298
			return;
299
		}
300
301
		// Otherwise, advance state to the next sitemap type.
302
		Jetpack_Sitemap_State::check_in( array(
303
			'sitemap-type'  => $index_type,
304
			'last-added'    => 0,
305
			'number'        => 0,
306
			'last-modified' => '1970-01-01 00:00:00',
307
		) );
308
309
		if ( $this->logger ) {
310
			$this->logger->report( "-- Cleaning Up $sitemap_type" );
311
		}
312
313
		// Clean up old files.
314
		$this->librarian->delete_numbered_sitemap_rows_after(
315
			$state['number'] + 1, $sitemap_type
316
		);
317
	}
318
319
	/**
320
	 * Build the next sitemap index of a given type and update the state.
321
	 *
322
	 * @since 4.8.0
323
	 *
324
	 * @param string $index_type The type of index being generated.
325
	 * @param string $next_type  The next type to generate after this one.
326
	 * @param array  $state      A sitemap state.
327
	 */
328
	private function build_next_sitemap_index_of_type( $index_type, $next_type, $state ) {
329
		$sitemap_type = jp_sitemap_child_type_of( $index_type );
330
331
		// If only 0 or 1 sitemaps were built, advance to the next type and return.
332
		if ( 1 >= $state['max'][ $sitemap_type ]['number'] ) {
333
			Jetpack_Sitemap_State::check_in( array(
334
				'sitemap-type'  => $next_type,
335
				'last-added'    => 0,
336
				'number'        => 0,
337
				'last-modified' => '1970-01-01 00:00:00',
338
			) );
339
340
			if ( $this->logger ) {
341
				$this->logger->report( "-- Cleaning Up $index_type" );
342
			}
343
344
			// There are no indices of this type.
345
			$this->librarian->delete_numbered_sitemap_rows_after(
346
				0, $index_type
347
			);
348
349
			return;
350
		}
351
352
		// Otherwise, try to build a sitemap index.
353
		$result = $this->build_one_sitemap_index(
354
			$state['number'] + 1,
355
			$state['last-added'],
356
			$state['last-modified'],
357
			$index_type
358
		);
359
360
		// If no index was built, advance to the next type and return.
361 View Code Duplication
		if ( false === $result ) {
362
			Jetpack_Sitemap_State::check_in( array(
363
				'sitemap-type'  => $next_type,
364
				'last-added'    => 0,
365
				'number'        => 0,
366
				'last-modified' => '1970-01-01 00:00:00',
367
			) );
368
369
			if ( $this->logger ) {
370
				$this->logger->report( "-- Cleaning Up $index_type" );
371
			}
372
373
			// Clean up old files.
374
			$this->librarian->delete_numbered_sitemap_rows_after(
375
				$state['number'], $index_type
376
			);
377
378
			return;
379
		}
380
381
		// Otherwise, check in the state.
382
		Jetpack_Sitemap_State::check_in( array(
383
			'sitemap-type'  => $index_type,
384
			'last-added'    => $result['last_id'],
385
			'number'        => $state['number'] + 1,
386
			'last-modified' => $result['last_modified'],
387
		) );
388
389
		// If there are still sitemaps left to index, return.
390
		if ( true === $result['any_left'] ) {
391
			return;
392
		}
393
394
		// Otherwise, advance to the next type.
395
		Jetpack_Sitemap_State::check_in( array(
396
			'sitemap-type'  => $next_type,
397
			'last-added'    => 0,
398
			'number'        => 0,
399
			'last-modified' => '1970-01-01 00:00:00',
400
		) );
401
402
		if ( $this->logger ) {
403
			$this->logger->report( "-- Cleaning Up $index_type" );
404
		}
405
406
		// We're done generating indices of this type.
407
		$this->librarian->delete_numbered_sitemap_rows_after(
408
			$state['number'] + 1, $index_type
409
		);
410
411
		return;
412
	}
413
414
	/**
415
	 * Builds the master sitemap index.
416
	 *
417
	 * @param array $max Array of sitemap types with max index and datetime.
418
	 *
419
	 * @since 4.8.0
420
	 */
421
	private function build_master_sitemap( $max ) {
422
		if ( $this->logger ) {
423
			$this->logger->report( '-- Building Master Sitemap.' );
424
		}
425
426
		$buffer = new Jetpack_Sitemap_Buffer_Master(
427
			JP_SITEMAP_MAX_ITEMS,
428
			JP_SITEMAP_MAX_BYTES
429
		);
430
431 View Code Duplication
		if ( 0 < $max[ JP_PAGE_SITEMAP_TYPE ]['number'] ) {
432
			if ( 1 === $max[ JP_PAGE_SITEMAP_TYPE ]['number'] ) {
433
				$page['filename'] = jp_sitemap_filename( JP_PAGE_SITEMAP_TYPE, 1 );
434
				$page['last_modified'] = jp_sitemap_datetime( $max[ JP_PAGE_SITEMAP_TYPE ]['lastmod'] );
435
			} else {
436
				$page['filename'] = jp_sitemap_filename(
437
					JP_PAGE_SITEMAP_INDEX_TYPE,
438
					$max[ JP_PAGE_SITEMAP_INDEX_TYPE ]['number']
439
				);
440
				$page['last_modified'] = jp_sitemap_datetime( $max[ JP_PAGE_SITEMAP_INDEX_TYPE ]['lastmod'] );
441
			}
442
443
			$buffer->append(
444
				array(
445
					'sitemap' => array(
446
						'loc'     => $this->finder->construct_sitemap_url( $page['filename'] ),
447
						'lastmod' => $page['last_modified'],
448
					),
449
				)
450
			);
451
		}
452
453 View Code Duplication
		if ( 0 < $max[ JP_IMAGE_SITEMAP_TYPE ]['number'] ) {
454
			if ( 1 === $max[ JP_IMAGE_SITEMAP_TYPE ]['number'] ) {
455
				$image['filename'] = jp_sitemap_filename( JP_IMAGE_SITEMAP_TYPE, 1 );
456
				$image['last_modified'] = jp_sitemap_datetime( $max[ JP_IMAGE_SITEMAP_TYPE ]['lastmod'] );
457
			} else {
458
				$image['filename'] = jp_sitemap_filename(
459
					JP_IMAGE_SITEMAP_INDEX_TYPE,
460
					$max[ JP_IMAGE_SITEMAP_INDEX_TYPE ]['number']
461
				);
462
				$image['last_modified'] = jp_sitemap_datetime( $max[ JP_IMAGE_SITEMAP_INDEX_TYPE ]['lastmod'] );
463
			}
464
465
			$buffer->append(
466
				array(
467
					'sitemap' => array(
468
						'loc'     => $this->finder->construct_sitemap_url( $image['filename'] ),
469
						'lastmod' => $image['last_modified'],
470
					),
471
				)
472
			);
473
		}
474
475 View Code Duplication
		if ( 0 < $max[ JP_VIDEO_SITEMAP_TYPE ]['number'] ) {
476
			if ( 1 === $max[ JP_VIDEO_SITEMAP_TYPE ]['number'] ) {
477
				$video['filename'] = jp_sitemap_filename( JP_VIDEO_SITEMAP_TYPE, 1 );
478
				$video['last_modified'] = jp_sitemap_datetime( $max[ JP_VIDEO_SITEMAP_TYPE ]['lastmod'] );
479
			} else {
480
				$video['filename'] = jp_sitemap_filename(
481
					JP_VIDEO_SITEMAP_INDEX_TYPE,
482
					$max[ JP_VIDEO_SITEMAP_INDEX_TYPE ]['number']
483
				);
484
				$video['last_modified'] = jp_sitemap_datetime( $max[ JP_VIDEO_SITEMAP_INDEX_TYPE ]['lastmod'] );
485
			}
486
487
			$buffer->append(
488
				array(
489
					'sitemap' => array(
490
						'loc'     => $this->finder->construct_sitemap_url( $video['filename'] ),
491
						'lastmod' => $video['last_modified'],
492
					),
493
				)
494
			);
495
		}
496
497
		$this->librarian->store_sitemap_data(
498
			0,
499
			JP_MASTER_SITEMAP_TYPE,
500
			$buffer->contents(),
501
			''
502
		);
503
	}
504
505
	/**
506
	 * Build and store a single page sitemap. Returns false if no sitemap is built.
507
	 *
508
	 * Side effect: Create/update a sitemap row.
509
	 *
510
	 * @access private
511
	 * @since 4.8.0
512
	 *
513
	 * @param int $number The number of the current sitemap.
514
	 * @param int $from_id The greatest lower bound of the IDs of the posts to be included.
515
	 *
516
	 * @return bool|array @args {
517
	 *   @type int    $last_id       The ID of the last item to be successfully added to the buffer.
518
	 *   @type bool   $any_left      'true' if there are items which haven't been saved to a sitemap, 'false' otherwise.
519
	 *   @type string $last_modified The most recent timestamp to appear on the sitemap.
520
	 * }
521
	 */
522
	public function build_one_page_sitemap( $number, $from_id ) {
523
		$last_post_id   = $from_id;
524
		$any_posts_left = true;
525
526
		if ( $this->logger ) {
527
			$debug_name = jp_sitemap_filename( JP_PAGE_SITEMAP_TYPE, $number );
528
			$this->logger->report( "-- Building $debug_name" );
529
		}
530
531
		$buffer = new Jetpack_Sitemap_Buffer_Page(
532
			JP_SITEMAP_MAX_ITEMS,
533
			JP_SITEMAP_MAX_BYTES
534
		);
535
536
		// Add entry for the main page (only if we're at the first one).
537
		if ( 1 === $number ) {
538
			$item_array = array(
539
				'url' => array(
540
					'loc' => home_url(),
541
				),
542
			);
543
544
			/**
545
			 * Filter associative array with data to build <url> node
546
			 * and its descendants for site home.
547
			 *
548
			 * @module sitemaps
549
			 *
550
			 * @since 3.9.0
551
			 *
552
			 * @param array $blog_home Data to build parent and children nodes for site home.
553
			 */
554
			$item_array = apply_filters( 'jetpack_sitemap_url_home', $item_array );
555
556
			$buffer->append( $item_array );
557
		}
558
559
		// Add as many items to the buffer as possible.
560
		while ( $last_post_id >= 0 && false === $buffer->is_full() ) {
561
			$posts = $this->librarian->query_posts_after_id(
562
				$last_post_id, JP_SITEMAP_BATCH_SIZE
563
			);
564
565
			if ( null == $posts ) { // WPCS: loose comparison ok.
566
				$any_posts_left = false;
567
				break;
568
			}
569
570
			foreach ( $posts as $post ) {
571
				$current_item = $this->post_to_sitemap_item( $post );
572
573
				if ( true === $buffer->append( $current_item['xml'] ) ) {
574
					$last_post_id = $post->ID;
575
					$buffer->view_time( $current_item['last_modified'] );
576
				} else {
577
					break;
578
				}
579
			}
580
		}
581
582
		// Handle other page sitemap URLs.
583
		if ( false === $any_posts_left || $last_post_id < 0 ) {
584
			// Negative IDs are used to track URL indexes.
585
			$last_post_id   = min( 0, $last_post_id );
586
			$any_posts_left = true; // Reinitialize.
587
588
			/**
589
			 * Filter other page sitemap URLs.
590
			 *
591
			 * @module sitemaps
592
			 *
593
			 * @since 6.1.0
594
			 *
595
			 * @param array $urls An array of other URLs.
596
			 */
597
			$other_urls = apply_filters( 'jetpack_page_sitemap_other_urls', array() );
598
599
			if ( $other_urls ) { // Start with index [1].
600
				$other_urls = array_values( $other_urls );
601
				array_unshift( $other_urls, $other_urls[0] );
602
				unset( $other_urls[0] );
603
			}
604
605
			// Add as many items to the buffer as possible.
606
			while ( false === $buffer->is_full() ) {
607
				$last_post_id_index       = abs( $last_post_id );
608
				$start_from_post_id_index = $last_post_id_index ? $last_post_id_index + 1 : 0;
609
				$urls                     = array_slice(
610
					$other_urls,
611
					$start_from_post_id_index,
612
					JP_SITEMAP_BATCH_SIZE,
613
					true
614
				);
615
616
				if ( ! $urls ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $urls of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
617
					$any_posts_left = false;
618
					break;
619
				}
620
621
				foreach ( $urls as $index => $url ) {
622
					if ( ! is_array( $url ) ) {
623
						$url = array( 'loc' => $url );
624
					}
625
					$item = array( 'xml' => compact( 'url' ) );
626
627
					if ( true === $buffer->append( $item['xml'] ) ) {
628
						$last_post_id = -$index;
629
					} else {
630
						break;
631
					}
632
				}
633
			}
634
		}
635
636
		// If no items were added, return false.
637
		if ( true === $buffer->is_empty() ) {
638
			return false;
639
		}
640
641
		/**
642
		 * Filter sitemap before rendering it as XML.
643
		 *
644
		 * @module sitemaps
645
		 *
646
		 * @since 3.9.0
647
		 * @since 5.3.0 returns an element of DOMDocument type instead of SimpleXMLElement
648
		 *
649
		 * @param DOMDocument      $doc Data tree for sitemap.
650
		 * @param string           $last_modified Date of last modification.
651
		 */
652
		$tree = apply_filters(
653
			'jetpack_print_sitemap',
654
			$buffer->get_document(),
655
			$buffer->last_modified()
656
		);
657
658
		// Store the buffer as the content of a sitemap row.
659
		$this->librarian->store_sitemap_data(
660
			$number,
661
			JP_PAGE_SITEMAP_TYPE,
662
			$buffer->contents(),
663
			$buffer->last_modified()
664
		);
665
666
		/*
667
		 * Now report back with the ID of the last post ID to be
668
		 * successfully added and whether there are any posts left.
669
		 */
670
		return array(
671
			'last_id'       => $last_post_id,
672
			'any_left'      => $any_posts_left,
673
			'last_modified' => $buffer->last_modified(),
674
		);
675
	}
676
677
	/**
678
	 * Build and store a single image sitemap. Returns false if no sitemap is built.
679
	 *
680
	 * Side effect: Create/update an image sitemap row.
681
	 *
682
	 * @access private
683
	 * @since 4.8.0
684
	 *
685
	 * @param int $number The number of the current sitemap.
686
	 * @param int $from_id The greatest lower bound of the IDs of the posts to be included.
687
	 *
688
	 * @return bool|array @args {
689
	 *   @type int    $last_id       The ID of the last item to be successfully added to the buffer.
690
	 *   @type bool   $any_left      'true' if there are items which haven't been saved to a sitemap, 'false' otherwise.
691
	 *   @type string $last_modified The most recent timestamp to appear on the sitemap.
692
	 * }
693
	 */
694 View Code Duplication
	public function build_one_image_sitemap( $number, $from_id ) {
695
		$last_post_id = $from_id;
696
		$any_posts_left = true;
697
698
		if ( $this->logger ) {
699
			$debug_name = jp_sitemap_filename( JP_IMAGE_SITEMAP_TYPE, $number );
700
			$this->logger->report( "-- Building $debug_name" );
701
		}
702
703
		$buffer = new Jetpack_Sitemap_Buffer_Image(
704
			JP_SITEMAP_MAX_ITEMS,
705
			JP_SITEMAP_MAX_BYTES
706
		);
707
708
		// Add as many items to the buffer as possible.
709
		while ( false === $buffer->is_full() ) {
710
			$posts = $this->librarian->query_images_after_id(
711
				$last_post_id, JP_SITEMAP_BATCH_SIZE
712
			);
713
714
			if ( null == $posts ) { // WPCS: loose comparison ok.
715
				$any_posts_left = false;
716
				break;
717
			}
718
719
			foreach ( $posts as $post ) {
720
				$current_item = $this->image_post_to_sitemap_item( $post );
721
722
				if ( true === $buffer->append( $current_item['xml'] ) ) {
723
					$last_post_id = $post->ID;
724
					$buffer->view_time( $current_item['last_modified'] );
725
				} else {
726
					break;
727
				}
728
			}
729
		}
730
731
		// If no items were added, return false.
732
		if ( true === $buffer->is_empty() ) {
733
			return false;
734
		}
735
736
		// Store the buffer as the content of a jp_sitemap post.
737
		$this->librarian->store_sitemap_data(
738
			$number,
739
			JP_IMAGE_SITEMAP_TYPE,
740
			$buffer->contents(),
741
			$buffer->last_modified()
742
		);
743
744
		/*
745
		 * Now report back with the ID of the last post to be
746
		 * successfully added and whether there are any posts left.
747
		 */
748
		return array(
749
			'last_id'       => $last_post_id,
750
			'any_left'      => $any_posts_left,
751
			'last_modified' => $buffer->last_modified(),
752
		);
753
	}
754
755
	/**
756
	 * Build and store a single video sitemap. Returns false if no sitemap is built.
757
	 *
758
	 * Side effect: Create/update an video sitemap row.
759
	 *
760
	 * @access private
761
	 * @since 4.8.0
762
	 *
763
	 * @param int $number The number of the current sitemap.
764
	 * @param int $from_id The greatest lower bound of the IDs of the posts to be included.
765
	 *
766
	 * @return bool|array @args {
767
	 *   @type int    $last_id       The ID of the last item to be successfully added to the buffer.
768
	 *   @type bool   $any_left      'true' if there are items which haven't been saved to a sitemap, 'false' otherwise.
769
	 *   @type string $last_modified The most recent timestamp to appear on the sitemap.
770
	 * }
771
	 */
772 View Code Duplication
	public function build_one_video_sitemap( $number, $from_id ) {
773
		$last_post_id = $from_id;
774
		$any_posts_left = true;
775
776
		if ( $this->logger ) {
777
			$debug_name = jp_sitemap_filename( JP_VIDEO_SITEMAP_TYPE, $number );
778
			$this->logger->report( "-- Building $debug_name" );
779
		}
780
781
		$buffer = new Jetpack_Sitemap_Buffer_Video(
782
			JP_SITEMAP_MAX_ITEMS,
783
			JP_SITEMAP_MAX_BYTES
784
		);
785
786
		// Add as many items to the buffer as possible.
787
		while ( false === $buffer->is_full() ) {
788
			$posts = $this->librarian->query_videos_after_id(
789
				$last_post_id, JP_SITEMAP_BATCH_SIZE
790
			);
791
792
			if ( null == $posts ) { // WPCS: loose comparison ok.
793
				$any_posts_left = false;
794
				break;
795
			}
796
797
			foreach ( $posts as $post ) {
798
				$current_item = $this->video_post_to_sitemap_item( $post );
799
800
				if ( true === $buffer->append( $current_item['xml'] ) ) {
801
					$last_post_id = $post->ID;
802
					$buffer->view_time( $current_item['last_modified'] );
803
				} else {
804
					break;
805
				}
806
			}
807
		}
808
809
		// If no items were added, return false.
810
		if ( true === $buffer->is_empty() ) {
811
			return false;
812
		}
813
814
		if ( false === $buffer->is_empty() ) {
815
			$this->librarian->store_sitemap_data(
816
				$number,
817
				JP_VIDEO_SITEMAP_TYPE,
818
				$buffer->contents(),
819
				$buffer->last_modified()
820
			);
821
		}
822
823
		/*
824
		 * Now report back with the ID of the last post to be
825
		 * successfully added and whether there are any posts left.
826
		 */
827
		return array(
828
			'last_id'       => $last_post_id,
829
			'any_left'      => $any_posts_left,
830
			'last_modified' => $buffer->last_modified(),
831
		);
832
	}
833
834
	/**
835
	 * Build and store a single page sitemap index. Return false if no index is built.
836
	 *
837
	 * Side effect: Create/update a sitemap index row.
838
	 *
839
	 * @access private
840
	 * @since 4.8.0
841
	 *
842
	 * @param int    $number     The number of the current sitemap index.
843
	 * @param int    $from_id    The greatest lower bound of the IDs of the sitemaps to be included.
844
	 * @param string $datetime   Datetime of previous sitemap in 'YYYY-MM-DD hh:mm:ss' format.
845
	 * @param string $index_type Sitemap index type.
846
	 *
847
	 * @return bool|array @args {
848
	 *   @type int    $last_id       The ID of the last item to be successfully added to the buffer.
849
	 *   @type bool   $any_left      'true' if there are items which haven't been saved to a sitemap, 'false' otherwise.
850
	 *   @type string $last_modified The most recent timestamp to appear on the sitemap.
851
	 * }
852
	 */
853
	private function build_one_sitemap_index( $number, $from_id, $datetime, $index_type ) {
854
		$last_sitemap_id   = $from_id;
855
		$any_sitemaps_left = true;
856
857
		// Check the datetime format.
858
		$datetime = jp_sitemap_datetime( $datetime );
859
860
		$sitemap_type = jp_sitemap_child_type_of( $index_type );
861
862
		if ( $this->logger ) {
863
			$index_debug_name = jp_sitemap_filename( $index_type, $number );
864
			$this->logger->report( "-- Building $index_debug_name" );
865
		}
866
867
		$buffer = new Jetpack_Sitemap_Buffer_Master(
868
			JP_SITEMAP_MAX_ITEMS,
869
			JP_SITEMAP_MAX_BYTES,
870
			$datetime
871
		);
872
873
		// Add pointer to the previous sitemap index (unless we're at the first one).
874
		if ( 1 !== $number ) {
875
			$i = $number - 1;
876
			$prev_index_url = $this->finder->construct_sitemap_url(
877
				jp_sitemap_filename( $index_type, $i )
878
			);
879
880
			$item_array = array(
881
				'sitemap' => array(
882
					'loc'     => $prev_index_url,
883
					'lastmod' => $datetime,
884
				),
885
			);
886
887
			$buffer->append( $item_array );
888
		}
889
890
		// Add as many items to the buffer as possible.
891
		while ( false === $buffer->is_full() ) {
892
			// Retrieve a batch of posts (in order).
893
			$posts = $this->librarian->query_sitemaps_after_id(
894
				$sitemap_type, $last_sitemap_id, JP_SITEMAP_BATCH_SIZE
895
			);
896
897
			// If there were no posts to get, make a note.
898
			if ( null == $posts ) { // WPCS: loose comparison ok.
899
				$any_sitemaps_left = false;
900
				break;
901
			}
902
903
			// Otherwise, loop through each post in the batch.
904
			foreach ( $posts as $post ) {
905
				// Generate the sitemap XML for the post.
906
				$current_item = $this->sitemap_row_to_index_item( (array) $post );
907
908
				// Try adding this item to the buffer.
909
				if ( true === $buffer->append( $current_item['xml'] ) ) {
910
					$last_sitemap_id = $post['ID'];
911
					$buffer->view_time( $current_item['last_modified'] );
912
				} else {
913
					// Otherwise stop looping through posts.
914
					break;
915
				}
916
			}
917
		}
918
919
		// If no items were added, return false.
920
		if ( true === $buffer->is_empty() ) {
921
			return false;
922
		}
923
924
		$this->librarian->store_sitemap_data(
925
			$number,
926
			$index_type,
927
			$buffer->contents(),
928
			$buffer->last_modified()
929
		);
930
931
		/*
932
		 * Now report back with the ID of the last sitemap post ID to
933
		 * be successfully added, whether there are any sitemap posts
934
		 * left, and the most recent modification time seen.
935
		 */
936
		return array(
937
			'last_id'       => $last_sitemap_id,
938
			'any_left'      => $any_sitemaps_left,
939
			'last_modified' => $buffer->last_modified(),
940
		);
941
	}
942
943
	/**
944
	 * Construct the sitemap index url entry for a sitemap row.
945
	 *
946
	 * @link http://www.sitemaps.org/protocol.html#sitemapIndex_sitemap
947
	 *
948
	 * @access private
949
	 * @since 4.8.0
950
	 *
951
	 * @param array $row The sitemap data to be processed.
952
	 *
953
	 * @return string An XML fragment representing the post URL.
954
	 */
955
	private function sitemap_row_to_index_item( $row ) {
956
		$url = $this->finder->construct_sitemap_url( $row['post_title'] );
957
958
		$item_array = array(
959
			'sitemap' => array(
960
				'loc'     => $url,
961
				'lastmod' => jp_sitemap_datetime( $row['post_date'] ),
962
			),
963
		);
964
965
		return array(
966
			'xml'           => $item_array,
967
			'last_modified' => $row['post_date'],
968
		);
969
	}
970
971
	/**
972
	 * Build and return the news sitemap xml. Note that the result of this
973
	 * function is cached in the transient 'jetpack_news_sitemap_xml'.
974
	 *
975
	 * @access public
976
	 * @since 4.8.0
977
	 *
978
	 * @return string The news sitemap xml.
979
	 */
980
	public function news_sitemap_xml() {
981
		$the_stored_news_sitemap = get_transient( 'jetpack_news_sitemap_xml' );
982
983
		if ( false === $the_stored_news_sitemap ) {
984
985
			if ( $this->logger ) {
986
				$this->logger->report( 'Beginning news sitemap generation.' );
987
			}
988
989
			/**
990
			 * Filter limit of entries to include in news sitemap.
991
			 *
992
			 * @module sitemaps
993
			 *
994
			 * @since 3.9.0
995
			 *
996
			 * @param int $count Number of entries to include in news sitemap.
997
			 */
998
			$item_limit = apply_filters(
999
				'jetpack_sitemap_news_sitemap_count',
1000
				JP_NEWS_SITEMAP_MAX_ITEMS
1001
			);
1002
1003
			$buffer = new Jetpack_Sitemap_Buffer_News(
1004
				min( $item_limit, JP_NEWS_SITEMAP_MAX_ITEMS ),
1005
				JP_SITEMAP_MAX_BYTES
1006
			);
1007
1008
			$posts = $this->librarian->query_most_recent_posts( JP_NEWS_SITEMAP_MAX_ITEMS );
1009
1010
			foreach ( $posts as $post ) {
1011
				$current_item = $this->post_to_news_sitemap_item( $post );
1012
1013
				if ( false === $buffer->append( $current_item['xml'] ) ) {
1014
					break;
1015
				}
1016
			}
1017
1018
			if ( $this->logger ) {
1019
				$this->logger->time( 'End news sitemap generation.' );
1020
			}
1021
1022
			$the_stored_news_sitemap = $buffer->contents();
1023
1024
			set_transient(
1025
				'jetpack_news_sitemap_xml',
1026
				$the_stored_news_sitemap,
1027
				JP_NEWS_SITEMAP_INTERVAL
1028
			);
1029
		} // End if().
1030
1031
		return $the_stored_news_sitemap;
1032
	}
1033
1034
	/**
1035
	 * Construct the sitemap url entry for a WP_Post.
1036
	 *
1037
	 * @link http://www.sitemaps.org/protocol.html#urldef
1038
	 * @access private
1039
	 * @since 4.8.0
1040
	 *
1041
	 * @param WP_Post $post The post to be processed.
1042
	 *
1043
	 * @return array An array representing the post URL.
1044
	 */
1045
	private function post_to_sitemap_item( $post ) {
1046
1047
		/**
1048
		 * Filter condition to allow skipping specific posts in sitemap.
1049
		 *
1050
		 * @module sitemaps
1051
		 *
1052
		 * @since 3.9.0
1053
		 *
1054
		 * @param bool    $skip Current boolean. False by default, so no post is skipped.
1055
		 * @param WP_POST $post Current post object.
1056
		 */
1057
		if ( true === apply_filters( 'jetpack_sitemap_skip_post', false, $post ) ) {
1058
			return array(
1059
				'xml'           => null,
1060
				'last_modified' => null,
1061
			);
1062
		}
1063
1064
		$url = esc_url( get_permalink( $post ) );
1065
1066
		/*
1067
		 * Spec requires the URL to be <=2048 bytes.
1068
		 * In practice this constraint is unlikely to be violated.
1069
		 */
1070
		if ( 2048 < strlen( $url ) ) {
1071
			$url = home_url() . '/?p=' . $post->ID;
1072
		}
1073
1074
		$last_modified = $post->post_modified_gmt;
1075
1076
		// Check for more recent comments.
1077
		// Note that 'Y-m-d h:i:s' strings sort lexicographically.
1078
		if ( 0 < $post->comment_count ) {
1079
			$last_modified = max(
1080
				$last_modified,
1081
				$this->librarian->query_latest_approved_comment_time_on_post( $post->ID )
1082
			);
1083
		}
1084
1085
		$item_array = array(
1086
			'url' => array(
1087
				'loc'     => $url,
1088
				'lastmod' => jp_sitemap_datetime( $last_modified ),
1089
			),
1090
		);
1091
1092
		/**
1093
		 * Filter sitemap URL item before rendering it as XML.
1094
		 *
1095
		 * @module sitemaps
1096
		 *
1097
		 * @since 3.9.0
1098
		 *
1099
		 * @param array $tree Associative array representing sitemap URL element.
1100
		 * @param int   $post_id ID of the post being processed.
1101
		 */
1102
		$item_array = apply_filters( 'jetpack_sitemap_url', $item_array, $post->ID );
1103
1104
		return array(
1105
			'xml'           => $item_array,
1106
			'last_modified' => $last_modified,
1107
		);
1108
	}
1109
1110
	/**
1111
	 * Construct the image sitemap url entry for a WP_Post of image type.
1112
	 *
1113
	 * @link http://www.sitemaps.org/protocol.html#urldef
1114
	 *
1115
	 * @access private
1116
	 * @since 4.8.0
1117
	 *
1118
	 * @param WP_Post $post The image post to be processed.
1119
	 *
1120
	 * @return string An XML fragment representing the post URL.
1121
	 */
1122
	private function image_post_to_sitemap_item( $post ) {
1123
1124
		/**
1125
		 * Filter condition to allow skipping specific image posts in the sitemap.
1126
		 *
1127
		 * @module sitemaps
1128
		 *
1129
		 * @since 4.8.0
1130
		 *
1131
		 * @param bool    $skip Current boolean. False by default, so no post is skipped.
1132
		 * @param WP_POST $post Current post object.
1133
		 */
1134
		if ( apply_filters( 'jetpack_sitemap_image_skip_post', false, $post ) ) {
1135
			return array(
1136
				'xml'           => null,
1137
				'last_modified' => null,
1138
			);
1139
		}
1140
1141
		$url = wp_get_attachment_url( $post->ID );
1142
1143
		$parent_url = get_permalink( get_post( $post->post_parent ) );
1144
		if ( '' == $parent_url ) { // WPCS: loose comparison ok.
1145
			$parent_url = get_permalink( $post );
1146
		}
1147
1148
		$item_array = array(
1149
			'url' => array(
1150
				'loc'         => $parent_url,
1151
				'lastmod'     => jp_sitemap_datetime( $post->post_modified_gmt ),
1152
				'image:image' => array(
1153
					'image:loc' => $url,
1154
				),
1155
			),
1156
		);
1157
1158
		$item_array['url']['image:image']['image:title'] = $post->post_title;
1159
		$item_array['url']['image:image']['image:caption'] = $post->post_excerpt;
1160
1161
		/**
1162
		 * Filter associative array with data to build <url> node
1163
		 * and its descendants for current post in image sitemap.
1164
		 *
1165
		 * @module sitemaps
1166
		 *
1167
		 * @since 4.8.0
1168
		 *
1169
		 * @param array $item_array Data to build parent and children nodes for current post.
1170
		 * @param int   $post_id Current image post ID.
1171
		 */
1172
		$item_array = apply_filters(
1173
			'jetpack_sitemap_image_sitemap_item',
1174
			$item_array,
1175
			$post->ID
1176
		);
1177
1178
		return array(
1179
			'xml'           => $item_array,
1180
			'last_modified' => $post->post_modified_gmt,
1181
		);
1182
	}
1183
1184
	/**
1185
	 * Construct the video sitemap url entry for a WP_Post of video type.
1186
	 *
1187
	 * @link http://www.sitemaps.org/protocol.html#urldef
1188
	 * @link https://developers.google.com/webmasters/videosearch/sitemaps
1189
	 *
1190
	 * @access private
1191
	 * @since 4.8.0
1192
	 *
1193
	 * @param WP_Post $post The video post to be processed.
1194
	 *
1195
	 * @return string An XML fragment representing the post URL.
1196
	 */
1197
	private function video_post_to_sitemap_item( $post ) {
1198
1199
		/**
1200
		 * Filter condition to allow skipping specific image posts in the sitemap.
1201
		 *
1202
		 * @module sitemaps
1203
		 *
1204
		 * @since 4.8.0
1205
		 *
1206
		 * @param bool    $skip Current boolean. False by default, so no post is skipped.
1207
		 * @param WP_POST $post Current post object.
1208
		 */
1209
		if ( apply_filters( 'jetpack_sitemap_video_skip_post', false, $post ) ) {
1210
			return array(
1211
				'xml'           => null,
1212
				'last_modified' => null,
1213
			);
1214
		}
1215
1216
		$parent_url = esc_url( get_permalink( get_post( $post->post_parent ) ) );
1217
		if ( '' == $parent_url ) { // WPCS: loose comparison ok.
1218
			$parent_url = esc_url( get_permalink( $post ) );
1219
		}
1220
1221
		// Prepare the content like get_the_content_feed().
1222
		$content = $post->post_content;
1223
		/** This filter is already documented in core/wp-includes/post-template.php */
1224
		$content = apply_filters( 'the_content', $content );
1225
1226
		/** This filter is already documented in core/wp-includes/feed.php */
1227
		$content = apply_filters( 'the_content_feed', $content, 'rss2' );
1228
1229
		$item_array = array(
1230
			'url' => array(
1231
				'loc'         => $parent_url,
1232
				'lastmod'     => jp_sitemap_datetime( $post->post_modified_gmt ),
1233
				'video:video' => array(
1234
					/** This filter is already documented in core/wp-includes/feed.php */
1235
					'video:title'         => apply_filters( 'the_title_rss', $post->post_title ),
1236
					'video:thumbnail_loc' => '',
1237
					'video:description'   => $content,
1238
					'video:content_loc'   => esc_url( wp_get_attachment_url( $post->ID ) ),
1239
				),
1240
			),
1241
		);
1242
1243
		// TODO: Integrate with VideoPress here.
1244
		// cf. video:player_loc tag in video sitemap spec.
1245
1246
		/**
1247
		 * Filter associative array with data to build <url> node
1248
		 * and its descendants for current post in video sitemap.
1249
		 *
1250
		 * @module sitemaps
1251
		 *
1252
		 * @since 4.8.0
1253
		 *
1254
		 * @param array $item_array Data to build parent and children nodes for current post.
1255
		 * @param int   $post_id Current video post ID.
1256
		 */
1257
		$item_array = apply_filters(
1258
			'jetpack_sitemap_video_sitemap_item',
1259
			$item_array,
1260
			$post->ID
1261
		);
1262
1263
		return array(
1264
			'xml'           => $item_array,
1265
			'last_modified' => $post->post_modified_gmt,
1266
		);
1267
	}
1268
1269
	/**
1270
	 * Construct the news sitemap url entry for a WP_Post.
1271
	 *
1272
	 * @link http://www.sitemaps.org/protocol.html#urldef
1273
	 *
1274
	 * @access private
1275
	 * @since 4.8.0
1276
	 *
1277
	 * @param WP_Post $post The post to be processed.
1278
	 *
1279
	 * @return string An XML fragment representing the post URL.
1280
	 */
1281
	private function post_to_news_sitemap_item( $post ) {
1282
1283
		/**
1284
		 * Filter condition to allow skipping specific posts in news sitemap.
1285
		 *
1286
		 * @module sitemaps
1287
		 *
1288
		 * @since 3.9.0
1289
		 *
1290
		 * @param bool    $skip Current boolean. False by default, so no post is skipped.
1291
		 * @param WP_POST $post Current post object.
1292
		 */
1293
		if ( apply_filters( 'jetpack_sitemap_news_skip_post', false, $post ) ) {
1294
			return array(
1295
				'xml' => null,
1296
			);
1297
		}
1298
1299
		$url = get_permalink( $post );
1300
1301
		/*
1302
		 * Spec requires the URL to be <=2048 bytes.
1303
		 * In practice this constraint is unlikely to be violated.
1304
		 */
1305
		if ( 2048 < strlen( $url ) ) {
1306
			$url = home_url() . '/?p=' . $post->ID;
1307
		}
1308
1309
		/*
1310
		 * Trim the locale to an ISO 639 language code as required by Google.
1311
		 * Special cases are zh-cn (Simplified Chinese) and zh-tw (Traditional Chinese).
1312
		 * @link http://www.loc.gov/standards/iso639-2/php/code_list.php
1313
		 */
1314
		$language = strtolower( get_locale() );
1315
1316
		if ( in_array( $language, array( 'zh_tw', 'zh_cn' ), true ) ) {
1317
			$language = str_replace( '_', '-', $language );
1318
		} else {
1319
			$language = preg_replace( '/(_.*)$/i', '', $language );
1320
		}
1321
1322
		$item_array = array(
1323
			'url' => array(
1324
				'loc' => $url,
1325
				'lastmod' => jp_sitemap_datetime( $post->post_modified_gmt ),
1326
				'news:news' => array(
1327
					'news:publication' => array(
1328
						'news:name'     => get_bloginfo( 'name' ),
1329
						'news:language' => $language,
1330
					),
1331
					/** This filter is already documented in core/wp-includes/feed.php */
1332
					'news:title'            => apply_filters( 'the_title_rss', $post->post_title ),
1333
					'news:publication_date' => jp_sitemap_datetime( $post->post_date_gmt ),
1334
					'news:genres'           => 'Blog',
1335
				),
1336
			),
1337
		);
1338
1339
		/**
1340
		 * Filter associative array with data to build <url> node
1341
		 * and its descendants for current post in news sitemap.
1342
		 *
1343
		 * @module sitemaps
1344
		 *
1345
		 * @since 3.9.0
1346
		 *
1347
		 * @param array $item_array Data to build parent and children nodes for current post.
1348
		 * @param int   $post_id Current post ID.
1349
		 */
1350
		$item_array = apply_filters(
1351
			'jetpack_sitemap_news_sitemap_item',
1352
			$item_array,
1353
			$post->ID
1354
		);
1355
1356
		return array(
1357
			'xml' => $item_array,
1358
		);
1359
	}
1360
}
1361