Completed
Push — add/comments-bulk-endpoint ( 5043c1...e046d5 )
by
unknown
09:06
created

callback_sitemap_cron_hook()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
nc 1
nop 0
dl 0
loc 4
rs 10
c 0
b 0
f 0
1
<?php
2
/**
3
 * Generate sitemap files in base XML as well as some namespace extensions.
4
 *
5
 * This module generates two different base sitemaps.
6
 *
7
 * 1. sitemap.xml
8
 *    The basic sitemap is updated regularly by wp-cron. It is stored in the
9
 *    database and retrieved when requested. This sitemap aims to include canonical
10
 *    URLs for all published content and abide by the sitemap spec. This is the root
11
 *    of a tree of sitemap and sitemap index xml files, depending on the number of URLs.
12
 *
13
 *    By default the sitemap contains published posts of type 'post' and 'page', as
14
 *    well as the home url. To include other post types use the 'jetpack_sitemap_post_types'
15
 *    filter.
16
 *
17
 * @link http://sitemaps.org/protocol.php Base sitemaps protocol.
18
 * @link https://support.google.com/webmasters/answer/178636 Image sitemap extension.
19
 * @link https://developers.google.com/webmasters/videosearch/sitemaps Video sitemap extension.
20
 *
21
 * 2. news-sitemap.xml
22
 *    The news sitemap is generated on the fly when requested. It does not aim for
23
 *    completeness, instead including at most 1000 of the most recent published posts
24
 *    from the previous 2 days, per the news-sitemap spec.
25
 *
26
 * @link http://www.google.com/support/webmasters/bin/answer.py?answer=74288 News sitemap extension.
27
 *
28
 * @package Jetpack
29
 * @since 3.9.0
30
 * @since 4.8.0 Remove 1000 post limit.
31
 * @author Automattic
32
 */
33
34
require_once dirname( __FILE__ ) . '/sitemap-constants.php';
35
require_once dirname( __FILE__ ) . '/sitemap-buffer.php';
36
require_once dirname( __FILE__ ) . '/sitemap-stylist.php';
37
require_once dirname( __FILE__ ) . '/sitemap-librarian.php';
38
require_once dirname( __FILE__ ) . '/sitemap-finder.php';
39
require_once dirname( __FILE__ ) . '/sitemap-builder.php';
40
41
if ( defined( 'WP_DEBUG' ) && WP_DEBUG ) {
42
	require_once dirname( __FILE__ ) . '/sitemap-logger.php';
43
}
44
45
/**
46
 * Governs the generation, storage, and serving of sitemaps.
47
 *
48
 * @since 4.8.0
49
 */
50
class Jetpack_Sitemap_Manager {
51
52
	/**
53
	 * @see Jetpack_Sitemap_Librarian
54
	 * @since 4.8.0
55
	 * @var Jetpack_Sitemap_Librarian $librarian Librarian object for storing and retrieving sitemap data.
56
	 */
57
	private $librarian;
58
59
	/**
60
	 * @see Jetpack_Sitemap_Logger
61
	 * @since 4.8.0
62
	 * @var Jetpack_Sitemap_Logger $logger Logger object for reporting debug messages.
63
	 */
64
	private $logger;
65
66
	/**
67
	 * @see Jetpack_Sitemap_Finder
68
	 * @since 4.8.0
69
	 * @var Jetpack_Sitemap_Finder $finder Finder object for dealing with sitemap URIs.
70
	 */
71
	private $finder;
72
73
	/**
74
	 * Construct a new Jetpack_Sitemap_Manager.
75
	 *
76
	 * @access public
77
	 * @since 4.8.0
78
	 */
79
	public function __construct() {
80
		$this->librarian = new Jetpack_Sitemap_Librarian();
81
		$this->finder = new Jetpack_Sitemap_Finder();
82
83
		if ( defined( 'WP_DEBUG' ) && ( true === WP_DEBUG ) ) {
84
			$this->logger = new Jetpack_Sitemap_Logger();
85
		}
86
87
		// Add callback for sitemap URL handler.
88
		add_action(
89
			'init',
90
			array( $this, 'callback_action_catch_sitemap_urls' ),
91
			defined( 'IS_WPCOM' ) && IS_WPCOM ? 100 : 10
92
		);
93
94
		// Add generator to wp_cron task list.
95
		$this->schedule_sitemap_generation();
96
97
		// Add sitemap to robots.txt.
98
		add_action(
99
			'do_robotstxt',
100
			array( $this, 'callback_action_do_robotstxt' ),
101
			20
102
		);
103
104
		// The news sitemap is cached; here we add a callback to
105
		// flush the cached news sitemap when a post is published.
106
		add_action(
107
			'publish_post',
108
			array( $this, 'callback_action_flush_news_sitemap_cache' ),
109
			10
110
		);
111
112
		// In case we need to purge all sitemaps, we do this.
113
		add_action(
114
			'jetpack_sitemaps_purge_data',
115
			array( $this, 'callback_action_purge_data' )
116
		);
117
118
		/*
119
		 * Module parameters are stored as options in the database.
120
		 * This allows us to avoid having to process all of init
121
		 * before serving the sitemap data. The following actions
122
		 * process and store these filters.
123
		 */
124
125
		// Process filters and store location string for sitemap.
126
		add_action(
127
			'init',
128
			array( $this, 'callback_action_filter_sitemap_location' ),
129
			999
130
		);
131
132
		return;
133
	}
134
135
	/**
136
	 * Echo a raw string of given content-type.
137
	 *
138
	 * @access private
139
	 * @since 4.8.0
140
	 *
141
	 * @param string $the_content_type The content type to be served.
142
	 * @param string $the_content The string to be echoed.
143
	 */
144
	private function serve_raw_and_die( $the_content_type, $the_content ) {
145
		header( 'Content-Type: ' . $the_content_type . '; charset=UTF-8' );
146
147
		global $wp_query;
148
		$wp_query->is_feed = true;
149
		set_query_var( 'feed', 'sitemap' );
150
151
		if ( '' === $the_content ) {
152
			wp_die(
153
				esc_html__( "No sitemap found. Maybe it's being generated. Please try again later.", 'jetpack' ),
154
				esc_html__( 'Sitemaps', 'jetpack' ),
155
				array(
156
					'response' => 404,
157
				)
158
			);
159
		}
160
161
		echo $the_content;
162
163
		die();
164
	}
165
166
	/**
167
	 * Callback to intercept sitemap url requests and serve sitemap files.
168
	 *
169
	 * @access public
170
	 * @since 4.8.0
171
	 */
172
	public function callback_action_catch_sitemap_urls() {
173
		// Regular expressions for sitemap URL routing.
174
		$regex = array(
175
			'master'        => '/^sitemap\.xml$/',
176
			'sitemap'       => '/^sitemap-[1-9][0-9]*\.xml$/',
177
			'index'         => '/^sitemap-index-[1-9][0-9]*\.xml$/',
178
			'sitemap-style' => '/^sitemap\.xsl$/',
179
			'index-style'   => '/^sitemap-index\.xsl$/',
180
			'image'         => '/^image-sitemap-[1-9][0-9]*\.xml$/',
181
			'image-index'   => '/^image-sitemap-index-[1-9][0-9]*\.xml$/',
182
			'image-style'   => '/^image-sitemap\.xsl$/',
183
			'video'         => '/^video-sitemap-[1-9][0-9]*\.xml$/',
184
			'video-index'   => '/^video-sitemap-index-[1-9][0-9]*\.xml$/',
185
			'video-style'   => '/^video-sitemap\.xsl$/',
186
			'news'          => '/^news-sitemap\.xml$/',
187
			'news-style'    => '/^news-sitemap\.xsl$/',
188
		);
189
190
		// The raw path(+query) of the requested URI.
191
		if ( isset( $_SERVER['REQUEST_URI'] ) ) { // WPCS: Input var okay.
192
			$raw_uri = sanitize_text_field(
193
				wp_unslash( $_SERVER['REQUEST_URI'] ) // WPCS: Input var okay.
194
			);
195
		} else {
196
			$raw_uri = '';
197
		}
198
199
		$request = $this->finder->recognize_sitemap_uri( $raw_uri );
200
201
		if ( isset( $request['sitemap_name'] ) ) {
202
203
			/**
204
			 * Filter the content type used to serve the sitemap XML files.
205
			 *
206
			 * @module sitemaps
207
			 *
208
			 * @since 3.9.0
209
			 *
210
			 * @param string $xml_content_type By default, it's 'text/xml'.
211
			 */
212
			$xml_content_type = apply_filters( 'jetpack_sitemap_content_type', 'text/xml' );
213
214
			// Catch master sitemap xml.
215
			if ( preg_match( $regex['master'], $request['sitemap_name'] ) ) {
216
				$this->serve_raw_and_die(
217
					$xml_content_type,
218
					$this->librarian->get_sitemap_text(
219
						jp_sitemap_filename( JP_MASTER_SITEMAP_TYPE, 0 ),
220
						JP_MASTER_SITEMAP_TYPE
221
					)
222
				);
223
			}
224
225
			// Catch sitemap xml.
226 View Code Duplication
			if ( preg_match( $regex['sitemap'], $request['sitemap_name'] ) ) {
227
				$this->serve_raw_and_die(
228
					$xml_content_type,
229
					$this->librarian->get_sitemap_text(
230
						$request['sitemap_name'],
231
						JP_PAGE_SITEMAP_TYPE
232
					)
233
				);
234
			}
235
236
			// Catch sitemap index xml.
237 View Code Duplication
			if ( preg_match( $regex['index'], $request['sitemap_name'] ) ) {
238
				$this->serve_raw_and_die(
239
					$xml_content_type,
240
					$this->librarian->get_sitemap_text(
241
						$request['sitemap_name'],
242
						JP_PAGE_SITEMAP_INDEX_TYPE
243
					)
244
				);
245
			}
246
247
			// Catch sitemap xsl.
248
			if ( preg_match( $regex['sitemap-style'], $request['sitemap_name'] ) ) {
249
				$this->serve_raw_and_die(
250
					'application/xml',
251
					Jetpack_Sitemap_Stylist::sitemap_xsl()
252
				);
253
			}
254
255
			// Catch sitemap index xsl.
256
			if ( preg_match( $regex['index-style'], $request['sitemap_name'] ) ) {
257
				$this->serve_raw_and_die(
258
					'application/xml',
259
					Jetpack_Sitemap_Stylist::sitemap_index_xsl()
260
				);
261
			}
262
263
			// Catch image sitemap xml.
264 View Code Duplication
			if ( preg_match( $regex['image'], $request['sitemap_name'] ) ) {
265
				$this->serve_raw_and_die(
266
					$xml_content_type,
267
					$this->librarian->get_sitemap_text(
268
						$request['sitemap_name'],
269
						JP_IMAGE_SITEMAP_TYPE
270
					)
271
				);
272
			}
273
274
			// Catch image sitemap index xml.
275 View Code Duplication
			if ( preg_match( $regex['image-index'], $request['sitemap_name'] ) ) {
276
				$this->serve_raw_and_die(
277
					$xml_content_type,
278
					$this->librarian->get_sitemap_text(
279
						$request['sitemap_name'],
280
						JP_IMAGE_SITEMAP_INDEX_TYPE
281
					)
282
				);
283
			}
284
285
			// Catch image sitemap xsl.
286
			if ( preg_match( $regex['image-style'], $request['sitemap_name'] ) ) {
287
				$this->serve_raw_and_die(
288
					'application/xml',
289
					Jetpack_Sitemap_Stylist::image_sitemap_xsl()
290
				);
291
			}
292
293
			// Catch video sitemap xml.
294 View Code Duplication
			if ( preg_match( $regex['video'], $request['sitemap_name'] ) ) {
295
				$this->serve_raw_and_die(
296
					$xml_content_type,
297
					$this->librarian->get_sitemap_text(
298
						$request['sitemap_name'],
299
						JP_VIDEO_SITEMAP_TYPE
300
					)
301
				);
302
			}
303
304
			// Catch video sitemap index xml.
305 View Code Duplication
			if ( preg_match( $regex['video-index'], $request['sitemap_name'] ) ) {
306
				$this->serve_raw_and_die(
307
					$xml_content_type,
308
					$this->librarian->get_sitemap_text(
309
						$request['sitemap_name'],
310
						JP_VIDEO_SITEMAP_INDEX_TYPE
311
					)
312
				);
313
			}
314
315
			// Catch video sitemap xsl.
316
			if ( preg_match( $regex['video-style'], $request['sitemap_name'] ) ) {
317
				$this->serve_raw_and_die(
318
					'application/xml',
319
					Jetpack_Sitemap_Stylist::video_sitemap_xsl()
320
				);
321
			}
322
323
			// Catch news sitemap xml.
324
			if ( preg_match( $regex['news'], $request['sitemap_name'] ) ) {
325
				$sitemap_builder = new Jetpack_Sitemap_Builder();
326
				$this->serve_raw_and_die(
327
					$xml_content_type,
328
					$sitemap_builder->news_sitemap_xml()
329
				);
330
			}
331
332
			// Catch news sitemap xsl.
333
			if ( preg_match( $regex['news-style'], $request['sitemap_name'] ) ) {
334
				$this->serve_raw_and_die(
335
					'application/xml',
336
					Jetpack_Sitemap_Stylist::news_sitemap_xsl()
337
				);
338
			}
339
		}
340
341
		// URL did not match any sitemap patterns.
342
		return;
343
	}
344
345
	/**
346
	 * Callback for adding sitemap-interval to the list of schedules.
347
	 *
348
	 * @access public
349
	 * @since 4.8.0
350
	 *
351
	 * @param array $schedules The array of WP_Cron schedules.
352
	 *
353
	 * @return array The updated array of WP_Cron schedules.
354
	 */
355
	public function callback_add_sitemap_schedule( $schedules ) {
356
		$schedules['sitemap-interval'] = array(
357
			'interval' => JP_SITEMAP_INTERVAL,
358
			'display'  => __( 'Sitemap Interval', 'jetpack' ),
359
		);
360
		return $schedules;
361
	}
362
363
	/**
364
	 * Add actions to schedule sitemap generation.
365
	 * Should only be called once, in the constructor.
366
	 *
367
	 * @access private
368
	 * @since 4.8.0
369
	 */
370
	private function schedule_sitemap_generation() {
371
		// Add cron schedule.
372
		add_filter( 'cron_schedules', array( $this, 'callback_add_sitemap_schedule' ) );
373
374
		$sitemap_builder = new Jetpack_Sitemap_Builder();
375
376
		add_action(
377
			'jp_sitemap_cron_hook',
378
			array( $sitemap_builder, 'update_sitemap' )
379
		);
380
381
		if ( ! wp_next_scheduled( 'jp_sitemap_cron_hook' ) ) {
382
			wp_schedule_event(
383
				time(),
384
				'sitemap-interval',
385
				'jp_sitemap_cron_hook'
386
			);
387
		}
388
389
		return;
390
	}
391
392
	/**
393
	 * Callback to add sitemap to robots.txt.
394
	 *
395
	 * @access public
396
	 * @since 4.8.0
397
	 */
398
	public function callback_action_do_robotstxt() {
399
400
		/**
401
		 * Filter whether to make the default sitemap discoverable to robots or not. Default true.
402
		 *
403
		 * @module sitemaps
404
		 * @since 3.9.0
405
		 *
406
		 * @param bool $discover_sitemap Make default sitemap discoverable to robots.
407
		 */
408
		$discover_sitemap = apply_filters( 'jetpack_sitemap_generate', true );
409
410 View Code Duplication
		if ( true === $discover_sitemap ) {
411
			$sitemap_url      = $this->finder->construct_sitemap_url( 'sitemap.xml' );
412
			echo 'Sitemap: ' . esc_url( $sitemap_url ) . "\n";
413
		}
414
415
		/**
416
		 * Filter whether to make the news sitemap discoverable to robots or not. Default true.
417
		 *
418
		 * @module sitemaps
419
		 * @since 3.9.0
420
		 *
421
		 * @param bool $discover_news_sitemap Make default news sitemap discoverable to robots.
422
		 */
423
		$discover_news_sitemap = apply_filters( 'jetpack_news_sitemap_generate', true );
424
425 View Code Duplication
		if ( true === $discover_news_sitemap ) {
426
			$news_sitemap_url = $this->finder->construct_sitemap_url( 'news-sitemap.xml' );
427
			echo 'Sitemap: ' . esc_url( $news_sitemap_url ) . "\n";
428
		}
429
430
		return;
431
	}
432
433
	/**
434
	 * Callback to delete the news sitemap cache.
435
	 *
436
	 * @access public
437
	 * @since 4.8.0
438
	 */
439
	public function callback_action_flush_news_sitemap_cache() {
440
		delete_transient( 'jetpack_news_sitemap_xml' );
441
	}
442
443
	/**
444
	 * Callback for resetting stored sitemap data.
445
	 *
446
	 * @access public
447
	 * @since 5.3.0
448
	 */
449
	public function callback_action_purge_data() {
450
		$this->callback_action_flush_news_sitemap_cache();
451
		$this->librarian->delete_all_stored_sitemap_data();
452
	}
453
454
	/**
455
	 * Callback to set the sitemap location.
456
	 *
457
	 * @access public
458
	 * @since 4.8.0
459
	 */
460
	public function callback_action_filter_sitemap_location() {
461
		update_option(
462
			'jetpack_sitemap_location',
463
			/**
464
			 * Additional path for sitemap URIs. Default value is empty.
465
			 *
466
			 * This string is any additional path fragment you want included between
467
			 * the home URL and the sitemap filenames. Exactly how this fragment is
468
			 * interpreted depends on your permalink settings. For example:
469
			 *
470
			 *   Pretty permalinks:
471
			 *     home_url() . jetpack_sitemap_location . '/sitemap.xml'
472
			 *
473
			 *   Plain ("ugly") permalinks:
474
			 *     home_url() . jetpack_sitemap_location . '/?jetpack-sitemap=sitemap.xml'
475
			 *
476
			 *   PATHINFO permalinks:
477
			 *     home_url() . '/index.php' . jetpack_sitemap_location . '/sitemap.xml'
478
			 *
479
			 * where 'sitemap.xml' is the name of a specific sitemap file.
480
			 * The value of this filter must be a valid path fragment per RFC 3986;
481
			 * in particular it must either be empty or begin with a '/'.
482
			 * Also take care that any restrictions on sitemap location imposed by
483
			 * the sitemap protocol are satisfied.
484
			 *
485
			 * The result of this filter is stored in an option, 'jetpack_sitemap_location';
486
			 * that option is what gets read when the sitemap location is needed.
487
			 * This way we don't have to wait for init to finish before building sitemaps.
488
			 *
489
			 * @link https://tools.ietf.org/html/rfc3986#section-3.3 RFC 3986
490
			 * @link http://www.sitemaps.org/ The sitemap protocol
491
			 *
492
			 * @since 4.8.0
493
			 */
494
			apply_filters(
495
				'jetpack_sitemap_location',
496
				''
497
			)
498
		);
499
500
		return;
501
	}
502
503
} // End Jetpack_Sitemap_Manager class.
504
505
new Jetpack_Sitemap_Manager();
506
507
/**
508
 * Absolute URL of the current blog's sitemap.
509
 *
510
 * @module sitemaps
511
 *
512
 * @since  3.9.0
513
 * @since  4.8.1 Code uses method found in Jetpack_Sitemap_Finder::construct_sitemap_url in 4.8.0.
514
 *                It has been moved here to avoid fatal errors with other plugins that were expecting to find this function.
515
 *
516
 * @param string $filename Sitemap file name. Defaults to 'sitemap.xml', the initial sitemaps page.
517
 *
518
 * @return string Sitemap URL.
519
 */
520
function jetpack_sitemap_uri( $filename = 'sitemap.xml' ) {
521
	global $wp_rewrite;
522
523
	$location = Jetpack_Options::get_option_and_ensure_autoload( 'jetpack_sitemap_location', '' );
524
525
	if ( $wp_rewrite->using_index_permalinks() ) {
526
		$sitemap_url = home_url( '/index.php' . $location . '/' . $filename );
527
	} elseif ( $wp_rewrite->using_permalinks() ) {
528
		$sitemap_url = home_url( $location . '/' . $filename );
529
	} else {
530
		$sitemap_url = home_url( $location . '/?jetpack-sitemap=' . $filename );
531
	}
532
533
	/**
534
	 * Filter sitemap URL relative to home URL.
535
	 *
536
	 * @module sitemaps
537
	 *
538
	 * @since 3.9.0
539
	 *
540
	 * @param string $sitemap_url Sitemap URL.
541
	 */
542
	return apply_filters( 'jetpack_sitemap_location', $sitemap_url );
543
}
544