Completed
Push — add/wordads-ads-txt ( b6a14d...7c89f2 )
by
unknown
46:03 queued 34:13
created

schedule_sitemap_generation()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 21
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 12
nc 2
nop 0
dl 0
loc 21
rs 9.3142
c 0
b 0
f 0
1
<?php
2
/**
3
 * Generate sitemap files in base XML as well as some namespace extensions.
4
 *
5
 * This module generates two different base sitemaps.
6
 *
7
 * 1. sitemap.xml
8
 *    The basic sitemap is updated regularly by wp-cron. It is stored in the
9
 *    database and retrieved when requested. This sitemap aims to include canonical
10
 *    URLs for all published content and abide by the sitemap spec. This is the root
11
 *    of a tree of sitemap and sitemap index xml files, depending on the number of URLs.
12
 *
13
 *    By default the sitemap contains published posts of type 'post' and 'page', as
14
 *    well as the home url. To include other post types use the 'jetpack_sitemap_post_types'
15
 *    filter.
16
 *
17
 * @link http://sitemaps.org/protocol.php Base sitemaps protocol.
18
 * @link https://support.google.com/webmasters/answer/178636 Image sitemap extension.
19
 * @link https://developers.google.com/webmasters/videosearch/sitemaps Video sitemap extension.
20
 *
21
 * 2. news-sitemap.xml
22
 *    The news sitemap is generated on the fly when requested. It does not aim for
23
 *    completeness, instead including at most 1000 of the most recent published posts
24
 *    from the previous 2 days, per the news-sitemap spec.
25
 *
26
 * @link http://www.google.com/support/webmasters/bin/answer.py?answer=74288 News sitemap extension.
27
 *
28
 * @package Jetpack
29
 * @since 3.9.0
30
 * @since 4.8.0 Remove 1000 post limit.
31
 * @author Automattic
32
 */
33
34
require_once dirname( __FILE__ ) . '/sitemap-constants.php';
35
require_once dirname( __FILE__ ) . '/sitemap-buffer.php';
36
require_once dirname( __FILE__ ) . '/sitemap-stylist.php';
37
require_once dirname( __FILE__ ) . '/sitemap-librarian.php';
38
require_once dirname( __FILE__ ) . '/sitemap-finder.php';
39
require_once dirname( __FILE__ ) . '/sitemap-builder.php';
40
41
if ( defined( 'WP_DEBUG' ) && WP_DEBUG ) {
42
	require_once dirname( __FILE__ ) . '/sitemap-logger.php';
43
}
44
45
/**
46
 * Governs the generation, storage, and serving of sitemaps.
47
 *
48
 * @since 4.8.0
49
 */
50
class Jetpack_Sitemap_Manager {
51
52
	/**
53
	 * @see Jetpack_Sitemap_Librarian
54
	 * @since 4.8.0
55
	 * @var Jetpack_Sitemap_Librarian $librarian Librarian object for storing and retrieving sitemap data.
56
	 */
57
	private $librarian;
58
59
	/**
60
	 * @see Jetpack_Sitemap_Logger
61
	 * @since 4.8.0
62
	 * @var Jetpack_Sitemap_Logger $logger Logger object for reporting debug messages.
63
	 */
64
	private $logger;
65
66
	/**
67
	 * @see Jetpack_Sitemap_Finder
68
	 * @since 4.8.0
69
	 * @var Jetpack_Sitemap_Finder $finder Finder object for dealing with sitemap URIs.
70
	 */
71
	private $finder;
72
73
	/**
74
	 * Construct a new Jetpack_Sitemap_Manager.
75
	 *
76
	 * @access public
77
	 * @since 4.8.0
78
	 */
79
	public function __construct() {
80
		$this->librarian = new Jetpack_Sitemap_Librarian();
81
		$this->finder = new Jetpack_Sitemap_Finder();
82
83
		if ( defined( 'WP_DEBUG' ) && ( true === WP_DEBUG ) ) {
84
			$this->logger = new Jetpack_Sitemap_Logger();
85
		}
86
87
		// Add callback for sitemap URL handler.
88
		add_action(
89
			'init',
90
			array( $this, 'callback_action_catch_sitemap_urls' )
91
		);
92
93
		// Add generator to wp_cron task list.
94
		$this->schedule_sitemap_generation();
95
96
		// Add sitemap to robots.txt.
97
		add_action(
98
			'do_robotstxt',
99
			array( $this, 'callback_action_do_robotstxt' ),
100
			20
101
		);
102
103
		// The news sitemap is cached; here we add a callback to
104
		// flush the cached news sitemap when a post is published.
105
		add_action(
106
			'publish_post',
107
			array( $this, 'callback_action_flush_news_sitemap_cache' ),
108
			10
109
		);
110
111
		// In case we need to purge all sitemaps, we do this.
112
		add_action(
113
			'jetpack_sitemaps_purge_data',
114
			array( $this, 'callback_action_purge_data' )
115
		);
116
117
		/*
118
		 * Module parameters are stored as options in the database.
119
		 * This allows us to avoid having to process all of init
120
		 * before serving the sitemap data. The following actions
121
		 * process and store these filters.
122
		 */
123
124
		// Process filters and store location string for sitemap.
125
		add_action(
126
			'init',
127
			array( $this, 'callback_action_filter_sitemap_location' ),
128
			999
129
		);
130
131
		return;
132
	}
133
134
	/**
135
	 * Echo a raw string of given content-type.
136
	 *
137
	 * @access private
138
	 * @since 4.8.0
139
	 *
140
	 * @param string $the_content_type The content type to be served.
141
	 * @param string $the_content The string to be echoed.
142
	 */
143
	private function serve_raw_and_die( $the_content_type, $the_content ) {
144
		header( 'Content-Type: ' . $the_content_type . '; charset=UTF-8' );
145
146
		global $wp_query;
147
		$wp_query->is_feed = true;
148
		set_query_var( 'feed', 'sitemap' );
149
150
		if ( '' === $the_content ) {
151
			wp_die(
152
				esc_html__( "No sitemap found. Maybe it's being generated. Please try again later.", 'jetpack' ),
153
				esc_html__( 'Sitemaps', 'jetpack' ),
154
				array(
155
					'response' => 404,
156
				)
157
			);
158
		}
159
160
		echo $the_content;
161
162
		die();
0 ignored issues
show
Coding Style Compatibility introduced by
The method serve_raw_and_die() contains an exit expression.

An exit expression should only be used in rare cases. For example, if you write a short command line script.

In most cases however, using an exit expression makes the code untestable and often causes incompatibilities with other libraries. Thus, unless you are absolutely sure it is required here, we recommend to refactor your code to avoid its usage.

Loading history...
163
	}
164
165
	/**
166
	 * Callback to intercept sitemap url requests and serve sitemap files.
167
	 *
168
	 * @access public
169
	 * @since 4.8.0
170
	 */
171
	public function callback_action_catch_sitemap_urls() {
172
		// Regular expressions for sitemap URL routing.
173
		$regex = array(
174
			'master'        => '/^sitemap\.xml$/',
175
			'sitemap'       => '/^sitemap-[1-9][0-9]*\.xml$/',
176
			'index'         => '/^sitemap-index-[1-9][0-9]*\.xml$/',
177
			'sitemap-style' => '/^sitemap\.xsl$/',
178
			'index-style'   => '/^sitemap-index\.xsl$/',
179
			'image'         => '/^image-sitemap-[1-9][0-9]*\.xml$/',
180
			'image-index'   => '/^image-sitemap-index-[1-9][0-9]*\.xml$/',
181
			'image-style'   => '/^image-sitemap\.xsl$/',
182
			'video'         => '/^video-sitemap-[1-9][0-9]*\.xml$/',
183
			'video-index'   => '/^video-sitemap-index-[1-9][0-9]*\.xml$/',
184
			'video-style'   => '/^video-sitemap\.xsl$/',
185
			'news'          => '/^news-sitemap\.xml$/',
186
			'news-style'    => '/^news-sitemap\.xsl$/',
187
		);
188
189
		// The raw path(+query) of the requested URI.
190
		if ( isset( $_SERVER['REQUEST_URI'] ) ) { // WPCS: Input var okay.
191
			$raw_uri = sanitize_text_field(
192
				wp_unslash( $_SERVER['REQUEST_URI'] ) // WPCS: Input var okay.
193
			);
194
		} else {
195
			$raw_uri = '';
196
		}
197
198
		$request = $this->finder->recognize_sitemap_uri( $raw_uri );
199
200
		if ( isset( $request['sitemap_name'] ) ) {
201
202
			/**
203
			 * Filter the content type used to serve the sitemap XML files.
204
			 *
205
			 * @module sitemaps
206
			 *
207
			 * @since 3.9.0
208
			 *
209
			 * @param string $xml_content_type By default, it's 'text/xml'.
210
			 */
211
			$xml_content_type = apply_filters( 'jetpack_sitemap_content_type', 'text/xml' );
212
213
			// Catch master sitemap xml.
214
			if ( preg_match( $regex['master'], $request['sitemap_name'] ) ) {
215
				$this->serve_raw_and_die(
216
					$xml_content_type,
217
					$this->librarian->get_sitemap_text(
218
						jp_sitemap_filename( JP_MASTER_SITEMAP_TYPE, 0 ),
219
						JP_MASTER_SITEMAP_TYPE
220
					)
221
				);
222
			}
223
224
			// Catch sitemap xml.
225 View Code Duplication
			if ( preg_match( $regex['sitemap'], $request['sitemap_name'] ) ) {
226
				$this->serve_raw_and_die(
227
					$xml_content_type,
228
					$this->librarian->get_sitemap_text(
229
						$request['sitemap_name'],
230
						JP_PAGE_SITEMAP_TYPE
231
					)
232
				);
233
			}
234
235
			// Catch sitemap index xml.
236 View Code Duplication
			if ( preg_match( $regex['index'], $request['sitemap_name'] ) ) {
237
				$this->serve_raw_and_die(
238
					$xml_content_type,
239
					$this->librarian->get_sitemap_text(
240
						$request['sitemap_name'],
241
						JP_PAGE_SITEMAP_INDEX_TYPE
242
					)
243
				);
244
			}
245
246
			// Catch sitemap xsl.
247
			if ( preg_match( $regex['sitemap-style'], $request['sitemap_name'] ) ) {
248
				$this->serve_raw_and_die(
249
					'application/xml',
250
					Jetpack_Sitemap_Stylist::sitemap_xsl()
251
				);
252
			}
253
254
			// Catch sitemap index xsl.
255
			if ( preg_match( $regex['index-style'], $request['sitemap_name'] ) ) {
256
				$this->serve_raw_and_die(
257
					'application/xml',
258
					Jetpack_Sitemap_Stylist::sitemap_index_xsl()
259
				);
260
			}
261
262
			// Catch image sitemap xml.
263 View Code Duplication
			if ( preg_match( $regex['image'], $request['sitemap_name'] ) ) {
264
				$this->serve_raw_and_die(
265
					$xml_content_type,
266
					$this->librarian->get_sitemap_text(
267
						$request['sitemap_name'],
268
						JP_IMAGE_SITEMAP_TYPE
269
					)
270
				);
271
			}
272
273
			// Catch image sitemap index xml.
274 View Code Duplication
			if ( preg_match( $regex['image-index'], $request['sitemap_name'] ) ) {
275
				$this->serve_raw_and_die(
276
					$xml_content_type,
277
					$this->librarian->get_sitemap_text(
278
						$request['sitemap_name'],
279
						JP_IMAGE_SITEMAP_INDEX_TYPE
280
					)
281
				);
282
			}
283
284
			// Catch image sitemap xsl.
285
			if ( preg_match( $regex['image-style'], $request['sitemap_name'] ) ) {
286
				$this->serve_raw_and_die(
287
					'application/xml',
288
					Jetpack_Sitemap_Stylist::image_sitemap_xsl()
289
				);
290
			}
291
292
			// Catch video sitemap xml.
293 View Code Duplication
			if ( preg_match( $regex['video'], $request['sitemap_name'] ) ) {
294
				$this->serve_raw_and_die(
295
					$xml_content_type,
296
					$this->librarian->get_sitemap_text(
297
						$request['sitemap_name'],
298
						JP_VIDEO_SITEMAP_TYPE
299
					)
300
				);
301
			}
302
303
			// Catch video sitemap index xml.
304 View Code Duplication
			if ( preg_match( $regex['video-index'], $request['sitemap_name'] ) ) {
305
				$this->serve_raw_and_die(
306
					$xml_content_type,
307
					$this->librarian->get_sitemap_text(
308
						$request['sitemap_name'],
309
						JP_VIDEO_SITEMAP_INDEX_TYPE
310
					)
311
				);
312
			}
313
314
			// Catch video sitemap xsl.
315
			if ( preg_match( $regex['video-style'], $request['sitemap_name'] ) ) {
316
				$this->serve_raw_and_die(
317
					'application/xml',
318
					Jetpack_Sitemap_Stylist::video_sitemap_xsl()
319
				);
320
			}
321
322
			// Catch news sitemap xml.
323
			if ( preg_match( $regex['news'], $request['sitemap_name'] ) ) {
324
				$sitemap_builder = new Jetpack_Sitemap_Builder();
325
				$this->serve_raw_and_die(
326
					$xml_content_type,
327
					$sitemap_builder->news_sitemap_xml()
328
				);
329
			}
330
331
			// Catch news sitemap xsl.
332
			if ( preg_match( $regex['news-style'], $request['sitemap_name'] ) ) {
333
				$this->serve_raw_and_die(
334
					'application/xml',
335
					Jetpack_Sitemap_Stylist::news_sitemap_xsl()
336
				);
337
			}
338
		}
339
340
		// URL did not match any sitemap patterns.
341
		return;
342
	}
343
344
	/**
345
	 * Callback for adding sitemap-interval to the list of schedules.
346
	 *
347
	 * @access public
348
	 * @since 4.8.0
349
	 *
350
	 * @param array $schedules The array of WP_Cron schedules.
351
	 *
352
	 * @return array The updated array of WP_Cron schedules.
353
	 */
354
	public function callback_add_sitemap_schedule( $schedules ) {
355
		$schedules['sitemap-interval'] = array(
356
			'interval' => JP_SITEMAP_INTERVAL,
357
			'display'  => __( 'Sitemap Interval', 'jetpack' ),
358
		);
359
		return $schedules;
360
	}
361
362
	/**
363
	 * Add actions to schedule sitemap generation.
364
	 * Should only be called once, in the constructor.
365
	 *
366
	 * @access private
367
	 * @since 4.8.0
368
	 */
369
	private function schedule_sitemap_generation() {
370
		// Add cron schedule.
371
		add_filter( 'cron_schedules', array( $this, 'callback_add_sitemap_schedule' ) );
372
373
		$sitemap_builder = new Jetpack_Sitemap_Builder();
374
375
		add_action(
376
			'jp_sitemap_cron_hook',
377
			array( $sitemap_builder, 'update_sitemap' )
378
		);
379
380
		if ( ! wp_next_scheduled( 'jp_sitemap_cron_hook' ) ) {
381
			wp_schedule_event(
382
				time(),
383
				'sitemap-interval',
384
				'jp_sitemap_cron_hook'
385
			);
386
		}
387
388
		return;
389
	}
390
391
	/**
392
	 * Callback to add sitemap to robots.txt.
393
	 *
394
	 * @access public
395
	 * @since 4.8.0
396
	 */
397
	public function callback_action_do_robotstxt() {
398
399
		/**
400
		 * Filter whether to make the default sitemap discoverable to robots or not. Default true.
401
		 *
402
		 * @module sitemaps
403
		 * @since 3.9.0
404
		 *
405
		 * @param bool $discover_sitemap Make default sitemap discoverable to robots.
406
		 */
407
		$discover_sitemap = apply_filters( 'jetpack_sitemap_generate', true );
408
409 View Code Duplication
		if ( true === $discover_sitemap ) {
410
			$sitemap_url      = $this->finder->construct_sitemap_url( 'sitemap.xml' );
0 ignored issues
show
Coding Style introduced by
Equals sign not aligned correctly; expected 1 space but found 6 spaces

This check looks for improperly formatted assignments.

Every assignment must have exactly one space before and one space after the equals operator.

To illustrate:

$a = "a";
$ab = "ab";
$abc = "abc";

will have no issues, while

$a   = "a";
$ab  = "ab";
$abc = "abc";

will report issues in lines 1 and 2.

Loading history...
411
			echo 'Sitemap: ' . esc_url( $sitemap_url ) . "\n";
412
		}
413
414
		/**
415
		 * Filter whether to make the news sitemap discoverable to robots or not. Default true.
416
		 *
417
		 * @module sitemaps
418
		 * @since 3.9.0
419
		 *
420
		 * @param bool $discover_news_sitemap Make default news sitemap discoverable to robots.
421
		 */
422
		$discover_news_sitemap = apply_filters( 'jetpack_news_sitemap_generate', true );
423
424 View Code Duplication
		if ( true === $discover_news_sitemap ) {
425
			$news_sitemap_url = $this->finder->construct_sitemap_url( 'news-sitemap.xml' );
426
			echo 'Sitemap: ' . esc_url( $news_sitemap_url ) . "\n";
427
		}
428
429
		return;
430
	}
431
432
	/**
433
	 * Callback to delete the news sitemap cache.
434
	 *
435
	 * @access public
436
	 * @since 4.8.0
437
	 */
438
	public function callback_action_flush_news_sitemap_cache() {
439
		delete_transient( 'jetpack_news_sitemap_xml' );
440
	}
441
442
	/**
443
	 * Callback for resetting stored sitemap data.
444
	 *
445
	 * @access public
446
	 * @since 5.3.0
447
	 */
448
	public function callback_action_purge_data() {
449
		$this->callback_action_flush_news_sitemap_cache();
450
		$this->librarian->delete_all_stored_sitemap_data();
451
	}
452
453
	/**
454
	 * Callback to set the sitemap location.
455
	 *
456
	 * @access public
457
	 * @since 4.8.0
458
	 */
459
	public function callback_action_filter_sitemap_location() {
460
		update_option(
461
			'jetpack_sitemap_location',
462
			/**
463
			 * Additional path for sitemap URIs. Default value is empty.
464
			 *
465
			 * This string is any additional path fragment you want included between
466
			 * the home URL and the sitemap filenames. Exactly how this fragment is
467
			 * interpreted depends on your permalink settings. For example:
468
			 *
469
			 *   Pretty permalinks:
470
			 *     home_url() . jetpack_sitemap_location . '/sitemap.xml'
471
			 *
472
			 *   Plain ("ugly") permalinks:
473
			 *     home_url() . jetpack_sitemap_location . '/?jetpack-sitemap=sitemap.xml'
474
			 *
475
			 *   PATHINFO permalinks:
476
			 *     home_url() . '/index.php' . jetpack_sitemap_location . '/sitemap.xml'
477
			 *
478
			 * where 'sitemap.xml' is the name of a specific sitemap file.
479
			 * The value of this filter must be a valid path fragment per RFC 3986;
480
			 * in particular it must either be empty or begin with a '/'.
481
			 * Also take care that any restrictions on sitemap location imposed by
482
			 * the sitemap protocol are satisfied.
483
			 *
484
			 * The result of this filter is stored in an option, 'jetpack_sitemap_location';
485
			 * that option is what gets read when the sitemap location is needed.
486
			 * This way we don't have to wait for init to finish before building sitemaps.
487
			 *
488
			 * @link https://tools.ietf.org/html/rfc3986#section-3.3 RFC 3986
489
			 * @link http://www.sitemaps.org/ The sitemap protocol
490
			 *
491
			 * @since 4.8.0
492
			 */
493
			apply_filters(
494
				'jetpack_sitemap_location',
495
				''
496
			)
497
		);
498
499
		return;
500
	}
501
502
} // End Jetpack_Sitemap_Manager class.
503
504
new Jetpack_Sitemap_Manager();
505
506
/**
507
 * Absolute URL of the current blog's sitemap.
508
 *
509
 * @module sitemaps
510
 *
511
 * @since  3.9.0
512
 * @since  4.8.1 Code uses method found in Jetpack_Sitemap_Finder::construct_sitemap_url in 4.8.0.
513
 *                It has been moved here to avoid fatal errors with other plugins that were expecting to find this function.
514
 *
515
 * @param string $filename Sitemap file name. Defaults to 'sitemap.xml', the initial sitemaps page.
516
 *
517
 * @return string Sitemap URL.
518
 */
519
function jetpack_sitemap_uri( $filename = 'sitemap.xml' ) {
520
	global $wp_rewrite;
521
522
	$location = Jetpack_Options::get_option_and_ensure_autoload( 'jetpack_sitemap_location', '' );
523
524
	if ( $wp_rewrite->using_index_permalinks() ) {
525
		$sitemap_url = home_url( '/index.php' . $location . '/' . $filename );
526
	} elseif ( $wp_rewrite->using_permalinks() ) {
527
		$sitemap_url = home_url( $location . '/' . $filename );
528
	} else {
529
		$sitemap_url = home_url( $location . '/?jetpack-sitemap=' . $filename );
530
	}
531
532
	/**
533
	 * Filter sitemap URL relative to home URL.
534
	 *
535
	 * @module sitemaps
536
	 *
537
	 * @since 3.9.0
538
	 *
539
	 * @param string $sitemap_url Sitemap URL.
540
	 */
541
	return apply_filters( 'jetpack_sitemap_location', $sitemap_url );
542
}
543