Completed
Push — update/activity-log-links ( 92fe50...3b9362 )
by
unknown
12:21
created

callback_action_do_robotstxt()   A

Complexity

Conditions 3
Paths 4

Size

Total Lines 34

Duplication

Lines 8
Ratio 23.53 %

Importance

Changes 0
Metric Value
cc 3
nc 4
nop 0
dl 8
loc 34
rs 9.376
c 0
b 0
f 0
1
<?php
2
/**
3
 * Generate sitemap files in base XML as well as some namespace extensions.
4
 *
5
 * This module generates two different base sitemaps.
6
 *
7
 * 1. sitemap.xml
8
 *    The basic sitemap is updated regularly by wp-cron. It is stored in the
9
 *    database and retrieved when requested. This sitemap aims to include canonical
10
 *    URLs for all published content and abide by the sitemap spec. This is the root
11
 *    of a tree of sitemap and sitemap index xml files, depending on the number of URLs.
12
 *
13
 *    By default the sitemap contains published posts of type 'post' and 'page', as
14
 *    well as the home url. To include other post types use the 'jetpack_sitemap_post_types'
15
 *    filter.
16
 *
17
 * @link http://sitemaps.org/protocol.php Base sitemaps protocol.
18
 * @link https://support.google.com/webmasters/answer/178636 Image sitemap extension.
19
 * @link https://developers.google.com/webmasters/videosearch/sitemaps Video sitemap extension.
20
 *
21
 * 2. news-sitemap.xml
22
 *    The news sitemap is generated on the fly when requested. It does not aim for
23
 *    completeness, instead including at most 1000 of the most recent published posts
24
 *    from the previous 2 days, per the news-sitemap spec.
25
 *
26
 * @link http://www.google.com/support/webmasters/bin/answer.py?answer=74288 News sitemap extension.
27
 *
28
 * @package Jetpack
29
 * @since 3.9.0
30
 * @since 4.8.0 Remove 1000 post limit.
31
 * @author Automattic
32
 */
33
34
require_once dirname( __FILE__ ) . '/sitemap-constants.php';
35
require_once dirname( __FILE__ ) . '/sitemap-buffer.php';
36
require_once dirname( __FILE__ ) . '/sitemap-stylist.php';
37
require_once dirname( __FILE__ ) . '/sitemap-librarian.php';
38
require_once dirname( __FILE__ ) . '/sitemap-finder.php';
39
require_once dirname( __FILE__ ) . '/sitemap-builder.php';
40
41
if ( defined( 'WP_DEBUG' ) && WP_DEBUG ) {
42
	require_once dirname( __FILE__ ) . '/sitemap-logger.php';
43
}
44
45
/**
46
 * Governs the generation, storage, and serving of sitemaps.
47
 *
48
 * @since 4.8.0
49
 */
50
class Jetpack_Sitemap_Manager {
51
52
	/**
53
	 * @see Jetpack_Sitemap_Librarian
54
	 * @since 4.8.0
55
	 * @var Jetpack_Sitemap_Librarian $librarian Librarian object for storing and retrieving sitemap data.
56
	 */
57
	private $librarian;
58
59
	/**
60
	 * @see Jetpack_Sitemap_Logger
61
	 * @since 4.8.0
62
	 * @var Jetpack_Sitemap_Logger $logger Logger object for reporting debug messages.
63
	 */
64
	private $logger;
65
66
	/**
67
	 * @see Jetpack_Sitemap_Finder
68
	 * @since 4.8.0
69
	 * @var Jetpack_Sitemap_Finder $finder Finder object for dealing with sitemap URIs.
70
	 */
71
	private $finder;
72
73
	/**
74
	 * Construct a new Jetpack_Sitemap_Manager.
75
	 *
76
	 * @access public
77
	 * @since 4.8.0
78
	 */
79
	public function __construct() {
80
		$this->librarian = new Jetpack_Sitemap_Librarian();
81
		$this->finder = new Jetpack_Sitemap_Finder();
82
83
		if ( defined( 'WP_DEBUG' ) && ( true === WP_DEBUG ) ) {
84
			$this->logger = new Jetpack_Sitemap_Logger();
85
		}
86
87
		// Add callback for sitemap URL handler.
88
		add_action(
89
			'init',
90
			array( $this, 'callback_action_catch_sitemap_urls' ),
91
			defined( 'IS_WPCOM' ) && IS_WPCOM ? 100 : 10
92
		);
93
94
		// Add generator to wp_cron task list.
95
		$this->schedule_sitemap_generation();
96
97
		// Add sitemap to robots.txt.
98
		add_action(
99
			'do_robotstxt',
100
			array( $this, 'callback_action_do_robotstxt' ),
101
			20
102
		);
103
104
		// The news sitemap is cached; here we add a callback to
105
		// flush the cached news sitemap when a post is published.
106
		add_action(
107
			'publish_post',
108
			array( $this, 'callback_action_flush_news_sitemap_cache' ),
109
			10
110
		);
111
112
		// In case we need to purge all sitemaps, we do this.
113
		add_action(
114
			'jetpack_sitemaps_purge_data',
115
			array( $this, 'callback_action_purge_data' )
116
		);
117
118
		/*
119
		 * Module parameters are stored as options in the database.
120
		 * This allows us to avoid having to process all of init
121
		 * before serving the sitemap data. The following actions
122
		 * process and store these filters.
123
		 */
124
125
		// Process filters and store location string for sitemap.
126
		add_action(
127
			'init',
128
			array( $this, 'callback_action_filter_sitemap_location' ),
129
			999
130
		);
131
132
		return;
133
	}
134
135
	/**
136
	 * Echo a raw string of given content-type.
137
	 *
138
	 * @access private
139
	 * @since 4.8.0
140
	 *
141
	 * @param string $the_content_type The content type to be served.
142
	 * @param string $the_content The string to be echoed.
143
	 */
144
	private function serve_raw_and_die( $the_content_type, $the_content ) {
145
		header( 'Content-Type: ' . $the_content_type . '; charset=UTF-8' );
146
147
		global $wp_query;
148
		$wp_query->is_feed = true;
149
		set_query_var( 'feed', 'sitemap' );
150
151
		if ( '' === $the_content ) {
152
			wp_die(
153
				esc_html__( "No sitemap found. Maybe it's being generated. Please try again later.", 'jetpack' ),
154
				esc_html__( 'Sitemaps', 'jetpack' ),
155
				array(
156
					'response' => 404,
157
				)
158
			);
159
		}
160
161
		echo $the_content;
162
163
		die();
164
	}
165
166
	/**
167
	 * Callback to intercept sitemap url requests and serve sitemap files.
168
	 *
169
	 * @access public
170
	 * @since 4.8.0
171
	 */
172
	public function callback_action_catch_sitemap_urls() {
173
		// Regular expressions for sitemap URL routing.
174
		$regex = array(
175
			'master'        => '/^sitemap\.xml$/',
176
			'sitemap'       => '/^sitemap-[1-9][0-9]*\.xml$/',
177
			'index'         => '/^sitemap-index-[1-9][0-9]*\.xml$/',
178
			'sitemap-style' => '/^sitemap\.xsl$/',
179
			'index-style'   => '/^sitemap-index\.xsl$/',
180
			'image'         => '/^image-sitemap-[1-9][0-9]*\.xml$/',
181
			'image-index'   => '/^image-sitemap-index-[1-9][0-9]*\.xml$/',
182
			'image-style'   => '/^image-sitemap\.xsl$/',
183
			'video'         => '/^video-sitemap-[1-9][0-9]*\.xml$/',
184
			'video-index'   => '/^video-sitemap-index-[1-9][0-9]*\.xml$/',
185
			'video-style'   => '/^video-sitemap\.xsl$/',
186
			'news'          => '/^news-sitemap\.xml$/',
187
			'news-style'    => '/^news-sitemap\.xsl$/',
188
		);
189
190
		// The raw path(+query) of the requested URI.
191
		if ( isset( $_SERVER['REQUEST_URI'] ) ) { // WPCS: Input var okay.
192
			$raw_uri = sanitize_text_field(
193
				wp_unslash( $_SERVER['REQUEST_URI'] ) // WPCS: Input var okay.
194
			);
195
		} else {
196
			$raw_uri = '';
197
		}
198
199
		$request = $this->finder->recognize_sitemap_uri( $raw_uri );
200
201
		if ( isset( $request['sitemap_name'] ) ) {
202
203
			/**
204
			 * Filter the content type used to serve the sitemap XML files.
205
			 *
206
			 * @module sitemaps
207
			 *
208
			 * @since 3.9.0
209
			 *
210
			 * @param string $xml_content_type By default, it's 'text/xml'.
211
			 */
212
			$xml_content_type = apply_filters( 'jetpack_sitemap_content_type', 'text/xml' );
213
214
			// Catch master sitemap xml.
215
			if ( preg_match( $regex['master'], $request['sitemap_name'] ) ) {
216
				$this->serve_raw_and_die(
217
					$xml_content_type,
218
					$this->librarian->get_sitemap_text(
219
						jp_sitemap_filename( JP_MASTER_SITEMAP_TYPE, 0 ),
220
						JP_MASTER_SITEMAP_TYPE
221
					)
222
				);
223
			}
224
225
			// Catch sitemap xml.
226 View Code Duplication
			if ( preg_match( $regex['sitemap'], $request['sitemap_name'] ) ) {
227
				$this->serve_raw_and_die(
228
					$xml_content_type,
229
					$this->librarian->get_sitemap_text(
230
						$request['sitemap_name'],
231
						JP_PAGE_SITEMAP_TYPE
232
					)
233
				);
234
			}
235
236
			// Catch sitemap index xml.
237 View Code Duplication
			if ( preg_match( $regex['index'], $request['sitemap_name'] ) ) {
238
				$this->serve_raw_and_die(
239
					$xml_content_type,
240
					$this->librarian->get_sitemap_text(
241
						$request['sitemap_name'],
242
						JP_PAGE_SITEMAP_INDEX_TYPE
243
					)
244
				);
245
			}
246
247
			// Catch sitemap xsl.
248
			if ( preg_match( $regex['sitemap-style'], $request['sitemap_name'] ) ) {
249
				$this->serve_raw_and_die(
250
					'application/xml',
251
					Jetpack_Sitemap_Stylist::sitemap_xsl()
252
				);
253
			}
254
255
			// Catch sitemap index xsl.
256
			if ( preg_match( $regex['index-style'], $request['sitemap_name'] ) ) {
257
				$this->serve_raw_and_die(
258
					'application/xml',
259
					Jetpack_Sitemap_Stylist::sitemap_index_xsl()
260
				);
261
			}
262
263
			// Catch image sitemap xml.
264 View Code Duplication
			if ( preg_match( $regex['image'], $request['sitemap_name'] ) ) {
265
				$this->serve_raw_and_die(
266
					$xml_content_type,
267
					$this->librarian->get_sitemap_text(
268
						$request['sitemap_name'],
269
						JP_IMAGE_SITEMAP_TYPE
270
					)
271
				);
272
			}
273
274
			// Catch image sitemap index xml.
275 View Code Duplication
			if ( preg_match( $regex['image-index'], $request['sitemap_name'] ) ) {
276
				$this->serve_raw_and_die(
277
					$xml_content_type,
278
					$this->librarian->get_sitemap_text(
279
						$request['sitemap_name'],
280
						JP_IMAGE_SITEMAP_INDEX_TYPE
281
					)
282
				);
283
			}
284
285
			// Catch image sitemap xsl.
286
			if ( preg_match( $regex['image-style'], $request['sitemap_name'] ) ) {
287
				$this->serve_raw_and_die(
288
					'application/xml',
289
					Jetpack_Sitemap_Stylist::image_sitemap_xsl()
290
				);
291
			}
292
293
			// Catch video sitemap xml.
294 View Code Duplication
			if ( preg_match( $regex['video'], $request['sitemap_name'] ) ) {
295
				$this->serve_raw_and_die(
296
					$xml_content_type,
297
					$this->librarian->get_sitemap_text(
298
						$request['sitemap_name'],
299
						JP_VIDEO_SITEMAP_TYPE
300
					)
301
				);
302
			}
303
304
			// Catch video sitemap index xml.
305 View Code Duplication
			if ( preg_match( $regex['video-index'], $request['sitemap_name'] ) ) {
306
				$this->serve_raw_and_die(
307
					$xml_content_type,
308
					$this->librarian->get_sitemap_text(
309
						$request['sitemap_name'],
310
						JP_VIDEO_SITEMAP_INDEX_TYPE
311
					)
312
				);
313
			}
314
315
			// Catch video sitemap xsl.
316
			if ( preg_match( $regex['video-style'], $request['sitemap_name'] ) ) {
317
				$this->serve_raw_and_die(
318
					'application/xml',
319
					Jetpack_Sitemap_Stylist::video_sitemap_xsl()
320
				);
321
			}
322
323
			// Catch news sitemap xml.
324
			if ( preg_match( $regex['news'], $request['sitemap_name'] ) ) {
325
				$sitemap_builder = new Jetpack_Sitemap_Builder();
326
				$this->serve_raw_and_die(
327
					$xml_content_type,
328
					$sitemap_builder->news_sitemap_xml()
329
				);
330
			}
331
332
			// Catch news sitemap xsl.
333
			if ( preg_match( $regex['news-style'], $request['sitemap_name'] ) ) {
334
				$this->serve_raw_and_die(
335
					'application/xml',
336
					Jetpack_Sitemap_Stylist::news_sitemap_xsl()
337
				);
338
			}
339
		}
340
341
		// URL did not match any sitemap patterns.
342
		return;
343
	}
344
345
	/**
346
	 * Callback for adding sitemap-interval to the list of schedules.
347
	 *
348
	 * @access public
349
	 * @since 4.8.0
350
	 *
351
	 * @param array $schedules The array of WP_Cron schedules.
352
	 *
353
	 * @return array The updated array of WP_Cron schedules.
354
	 */
355
	public function callback_add_sitemap_schedule( $schedules ) {
356
		$schedules['sitemap-interval'] = array(
357
			'interval' => JP_SITEMAP_INTERVAL,
358
			'display'  => __( 'Sitemap Interval', 'jetpack' ),
359
		);
360
		return $schedules;
361
	}
362
363
	/**
364
	 * Callback handler for sitemap cron hook
365
	 *
366
	 * @access public
367
	 */
368
	public function callback_sitemap_cron_hook() {
369
		$sitemap_builder = new Jetpack_Sitemap_Builder();
370
		$sitemap_builder->update_sitemap();
371
	}
372
373
	/**
374
	 * Add actions to schedule sitemap generation.
375
	 * Should only be called once, in the constructor.
376
	 *
377
	 * @access private
378
	 * @since 4.8.0
379
	 */
380
	private function schedule_sitemap_generation() {
381
		// Add cron schedule.
382
		add_filter( 'cron_schedules', array( $this, 'callback_add_sitemap_schedule' ) );
383
384
		add_action(
385
			'jp_sitemap_cron_hook',
386
			array( $this, 'callback_sitemap_cron_hook' )
387
		);
388
389
		if ( ! wp_next_scheduled( 'jp_sitemap_cron_hook' ) ) {
390
			wp_schedule_event(
391
				time(),
392
				'sitemap-interval',
393
				'jp_sitemap_cron_hook'
394
			);
395
		}
396
	}
397
398
	/**
399
	 * Callback to add sitemap to robots.txt.
400
	 *
401
	 * @access public
402
	 * @since 4.8.0
403
	 */
404
	public function callback_action_do_robotstxt() {
405
406
		/**
407
		 * Filter whether to make the default sitemap discoverable to robots or not. Default true.
408
		 *
409
		 * @module sitemaps
410
		 * @since 3.9.0
411
		 *
412
		 * @param bool $discover_sitemap Make default sitemap discoverable to robots.
413
		 */
414
		$discover_sitemap = apply_filters( 'jetpack_sitemap_generate', true );
415
416 View Code Duplication
		if ( true === $discover_sitemap ) {
417
			$sitemap_url      = $this->finder->construct_sitemap_url( 'sitemap.xml' );
418
			echo 'Sitemap: ' . esc_url( $sitemap_url ) . "\n";
419
		}
420
421
		/**
422
		 * Filter whether to make the news sitemap discoverable to robots or not. Default true.
423
		 *
424
		 * @module sitemaps
425
		 * @since 3.9.0
426
		 *
427
		 * @param bool $discover_news_sitemap Make default news sitemap discoverable to robots.
428
		 */
429
		$discover_news_sitemap = apply_filters( 'jetpack_news_sitemap_generate', true );
430
431 View Code Duplication
		if ( true === $discover_news_sitemap ) {
432
			$news_sitemap_url = $this->finder->construct_sitemap_url( 'news-sitemap.xml' );
433
			echo 'Sitemap: ' . esc_url( $news_sitemap_url ) . "\n";
434
		}
435
436
		return;
437
	}
438
439
	/**
440
	 * Callback to delete the news sitemap cache.
441
	 *
442
	 * @access public
443
	 * @since 4.8.0
444
	 */
445
	public function callback_action_flush_news_sitemap_cache() {
446
		delete_transient( 'jetpack_news_sitemap_xml' );
447
	}
448
449
	/**
450
	 * Callback for resetting stored sitemap data.
451
	 *
452
	 * @access public
453
	 * @since 5.3.0
454
	 */
455
	public function callback_action_purge_data() {
456
		$this->callback_action_flush_news_sitemap_cache();
457
		$this->librarian->delete_all_stored_sitemap_data();
458
	}
459
460
	/**
461
	 * Callback to set the sitemap location.
462
	 *
463
	 * @access public
464
	 * @since 4.8.0
465
	 */
466
	public function callback_action_filter_sitemap_location() {
467
		update_option(
468
			'jetpack_sitemap_location',
469
			/**
470
			 * Additional path for sitemap URIs. Default value is empty.
471
			 *
472
			 * This string is any additional path fragment you want included between
473
			 * the home URL and the sitemap filenames. Exactly how this fragment is
474
			 * interpreted depends on your permalink settings. For example:
475
			 *
476
			 *   Pretty permalinks:
477
			 *     home_url() . jetpack_sitemap_location . '/sitemap.xml'
478
			 *
479
			 *   Plain ("ugly") permalinks:
480
			 *     home_url() . jetpack_sitemap_location . '/?jetpack-sitemap=sitemap.xml'
481
			 *
482
			 *   PATHINFO permalinks:
483
			 *     home_url() . '/index.php' . jetpack_sitemap_location . '/sitemap.xml'
484
			 *
485
			 * where 'sitemap.xml' is the name of a specific sitemap file.
486
			 * The value of this filter must be a valid path fragment per RFC 3986;
487
			 * in particular it must either be empty or begin with a '/'.
488
			 * Also take care that any restrictions on sitemap location imposed by
489
			 * the sitemap protocol are satisfied.
490
			 *
491
			 * The result of this filter is stored in an option, 'jetpack_sitemap_location';
492
			 * that option is what gets read when the sitemap location is needed.
493
			 * This way we don't have to wait for init to finish before building sitemaps.
494
			 *
495
			 * @link https://tools.ietf.org/html/rfc3986#section-3.3 RFC 3986
496
			 * @link http://www.sitemaps.org/ The sitemap protocol
497
			 *
498
			 * @since 4.8.0
499
			 */
500
			apply_filters(
501
				'jetpack_sitemap_location',
502
				''
503
			)
504
		);
505
506
		return;
507
	}
508
509
} // End Jetpack_Sitemap_Manager class.
510
511
new Jetpack_Sitemap_Manager();
512
513
/**
514
 * Absolute URL of the current blog's sitemap.
515
 *
516
 * @module sitemaps
517
 *
518
 * @since  3.9.0
519
 * @since  4.8.1 Code uses method found in Jetpack_Sitemap_Finder::construct_sitemap_url in 4.8.0.
520
 *                It has been moved here to avoid fatal errors with other plugins that were expecting to find this function.
521
 *
522
 * @param string $filename Sitemap file name. Defaults to 'sitemap.xml', the initial sitemaps page.
523
 *
524
 * @return string Sitemap URL.
525
 */
526
function jetpack_sitemap_uri( $filename = 'sitemap.xml' ) {
527
	global $wp_rewrite;
528
529
	$location = Jetpack_Options::get_option_and_ensure_autoload( 'jetpack_sitemap_location', '' );
530
531
	if ( $wp_rewrite->using_index_permalinks() ) {
532
		$sitemap_url = home_url( '/index.php' . $location . '/' . $filename );
533
	} elseif ( $wp_rewrite->using_permalinks() ) {
534
		$sitemap_url = home_url( $location . '/' . $filename );
535
	} else {
536
		$sitemap_url = home_url( $location . '/?jetpack-sitemap=' . $filename );
537
	}
538
539
	/**
540
	 * Filter sitemap URL relative to home URL.
541
	 *
542
	 * @module sitemaps
543
	 *
544
	 * @since 3.9.0
545
	 *
546
	 * @param string $sitemap_url Sitemap URL.
547
	 */
548
	return apply_filters( 'jetpack_sitemap_location', $sitemap_url );
549
}
550