Completed
Push — add/double-encode-message ( 8b6530...2d4e84 )
by
unknown
14:26 queued 05:57
created

o_simplexml()   D

Complexity

Conditions 9
Paths 13

Size

Total Lines 30
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 9
eloc 19
nc 13
nop 2
dl 0
loc 30
rs 4.909
c 0
b 0
f 0
1
<?php
2
/**
3
 * Generate sitemap files in base XML as well as some namespace extensions.
4
 *
5
 * This module generates two different base sitemaps.
6
 *
7
 * 1. sitemap.xml
8
 *    The basic sitemap is updated regularly by wp-cron. It is stored in the
9
 *    database and retrieved when requested. This sitemap aims to include canonical
10
 *    URLs for all published content and abide by the sitemap spec. This is the root
11
 *    of a tree of sitemap and sitemap index xml files, depending on the number of URLs.
12
 *
13
 *    By default the sitemap contains published posts of type 'post' and 'page', as
14
 *    well as the home url. To include other post types use the 'jetpack_sitemap_post_types'
15
 *    filter.
16
 *
17
 * @link http://sitemaps.org/protocol.php Base sitemaps protocol.
18
 * @link https://support.google.com/webmasters/answer/178636 Image sitemap extension.
19
 * @link https://developers.google.com/webmasters/videosearch/sitemaps Video sitemap extension.
20
 *
21
 * 2. news-sitemap.xml
22
 *    The news sitemap is generated on the fly when requested. It does not aim for
23
 *    completeness, instead including at most 1000 of the most recent published posts
24
 *    from the previous 2 days, per the news-sitemap spec.
25
 *
26
 * @link http://www.google.com/support/webmasters/bin/answer.py?answer=74288 News sitemap extension.
27
 *
28
 * @package Jetpack
29
 * @since 3.9.0
30
 * @since 4.8.0 Remove 1000 post limit.
31
 * @author Automattic
32
 */
33
34
/* Include all of the sitemap subclasses. */
35
require_once dirname( __FILE__ ) . '/sitemap-constants.php';
36
require_once dirname( __FILE__ ) . '/sitemap-buffer.php';
37
require_once dirname( __FILE__ ) . '/sitemap-stylist.php';
38
require_once dirname( __FILE__ ) . '/sitemap-librarian.php';
39
require_once dirname( __FILE__ ) . '/sitemap-finder.php';
40
require_once dirname( __FILE__ ) . '/sitemap-builder.php';
41
42
if ( defined( 'WP_DEBUG' ) && WP_DEBUG ) {
43
	require_once dirname( __FILE__ ) . '/sitemap-logger.php';
44
}
45
46
/**
47
 * Governs the generation, storage, and serving of sitemaps.
48
 *
49
 * @since 4.8.0
50
 */
51
class Jetpack_Sitemap_Manager {
52
53
	/**
54
	 * Librarian object for storing and retrieving sitemap data.
55
	 *
56
	 * @see Jetpack_Sitemap_Librarian
57
	 * @since 4.8.0
58
	 * @var Jetpack_Sitemap_Librarian $librarian Librarian object for storing and retrieving sitemap data.
59
	 */
60
	private $librarian;
61
62
	/**
63
	 * Logger object for reporting debug messages.
64
	 *
65
	 * @see Jetpack_Sitemap_Logger
66
	 * @since 4.8.0
67
	 * @var Jetpack_Sitemap_Logger $logger Logger object for reporting debug messages.
68
	 */
69
	private $logger;
70
71
	/**
72
	 * Finder object for handling sitemap URIs.
73
	 *
74
	 * @see Jetpack_Sitemap_Finder
75
	 * @since 4.8.0
76
	 * @var Jetpack_Sitemap_Finder $finder Finder object for handling with sitemap URIs.
77
	 */
78
	private $finder;
79
80
	/**
81
	 * Construct a new Jetpack_Sitemap_Manager.
82
	 *
83
	 * @access public
84
	 * @since 4.8.0
85
	 */
86
	public function __construct() {
87
		$this->librarian = new Jetpack_Sitemap_Librarian();
88
		$this->finder    = new Jetpack_Sitemap_Finder();
89
90
		if ( defined( 'WP_DEBUG' ) && ( true === WP_DEBUG ) ) {
91
			$this->logger = new Jetpack_Sitemap_Logger();
92
		}
93
94
		// Add callback for sitemap URL handler.
95
		add_action(
96
			'init',
97
			array( $this, 'callback_action_catch_sitemap_urls' ),
98
			defined( 'IS_WPCOM' ) && IS_WPCOM ? 100 : 10
99
		);
100
101
		// Add generator to wp_cron task list.
102
		$this->schedule_sitemap_generation();
103
104
		// Add sitemap to robots.txt.
105
		add_action(
106
			'do_robotstxt',
107
			array( $this, 'callback_action_do_robotstxt' ),
108
			20
109
		);
110
111
		// The news sitemap is cached; here we add a callback to
112
		// flush the cached news sitemap when a post is published.
113
		add_action(
114
			'publish_post',
115
			array( $this, 'callback_action_flush_news_sitemap_cache' ),
116
			10
117
		);
118
119
		// In case we need to purge all sitemaps, we do this.
120
		add_action(
121
			'jetpack_sitemaps_purge_data',
122
			array( $this, 'callback_action_purge_data' )
123
		);
124
125
		/*
126
		 * Module parameters are stored as options in the database.
127
		 * This allows us to avoid having to process all of init
128
		 * before serving the sitemap data. The following actions
129
		 * process and store these filters.
130
		 */
131
132
		// Process filters and store location string for sitemap.
133
		add_action(
134
			'init',
135
			array( $this, 'callback_action_filter_sitemap_location' ),
136
			999
137
		);
138
	}
139
140
	/**
141
	 * Echo a raw string of given content-type.
142
	 *
143
	 * @access private
144
	 * @since 4.8.0
145
	 *
146
	 * @param string $the_content_type The content type to be served.
147
	 * @param string $the_content The string to be echoed.
148
	 */
149
	private function serve_raw_and_die( $the_content_type, $the_content ) {
150
		header( 'Content-Type: ' . $the_content_type . '; charset=UTF-8' );
151
152
		global $wp_query;
153
		$wp_query->is_feed = true;
154
		set_query_var( 'feed', 'sitemap' );
155
156
		if ( '' === $the_content ) {
157
			$error = __( 'No sitemap found. Please try again later.', 'jetpack' );
158 View Code Duplication
			if ( current_user_can( 'manage_options' ) ) {
159
				$next = human_time_diff( wp_next_scheduled( 'jp_sitemap_cron_hook' ) );
160
				/* translators: %s is a human_time_diff until next sitemap generation. */
161
				$error = sprintf( __( 'No sitemap found. The system will try to build it again in %s.', 'jetpack' ), $next );
162
			}
163
164
			wp_die(
165
				esc_html( $error ),
166
				esc_html__( 'Sitemaps', 'jetpack' ),
167
				array(
168
					'response' => 404,
169
				)
170
			);
171
		}
172
173
		echo $the_content;
174
175
		die();
176
	}
177
178
	/**
179
	 * Callback to intercept sitemap url requests and serve sitemap files.
180
	 *
181
	 * @access public
182
	 * @since 4.8.0
183
	 */
184
	public function callback_action_catch_sitemap_urls() {
185
		// Regular expressions for sitemap URL routing.
186
		$regex = array(
187
			'master'        => '/^sitemap\.xml$/',
188
			'sitemap'       => '/^sitemap-[1-9][0-9]*\.xml$/',
189
			'index'         => '/^sitemap-index-[1-9][0-9]*\.xml$/',
190
			'sitemap-style' => '/^sitemap\.xsl$/',
191
			'index-style'   => '/^sitemap-index\.xsl$/',
192
			'image'         => '/^image-sitemap-[1-9][0-9]*\.xml$/',
193
			'image-index'   => '/^image-sitemap-index-[1-9][0-9]*\.xml$/',
194
			'image-style'   => '/^image-sitemap\.xsl$/',
195
			'video'         => '/^video-sitemap-[1-9][0-9]*\.xml$/',
196
			'video-index'   => '/^video-sitemap-index-[1-9][0-9]*\.xml$/',
197
			'video-style'   => '/^video-sitemap\.xsl$/',
198
			'news'          => '/^news-sitemap\.xml$/',
199
			'news-style'    => '/^news-sitemap\.xsl$/',
200
		);
201
202
		// The raw path(+query) of the requested URI.
203
		if ( isset( $_SERVER['REQUEST_URI'] ) ) { // WPCS: Input var okay.
204
			$raw_uri = sanitize_text_field(
205
				wp_unslash( $_SERVER['REQUEST_URI'] ) // WPCS: Input var okay.
206
			);
207
		} else {
208
			$raw_uri = '';
209
		}
210
211
		$request = $this->finder->recognize_sitemap_uri( $raw_uri );
212
213
		if ( isset( $request['sitemap_name'] ) ) {
214
215
			/**
216
			 * Filter the content type used to serve the sitemap XML files.
217
			 *
218
			 * @module sitemaps
219
			 *
220
			 * @since 3.9.0
221
			 *
222
			 * @param string $xml_content_type By default, it's 'text/xml'.
223
			 */
224
			$xml_content_type = apply_filters( 'jetpack_sitemap_content_type', 'text/xml' );
225
226
			// Catch master sitemap xml.
227
			if ( preg_match( $regex['master'], $request['sitemap_name'] ) ) {
228
				$sitemap_content = $this->librarian->get_sitemap_text(
229
					jp_sitemap_filename( JP_MASTER_SITEMAP_TYPE, 0 ),
230
					JP_MASTER_SITEMAP_TYPE
231
				);
232
233
				// if there is no master sitemap yet, let's just return an empty sitemap with a short TTL instead of a 404
234
				if ( empty( $sitemap_content ) ) {
235
					$builder = new Jetpack_Sitemap_Builder();
236
					$sitemap_content = $builder->empty_sitemap_xml();
237
				}
238
239
				$this->serve_raw_and_die(
240
					$xml_content_type,
241
					$sitemap_content
242
				);
243
			}
244
245
			// Catch sitemap xml.
246 View Code Duplication
			if ( preg_match( $regex['sitemap'], $request['sitemap_name'] ) ) {
247
				$this->serve_raw_and_die(
248
					$xml_content_type,
249
					$this->librarian->get_sitemap_text(
250
						$request['sitemap_name'],
251
						JP_PAGE_SITEMAP_TYPE
252
					)
253
				);
254
			}
255
256
			// Catch sitemap index xml.
257 View Code Duplication
			if ( preg_match( $regex['index'], $request['sitemap_name'] ) ) {
258
				$this->serve_raw_and_die(
259
					$xml_content_type,
260
					$this->librarian->get_sitemap_text(
261
						$request['sitemap_name'],
262
						JP_PAGE_SITEMAP_INDEX_TYPE
263
					)
264
				);
265
			}
266
267
			// Catch sitemap xsl.
268
			if ( preg_match( $regex['sitemap-style'], $request['sitemap_name'] ) ) {
269
				$this->serve_raw_and_die(
270
					'application/xml',
271
					Jetpack_Sitemap_Stylist::sitemap_xsl()
272
				);
273
			}
274
275
			// Catch sitemap index xsl.
276
			if ( preg_match( $regex['index-style'], $request['sitemap_name'] ) ) {
277
				$this->serve_raw_and_die(
278
					'application/xml',
279
					Jetpack_Sitemap_Stylist::sitemap_index_xsl()
280
				);
281
			}
282
283
			// Catch image sitemap xml.
284 View Code Duplication
			if ( preg_match( $regex['image'], $request['sitemap_name'] ) ) {
285
				$this->serve_raw_and_die(
286
					$xml_content_type,
287
					$this->librarian->get_sitemap_text(
288
						$request['sitemap_name'],
289
						JP_IMAGE_SITEMAP_TYPE
290
					)
291
				);
292
			}
293
294
			// Catch image sitemap index xml.
295 View Code Duplication
			if ( preg_match( $regex['image-index'], $request['sitemap_name'] ) ) {
296
				$this->serve_raw_and_die(
297
					$xml_content_type,
298
					$this->librarian->get_sitemap_text(
299
						$request['sitemap_name'],
300
						JP_IMAGE_SITEMAP_INDEX_TYPE
301
					)
302
				);
303
			}
304
305
			// Catch image sitemap xsl.
306
			if ( preg_match( $regex['image-style'], $request['sitemap_name'] ) ) {
307
				$this->serve_raw_and_die(
308
					'application/xml',
309
					Jetpack_Sitemap_Stylist::image_sitemap_xsl()
310
				);
311
			}
312
313
			// Catch video sitemap xml.
314 View Code Duplication
			if ( preg_match( $regex['video'], $request['sitemap_name'] ) ) {
315
				$this->serve_raw_and_die(
316
					$xml_content_type,
317
					$this->librarian->get_sitemap_text(
318
						$request['sitemap_name'],
319
						JP_VIDEO_SITEMAP_TYPE
320
					)
321
				);
322
			}
323
324
			// Catch video sitemap index xml.
325 View Code Duplication
			if ( preg_match( $regex['video-index'], $request['sitemap_name'] ) ) {
326
				$this->serve_raw_and_die(
327
					$xml_content_type,
328
					$this->librarian->get_sitemap_text(
329
						$request['sitemap_name'],
330
						JP_VIDEO_SITEMAP_INDEX_TYPE
331
					)
332
				);
333
			}
334
335
			// Catch video sitemap xsl.
336
			if ( preg_match( $regex['video-style'], $request['sitemap_name'] ) ) {
337
				$this->serve_raw_and_die(
338
					'application/xml',
339
					Jetpack_Sitemap_Stylist::video_sitemap_xsl()
340
				);
341
			}
342
343
			// Catch news sitemap xml.
344
			if ( preg_match( $regex['news'], $request['sitemap_name'] ) ) {
345
				$sitemap_builder = new Jetpack_Sitemap_Builder();
346
				$this->serve_raw_and_die(
347
					$xml_content_type,
348
					$sitemap_builder->news_sitemap_xml()
349
				);
350
			}
351
352
			// Catch news sitemap xsl.
353
			if ( preg_match( $regex['news-style'], $request['sitemap_name'] ) ) {
354
				$this->serve_raw_and_die(
355
					'application/xml',
356
					Jetpack_Sitemap_Stylist::news_sitemap_xsl()
357
				);
358
			}
359
		}
360
	}
361
362
	/**
363
	 * Callback for adding sitemap-interval to the list of schedules.
364
	 *
365
	 * @access public
366
	 * @since 4.8.0
367
	 *
368
	 * @param array $schedules The array of WP_Cron schedules.
369
	 *
370
	 * @return array The updated array of WP_Cron schedules.
371
	 */
372
	public function callback_add_sitemap_schedule( $schedules ) {
373
		$schedules['sitemap-interval'] = array(
374
			'interval' => JP_SITEMAP_INTERVAL,
375
			'display'  => __( 'Sitemap Interval', 'jetpack' ),
376
		);
377
		return $schedules;
378
	}
379
380
	/**
381
	 * Callback handler for sitemap cron hook
382
	 *
383
	 * @access public
384
	 */
385
	public function callback_sitemap_cron_hook() {
386
		$sitemap_builder = new Jetpack_Sitemap_Builder();
387
		$sitemap_builder->update_sitemap();
388
	}
389
390
	/**
391
	 * Add actions to schedule sitemap generation.
392
	 * Should only be called once, in the constructor.
393
	 *
394
	 * @access private
395
	 * @since 4.8.0
396
	 */
397
	private function schedule_sitemap_generation() {
398
		// Add cron schedule.
399
		add_filter( 'cron_schedules', array( $this, 'callback_add_sitemap_schedule' ) ); // phpcs:ignore WordPress.WP.CronInterval.ChangeDetected
400
401
		add_action(
402
			'jp_sitemap_cron_hook',
403
			array( $this, 'callback_sitemap_cron_hook' )
404
		);
405
406
		if ( ! wp_next_scheduled( 'jp_sitemap_cron_hook' ) ) {
407
			/**
408
			 * Filter the delay in seconds until sitemap generation cron job is started.
409
			 *
410
			 * This filter allows a site operator or hosting provider to potentialy spread out sitemap generation for a
411
			 * lot of sites over time. By default, it will be randomly done over 15 minutes.
412
			 *
413
			 * @module sitemaps
414
			 * @since 6.6.1
415
			 *
416
			 * @param int $delay Time to delay in seconds.
417
			 */
418
			$delay = apply_filters( 'jetpack_sitemap_generation_delay', MINUTE_IN_SECONDS * wp_rand( 1, 15 ) ); // Randomly space it out to start within next fifteen minutes.
419
			wp_schedule_event(
420
				time() + $delay,
421
				'sitemap-interval',
422
				'jp_sitemap_cron_hook'
423
			);
424
		}
425
	}
426
427
	/**
428
	 * Callback to add sitemap to robots.txt.
429
	 *
430
	 * @access public
431
	 * @since 4.8.0
432
	 */
433
	public function callback_action_do_robotstxt() {
434
435
		/**
436
		 * Filter whether to make the default sitemap discoverable to robots or not. Default true.
437
		 *
438
		 * @module sitemaps
439
		 * @since 3.9.0
440
		 *
441
		 * @param bool $discover_sitemap Make default sitemap discoverable to robots.
442
		 */
443
		$discover_sitemap = apply_filters( 'jetpack_sitemap_generate', true );
444
445 View Code Duplication
		if ( true === $discover_sitemap ) {
446
			$sitemap_url = $this->finder->construct_sitemap_url( 'sitemap.xml' );
447
			echo 'Sitemap: ' . esc_url( $sitemap_url ) . "\n";
448
		}
449
450
		/**
451
		 * Filter whether to make the news sitemap discoverable to robots or not. Default true.
452
		 *
453
		 * @module sitemaps
454
		 * @since 3.9.0
455
		 *
456
		 * @param bool $discover_news_sitemap Make default news sitemap discoverable to robots.
457
		 */
458
		$discover_news_sitemap = apply_filters( 'jetpack_news_sitemap_generate', true );
459
460 View Code Duplication
		if ( true === $discover_news_sitemap ) {
461
			$news_sitemap_url = $this->finder->construct_sitemap_url( 'news-sitemap.xml' );
462
			echo 'Sitemap: ' . esc_url( $news_sitemap_url ) . "\n";
463
		}
464
	}
465
466
	/**
467
	 * Callback to delete the news sitemap cache.
468
	 *
469
	 * @access public
470
	 * @since 4.8.0
471
	 */
472
	public function callback_action_flush_news_sitemap_cache() {
473
		delete_transient( 'jetpack_news_sitemap_xml' );
474
	}
475
476
	/**
477
	 * Callback for resetting stored sitemap data.
478
	 *
479
	 * @access public
480
	 * @since 5.3.0
481
	 * @since 6.7.0 Schedules a regeneration.
482
	 */
483
	public function callback_action_purge_data() {
484
		$this->callback_action_flush_news_sitemap_cache();
485
		$this->librarian->delete_all_stored_sitemap_data();
486
		/** This filter is documented in modules/sitemaps/sitemaps.php */
487
		$delay = apply_filters( 'jetpack_sitemap_generation_delay', MINUTE_IN_SECONDS * wp_rand( 1, 15 ) ); // Randomly space it out to start within next fifteen minutes.
488
		wp_schedule_single_event( time() + $delay, 'jp_sitemap_cron_hook' );
489
	}
490
491
	/**
492
	 * Callback to set the sitemap location.
493
	 *
494
	 * @access public
495
	 * @since 4.8.0
496
	 */
497
	public function callback_action_filter_sitemap_location() {
498
		update_option(
499
			'jetpack_sitemap_location',
500
			/**
501
			 * Additional path for sitemap URIs. Default value is empty.
502
			 *
503
			 * This string is any additional path fragment you want included between
504
			 * the home URL and the sitemap filenames. Exactly how this fragment is
505
			 * interpreted depends on your permalink settings. For example:
506
			 *
507
			 *   Pretty permalinks:
508
			 *     home_url() . jetpack_sitemap_location . '/sitemap.xml'
509
			 *
510
			 *   Plain ("ugly") permalinks:
511
			 *     home_url() . jetpack_sitemap_location . '/?jetpack-sitemap=sitemap.xml'
512
			 *
513
			 *   PATHINFO permalinks:
514
			 *     home_url() . '/index.php' . jetpack_sitemap_location . '/sitemap.xml'
515
			 *
516
			 * where 'sitemap.xml' is the name of a specific sitemap file.
517
			 * The value of this filter must be a valid path fragment per RFC 3986;
518
			 * in particular it must either be empty or begin with a '/'.
519
			 * Also take care that any restrictions on sitemap location imposed by
520
			 * the sitemap protocol are satisfied.
521
			 *
522
			 * The result of this filter is stored in an option, 'jetpack_sitemap_location';
523
			 * that option is what gets read when the sitemap location is needed.
524
			 * This way we don't have to wait for init to finish before building sitemaps.
525
			 *
526
			 * @link https://tools.ietf.org/html/rfc3986#section-3.3 RFC 3986
527
			 * @link http://www.sitemaps.org/ The sitemap protocol
528
			 *
529
			 * @since 4.8.0
530
			 */
531
			apply_filters(
532
				'jetpack_sitemap_location',
533
				''
534
			)
535
		);
536
	}
537
538
} // End Jetpack_Sitemap_Manager class.
539
540
new Jetpack_Sitemap_Manager();
541
542
/**
543
 * Absolute URL of the current blog's sitemap.
544
 *
545
 * @module sitemaps
546
 *
547
 * @since  3.9.0
548
 * @since  4.8.1 Code uses method found in Jetpack_Sitemap_Finder::construct_sitemap_url in 4.8.0.
549
 *                It has been moved here to avoid fatal errors with other plugins that were expecting to find this function.
550
 *
551
 * @param string $filename Sitemap file name. Defaults to 'sitemap.xml', the initial sitemaps page.
552
 *
553
 * @return string Sitemap URL.
554
 */
555
function jetpack_sitemap_uri( $filename = 'sitemap.xml' ) {
556
	global $wp_rewrite;
557
558
	$location = Jetpack_Options::get_option_and_ensure_autoload( 'jetpack_sitemap_location', '' );
559
560
	if ( $wp_rewrite->using_index_permalinks() ) {
561
		$sitemap_url = home_url( '/index.php' . $location . '/' . $filename );
562
	} elseif ( $wp_rewrite->using_permalinks() ) {
563
		$sitemap_url = home_url( $location . '/' . $filename );
564
	} else {
565
		$sitemap_url = home_url( $location . '/?jetpack-sitemap=' . $filename );
566
	}
567
568
	/**
569
	 * Filter sitemap URL relative to home URL.
570
	 *
571
	 * @module sitemaps
572
	 *
573
	 * @since 3.9.0
574
	 *
575
	 * @param string $sitemap_url Sitemap URL.
576
	 */
577
	return apply_filters( 'jetpack_sitemap_location', $sitemap_url );
578
}
579