Completed
Push — add/gdpr-ads-compliance ( c52a1e...cda5c5 )
by
unknown
26:32 queued 14:46
created

Jetpack_Sitemap_Manager   A

Complexity

Total Complexity 32

Size/Duplication

Total Lines 454
Duplicated Lines 13.66 %

Coupling/Cohesion

Components 1
Dependencies 5

Importance

Changes 0
Metric Value
dl 62
loc 454
rs 9.6
c 0
b 0
f 0
wmc 32
lcom 1
cbo 5

9 Methods

Rating   Name   Duplication   Size   Complexity  
A callback_add_sitemap_schedule() 0 7 1
B __construct() 0 55 5
A serve_raw_and_die() 0 21 2
F callback_action_catch_sitemap_urls() 54 172 16
A schedule_sitemap_generation() 0 21 2
B callback_action_do_robotstxt() 8 34 3
A callback_action_flush_news_sitemap_cache() 0 3 1
A callback_action_purge_data() 0 4 1
B callback_action_filter_sitemap_location() 0 42 1

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
<?php
2
/**
3
 * Generate sitemap files in base XML as well as some namespace extensions.
4
 *
5
 * This module generates two different base sitemaps.
6
 *
7
 * 1. sitemap.xml
8
 *    The basic sitemap is updated regularly by wp-cron. It is stored in the
9
 *    database and retrieved when requested. This sitemap aims to include canonical
10
 *    URLs for all published content and abide by the sitemap spec. This is the root
11
 *    of a tree of sitemap and sitemap index xml files, depending on the number of URLs.
12
 *
13
 *    By default the sitemap contains published posts of type 'post' and 'page', as
14
 *    well as the home url. To include other post types use the 'jetpack_sitemap_post_types'
15
 *    filter.
16
 *
17
 * @link http://sitemaps.org/protocol.php Base sitemaps protocol.
18
 * @link https://support.google.com/webmasters/answer/178636 Image sitemap extension.
19
 * @link https://developers.google.com/webmasters/videosearch/sitemaps Video sitemap extension.
20
 *
21
 * 2. news-sitemap.xml
22
 *    The news sitemap is generated on the fly when requested. It does not aim for
23
 *    completeness, instead including at most 1000 of the most recent published posts
24
 *    from the previous 2 days, per the news-sitemap spec.
25
 *
26
 * @link http://www.google.com/support/webmasters/bin/answer.py?answer=74288 News sitemap extension.
27
 *
28
 * @package Jetpack
29
 * @since 3.9.0
30
 * @since 4.8.0 Remove 1000 post limit.
31
 * @author Automattic
32
 */
33
34
require_once dirname( __FILE__ ) . '/sitemap-constants.php';
35
require_once dirname( __FILE__ ) . '/sitemap-buffer.php';
36
require_once dirname( __FILE__ ) . '/sitemap-stylist.php';
37
require_once dirname( __FILE__ ) . '/sitemap-librarian.php';
38
require_once dirname( __FILE__ ) . '/sitemap-finder.php';
39
require_once dirname( __FILE__ ) . '/sitemap-builder.php';
40
41
if ( defined( 'WP_DEBUG' ) && WP_DEBUG ) {
42
	require_once dirname( __FILE__ ) . '/sitemap-logger.php';
43
}
44
45
/**
46
 * Governs the generation, storage, and serving of sitemaps.
47
 *
48
 * @since 4.8.0
49
 */
50
class Jetpack_Sitemap_Manager {
51
52
	/**
53
	 * @see Jetpack_Sitemap_Librarian
54
	 * @since 4.8.0
55
	 * @var Jetpack_Sitemap_Librarian $librarian Librarian object for storing and retrieving sitemap data.
56
	 */
57
	private $librarian;
58
59
	/**
60
	 * @see Jetpack_Sitemap_Logger
61
	 * @since 4.8.0
62
	 * @var Jetpack_Sitemap_Logger $logger Logger object for reporting debug messages.
63
	 */
64
	private $logger;
65
66
	/**
67
	 * @see Jetpack_Sitemap_Finder
68
	 * @since 4.8.0
69
	 * @var Jetpack_Sitemap_Finder $finder Finder object for dealing with sitemap URIs.
70
	 */
71
	private $finder;
72
73
	/**
74
	 * Construct a new Jetpack_Sitemap_Manager.
75
	 *
76
	 * @access public
77
	 * @since 4.8.0
78
	 */
79
	public function __construct() {
80
		$this->librarian = new Jetpack_Sitemap_Librarian();
81
		$this->finder = new Jetpack_Sitemap_Finder();
82
83
		if ( defined( 'WP_DEBUG' ) && ( true === WP_DEBUG ) ) {
84
			$this->logger = new Jetpack_Sitemap_Logger();
85
		}
86
87
		// Add callback for sitemap URL handler.
88
		add_action(
89
			'init',
90
			array( $this, 'callback_action_catch_sitemap_urls' ),
91
			defined( 'IS_WPCOM' ) && IS_WPCOM ? 100 : 10
92
		);
93
94
		// Add generator to wp_cron task list.
95
		$this->schedule_sitemap_generation();
96
97
		// Add sitemap to robots.txt.
98
		add_action(
99
			'do_robotstxt',
100
			array( $this, 'callback_action_do_robotstxt' ),
101
			20
102
		);
103
104
		// The news sitemap is cached; here we add a callback to
105
		// flush the cached news sitemap when a post is published.
106
		add_action(
107
			'publish_post',
108
			array( $this, 'callback_action_flush_news_sitemap_cache' ),
109
			10
110
		);
111
112
		// In case we need to purge all sitemaps, we do this.
113
		add_action(
114
			'jetpack_sitemaps_purge_data',
115
			array( $this, 'callback_action_purge_data' )
116
		);
117
118
		/*
119
		 * Module parameters are stored as options in the database.
120
		 * This allows us to avoid having to process all of init
121
		 * before serving the sitemap data. The following actions
122
		 * process and store these filters.
123
		 */
124
125
		// Process filters and store location string for sitemap.
126
		add_action(
127
			'init',
128
			array( $this, 'callback_action_filter_sitemap_location' ),
129
			999
130
		);
131
132
		return;
133
	}
134
135
	/**
136
	 * Echo a raw string of given content-type.
137
	 *
138
	 * @access private
139
	 * @since 4.8.0
140
	 *
141
	 * @param string $the_content_type The content type to be served.
142
	 * @param string $the_content The string to be echoed.
143
	 */
144
	private function serve_raw_and_die( $the_content_type, $the_content ) {
145
		header( 'Content-Type: ' . $the_content_type . '; charset=UTF-8' );
146
147
		global $wp_query;
148
		$wp_query->is_feed = true;
149
		set_query_var( 'feed', 'sitemap' );
150
151
		if ( '' === $the_content ) {
152
			wp_die(
153
				esc_html__( "No sitemap found. Maybe it's being generated. Please try again later.", 'jetpack' ),
154
				esc_html__( 'Sitemaps', 'jetpack' ),
155
				array(
156
					'response' => 404,
157
				)
158
			);
159
		}
160
161
		echo $the_content;
162
163
		die();
0 ignored issues
show
Coding Style Compatibility introduced by
The method serve_raw_and_die() contains an exit expression.

An exit expression should only be used in rare cases. For example, if you write a short command line script.

In most cases however, using an exit expression makes the code untestable and often causes incompatibilities with other libraries. Thus, unless you are absolutely sure it is required here, we recommend to refactor your code to avoid its usage.

Loading history...
164
	}
165
166
	/**
167
	 * Callback to intercept sitemap url requests and serve sitemap files.
168
	 *
169
	 * @access public
170
	 * @since 4.8.0
171
	 */
172
	public function callback_action_catch_sitemap_urls() {
173
		// Regular expressions for sitemap URL routing.
174
		$regex = array(
175
			'master'        => '/^sitemap\.xml$/',
176
			'sitemap'       => '/^sitemap-[1-9][0-9]*\.xml$/',
177
			'index'         => '/^sitemap-index-[1-9][0-9]*\.xml$/',
178
			'sitemap-style' => '/^sitemap\.xsl$/',
179
			'index-style'   => '/^sitemap-index\.xsl$/',
180
			'image'         => '/^image-sitemap-[1-9][0-9]*\.xml$/',
181
			'image-index'   => '/^image-sitemap-index-[1-9][0-9]*\.xml$/',
182
			'image-style'   => '/^image-sitemap\.xsl$/',
183
			'video'         => '/^video-sitemap-[1-9][0-9]*\.xml$/',
184
			'video-index'   => '/^video-sitemap-index-[1-9][0-9]*\.xml$/',
185
			'video-style'   => '/^video-sitemap\.xsl$/',
186
			'news'          => '/^news-sitemap\.xml$/',
187
			'news-style'    => '/^news-sitemap\.xsl$/',
188
		);
189
190
		// The raw path(+query) of the requested URI.
191
		if ( isset( $_SERVER['REQUEST_URI'] ) ) { // WPCS: Input var okay.
192
			$raw_uri = sanitize_text_field(
193
				wp_unslash( $_SERVER['REQUEST_URI'] ) // WPCS: Input var okay.
194
			);
195
		} else {
196
			$raw_uri = '';
197
		}
198
199
		$request = $this->finder->recognize_sitemap_uri( $raw_uri );
200
201
		if ( isset( $request['sitemap_name'] ) ) {
202
203
			/**
204
			 * Filter the content type used to serve the sitemap XML files.
205
			 *
206
			 * @module sitemaps
207
			 *
208
			 * @since 3.9.0
209
			 *
210
			 * @param string $xml_content_type By default, it's 'text/xml'.
211
			 */
212
			$xml_content_type = apply_filters( 'jetpack_sitemap_content_type', 'text/xml' );
213
214
			// Catch master sitemap xml.
215
			if ( preg_match( $regex['master'], $request['sitemap_name'] ) ) {
216
				$this->serve_raw_and_die(
217
					$xml_content_type,
218
					$this->librarian->get_sitemap_text(
219
						jp_sitemap_filename( JP_MASTER_SITEMAP_TYPE, 0 ),
220
						JP_MASTER_SITEMAP_TYPE
221
					)
222
				);
223
			}
224
225
			// Catch sitemap xml.
226 View Code Duplication
			if ( preg_match( $regex['sitemap'], $request['sitemap_name'] ) ) {
227
				$this->serve_raw_and_die(
228
					$xml_content_type,
229
					$this->librarian->get_sitemap_text(
230
						$request['sitemap_name'],
231
						JP_PAGE_SITEMAP_TYPE
232
					)
233
				);
234
			}
235
236
			// Catch sitemap index xml.
237 View Code Duplication
			if ( preg_match( $regex['index'], $request['sitemap_name'] ) ) {
238
				$this->serve_raw_and_die(
239
					$xml_content_type,
240
					$this->librarian->get_sitemap_text(
241
						$request['sitemap_name'],
242
						JP_PAGE_SITEMAP_INDEX_TYPE
243
					)
244
				);
245
			}
246
247
			// Catch sitemap xsl.
248
			if ( preg_match( $regex['sitemap-style'], $request['sitemap_name'] ) ) {
249
				$this->serve_raw_and_die(
250
					'application/xml',
251
					Jetpack_Sitemap_Stylist::sitemap_xsl()
252
				);
253
			}
254
255
			// Catch sitemap index xsl.
256
			if ( preg_match( $regex['index-style'], $request['sitemap_name'] ) ) {
257
				$this->serve_raw_and_die(
258
					'application/xml',
259
					Jetpack_Sitemap_Stylist::sitemap_index_xsl()
260
				);
261
			}
262
263
			// Catch image sitemap xml.
264 View Code Duplication
			if ( preg_match( $regex['image'], $request['sitemap_name'] ) ) {
265
				$this->serve_raw_and_die(
266
					$xml_content_type,
267
					$this->librarian->get_sitemap_text(
268
						$request['sitemap_name'],
269
						JP_IMAGE_SITEMAP_TYPE
270
					)
271
				);
272
			}
273
274
			// Catch image sitemap index xml.
275 View Code Duplication
			if ( preg_match( $regex['image-index'], $request['sitemap_name'] ) ) {
276
				$this->serve_raw_and_die(
277
					$xml_content_type,
278
					$this->librarian->get_sitemap_text(
279
						$request['sitemap_name'],
280
						JP_IMAGE_SITEMAP_INDEX_TYPE
281
					)
282
				);
283
			}
284
285
			// Catch image sitemap xsl.
286
			if ( preg_match( $regex['image-style'], $request['sitemap_name'] ) ) {
287
				$this->serve_raw_and_die(
288
					'application/xml',
289
					Jetpack_Sitemap_Stylist::image_sitemap_xsl()
290
				);
291
			}
292
293
			// Catch video sitemap xml.
294 View Code Duplication
			if ( preg_match( $regex['video'], $request['sitemap_name'] ) ) {
295
				$this->serve_raw_and_die(
296
					$xml_content_type,
297
					$this->librarian->get_sitemap_text(
298
						$request['sitemap_name'],
299
						JP_VIDEO_SITEMAP_TYPE
300
					)
301
				);
302
			}
303
304
			// Catch video sitemap index xml.
305 View Code Duplication
			if ( preg_match( $regex['video-index'], $request['sitemap_name'] ) ) {
306
				$this->serve_raw_and_die(
307
					$xml_content_type,
308
					$this->librarian->get_sitemap_text(
309
						$request['sitemap_name'],
310
						JP_VIDEO_SITEMAP_INDEX_TYPE
311
					)
312
				);
313
			}
314
315
			// Catch video sitemap xsl.
316
			if ( preg_match( $regex['video-style'], $request['sitemap_name'] ) ) {
317
				$this->serve_raw_and_die(
318
					'application/xml',
319
					Jetpack_Sitemap_Stylist::video_sitemap_xsl()
320
				);
321
			}
322
323
			// Catch news sitemap xml.
324
			if ( preg_match( $regex['news'], $request['sitemap_name'] ) ) {
325
				$sitemap_builder = new Jetpack_Sitemap_Builder();
326
				$this->serve_raw_and_die(
327
					$xml_content_type,
328
					$sitemap_builder->news_sitemap_xml()
329
				);
330
			}
331
332
			// Catch news sitemap xsl.
333
			if ( preg_match( $regex['news-style'], $request['sitemap_name'] ) ) {
334
				$this->serve_raw_and_die(
335
					'application/xml',
336
					Jetpack_Sitemap_Stylist::news_sitemap_xsl()
337
				);
338
			}
339
		}
340
341
		// URL did not match any sitemap patterns.
342
		return;
343
	}
344
345
	/**
346
	 * Callback for adding sitemap-interval to the list of schedules.
347
	 *
348
	 * @access public
349
	 * @since 4.8.0
350
	 *
351
	 * @param array $schedules The array of WP_Cron schedules.
352
	 *
353
	 * @return array The updated array of WP_Cron schedules.
354
	 */
355
	public function callback_add_sitemap_schedule( $schedules ) {
356
		$schedules['sitemap-interval'] = array(
357
			'interval' => JP_SITEMAP_INTERVAL,
358
			'display'  => __( 'Sitemap Interval', 'jetpack' ),
359
		);
360
		return $schedules;
361
	}
362
363
	/**
364
	 * Add actions to schedule sitemap generation.
365
	 * Should only be called once, in the constructor.
366
	 *
367
	 * @access private
368
	 * @since 4.8.0
369
	 */
370
	private function schedule_sitemap_generation() {
371
		// Add cron schedule.
372
		add_filter( 'cron_schedules', array( $this, 'callback_add_sitemap_schedule' ) );
373
374
		$sitemap_builder = new Jetpack_Sitemap_Builder();
375
376
		add_action(
377
			'jp_sitemap_cron_hook',
378
			array( $sitemap_builder, 'update_sitemap' )
379
		);
380
381
		if ( ! wp_next_scheduled( 'jp_sitemap_cron_hook' ) ) {
382
			wp_schedule_event(
383
				time(),
384
				'sitemap-interval',
385
				'jp_sitemap_cron_hook'
386
			);
387
		}
388
389
		return;
390
	}
391
392
	/**
393
	 * Callback to add sitemap to robots.txt.
394
	 *
395
	 * @access public
396
	 * @since 4.8.0
397
	 */
398
	public function callback_action_do_robotstxt() {
399
400
		/**
401
		 * Filter whether to make the default sitemap discoverable to robots or not. Default true.
402
		 *
403
		 * @module sitemaps
404
		 * @since 3.9.0
405
		 *
406
		 * @param bool $discover_sitemap Make default sitemap discoverable to robots.
407
		 */
408
		$discover_sitemap = apply_filters( 'jetpack_sitemap_generate', true );
409
410 View Code Duplication
		if ( true === $discover_sitemap ) {
411
			$sitemap_url      = $this->finder->construct_sitemap_url( 'sitemap.xml' );
0 ignored issues
show
Coding Style introduced by
Equals sign not aligned correctly; expected 1 space but found 6 spaces

This check looks for improperly formatted assignments.

Every assignment must have exactly one space before and one space after the equals operator.

To illustrate:

$a = "a";
$ab = "ab";
$abc = "abc";

will have no issues, while

$a   = "a";
$ab  = "ab";
$abc = "abc";

will report issues in lines 1 and 2.

Loading history...
412
			echo 'Sitemap: ' . esc_url( $sitemap_url ) . "\n";
413
		}
414
415
		/**
416
		 * Filter whether to make the news sitemap discoverable to robots or not. Default true.
417
		 *
418
		 * @module sitemaps
419
		 * @since 3.9.0
420
		 *
421
		 * @param bool $discover_news_sitemap Make default news sitemap discoverable to robots.
422
		 */
423
		$discover_news_sitemap = apply_filters( 'jetpack_news_sitemap_generate', true );
424
425 View Code Duplication
		if ( true === $discover_news_sitemap ) {
426
			$news_sitemap_url = $this->finder->construct_sitemap_url( 'news-sitemap.xml' );
427
			echo 'Sitemap: ' . esc_url( $news_sitemap_url ) . "\n";
428
		}
429
430
		return;
431
	}
432
433
	/**
434
	 * Callback to delete the news sitemap cache.
435
	 *
436
	 * @access public
437
	 * @since 4.8.0
438
	 */
439
	public function callback_action_flush_news_sitemap_cache() {
440
		delete_transient( 'jetpack_news_sitemap_xml' );
441
	}
442
443
	/**
444
	 * Callback for resetting stored sitemap data.
445
	 *
446
	 * @access public
447
	 * @since 5.3.0
448
	 */
449
	public function callback_action_purge_data() {
450
		$this->callback_action_flush_news_sitemap_cache();
451
		$this->librarian->delete_all_stored_sitemap_data();
452
	}
453
454
	/**
455
	 * Callback to set the sitemap location.
456
	 *
457
	 * @access public
458
	 * @since 4.8.0
459
	 */
460
	public function callback_action_filter_sitemap_location() {
461
		update_option(
462
			'jetpack_sitemap_location',
463
			/**
464
			 * Additional path for sitemap URIs. Default value is empty.
465
			 *
466
			 * This string is any additional path fragment you want included between
467
			 * the home URL and the sitemap filenames. Exactly how this fragment is
468
			 * interpreted depends on your permalink settings. For example:
469
			 *
470
			 *   Pretty permalinks:
471
			 *     home_url() . jetpack_sitemap_location . '/sitemap.xml'
472
			 *
473
			 *   Plain ("ugly") permalinks:
474
			 *     home_url() . jetpack_sitemap_location . '/?jetpack-sitemap=sitemap.xml'
475
			 *
476
			 *   PATHINFO permalinks:
477
			 *     home_url() . '/index.php' . jetpack_sitemap_location . '/sitemap.xml'
478
			 *
479
			 * where 'sitemap.xml' is the name of a specific sitemap file.
480
			 * The value of this filter must be a valid path fragment per RFC 3986;
481
			 * in particular it must either be empty or begin with a '/'.
482
			 * Also take care that any restrictions on sitemap location imposed by
483
			 * the sitemap protocol are satisfied.
484
			 *
485
			 * The result of this filter is stored in an option, 'jetpack_sitemap_location';
486
			 * that option is what gets read when the sitemap location is needed.
487
			 * This way we don't have to wait for init to finish before building sitemaps.
488
			 *
489
			 * @link https://tools.ietf.org/html/rfc3986#section-3.3 RFC 3986
490
			 * @link http://www.sitemaps.org/ The sitemap protocol
491
			 *
492
			 * @since 4.8.0
493
			 */
494
			apply_filters(
495
				'jetpack_sitemap_location',
496
				''
497
			)
498
		);
499
500
		return;
501
	}
502
503
} // End Jetpack_Sitemap_Manager class.
504
505
new Jetpack_Sitemap_Manager();
506
507
/**
508
 * Absolute URL of the current blog's sitemap.
509
 *
510
 * @module sitemaps
511
 *
512
 * @since  3.9.0
513
 * @since  4.8.1 Code uses method found in Jetpack_Sitemap_Finder::construct_sitemap_url in 4.8.0.
514
 *                It has been moved here to avoid fatal errors with other plugins that were expecting to find this function.
515
 *
516
 * @param string $filename Sitemap file name. Defaults to 'sitemap.xml', the initial sitemaps page.
517
 *
518
 * @return string Sitemap URL.
519
 */
520
function jetpack_sitemap_uri( $filename = 'sitemap.xml' ) {
521
	global $wp_rewrite;
522
523
	$location = Jetpack_Options::get_option_and_ensure_autoload( 'jetpack_sitemap_location', '' );
524
525
	if ( $wp_rewrite->using_index_permalinks() ) {
526
		$sitemap_url = home_url( '/index.php' . $location . '/' . $filename );
527
	} elseif ( $wp_rewrite->using_permalinks() ) {
528
		$sitemap_url = home_url( $location . '/' . $filename );
529
	} else {
530
		$sitemap_url = home_url( $location . '/?jetpack-sitemap=' . $filename );
531
	}
532
533
	/**
534
	 * Filter sitemap URL relative to home URL.
535
	 *
536
	 * @module sitemaps
537
	 *
538
	 * @since 3.9.0
539
	 *
540
	 * @param string $sitemap_url Sitemap URL.
541
	 */
542
	return apply_filters( 'jetpack_sitemap_location', $sitemap_url );
543
}
544