Completed
Push — deprecate/render-nudge ( a7bfad...8c35c9 )
by
unknown
58:24 queued 49:50
created

Table_Checksum::build_checksum_query()   B

Complexity

Conditions 5
Paths 16

Size

Total Lines 67

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
nc 16
nop 4
dl 0
loc 67
rs 8.4088
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * Table Checksums Class.
4
 *
5
 * @package automattic/jetpack-sync
6
 */
7
8
namespace Automattic\Jetpack\Sync\Replicastore;
9
10
use Automattic\Jetpack\Sync;
11
use Exception;
12
use WP_Error;
13
14
// TODO add rest endpoints to work with this, hopefully in the same folder.
15
/**
16
 * Class to handle Table Checksums.
17
 */
18
class Table_Checksum {
19
20
	/**
21
	 * Table to be checksummed.
22
	 *
23
	 * @var string
24
	 */
25
	public $table = '';
26
27
	/**
28
	 * Table Checksum Configuration.
29
	 *
30
	 * @var array
31
	 */
32
	public $table_configuration = array();
33
34
	/**
35
	 * Field to be used for range queries.
36
	 *
37
	 * @var string
38
	 */
39
	public $range_field = '';
40
41
	/**
42
	 * ID Field(s) to be used.
43
	 *
44
	 * @var array
45
	 */
46
	public $key_fields = array();
47
48
	/**
49
	 * Field(s) to be used in generating the checksum value.
50
	 *
51
	 * @var array
52
	 */
53
	public $checksum_fields = array();
54
55
	/**
56
	 * Default filter values for the table
57
	 *
58
	 * @var array
59
	 */
60
	public $filter_values = array();
61
62
	/**
63
	 * SQL Query to be used to filter results (allow/disallow).
64
	 *
65
	 * @var string
66
	 */
67
	public $additional_filter_sql = '';
68
69
	/**
70
	 * Default Checksum Table Configurations.
71
	 *
72
	 * @var array
73
	 */
74
	public $default_tables = array();
75
76
	/**
77
	 * Salt to be used when generating checksum.
78
	 *
79
	 * @var string
80
	 */
81
	public $salt = '';
82
83
	/**
84
	 * Tables which are allowed to be checksummed.
85
	 *
86
	 * @var string
87
	 */
88
	public $allowed_tables = array();
89
90
	/**
91
	 * If the table has a "parent" table that it's related to.
92
	 *
93
	 * @var mixed|null
94
	 */
95
	private $parent_table = null;
96
97
	/**
98
	 * Table_Checksum constructor.
99
	 *
100
	 * @param string $table The table to calculate checksums for.
101
	 * @param string $salt  Optional salt to add to the checksum.
0 ignored issues
show
Documentation introduced by
Should the type for parameter $salt not be string|null?

This check looks for @param annotations where the type inferred by our type inference engine differs from the declared type.

It makes a suggestion as to what type it considers more descriptive.

Most often this is a case of a parameter that can be null in addition to its declared types.

Loading history...
102
	 *
103
	 * @throws Exception Throws exception from inner functions.
104
	 */
105
	public function __construct( $table, $salt = null ) {
106
107
		if ( ! Sync\Settings::is_checksum_enabled() ) {
108
			throw new Exception( 'Checksums are currently disabled.' );
109
		}
110
111
		$this->salt = $salt;
112
113
		$this->default_tables = $this->get_default_tables();
114
115
		// TODO change filters to allow the array format.
116
		// TODO add get_fields or similar method to get things out of the table.
117
		// TODO extract this configuration in a better way, still make it work with `$wpdb` names.
118
		// TODO take over the replicastore functions and move them over to this class.
119
		// TODO make the API work.
120
121
		$this->allowed_tables = apply_filters( 'jetpack_sync_checksum_allowed_tables', $this->default_tables );
122
123
		$this->table               = $this->validate_table_name( $table );
124
		$this->table_configuration = $this->allowed_tables[ $table ];
125
126
		$this->prepare_fields( $this->table_configuration );
127
128
	}
129
130
	/**
131
	 * Get Default Table configurations.
132
	 *
133
	 * @return array
134
	 */
135
	private function get_default_tables() {
136
		global $wpdb;
137
138
		return array(
139
			'posts'              => array(
140
				'table'           => $wpdb->posts,
141
				'range_field'     => 'ID',
142
				'key_fields'      => array( 'ID' ),
143
				'checksum_fields' => array( 'post_modified_gmt' ),
144
				'filter_values'   => Sync\Settings::get_disallowed_post_types_structured(),
145
			),
146
			'postmeta'           => array(
147
				'table'           => $wpdb->postmeta,
148
				'range_field'     => 'post_id',
149
				'key_fields'      => array( 'post_id', 'meta_key' ),
150
				'checksum_fields' => array( 'meta_key', 'meta_value' ),
151
				'filter_values'   => Sync\Settings::get_allowed_post_meta_structured(),
152
				'parent_table'    => 'posts',
153
			),
154
			'comments'           => array(
155
				'table'           => $wpdb->comments,
156
				'range_field'     => 'comment_ID',
157
				'key_fields'      => array( 'comment_ID' ),
158
				'checksum_fields' => array( 'comment_content' ),
159
				'filter_values'   => array(
160
					'comment_type' => array(
161
						'operator' => 'IN',
162
						'values'   => apply_filters(
163
							'jetpack_sync_whitelisted_comment_types',
164
							array( '', 'comment', 'trackback', 'pingback', 'review' )
165
						),
166
					),
167
				),
168
				'filter_sql'      => Sync\Settings::get_comments_filter_sql(),
169
			),
170
			'commentmeta'        => array(
171
				'table'           => $wpdb->commentmeta,
172
				'range_field'     => 'comment_id',
173
				'key_fields'      => array( 'comment_id', 'meta_key' ),
174
				'checksum_fields' => array( 'meta_key', 'meta_value' ),
175
				'filter_values'   => Sync\Settings::get_allowed_comment_meta_structured(),
176
				'parent_table'    => 'comments',
177
			),
178
			'terms'              => array(
179
				'table'           => $wpdb->terms,
180
				'range_field'     => 'term_id',
181
				'key_fields'      => array( 'term_id' ),
182
				'checksum_fields' => array( 'term_id', 'name', 'slug' ),
183
			),
184
			'termmeta'           => array(
185
				'table'           => $wpdb->termmeta,
186
				'range_field'     => 'term_id',
187
				'key_fields'      => array( 'term_id', 'meta_key' ),
188
				'checksum_fields' => array( 'meta_key', 'meta_value' ),
189
				'parent_table'    => 'terms',
190
			),
191
			'term_relationships' => array(
192
				'table'           => $wpdb->term_relationships,
193
				'range_field'     => 'object_id',
194
				'key_fields'      => array( 'object_id' ),
195
				'checksum_fields' => array( 'object_id', 'term_taxonomy_id' ),
196
			),
197
			'term_taxonomy'      => array(
198
				'table'           => $wpdb->term_taxonomy,
199
				'range_field'     => 'term_taxonomy_id',
200
				'key_fields'      => array( 'term_taxonomy_id' ),
201
				'checksum_fields' => array( 'term_taxonomy_id', 'term_id', 'taxonomy', 'description', 'parent' ),
202
			),
203
			'links'              => $wpdb->links, // TODO describe in the array format or add exceptions.
204
			'options'            => $wpdb->options, // TODO describe in the array format or add exceptions.
205
		);
206
	}
207
208
	/**
209
	 * Prepare field params based off provided configuration.
210
	 *
211
	 * @param array $table_configuration The table configuration array.
212
	 */
213
	private function prepare_fields( $table_configuration ) {
214
		$this->key_fields            = $table_configuration['key_fields'];
215
		$this->range_field           = $table_configuration['range_field'];
216
		$this->checksum_fields       = $table_configuration['checksum_fields'];
217
		$this->filter_values         = isset( $table_configuration['filter_values'] ) ? $table_configuration['filter_values'] : null;
218
		$this->additional_filter_sql = ! empty( $table_configuration['filter_sql'] ) ? $table_configuration['filter_sql'] : '';
219
		$this->parent_table          = isset( $table_configuration['parent_table'] ) ? $table_configuration['parent_table'] : null;
220
	}
221
222
	/**
223
	 * Verify provided table name is valid for checksum processing.
224
	 *
225
	 * @param string $table Table name to validate.
226
	 *
227
	 * @return mixed|string
228
	 * @throws Exception Throw an exception on validation failure.
229
	 */
230
	private function validate_table_name( $table ) {
231
		if ( empty( $table ) ) {
232
			throw new Exception( 'Invalid table name: empty' );
233
		}
234
235
		if ( ! array_key_exists( $table, $this->allowed_tables ) ) {
236
			throw new Exception( "Invalid table name: $table not allowed" );
237
		}
238
239
		// TODO other checks if such are needed.
240
241
		return $this->allowed_tables[ $table ]['table'];
242
	}
243
244
	/**
245
	 * Verify provided fields are proper names.
246
	 *
247
	 * @param array $fields Array of field names to validate.
248
	 *
249
	 * @throws Exception Throw an exception on failure to validate.
250
	 */
251
	private function validate_fields( $fields ) {
252
		foreach ( $fields as $field ) {
253
			if ( ! preg_match( '/^[0-9,a-z,A-Z$_]+$/i', $field ) ) {
254
				throw new Exception( "Invalid field name: $field is not allowed" );
255
			}
256
257
			// TODO other verifications of the field names.
258
		}
259
	}
260
261
	/**
262
	 * Verify the fields exist in the table.
263
	 *
264
	 * @param array $fields Array of fields to validate.
265
	 *
266
	 * @return bool
267
	 * @throws Exception Throw an exception on failure to validate.
268
	 */
269
	private function validate_fields_against_table( $fields ) {
270
		global $wpdb;
271
272
		// phpcs:ignore WordPress.DB.PreparedSQL.InterpolatedNotPrepared
273
		$result = $wpdb->get_row( "SELECT * FROM {$this->table} LIMIT 1", ARRAY_A );
274
		if ( ! is_array( $result ) ) {
275
			throw new Exception( 'Unexpected $wpdb->query output: not array' );
276
		}
277
278
		// Check if the fields are actually contained in the table.
279
		foreach ( $fields as $field_to_check ) {
280
			if ( ! array_key_exists( $field_to_check, $result ) ) {
281
				throw new Exception( "Invalid field name: field '{$field_to_check}' doesn't exist in table {$this->table}" );
282
			}
283
		}
284
285
		return true;
286
	}
287
288
	/**
289
	 * Verify the configured fields.
290
	 *
291
	 * @throws Exception Throw an exception on failure to validate in the internal functions.
292
	 */
293
	private function validate_input() {
294
		$fields = array_merge( array( $this->range_field ), $this->key_fields, $this->checksum_fields );
295
296
		$this->validate_fields( $fields );
297
		$this->validate_fields_against_table( $fields );
298
	}
299
300
	/**
301
	 * Prepare filter values as SQL statements to be added to the other filters.
302
	 *
303
	 * @param array  $filter_values The filter values array.
304
	 * @param string $table_prefix  If the values are going to be used in a sub-query, add a prefix with the table alias.
305
	 *
306
	 * @return array|null
307
	 */
308
	private function prepare_filter_values_as_sql( $filter_values = array(), $table_prefix = '' ) {
309
		global $wpdb;
310
311
		if ( ! is_array( $filter_values ) ) {
312
			return null;
313
		}
314
315
		$result = array();
316
317
		foreach ( $filter_values as $field => $filter ) {
318
			$key = ( ! empty( $table_prefix ) ? $table_prefix : $this->table ) . '.' . $field;
319
320
			switch ( $filter['operator'] ) {
321
				case 'IN':
322
				case 'NOT IN':
323
					$values_placeholders = implode( ',', array_fill( 0, count( $filter['values'] ), '%s' ) );
324
					$statement           = "{$key} {$filter['operator']} ( $values_placeholders )";
325
326
					// phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared
327
					$prepared_statement = $wpdb->prepare( $statement, $filter['values'] );
328
329
					$result[] = $prepared_statement;
330
					break;
331
				// TODO implement other operators if needed.
332
			}
333
		}
334
335
		return $result;
336
	}
337
338
	/**
339
	 * Build the filter query baased off range fields and values and the additional sql.
340
	 *
341
	 * @param int|null   $range_from    Start of the range.
342
	 * @param int|null   $range_to      End of the range.
343
	 * @param array|null $filter_values Additional filter values. Not used at the moment.
344
	 * @param string     $table_prefix  Table name to be prefixed to the columns. Used in sub-queries where columns can clash.
345
	 *
346
	 * @return string
347
	 */
348
	public function build_filter_statement( $range_from = null, $range_to = null, $filter_values = null, $table_prefix = '' ) {
349
		global $wpdb;
350
351
		// If there is a field prefix that we want to use with table aliases.
352
		$parent_prefix = ( ! empty( $table_prefix ) ? $table_prefix : $this->table ) . '.';
353
354
		/**
355
		 * Prepare the ranges.
356
		 */
357
358
		$filter_array = array( '1 = 1' );
359 View Code Duplication
		if ( null !== $range_from ) {
360
			// phpcs:ignore WordPress.DB.PreparedSQL.InterpolatedNotPrepared
361
			$filter_array[] = $wpdb->prepare( "{$parent_prefix}{$this->range_field} >= %d", array( intval( $range_from ) ) );
362
		}
363 View Code Duplication
		if ( null !== $range_to ) {
364
			// phpcs:ignore WordPress.DB.PreparedSQL.InterpolatedNotPrepared
365
			$filter_array[] = $wpdb->prepare( "{$parent_prefix}{$this->range_field} <= %d", array( intval( $range_to ) ) );
366
		}
367
368
		/**
369
		 * End prepare the ranges.
370
		 */
371
372
		/**
373
		 * Prepare data filters.
374
		 */
375
376
		// Default filters.
377
		if ( $this->filter_values ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->filter_values of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
378
			$prepared_values_statements = $this->prepare_filter_values_as_sql( $this->filter_values, $table_prefix );
379
			if ( $prepared_values_statements ) {
380
				$filter_array = array_merge( $filter_array, $prepared_values_statements );
381
			}
382
		}
383
384
		// Additional filters.
385
		if ( ! empty( $filter_values ) ) {
386
			// Prepare filtering.
387
			$prepared_values_statements = $this->prepare_filter_values_as_sql( $filter_values, $table_prefix );
388
			if ( $prepared_values_statements ) {
389
				$filter_array = array_merge( $filter_array, $prepared_values_statements );
390
			}
391
		}
392
393
		// Add any additional filters via direct SQL statement.
394
		// Currently used only because we haven't converted all filtering to happen via `filter_values`.
395
		// This SQL is NOT prefixed and column clashes can occur when used in sub-queries.
396
		if ( $this->additional_filter_sql ) {
397
			$filter_array[] = $this->additional_filter_sql;
398
		}
399
400
		/**
401
		 * End prepare data filters.
402
		 */
403
		return implode( ' AND ', $filter_array );
404
	}
405
406
	/**
407
	 * Returns the checksum query. All validation of fields and configurations are expected to occur prior to usage.
408
	 *
409
	 * @param int|null   $range_from      The start of the range.
410
	 * @param int|null   $range_to        The end of the range.
411
	 * @param array|null $filter_values   Additional filter values. Not used at the moment.
412
	 * @param bool       $granular_result If the function should return a granular result.
413
	 *
414
	 * @return string
415
	 *
416
	 * @throws Exception Throws and exception if validation fails in the internal function calls.
417
	 */
418
	private function build_checksum_query( $range_from = null, $range_to = null, $filter_values = null, $granular_result = false ) {
419
		global $wpdb;
420
421
		// Escape the salt.
422
		$salt = $wpdb->prepare( '%s', $this->salt ); // TODO escape or prepare statement.
423
424
		// Prepare the compound key.
425
		$key_fields = array();
426
427
		// Prefix the fields with the table name, to avoid clashes in queries with sub-queries (e.g. meta tables).
428
		foreach ( $this->key_fields as $field ) {
429
			$key_fields[] = $this->table . '.' . $field;
430
		}
431
432
		$key_fields = implode( ',', $key_fields );
433
434
		// Prepare the checksum fields.
435
		$checksum_fields_string = implode( ',', array_merge( $this->checksum_fields, array( $salt ) ) );
436
437
		$additional_fields = '';
438
		if ( $granular_result ) {
439
			// TODO uniq the fields as sometimes(most) range_index is the key and there's no need to select the same field twice.
440
			$additional_fields = "
441
				{$this->table}.{$this->range_field} as range_index,
442
			    {$key_fields},
443
			";
444
		}
445
446
		$filter_stamenet = $this->build_filter_statement( $range_from, $range_to, $filter_values );
447
448
		$join_statement = '';
449
		if ( $this->parent_table ) {
450
			$parent_table_obj    = new Table_Checksum( $this->parent_table );
451
			$parent_filter_query = $parent_table_obj->build_filter_statement( $range_from, $range_to, null, 'parent_table' );
452
453
			$join_statement = "
454
				INNER JOIN {$parent_table_obj->table} as parent_table ON ({$this->table}.{$this->range_field} = parent_table.{$parent_table_obj->range_field} AND {$parent_filter_query})
455
			";
456
		}
457
458
		$query = "
459
			SELECT
460
				{$additional_fields}
461
				SUM(
462
					CRC32(
463
						CONCAT_WS( '#', {$salt}, {$checksum_fields_string} )
464
					)
465
				)  AS checksum
466
			 FROM
467
			    {$this->table}
468
				{$join_statement}
469
			 WHERE
470
				{$filter_stamenet}
471
		";
472
473
		/**
474
		 * We need the GROUP BY only for compound keys.
475
		 */
476
		if ( $granular_result ) {
477
			$query .= "
478
				GROUP BY {$key_fields}
479
				LIMIT 9999999
480
			";
481
		}
482
483
		return $query;
484
	}
485
486
	/**
487
	 * Obtain the min-max values (edges) of the range.
488
	 *
489
	 * @param int|null $range_from The start of the range.
490
	 * @param int|null $range_to   The end of the range.
491
	 * @param int|null $limit      How many values to return.
492
	 *
493
	 * @return array|object|void
494
	 * @throws Exception Throws an exception if validation fails on the internal function calls.
495
	 */
496
	public function get_range_edges( $range_from = null, $range_to = null, $limit = null ) {
497
		global $wpdb;
498
499
		$this->validate_fields( array( $this->range_field ) );
500
501
		// `trim()` to make sure we don't add the statement if it's empty.
502
		$filters = trim( $this->build_filter_statement( $range_from, $range_to ) );
503
504
		$filter_statement = '';
505
		if ( ! empty( $filters ) ) {
506
			$filter_statement = "
507
				WHERE
508
					{$filters}
509
			";
510
		}
511
512
		// Only make the distinct count when we know there can be multiple entries for the range column.
513
		$distinct_count = count( $this->key_fields ) > 1 ? 'DISTINCT' : '';
514
515
		$query = "
516
			SELECT
517
			       MIN({$this->range_field}) as min_range,
518
			       MAX({$this->range_field}) as max_range,
519
			       COUNT( {$distinct_count} {$this->range_field}) as item_count
520
			FROM
521
		";
522
523
		/**
524
		 * If `$limit` is not specified, we can directly use the table.
525
		 */
526
		if ( ! $limit ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $limit of type integer|null is loosely compared to false; this is ambiguous if the integer can be zero. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
527
			$query .= "
528
				{$this->table}
529
	            {$filter_statement}
530
			";
531
		} else {
532
			/**
533
			 * If there is `$limit` specified, we can't directly use `MIN/MAX()` as they don't work with `LIMIT`.
534
			 * That's why we will alter the query for this case.
535
			 */
536
			$limit = intval( $limit );
537
538
			$query .= "
539
				(
540
					SELECT
541
						{$distinct_count} {$this->range_field}
542
					FROM
543
						{$this->table}
544
						{$filter_statement}
545
					ORDER BY
546
						{$this->range_field} ASC
547
					LIMIT {$limit}
548
				) as ids_query
549
			";
550
		}
551
552
		// phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared
553
		$result = $wpdb->get_row( $query, ARRAY_A );
554
555
		if ( ! $result || ! is_array( $result ) ) {
556
			throw new Exception( 'Unable to get range edges' );
557
		}
558
559
		return $result;
560
	}
561
562
	/**
563
	 * Update the results to have key/checksum format.
564
	 *
565
	 * @param array $results Prepare the results for output of granular results.
566
	 */
567
	protected function prepare_results_for_output( &$results ) {
568
		// get the compound key.
569
		// only return range and compound key for granular results.
570
571
		$return_value = array();
572
573
		foreach ( $results as &$result ) {
574
			// Working on reference to save memory here.
575
576
			$key = array();
577
			foreach ( $this->key_fields as $field ) {
578
				$key[] = $result[ $field ];
579
			}
580
581
			$return_value[ implode( '-', $key ) ] = $result['checksum'];
582
		}
583
584
		return $return_value;
585
	}
586
587
	/**
588
	 * Calculate the checksum based on provided range and filters.
589
	 *
590
	 * @param int|null   $range_from          The start of the range.
591
	 * @param int|null   $range_to            The end of the range.
592
	 * @param array|null $filter_values       Additional filter values. Not used at the moment.
593
	 * @param bool       $granular_result     If the returned result should be granular or only the checksum.
594
	 * @param bool       $simple_return_value If we want to use a simple return value for non-granular results (return only the checksum, without wrappers).
595
	 *
596
	 * @return array|mixed|object|WP_Error|null
597
	 */
598
	public function calculate_checksum( $range_from = null, $range_to = null, $filter_values = null, $granular_result = false, $simple_return_value = true ) {
599
600
		if ( ! Sync\Settings::is_checksum_enabled() ) {
601
			return new WP_Error( 'checksum_disabled', 'Checksums are currently disabled.' );
602
		}
603
604
		try {
605
			$this->validate_input();
606
		} catch ( Exception $ex ) {
607
			return new WP_Error( 'invalid_input', $ex->getMessage() );
608
		}
609
610
		$query = $this->build_checksum_query( $range_from, $range_to, $filter_values, $granular_result );
611
612
		global $wpdb;
613
614
		if ( ! $granular_result ) {
615
			// phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared
616
			$result = $wpdb->get_row( $query, ARRAY_A );
617
618
			if ( ! is_array( $result ) ) {
619
				return new WP_Error( 'invalid_query', "Result wasn't an array" );
620
			}
621
622
			if ( $simple_return_value ) {
623
				return $result['checksum'];
624
			}
625
626
			return array(
627
				'range'    => $range_from . '-' . $range_to,
628
				'checksum' => $result['checksum'],
629
			);
630
		} else {
631
			// phpcs:ignore WordPress.DB.PreparedSQL.NotPrepared
632
			$result = $wpdb->get_results( $query, ARRAY_A );
633
			return $this->prepare_results_for_output( $result );
634
		}
635
	}
636
}
637