Completed
Push — add/sync-partial-sync-checksum... ( 93f4e2...c9ed3f )
by
unknown
253:26 queued 243:12
created

Table_Checksum::build_checksum_query()   B

Complexity

Conditions 4
Paths 8

Size

Total Lines 60

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
nc 8
nop 4
dl 0
loc 60
rs 8.8727
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * Table Checksums Class.
4
 *
5
 * @package automattic/jetpack-sync
6
 */
7
8
namespace Automattic\Jetpack\Sync\Replicastore;
9
10
use Automattic\Jetpack\Sync\Settings;
11
use Exception;
12
use WP_Error;
13
14
// TODO add rest endpoints to work with this, hopefully in the same folder.
15
/**
16
 * Class to handle Table Checksums.
17
 */
18
class Table_Checksum {
19
20
	/**
21
	 * Table to be checksummed.
22
	 *
23
	 * @var string
24
	 */
25
	public $table = '';
26
27
	/**
28
	 * Table Checksum Configuration.
29
	 *
30
	 * @var array
31
	 */
32
	public $table_configuration = array();
33
34
	/**
35
	 * Field to be used for range queries.
36
	 *
37
	 * @var string
38
	 */
39
	public $range_field = '';
40
41
	/**
42
	 * ID Field(s) to be used.
43
	 *
44
	 * @var array
45
	 */
46
	public $key_fields = array();
47
48
	/**
49
	 * Field(s) to be used in generating the checksum value.
50
	 *
51
	 * @var array
52
	 */
53
	public $checksum_fields = array();
54
55
	/**
56
	 * SQL Query to be used to filter results (allow/disallow).
57
	 *
58
	 * @var string
59
	 */
60
	public $additional_filter_sql = '';
61
62
	/**
63
	 * Default Checksum Table Configurations.
64
	 *
65
	 * @var array
66
	 */
67
	public $default_tables = array();
68
69
	/**
70
	 * Salt to be used when generating checksum.
71
	 *
72
	 * @var string
73
	 */
74
	public $salt = '';
75
76
	/**
77
	 * Tables which are allowed to be checksummed.
78
	 *
79
	 * @var string
80
	 */
81
	public $allowed_tables = array();
82
83
	/**
84
	 * If the table has a "parent" table that it's related to.
85
	 *
86
	 * @var mixed|null
87
	 */
88
	private $parent_table = null;
89
90
	/**
91
	 * Table_Checksum constructor.
92
	 *
93
	 * @param string $table The table to calculate checksums for.
94
	 * @param string $salt  Optional salt to add to the checksum.
0 ignored issues
show
Documentation introduced by
Should the type for parameter $salt not be string|null?

This check looks for @param annotations where the type inferred by our type inference engine differs from the declared type.

It makes a suggestion as to what type it considers more descriptive.

Most often this is a case of a parameter that can be null in addition to its declared types.

Loading history...
95
	 *
96
	 * @throws Exception Throws exception from inner functions.
97
	 */
98
	public function __construct( $table, $salt = null ) {
99
		$this->salt = $salt;
100
101
		$this->default_tables = $this->get_default_tables();
102
103
		// TODO change filters to allow the array format.
104
		// TODO add get_fields or similar method to get things out of the table.
105
		// TODO extract this configuration in a better way, still make it work with `$wpdb` names.
106
		// TODO take over the replicastore functions and move them over to this class.
107
		// TODO make the API work.
108
109
		$this->allowed_tables = apply_filters( 'jetpack_sync_checksum_allowed_tables', $this->default_tables );
110
111
		$this->table               = $this->validate_table_name( $table );
112
		$this->table_configuration = $this->allowed_tables[ $table ];
113
114
		$this->prepare_fields( $this->table_configuration );
115
116
	}
117
118
	/**
119
	 * Get Default Table configurations.
120
	 *
121
	 * @return array
122
	 */
123
	private function get_default_tables() {
124
		global $wpdb;
125
126
		 return array(
127
			 'posts'              => array(
128
				 'table'           => $wpdb->posts,
129
				 'range_field'     => 'ID',
130
				 'key_fields'      => array( 'ID' ),
131
				 'checksum_fields' => array( 'post_modified_gmt' ),
132
				 'filter_sql'      => Settings::get_blacklisted_post_types_sql(),
133
			 ),
134
			 'postmeta'           => array(
135
				 'table'           => $wpdb->postmeta,
136
				 'range_field'     => 'post_id',
137
				 'key_fields'      => array( 'post_id', 'meta_key' ),
138
				 'checksum_fields' => array( 'meta_key', 'meta_value' ),
139
				 'filter_sql'      => Settings::get_whitelisted_post_meta_sql(),
140
				 'parent_table'    => 'posts',
141
			 ),
142
			 'comments'           => array(
143
				 'table'           => $wpdb->comments,
144
				 'range_field'     => 'comment_ID',
145
				 'key_fields'      => array( 'comment_ID' ),
146
				 'checksum_fields' => array( 'comment_content' ),
147
				 'filter_sql'      => Settings::get_comments_filter_sql(),
148
			 ),
149
			 'commentmeta'        => array(
150
				 'table'           => $wpdb->commentmeta,
151
				 'range_field'     => 'comment_id',
152
				 'key_fields'      => array( 'comment_id', 'meta_key' ),
153
				 'checksum_fields' => array( 'meta_key', 'meta_value' ),
154
				 'filter_sql'      => Settings::get_whitelisted_comment_meta_sql(),
155
				 'parent_table'    => 'comments',
156
			 ),
157
			 'terms'              => array(
158
				 'table'           => $wpdb->terms,
159
				 'range_field'     => 'term_id',
160
				 'key_fields'      => array( 'term_id' ),
161
				 'checksum_fields' => array( 'term_id', 'name', 'slug' ),
162
			 ),
163
			 'termmeta'           => array(
164
				 'table'           => $wpdb->termmeta,
165
				 'range_field'     => 'term_id',
166
				 'key_fields'      => array( 'term_id', 'meta_key' ),
167
				 'checksum_fields' => array( 'meta_key', 'meta_value' ),
168
				 'parent_table'    => 'terms',
169
			 ),
170
			 'term_relationships' => array(
171
				 'table'           => $wpdb->term_relationships,
172
				 'range_field'     => 'object_id',
173
				 'key_fields'      => array( 'object_id' ),
174
				 'checksum_fields' => array( 'object_id', 'term_taxonomy_id' ),
175
			 ),
176
			 'term_taxonomy'      => array(
177
				 'table'           => $wpdb->term_taxonomy,
178
				 'range_field'     => 'term_taxonomy_id',
179
				 'key_fields'      => array( 'term_taxonomy_id' ),
180
				 'checksum_fields' => array( 'term_taxonomy_id', 'term_id', 'taxonomy', 'description', 'parent' ),
181
			 ),
182
			 'links'              => $wpdb->links, // TODO describe in the array format or add exceptions.
183
			 'options'            => $wpdb->options, // TODO describe in the array format or add exceptions.
184
		 );
185
	}
186
187
	/**
188
	 * Prepare field params based off provided configuration.
189
	 *
190
	 * @param array $table_configuration The table configuration array.
191
	 */
192
	private function prepare_fields( $table_configuration ) {
193
		$this->key_fields            = $table_configuration['key_fields'];
194
		$this->range_field           = $table_configuration['range_field'];
195
		$this->checksum_fields       = $table_configuration['checksum_fields'];
196
		$this->additional_filter_sql = ! empty( $table_configuration['filter_sql'] ) ? $table_configuration['filter_sql'] : '';
197
		$this->parent_table          = isset( $table_configuration['parent_table'] ) ? $table_configuration['parent_table'] : null;
198
	}
199
200
	/**
201
	 * Verify provided table name is valid for checksum processing.
202
	 *
203
	 * @param string $table Table name to validate.
204
	 *
205
	 * @return mixed|string
206
	 * @throws Exception Throw an exception on validation failure.
207
	 */
208
	private function validate_table_name( $table ) {
209
		if ( empty( $table ) ) {
210
			throw new Exception( 'Invalid table name: empty' );
211
		}
212
213
		if ( ! array_key_exists( $table, $this->allowed_tables ) ) {
214
			throw new Exception( 'Invalid table name: not allowed' );
215
		}
216
217
		// TODO other checks if such are needed.
218
219
		return $this->allowed_tables[ $table ]['table'];
220
	}
221
222
	/**
223
	 * Verify provided fields are proper names.
224
	 *
225
	 * @param array $fields Array of field names to validate.
226
	 *
227
	 * @throws Exception Throw an exception on failure to validate.
228
	 */
229
	private function validate_fields( $fields ) {
230
		foreach ( $fields as $field ) {
231
			if ( ! preg_match( '/^[0-9,a-z,A-Z$_]+$/i', $field ) ) {
232
				throw new Exception( "Invalid field name: {$field} is not allowed" );
233
			}
234
235
			// TODO other verifications of the field names.
236
		}
237
	}
238
239
	/**
240
	 * Verify the fields exist in the table.
241
	 *
242
	 * @param array $fields Array of fields to validate.
243
	 *
244
	 * @return bool
245
	 * @throws Exception Throw an exception on failure to validate.
246
	 */
247
	private function validate_fields_against_table( $fields ) {
248
		global $wpdb;
249
250
		// TODO: Is this safe enough?
251
		$result = $wpdb->get_row( "SELECT * FROM {$this->table} LIMIT 1", ARRAY_A );
252
		if ( ! is_array( $result ) ) {
253
			throw new Exception( 'Unexpected $wpdb->query output: not array' );
254
		}
255
256
		// Check if the fields are actually contained in the table.
257
		foreach ( $fields as $field_to_check ) {
258
			if ( ! array_key_exists( $field_to_check, $result ) ) {
259
				throw new Exception( "Invalid field name: field '{$field_to_check}' doesn't exist in table {$this->table}" );
260
			}
261
		}
262
263
		return true;
264
	}
265
266
	/**
267
	 * Verify the configured fields.
268
	 *
269
	 * @throws Exception Throw an exception on failure to validate in the internal functions.
270
	 */
271
	private function validate_input() {
272
		$fields = array_merge( array( $this->range_field ), $this->key_fields, $this->checksum_fields );
273
274
		$this->validate_fields( $fields );
275
		$this->validate_fields_against_table( $fields );
276
	}
277
278
	/**
279
	 * Build the filter query baased off range fields and values and the additional sql.
280
	 *
281
	 * @param int|null   $range_from Start of the range.
282
	 * @param int|null   $range_to End of the range.
283
	 * @param array|null $filter_values Additional filter values. Not used at the moment.
284
	 *
285
	 * @return string
286
	 */
287
	public function build_filter_statement( $range_from = null, $range_to = null, $filter_values = null ) {
288
		global $wpdb;
289
290
		/**
291
		 * Prepare the ranges.
292
		 */
293
294
		$filter_array = array();
295 View Code Duplication
		if ( $range_from !== null ) {
296
			$filter_array[] = $wpdb->prepare( "{$this->range_field} >= %d", array( intval( $range_from ) ) );
297
		}
298 View Code Duplication
		if ( $range_to !== null ) {
299
			$filter_array[] = $wpdb->prepare( "{$this->range_field} <= %d", array( intval( $range_to ) ) );
300
		}
301
302
		/**
303
		 * End prepare the ranges.
304
		 */
305
306
		/**
307
		 * Prepare data filters.
308
		 */
309
		// TODO add support for multiple filter fields from array syntax (i.e. filter => values, filter => values, ...).
310
		// TODO this doesn't work right now, until we properly migrate all the filtering functions to array syntax.
311
		$filter_prepared_statement = '';
0 ignored issues
show
Unused Code introduced by
$filter_prepared_statement is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
312
		if ( 0 & ! empty( $filter_values ) ) {
313
			// Prepare filtering.
314
			$filter_placeholders = "AND {$this->filter_field} IN(" . implode( ',', array_fill( 0, count( $filter_values ), '%s' ) ) . ')';
0 ignored issues
show
Bug introduced by
The property filter_field does not exist. Did you maybe forget to declare it?

In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:

class MyClass { }

$x = new MyClass();
$x->foo = true;

Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion:

class MyClass {
    public $foo;
}

$x = new MyClass();
$x->foo = true;
Loading history...
315
			$filter_array[]      = $wpdb->prepare( $filter_placeholders, $filter_values );
316
		}
317
318
		// Add any additional filters via direct SQL statement.
319
		// Currently used only because the above isn't done ( `$filter_values` ).
320
		$additional_filter_sql = '';
0 ignored issues
show
Unused Code introduced by
$additional_filter_sql is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
321
		if ( $this->additional_filter_sql ) {
322
			$filter_array[] = $this->additional_filter_sql;
323
		}
324
325
		/**
326
		 * End prepare data filters.
327
		 */
328
		return implode( ' AND ', $filter_array );
329
	}
330
331
	/**
332
	 * Returns the checksum query. All validation of fields and configurations are expected to occur prior to usage.
333
	 *
334
	 * @param int|null   $range_from      The start of the range.
335
	 * @param int|null   $range_to        The end of the range.
336
	 * @param array|null $filter_values   Additional filter values. Not used at the moment.
337
	 * @param bool       $granular_result If the function should return a granular result.
338
	 *
339
	 * @return string
340
	 *
341
	 * @throws Exception Throws and exception if validation fails in the internal function calls.
342
	 */
343
	private function build_checksum_query( $range_from = null, $range_to = null, $filter_values = null, $granular_result = false ) {
344
		global $wpdb;
345
346
		// Escape the salt.
347
		$salt = $wpdb->prepare( '%s', $this->salt ); // TODO escape or prepare statement.
348
349
		// Prepare the compound key.
350
		$key_fields = implode( ',', $this->key_fields );
351
352
		// Prepare the checksum fields.
353
		$checksum_fields_string = implode( ',', array_merge( $this->checksum_fields, array( $salt ) ) );
354
355
		$additional_fields = '';
356
		if ( $granular_result ) {
357
			// TODO uniq the fields as sometimes(most) range_index is the key and there's no need to select the same field twice.
358
			$additional_fields = "
359
				{$this->range_field} as range_index,
360
			    {$key_fields},
361
			";
362
		}
363
364
		$filter_stamenet = $this->build_filter_statement( $range_from, $range_to, $filter_values );
365
366
		$join_statement = '';
367
		if ( $this->parent_table ) {
368
			$parent_table_obj    = new Table_Checksum( $this->parent_table );
369
			$parent_filter_query = $parent_table_obj->build_filter_statement( $range_from, $range_to );
370
371
			$join_statement = "
372
				INNER JOIN {$parent_table_obj->table} ON ({$this->table}.{$this->range_field} = {$parent_table_obj->table}.{$parent_table_obj->range_field} AND {$parent_filter_query})
373
			";
374
		}
375
376
		$query = "
377
			SELECT
378
				{$additional_fields}
379
				SUM(
380
					CRC32(
381
						CONCAT_WS( '#', {$salt}, {$checksum_fields_string} )
382
					)
383
				)  AS checksum
384
			 FROM
385
			    {$this->table}
386
				{$join_statement}
387
			 WHERE
388
				{$filter_stamenet}
389
		";
390
391
		/**
392
		 * We need the GROUP BY only for compound keys.
393
		 */
394
		if ( $granular_result ) {
395
			$query .= "
396
				GROUP BY {$key_fields}
397
			";
398
		}
399
400
		return $query;
401
402
	}
403
404
	/**
405
	 * Obtain the min-max values (edges) of the range.
406
	 *
407
	 * @param int|null $range_from The start of the range.
408
	 * @param int|null $range_to   The end of the range.
409
	 * @param int|null $limit      How many values to return.
410
	 *
411
	 * @return array|object|void
412
	 * @throws Exception Throws an exception if validation fails on the internal function calls.
413
	 */
414
	public function get_range_edges( $range_from = null, $range_to = null, $limit = null ) {
415
		global $wpdb;
416
417
		$this->validate_fields( array( $this->range_field ) );
418
419
		// `trim()` to make sure we don't add the statement if it's empty.
420
		$filters = trim( $this->build_filter_statement( $range_from, $range_to ) );
421
422
		$filter_statement = '';
423
		if ( ! empty( $filters ) ) {
424
			$filter_statement = "
425
				WHERE
426
					{$filters}
427
			";
428
		}
429
430
		// Only make the distinct count when we know there can be multiple entries for the range column.
431
		$distinct_count = count( $this->key_fields ) > 1 ? 'DISTINCT' : '';
432
433
		$query = "
434
			SELECT
435
			       MIN({$this->range_field}) as min_range,
436
			       MAX({$this->range_field}) as max_range,
437
			       COUNT( {$distinct_count} {$this->range_field}) as item_count
438
			FROM
439
		";
440
441
		/**
442
		 * If `$limit` is not specified, we can directly use the table.
443
		 */
444
		if ( ! $limit ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $limit of type integer|null is loosely compared to false; this is ambiguous if the integer can be zero. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
445
			$query .= "
446
				{$this->table}
447
	            {$filter_statement}
448
			";
449
		} else {
450
			/**
451
			 * If there is `$limit` specified, we can't directly use `MIN/MAX()` as they don't work with `LIMIT`.
452
			 * That's why we will alter the query for this case.
453
			 */
454
			$limit = intval( $limit );
455
456
			$query .= "
457
				(
458
					SELECT
459
						{$distinct_count} {$this->range_field}
460
					FROM
461
						{$this->table}
462
						{$filter_statement}
463
					ORDER BY
464
						{$this->range_field} ASC
465
					LIMIT {$limit}
466
				) as ids_query
467
			";
468
		}
469
470
		$result = $wpdb->get_row( $query, ARRAY_A );
471
472
		if ( ! $result || ! is_array( $result ) ) {
473
			throw new Exception( 'Unable to get range edges' );
474
		}
475
476
		return $result;
477
	}
478
479
	/**
480
	 * Update the results to have key/checksum format.
481
	 *
482
	 * @param array $results Prepare the results for output of granular results.
483
	 */
484
	public function prepare_results_for_output( &$results ) {
485
		// get the compound key.
486
		// only return range and compound key for granular results.
487
488
		foreach ( $results as &$result ) {
489
			// Working on reference to save memory here.
490
491
			$key = array();
492
			foreach ( $this->key_fields as $field ) {
493
				$key[] = $result[ $field ];
494
			}
495
496
			$result = array(
497
				'key'      => implode( '-', $key ),
498
				'checksum' => $result['checksum'],
499
			);
500
501
		}
502
	}
503
504
	/**
505
	 * Calculate the checksum based on provided range and filters.
506
	 *
507
	 * @param int|null   $range_from          The start of the range.
508
	 * @param int|null   $range_to            The end of the range.
509
	 * @param array|null $filter_values       Additional filter values. Not used at the moment.
510
	 * @param bool       $granular_result     If the returned result should be granular or only the checksum.
511
	 * @param bool       $simple_return_value If we want to use a simple return value for non-granular results (return only the checksum, without wrappers).
512
	 *
513
	 * @return array|mixed|object|WP_Error|null
514
	 */
515
	public function calculate_checksum( $range_from = null, $range_to = null, $filter_values = null, $granular_result = false, $simple_return_value = true ) {
516
		try {
517
			$this->validate_input();
518
		} catch ( Exception $ex ) {
519
			return new WP_Error( 'invalid_input', $ex->getMessage() );
0 ignored issues
show
Unused Code introduced by
The call to WP_Error::__construct() has too many arguments starting with 'invalid_input'.

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.

In this case you can add the @ignore PhpDoc annotation to the duplicate definition and it will be ignored.

Loading history...
520
		}
521
522
		$query = $this->build_checksum_query( $range_from, $range_to, $filter_values, $granular_result );
523
524
		global $wpdb;
525
526
		if ( ! $granular_result ) {
527
			$result = $wpdb->get_row( $query, ARRAY_A );
528
529
			if ( ! is_array( $result ) ) {
530
				return new WP_Error( 'invalid_query', "Result wasn't an array" );
0 ignored issues
show
Unused Code introduced by
The call to WP_Error::__construct() has too many arguments starting with 'invalid_query'.

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.

In this case you can add the @ignore PhpDoc annotation to the duplicate definition and it will be ignored.

Loading history...
531
			}
532
533
			if ( $simple_return_value ) {
534
				return $result['checksum'];
535
			}
536
537
			return array(
538
				'range'    => $range_from . '-' . $range_to,
539
				'checksum' => $result['checksum'],
540
			);
541
		} else {
542
			$result = $wpdb->get_results( $query, ARRAY_A );
543
			$this->prepare_results_for_output( $result );
544
545
			return $result;
546
		}
547
	}
548
}
549