Completed
Push — add/sync-partial-sync-checksum... ( 4393a8...c3d856 )
by
unknown
87:38 queued 79:06
created

Table_Checksum::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
nc 1
nop 2
dl 0
loc 19
rs 9.6333
c 0
b 0
f 0
1
<?php
2
/**
3
 * Table Checksums Class.
4
 *
5
 * @package automattic/jetpack-sync
6
 */
7
8
namespace Automattic\Jetpack\Sync\Replicastore;
9
10
use Automattic\Jetpack\Sync\Settings;
11
use Exception;
12
use WP_error;
13
14
// TODO add rest endpoints to work with this, hopefully in the same folder
15
/**
16
 * Class to handle Table Checksums.
17
 */
18
class Table_Checksum {
19
20
	/**
21
	 * Table to be checksummed.
22
	 *
23
	 * @var string
24
	 */
25
	public $table = '';
26
27
	/**
28
	 * Table Checksum Configuration.
29
	 *
30
	 * @var array
31
	 */
32
	public $table_configuration = array();
33
34
	/**
35
	 * Field to be used for range queries.
36
	 *
37
	 * @var string
38
	 */
39
	public $range_field = '';
40
41
	/**
42
	 * ID Field(s) to be used.
43
	 *
44
	 * @var array
45
	 */
46
	public $key_fields = array();
47
48
	/**
49
	 * Field(s) to be used in generating the checksum value.
50
	 *
51
	 * @var array
52
	 */
53
	public $checksum_fields = array();
54
55
	/**
56
	 * SQL Query to be used to filter results (allow/disallow).
57
	 *
58
	 * @var string
59
	 */
60
	public $additional_filter_sql = '';
61
62
	/**
63
	 * Default Checksum Table Configurations.
64
	 *
65
	 * @var array
66
	 */
67
	public $default_tables = array();
68
69
	/**
70
	 * Salt to be used when generating checksum.
71
	 *
72
	 * @var string
73
	 */
74
	public $salt = '';
75
76
	/**
77
	 * Tables which are allowed to be checksummed.
78
	 *
79
	 * @var string
80
	 */
81
	public $allowed_tables = array();
82
83
	/**
84
	 * Table_Checksum constructor.
85
	 *
86
	 * @param string $table
87
	 * @param string $salt
0 ignored issues
show
Documentation introduced by
Should the type for parameter $salt not be string|null?

This check looks for @param annotations where the type inferred by our type inference engine differs from the declared type.

It makes a suggestion as to what type it considers more descriptive.

Most often this is a case of a parameter that can be null in addition to its declared types.

Loading history...
88
	 *
89
	 * @throws Exception
90
	 */
91
	public function __construct( $table, $salt = null ) {
92
		$this->salt = $salt;
93
94
		$this->default_tables = $this->get_default_tables();
95
96
		// TODO change filters to allow the array format.
97
		// TODO add get_fields or similar method to get things out of the table.
98
		// TODO extract this configuration in a better way, still make it work with `$wpdb` names.
99
		// TODO take over the replicastore functions and move them over to this class.
100
		// TODO make the API work.
101
102
		$this->allowed_tables = apply_filters( 'jetpack_sync_checksum_allowed_tables', $this->default_tables );
103
104
		$this->table               = $this->validate_table_name( $table );
105
		$this->table_configuration = $this->allowed_tables[ $table ];
106
107
		$this->prepare_fields( $this->table_configuration );
108
109
	}
110
111
	/**
112
	 * Get Default Table configurations.
113
	 *
114
	 * @return array
115
	 */
116
	private function get_default_tables() {
117
		global $wpdb;
118
119
		 return array(
120
			 'posts'              => array(
121
				 'table'           => $wpdb->posts,
122
				 'range_field'     => 'ID',
123
				 'key_fields'      => array( 'ID' ),
124
				 'checksum_fields' => array( 'post_modified_gmt' ),
125
				 'filter_sql'      => Settings::get_blacklisted_post_types_sql(),
126
			 ),
127
			 'postmeta'           => array(
128
				 'table'           => $wpdb->postmeta,
129
				 'range_field'     => 'post_id',
130
				 'key_fields'      => array( 'post_id', 'meta_key' ),
131
				 'checksum_fields' => array( 'meta_key', 'meta_value' ),
132
				 'filter_sql'      => Settings::get_whitelisted_post_meta_sql(),
133
			 ),
134
			 'comments'           => array(
135
				 'table'           => $wpdb->comments,
136
				 'range_field'     => 'comment_ID',
137
				 'key_fields'      => array( 'comment_ID' ),
138
				 'checksum_fields' => array( 'comment_content' ),
139
				 'filter_sql'      => Settings::get_comments_filter_sql(),
140
			 ),
141
			 'commentmeta'        => array(
142
				 'table'           => $wpdb->commentmeta,
143
				 'range_field'     => 'comment_id',
144
				 'key_fields'      => array( 'comment_id', 'meta_key' ),
145
				 'checksum_fields' => array( 'meta_key', 'meta_value' ),
146
				 'filter_sql'      => Settings::get_whitelisted_comment_meta_sql(),
147
			 ),
148
			 'terms'              => array(
149
				 'table'           => $wpdb->terms,
150
				 'range_field'     => 'term_id',
151
				 'key_fields'      => array( 'term_id' ),
152
				 'checksum_fields' => array( 'term_id', 'name', 'slug' ),
153
			 ),
154
			 'termmeta'           => array(
155
				 'table'           => $wpdb->termmeta,
156
				 'range_field'     => 'term_id',
157
				 'key_fields'      => array( 'term_id', 'meta_key' ),
158
				 'checksum_fields' => array( 'meta_key', 'meta_value' ),
159
			 ),
160
			 'term_relationships' => array(
161
				 'table'           => $wpdb->term_relationships,
162
				 'range_field'     => 'object_id',
163
				 'key_fields'      => array( 'object_id' ),
164
				 'checksum_fields' => array( 'object_id', 'term_taxonomy_id' ),
165
			 ),
166
			 'term_taxonomy'      => array(
167
				 'table'           => $wpdb->term_taxonomy,
168
				 'range_field'     => 'term_taxonomy_id',
169
				 'key_fields'      => array( 'term_taxonomy_id' ),
170
				 'checksum_fields' => array( 'term_taxonomy_id', 'term_id', 'taxonomy', 'description', 'parent' ),
171
			 ),
172
			 'links'              => $wpdb->links, // TODO describe in the array format or add exceptions.
173
			 'options'            => $wpdb->options, // TODO describe in the array format or add exceptions.
174
		 );
175
	}
176
177
	/**
178
	 * Prepare field params based off provided configuration.
179
	 *
180
	 * @param $table_configuration
181
	 */
182
	private function prepare_fields( $table_configuration ) {
183
		$this->key_fields            = $table_configuration['key_fields'];
184
		$this->range_field           = $table_configuration['range_field'];
185
		$this->checksum_fields       = $table_configuration['checksum_fields'];
186
		$this->additional_filter_sql = ! empty( $table_configuration['filter_sql'] ) ? $table_configuration['filter_sql'] : '';
187
	}
188
189
	/**
190
	 * Verify provided table name is valid for checksum processing.
191
	 *
192
	 * @param $table
193
	 *
194
	 * @return mixed|string
195
	 * @throws Exception
196
	 */
197
	private function validate_table_name( $table ) {
198
		if ( empty( $table ) ) {
199
			throw new Exception( 'Invalid table name: empty' );
200
		}
201
202
		if ( ! array_key_exists( $table, $this->allowed_tables ) ) {
203
			throw new Exception( 'Invalid table name: not allowed' );
204
		}
205
206
		// TODO other checks if such are needed.
207
208
		return $this->allowed_tables[ $table ]['table'];
209
	}
210
211
	/**
212
	 * Verify provided fields are proper names.
213
	 *
214
	 * @param $fields
215
	 *
216
	 * @throws Exception
217
	 */
218
	private function validate_fields( $fields ) {
219
		foreach ( $fields as $field ) {
220
			if ( ! preg_match( '/^[0-9,a-z,A-Z$_]+$/i', $field ) ) {
221
				throw new Exception( "Invalid field name: {$field} is not allowed" );
222
			}
223
224
			// TODO other verifications of the field names.
225
		}
226
	}
227
228
	/**
229
	 * Verify the fields exist in the table.
230
	 *
231
	 * @param $fields
232
	 *
233
	 * @return bool
234
	 * @throws Exception
235
	 */
236
	private function validate_fields_against_table( $fields ) {
237
		global $wpdb;
238
239
		// TODO: Is this safe enough?
240
		$result = $wpdb->get_row( "SELECT * FROM {$this->table} LIMIT 1", ARRAY_A );
241
		if ( ! is_array( $result ) ) {
242
			throw new Exception( 'Unexpected $wpdb->query output: not array' );
243
		}
244
245
		// Check if the fields are actually contained in the table.
246
		foreach ( $fields as $field_to_check ) {
247
			if ( ! array_key_exists( $field_to_check, $result ) ) {
248
				throw new Exception( "Invalid field name: field '{$field_to_check}' doesn't exist in table {$this->table}" );
249
			}
250
		}
251
252
		return true;
253
	}
254
255
	/**
256
	 * Verify the configured fields.
257
	 *
258
	 * @throws Exception
259
	 */
260
	private function validate_input() {
261
		$fields = array_merge( array( $this->range_field ), $this->key_fields, $this->checksum_fields );
262
263
		$this->validate_fields( $fields );
264
		$this->validate_fields_against_table( $fields );
265
	}
266
267
	/**
268
	 * Build the filter query baased off range fields and values and the additional sql.
269
	 *
270
	 * @param null $range_from
271
	 * @param null $range_to
272
	 * @param null $filter_values
273
	 *
274
	 * @return string
275
	 */
276
	private function build_filter_statement( $range_from = null, $range_to = null, $filter_values = null ) {
277
		global $wpdb;
278
279
		/**
280
		 * Prepare the ranges.
281
		 */
282
283
		$filter_array = array();
284 View Code Duplication
		if ( $range_from !== null ) {
285
			$filter_array[] = $wpdb->prepare( "{$this->range_field} > %d", array( intval( $range_from ) ) );
286
		}
287 View Code Duplication
		if ( $range_to !== null ) {
288
			$filter_array[] = $wpdb->prepare( "{$this->range_field} < %d", array( intval( $range_to ) ) );
289
		}
290
291
		/**
292
		 * End prepare the ranges.
293
		 */
294
295
		/**
296
		 * Prepare data filters.
297
		 */
298
		// TODO add support for multiple filter fields from array syntax (i.e. filter => values, filter => values, ...).
299
		// TODO this doesn't work right now, until we properly migrate all the filtering functions to array syntax.
300
		$filter_prepared_statement = '';
0 ignored issues
show
Unused Code introduced by
$filter_prepared_statement is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
301
		if ( 0 & ! empty( $filter_values ) ) {
302
			// Prepare filtering.
303
			$filter_placeholders = "AND {$this->filter_field} IN(" . implode( ',', array_fill( 0, count( $filter_values ), '%s' ) ) . ')';
0 ignored issues
show
Bug introduced by
The property filter_field does not exist. Did you maybe forget to declare it?

In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:

class MyClass { }

$x = new MyClass();
$x->foo = true;

Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion:

class MyClass {
    public $foo;
}

$x = new MyClass();
$x->foo = true;
Loading history...
304
			$filter_array[]      = $wpdb->prepare( $filter_placeholders, $filter_values );
305
		}
306
307
		// Add any additional filters via direct SQL statement.
308
		// Currently used only because the above isn't done ( `$filter_values` ).
309
		$additional_filter_sql = '';
0 ignored issues
show
Unused Code introduced by
$additional_filter_sql is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
310
		if ( $this->additional_filter_sql ) {
311
			$filter_array[] = $this->additional_filter_sql;
312
		}
313
314
		/**
315
		 * End prepare data filters.
316
		 */
317
		return implode( ' AND ', $filter_array );
318
	}
319
320
	/**
321
	 * Returns the checksum query. All validation of fields and configurations are expected to occur prior to usage.
322
	 *
323
	 * @param null $range_from
324
	 * @param null $range_to
325
	 * @param null $filter_values
326
	 * @param bool $granular_result
327
	 *
328
	 * @return string
329
	 */
330
	private function build_checksum_query( $range_from = null, $range_to = null, $filter_values = null, $granular_result = false ) {
331
		global $wpdb;
332
333
		// Escape the salt.
334
		$salt = $wpdb->prepare( '%s', $this->salt ); // TODO escape or prepare statement
335
336
		// Prepare the compound key.
337
		$key_fields = implode( ',', $this->key_fields );
338
339
		// Prepare the checksum fields.
340
		$checksum_fields_string = implode( ',', array_merge( $this->checksum_fields, array( $salt ) ) );
341
342
		$additional_fields = '';
343
		if ( $granular_result ) {
344
			// TODO uniq the fields as sometimes(most) range_index is the key and there's no need to select the same field twice.
345
			$additional_fields = "
346
				{$this->range_field} as range_index,
347
			    {$key_fields},
348
			";
349
		}
350
351
		$filter_stamenet = $this->build_filter_statement( $range_from, $range_to, $filter_values );
352
353
		$query = "
354
			SELECT
355
				{$additional_fields}
356
				SUM(
357
					CRC32(
358
						CONCAT_WS( '#', {$salt}, {$checksum_fields_string} )
359
					)
360
				)  AS checksum
361
			 FROM
362
			    {$this->table}
363
			 WHERE
364
				{$filter_stamenet}
365
		";
366
367
		/**
368
		 * We need the GROUP BY only for compound keys.
369
		 */
370
		if ( $granular_result ) {
371
			$query .= "
372
				GROUP BY {$key_fields}
373
			";
374
		}
375
376
		return $query;
377
378
	}
379
380
	/**
381
	 * Obtain the min-max values (edges) of the range.
382
	 *
383
	 * @param null $range_from
384
	 * @param null $range_to
385
	 * @param null $limit
386
	 *
387
	 * @return array|object|void
388
	 * @throws Exception
389
	 */
390
	public function get_range_edges( $range_from = null, $range_to = null, $limit = null ) {
391
		global $wpdb;
392
393
		$this->validate_fields( array( $this->range_field ) );
394
395
		// `trim()` to make sure we don't add the statement if it's empty.
396
		$filters = trim( $this->build_filter_statement( $range_from, $range_to ) );
397
398
		$filter_statement = '';
399
		if ( ! empty( $filters ) ) {
400
			$filter_statement = "
401
				WHERE
402
					{$filters}
403
			";
404
		}
405
406
		// Only make the distinct count when we know there can be multiple entries for the range column.
407
		$distinct_count = count( $this->key_fields ) > 1 ? 'DISTINCT' : '';
408
409
		$query = "
410
			SELECT
411
			       MIN({$this->range_field}) as min_range,
412
			       MAX({$this->range_field}) as max_range,
413
			       COUNT( {$distinct_count} {$this->range_field}) as item_count
414
			FROM
415
		";
416
417
		/**
418
		 * If `$limit` is not specified, we can directly use the table.
419
		 */
420
		if ( ! $limit ) {
421
			$query .= "
422
				{$this->table}
423
	            {$filter_statement}
424
			";
425
		} else {
426
			/**
427
			 * If there is `$limit` specified, we can't directly use `MIN/MAX()` as they don't work with `LIMIT`.
428
			 * That's why we will alter the query for this case.
429
			 */
430
			$limit = intval( $limit );
431
432
			$query .= "
433
				(
434
					SELECT
435
						{$distinct_count} {$this->range_field}
436
					FROM
437
						{$this->table}
438
						{$filter_statement}
439
					ORDER BY
440
						{$this->range_field} ASC
441
					LIMIT {$limit}
442
				) as ids_query
443
			";
444
		}
445
446
		$result = $wpdb->get_row( $query, ARRAY_A );
447
448
		if ( ! $result || ! is_array( $result ) ) {
449
			throw new Exception( 'Unable to get range edges' );
450
		}
451
452
		return $result;
453
	}
454
455
	/**
456
	 * Update the results to have key/checksum format.
457
	 *
458
	 * @param $results
459
	 */
460
	public function prepare_results_for_output( &$results ) {
461
		// get the compound key.
462
		// only return range and compound key for granular results.
463
464
		foreach ( $results as &$result ) {
465
			// Working on reference to save memory here.
466
467
			$key = array();
468
			foreach ( $this->key_fields as $field ) {
469
				$key[] = $result[ $field ];
470
			}
471
472
			$result = array(
473
				'key'      => implode( '-', $key ),
474
				'checksum' => $result['checksum'],
475
			);
476
477
		}
478
	}
479
480
	/**
481
	 * Calculate the checksum based on provided range and filters.
482
	 *
483
	 * @param null $range_from
484
	 * @param null $range_to
485
	 * @param null $filter_values
486
	 * @param bool $granular_result
487
	 * @param bool $simple_return_value
488
	 *
489
	 * @return array|mixed|object|WP_error|null
490
	 */
491
	public function calculate_checksum( $range_from = null, $range_to = null, $filter_values = null, $granular_result = false, $simple_return_value = true ) {
492
		try {
493
			$this->validate_input();
494
		} catch ( Exception $ex ) {
495
			return new WP_error( 'invalid_input', $ex->getMessage() );
0 ignored issues
show
Unused Code introduced by
The call to WP_Error::__construct() has too many arguments starting with 'invalid_input'.

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.

In this case you can add the @ignore PhpDoc annotation to the duplicate definition and it will be ignored.

Loading history...
496
		}
497
498
		$query = $this->build_checksum_query( $range_from, $range_to, $filter_values, $granular_result );
499
500
		global $wpdb;
501
502
		if ( ! $granular_result ) {
503
			$result = $wpdb->get_row( $query, ARRAY_A );
504
505
			if ( ! is_array( $result ) ) {
506
				return new WP_Error( 'invalid_query', "Result wasn't an array" );
0 ignored issues
show
Unused Code introduced by
The call to WP_Error::__construct() has too many arguments starting with 'invalid_query'.

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.

In this case you can add the @ignore PhpDoc annotation to the duplicate definition and it will be ignored.

Loading history...
507
			}
508
509
			if ( $simple_return_value ) {
510
				return $result['checksum'];
511
			}
512
513
			return array(
514
				'range'    => $range_from . '-' . $range_to,
515
				'checksum' => $result['checksum'],
516
			);
517
		} else {
518
			$result = $wpdb->get_results( $query, ARRAY_A );
519
			$this->prepare_results_for_output( $result );
520
521
			return $result;
522
		}
523
	}
524
}
525