Completed
Push — try/sync-backups ( f379e6 )
by
unknown
13:32 queued 04:30
created

Jetpack_Sync_Files::backup()   A

Complexity

Conditions 3
Paths 4

Size

Total Lines 19
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 10
nc 4
nop 1
dl 0
loc 19
rs 9.4285
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * Experimental file sync
5
 */
6
7
require_once dirname( __FILE__ ) . '/class.jetpack-sync-queue.php';
8
9
class Jetpack_Sync_Files {
10
	const FILE_HASH_SIZE = 4096;
11
	const POST_TYPE_BACKUP = 'jetpack_backup';
12
	const QUEUE_NAME = 'backup';
13
14
	// singleton functions
15
	private static $instance;
16
17
	public static function get_instance() {
18
		if ( null === self::$instance ) {
19
			self::$instance = new self();
20
		}
21
22
		return self::$instance;
23
	}
24
25
	// this is necessary because you can't use "new" when you declare instance properties >:(
26
	protected function __construct() {
27
		$this->init();
28
	}
29
30
	private function init() {
31
		// register Backup, Directory content types
32
		register_post_type( self::POST_TYPE_BACKUP, array(
33
			'description' => __( 'Jetpack Backup', 'jetpack' ),
34
			'rewrite' => array(
35
				'slug'       => 'backup',
36
				'with_front' => false,
37
				'feeds'      => false,
38
				'pages'      => false,
39
			),
40
			'public'          => false,
41
			'show_ui'         => false,
42
			'capability_type' => 'backup',
43
			'map_meta_cap'    => false,
44
			'has_archive'     => false,
45
			'query_var'       => 'backup',
46
			'show_in_rest'    => false,
47
		) );
48
	}
49
50
	public function backup( $label = null ) {
51
		if ( ! $label ) {
52
			$label = "Backup " . current_time( 'mysql' );
53
		}
54
55
		// TODO: check if another backup is already in progress
0 ignored issues
show
Coding Style Best Practice introduced by
Comments for TODO tasks are often forgotten in the code; it might be better to use a dedicated issue tracker.
Loading history...
56
		$backup = $this->get_running_backup();
57
58
		if ( $backup ) {
59
			return new WP_Error( 'multiple_backups', 'There is already a running backup' );
60
		}
61
62
		return wp_insert_post(
63
			array (
64
				'post_title' => $label,
65
				'post_type' => self::POST_TYPE_BACKUP,
66
			)
67
		);
68
	}
69
70
	public function cancel_backup( $post_id ) {
71
		$backup = get_post( $post_id );
72
73
		if ( ! $backup || self::POST_TYPE_BACKUP !== $backup->post_type ) {
74
			return new WP_Error( 'no_such_backup', 'No such backup with that ID' );
75
		}
76
77
		return wp_trash_post( $post_id );
78
	}
79
80
	private function get_running_backup() {
81
		$running_backups = get_posts( array(
82
			'post_type' => self::POST_TYPE_BACKUP,
83
			'post_status' => 'draft'
84
		) );
85
86
		if ( empty( $running_backups ) ) {
87
			return false;
88
		} elseif ( count( $running_backups ) > 1 ) {
89
			return new WP_Error( 'multiple_backups', 'More than one running backup - this should never happen' );
90
		} else {
91
			return reset( $running_backups ); // first element
92
		}
93
	}
94
95
	public function scan_directories() {
96
		$backup = $this->get_running_backup();
97
98
		if ( ! $backup || is_wp_error( $backup ) ) {
99
			return $backup;
100
		}
101
102
		global $scanned_dirs_count;
103
		global $scanned_files_count;
104
105
		$scanned_dirs_count = 0;
106
		$scanned_files_count = 0;
107
108
		$begin = microtime( true );
109
		list( $hash, $children ) = $this->get_directory_hash_and_children( ABSPATH );
0 ignored issues
show
Unused Code introduced by
The assignment to $hash is unused. Consider omitting it like so list($first,,$third).

This checks looks for assignemnts to variables using the list(...) function, where not all assigned variables are subsequently used.

Consider the following code example.

<?php

function returnThreeValues() {
    return array('a', 'b', 'c');
}

list($a, $b, $c) = returnThreeValues();

print $a . " - " . $c;

Only the variables $a and $c are used. There was no need to assign $b.

Instead, the list call could have been.

list($a,, $c) = returnThreeValues();
Loading history...
110
		echo "Scan duration: ".(microtime(true)-$begin)." seconds\n";
111
		echo "Scanned $scanned_dirs_count directories and $scanned_files_count files\n";
112
		// print_r($children);
113
114
		// flatten hash and send to server for comparison
115
		// perhaps we should send the tree, so the server can also confirm that the files have the same
116
		// names, and also do security scanning, e.g. be aware which files should _not_ have had their hashes modified?
117
		// but for now let's go with the naive, compact, stateless approach...
118
		$keys = $this->get_all_child_hashes( $children );
119
		// print_r($keys);
120
		echo "JSON size: ".strlen(json_encode($keys))."\n";
121
122
		return $keys;
123
	}
124
125
	/**
126
	 * Find out which of my hashes are already present on the server
127
	 */
128
	public function check_server( $keys ) {
129
		Jetpack::load_xml_rpc_client();
130
131
		$query_args = array( 'timeout' => 60 );
132
		$url = add_query_arg( $query_args, Jetpack::xmlrpc_api_url() );
133
		$rpc = new Jetpack_IXR_Client( array(
134
			'url'     => $url,
135
			'user_id' => JETPACK_MASTER_USER,
136
			'timeout' => $query_args['timeout'],
137
		) );
138
139
		$response = array();
140
		error_log("contacting server at $url");
141
		$start = microtime( true );
142
		foreach( array_chunk( $keys, 100 ) as $keys_chunk ) {
143
			$result = $rpc->query( 'jetpack.checkFiles', $keys_chunk );
144
			error_log("received response for " . count($keys_chunk) . " keys in ".(microtime(true)-$start)." seconds");
145
			if ( ! $result ) {
146
				return $rpc->get_jetpack_error();
147
			}
148
			// queue up any files that are missing
149
			$hashes_to_upload = $rpc->getResponse();
0 ignored issues
show
Unused Code introduced by
$hashes_to_upload is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
150
		}
151
152
		return $response;
153
	}
154
155
	/**
156
	 * Recursively flattens the directory hierarchy into a simple array of hashes to
157
	 * check against the server
158
	 */
159
	private function get_all_child_hashes( $directory, &$hashes = array() ) {
160
		foreach ( $directory as $filename => $hash_or_children ) {
161
			if ( is_scalar( $hash_or_children ) ) {
162
				$hashes[] = $hash_or_children;
163
			} else {
164
				$hashes[] = $hash_or_children['hash'];
165
				$this->get_all_child_hashes( $hash_or_children['children'], $hashes );
166
			}
167
		}
168
		return $hashes;
169
	}
170
171
	/**
172
	 * Recursively fetches the hash of a directory and all its children, except certain blacklisted directories
173
	 */
174
	private function get_directory_hash_and_children( $path ) {
175
		$children = array();
176
177
		// normalize path
178
		if ( $path[ strlen( $path ) - 1 ] !== DIRECTORY_SEPARATOR ) {
179
			$path .= DIRECTORY_SEPARATOR;
180
		}
181
182
		// we concatenate hashes of all files and subdirs to generate
183
		// a unique hash for this directory
184
		$directory_hash_content = '';
185
186
		// NOTE a directory's mtime is changed when a file is added or removed, but NOT when it's changed.
187
		// - useful!
188
		// ... but can be faked :( and so should not be trusted?
189
		$files = scandir( $path ); // we use scandir rather than opendir because it sorts alphabetically
190
191
		global $scanned_dirs_count;
192
		global $scanned_files_count;
193
194
		foreach( $files as $entry ) {
195
			$fullpath = $path . $entry;
196
			if ( is_dir( $fullpath )
197
				&& $entry != '.'
198
				&& $entry != '..'
199
				&& $entry != 'node_modules' // TODO: make configurable
0 ignored issues
show
Coding Style Best Practice introduced by
Comments for TODO tasks are often forgotten in the code; it might be better to use a dedicated issue tracker.
Loading history...
200
				&& $entry[0] != '.' ) {     // is this desirable? can we miss files necessary for the site to function this way?
201
				$scanned_dirs_count += 1;
202
				list( $hash, $subchildren ) = $this->get_directory_hash_and_children( $fullpath );
203
				$children[$entry] = array(
204
					'hash' => $hash,
205
					'children' => $subchildren
206
				);
207
				$directory_hash_content .= $hash;
208
				continue;
209
			}
210
211
			if ( is_file( $fullpath ) && $entry[0] != '.' ) {
212
				$scanned_files_count += 1;
213
				$hash = $this->get_file_hash( $fullpath );
214
				$children[$entry] = $hash;
215
				$directory_hash_content .= $hash;
216
			}
217
		}
218
		return array( sha1( $directory_hash_content ), $children );
219
	}
220
221
	/**
222
	 * Generates a hash for a file as quickly as possible
223
	 * It does this by only hashing the first FILE_HASH_SIZE bytes of the file and, if
224
	 * the file size is > FILE_HASH_SIZE bytes, also the last FILE_HASH_SIZE bytes of the
225
	 * file, or ( $filesize - FILE_HASH_SIZE ) bytes, if that is smaller.
226
	 *
227
	 * It includes the full path in the hash, minus ABSPATH, since the same file often ends up
228
	 * in different parts of the tree.
229
	 * TODO: test and account for 32 bit precision http://php.net/manual/en/function.filesize.php#121406
0 ignored issues
show
Coding Style Best Practice introduced by
Comments for TODO tasks are often forgotten in the code; it might be better to use a dedicated issue tracker.
Loading history...
230
	 */
231
	private function get_file_hash( $fullpath ) {
232
		$filehandle = fopen( $fullpath, 'r' );
233
		if ( ! $filehandle ) {
234
			return false;
235
		}
236
237
		$firstPart = fread( $filehandle, self::FILE_HASH_SIZE );
238
239
		// if the bytes retrieved is < self::FILE_HASH_SIZE, just return, otherwise also load the last self::FILE_HASH_SIZE bytes
240
		// to maximise the chance of picking up changes
241
		if ( strlen( $firstPart ) <= self::FILE_HASH_SIZE ) {
242
			fclose( $filehandle );
243
			return sha1( $firstPart );
244
		}
245
246
		// calculate how much more of the file to read. Note that fseek uses INT internally so files > 2GB in size
247
		// will break on 32 bit PHP builds
248
		$lastPartBytesToRead = self::FILE_HASH_SIZE;
249
		$filesize = filesize( $fullpath );
250
251
		// avoid reading overlapping bytes with the first hash, if the file is smaller than 2*FILE_HASH_SIZE
252
		if ( $filesize < 2*self::FILE_HASH_SIZE ) {
253
			$lastPartBytesToRead = $filesize - self::FILE_HASH_SIZE;
254
		}
255
256
		fseek($filehandle, -$lastPartBytesToRead);
257
258
		$lastPart = fread( $filehandle, $lastPartBytesToRead );
259
260
		fclose( $filehandle );
261
		return sha1( $firstPart . $lastPart );
262
	}
263
}
264
265
add_action( 'init', array('Jetpack_Sync_Files', 'get_instance' ) );