ConvertLinks::createTempTable()   B
last analyzed

Complexity

Conditions 3
Paths 3

Size

Total Lines 28
Code Lines 21

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 21
nc 3
nop 0
dl 0
loc 28
rs 8.8571
c 0
b 0
f 0
1
<?php
0 ignored issues
show
Coding Style Compatibility introduced by
For compatibility and reusability of your code, PSR1 recommends that a file should introduce either new symbols (like classes, functions, etc.) or have side-effects (like outputting something, or including other files), but not both at the same time. The first symbol is defined on line 34 and the first side effect is on line 24.

The PSR-1: Basic Coding Standard recommends that a file should either introduce new symbols, that is classes, functions, constants or similar, or have side effects. Side effects are anything that executes logic, like for example printing output, changing ini settings or writing to a file.

The idea behind this recommendation is that merely auto-loading a class should not change the state of an application. It also promotes a cleaner style of programming and makes your code less prone to errors, because the logic is not spread out all over the place.

To learn more about the PSR-1, please see the PHP-FIG site on the PSR-1.

Loading history...
2
/**
3
 * Convert from the old links schema (string->ID) to the new schema (ID->ID).
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License along
16
 * with this program; if not, write to the Free Software Foundation, Inc.,
17
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18
 * http://www.gnu.org/copyleft/gpl.html
19
 *
20
 * @file
21
 * @ingroup Maintenance
22
 */
23
24
require_once __DIR__ . '/Maintenance.php';
25
26
/**
27
 * Maintenance script to convert from the old links schema (string->ID)
28
 * to the new schema (ID->ID).
29
 *
30
 * The wiki should be put into read-only mode while this script executes.
31
 *
32
 * @ingroup Maintenance
33
 */
34
class ConvertLinks extends Maintenance {
35
	private $logPerformance;
36
37
	public function __construct() {
38
		parent::__construct();
39
		$this->addDescription(
40
			'Convert from the old links schema (string->ID) to the new schema (ID->ID). '
41
				. 'The wiki should be put into read-only mode while this script executes' );
42
43
		$this->addArg( 'logperformance', "Log performance to perfLogFilename.", false );
44
		$this->addArg(
45
			'perfLogFilename',
46
			"Filename where performance is logged if --logperformance was set "
47
				. "(defaults to 'convLinksPerf.txt').",
48
			false
49
		);
50
		$this->addArg(
51
			'keep-links-table',
52
			"Don't overwrite the old links table with the new one, leave the new table at links_temp.",
53
			false
54
		);
55
		$this->addArg(
56
			'nokeys',
57
			/* (What about InnoDB?) */
58
			"Don't create keys, and so allow duplicates in the new links table.\n"
59
				. "This gives a huge speed improvement for very large links tables which are MyISAM.",
60
			false
61
		);
62
	}
63
64
	public function getDbType() {
65
		return Maintenance::DB_ADMIN;
66
	}
67
68
	public function execute() {
69
		$dbw = $this->getDB( DB_MASTER );
70
71
		$type = $dbw->getType();
72
		if ( $type != 'mysql' ) {
73
			$this->output( "Link table conversion not necessary for $type\n" );
74
75
			return;
76
		}
77
78
		global $wgContLang;
79
80
		# counters etc
81
		$numBadLinks = $curRowsRead = 0;
82
83
		# total tuples INSERTed into links_temp
84
		$totalTuplesInserted = 0;
85
86
		# whether or not to give progress reports while reading IDs from cur table
87
		$reportCurReadProgress = true;
88
89
		# number of rows between progress reports
90
		$curReadReportInterval = 1000;
91
92
		# whether or not to give progress reports during conversion
93
		$reportLinksConvProgress = true;
94
95
		# number of rows per INSERT
96
		$linksConvInsertInterval = 1000;
97
98
		$initialRowOffset = 0;
99
100
		# not used yet; highest row number from links table to process
101
		# $finalRowOffset = 0;
102
103
		$overwriteLinksTable = !$this->hasOption( 'keep-links-table' );
104
		$noKeys = $this->hasOption( 'noKeys' );
105
		$this->logPerformance = $this->hasOption( 'logperformance' );
106
		$perfLogFilename = $this->getArg( 'perfLogFilename', "convLinksPerf.txt" );
107
108
		# --------------------------------------------------------------------
109
110
		list( $cur, $links, $links_temp, $links_backup ) =
111
			$dbw->tableNamesN( 'cur', 'links', 'links_temp', 'links_backup' );
112
113
		if ( $dbw->tableExists( 'pagelinks' ) ) {
114
			$this->output( "...have pagelinks; skipping old links table updates\n" );
115
116
			return;
117
		}
118
119
		$res = $dbw->query( "SELECT l_from FROM $links LIMIT 1" );
120
		if ( $dbw->fieldType( $res, 0 ) == "int" ) {
121
			$this->output( "Schema already converted\n" );
122
123
			return;
124
		}
125
126
		$res = $dbw->query( "SELECT COUNT(*) AS count FROM $links" );
127
		$row = $dbw->fetchObject( $res );
128
		$numRows = $row->count;
129
		$dbw->freeResult( $res );
130
131
		if ( $numRows == 0 ) {
132
			$this->output( "Updating schema (no rows to convert)...\n" );
133
			$this->createTempTable();
134
		} else {
135
			$fh = false;
136
			if ( $this->logPerformance ) {
137
				$fh = fopen( $perfLogFilename, "w" );
138
				if ( !$fh ) {
139
					$this->error( "Couldn't open $perfLogFilename" );
140
					$this->logPerformance = false;
141
				}
142
			}
143
			$baseTime = $startTime = microtime( true );
144
			# Create a title -> cur_id map
145
			$this->output( "Loading IDs from $cur table...\n" );
146
			$this->performanceLog( $fh, "Reading $numRows rows from cur table...\n" );
147
			$this->performanceLog( $fh, "rows read vs seconds elapsed:\n" );
148
149
			$dbw->bufferResults( false );
150
			$res = $dbw->query( "SELECT cur_namespace,cur_title,cur_id FROM $cur" );
151
			$ids = [];
152
153
			foreach ( $res as $row ) {
154
				$title = $row->cur_title;
155
				if ( $row->cur_namespace ) {
156
					$title = $wgContLang->getNsText( $row->cur_namespace ) . ":$title";
157
				}
158
				$ids[$title] = $row->cur_id;
159
				$curRowsRead++;
160 View Code Duplication
				if ( $reportCurReadProgress ) {
161
					if ( ( $curRowsRead % $curReadReportInterval ) == 0 ) {
162
						$this->performanceLog(
163
							$fh,
164
							$curRowsRead . " " . ( microtime( true ) - $baseTime ) . "\n"
165
						);
166
						$this->output( "\t$curRowsRead rows of $cur table read.\n" );
167
					}
168
				}
169
			}
170
			$dbw->freeResult( $res );
171
			$dbw->bufferResults( true );
172
			$this->output( "Finished loading IDs.\n\n" );
173
			$this->performanceLog(
174
				$fh,
175
				"Took " . ( microtime( true ) - $baseTime ) . " seconds to load IDs.\n\n"
176
			);
177
178
			# --------------------------------------------------------------------
179
180
			# Now, step through the links table (in chunks of $linksConvInsertInterval rows),
181
			# convert, and write to the new table.
182
			$this->createTempTable();
183
			$this->performanceLog( $fh, "Resetting timer.\n\n" );
184
			$baseTime = microtime( true );
185
			$this->output( "Processing $numRows rows from $links table...\n" );
186
			$this->performanceLog( $fh, "Processing $numRows rows from $links table...\n" );
187
			$this->performanceLog( $fh, "rows inserted vs seconds elapsed:\n" );
188
189
			for ( $rowOffset = $initialRowOffset; $rowOffset < $numRows;
190
				$rowOffset += $linksConvInsertInterval
191
			) {
192
				$sqlRead = "SELECT * FROM $links ";
193
				$sqlRead = $dbw->limitResult( $sqlRead, $linksConvInsertInterval, $rowOffset );
194
				$res = $dbw->query( $sqlRead );
195
				if ( $noKeys ) {
196
					$sqlWrite = [ "INSERT INTO $links_temp (l_from,l_to) VALUES " ];
197
				} else {
198
					$sqlWrite = [ "INSERT IGNORE INTO $links_temp (l_from,l_to) VALUES " ];
199
				}
200
201
				$tuplesAdded = 0; # no tuples added to INSERT yet
202
				foreach ( $res as $row ) {
203
					$fromTitle = $row->l_from;
204
					if ( array_key_exists( $fromTitle, $ids ) ) { # valid title
205
						$from = $ids[$fromTitle];
206
						$to = $row->l_to;
207
						if ( $tuplesAdded != 0 ) {
208
							$sqlWrite[] = ",";
209
						}
210
						$sqlWrite[] = "($from,$to)";
211
						$tuplesAdded++;
212
					} else { # invalid title
213
						$numBadLinks++;
214
					}
215
				}
216
				$dbw->freeResult( $res );
217
				# $this->output( "rowOffset: $rowOffset\ttuplesAdded: "
218
				# 	. "$tuplesAdded\tnumBadLinks: $numBadLinks\n" );
219
				if ( $tuplesAdded != 0 ) {
220
					if ( $reportLinksConvProgress ) {
221
						$this->output( "Inserting $tuplesAdded tuples into $links_temp..." );
222
					}
223
					$dbw->query( implode( "", $sqlWrite ) );
224
					$totalTuplesInserted += $tuplesAdded;
225 View Code Duplication
					if ( $reportLinksConvProgress ) {
226
						$this->output( " done. Total $totalTuplesInserted tuples inserted.\n" );
227
						$this->performanceLog(
228
							$fh,
229
							$totalTuplesInserted . " " . ( microtime( true ) - $baseTime ) . "\n"
230
						);
231
					}
232
				}
233
			}
234
			$this->output( "$totalTuplesInserted valid titles and "
235
				. "$numBadLinks invalid titles were processed.\n\n" );
236
			$this->performanceLog(
237
				$fh,
238
				"$totalTuplesInserted valid titles and $numBadLinks invalid titles were processed.\n"
239
			);
240
			$this->performanceLog(
241
				$fh,
242
				"Total execution time: " . ( microtime( true ) - $startTime ) . " seconds.\n"
243
			);
244
			if ( $this->logPerformance ) {
245
				fclose( $fh );
246
			}
247
		}
248
		# --------------------------------------------------------------------
249
250
		if ( $overwriteLinksTable ) {
251
			# Check for existing links_backup, and delete it if it exists.
252
			$this->output( "Dropping backup links table if it exists..." );
253
			$dbw->query( "DROP TABLE IF EXISTS $links_backup", __METHOD__ );
254
			$this->output( " done.\n" );
255
256
			# Swap in the new table, and move old links table to links_backup
257
			$this->output( "Swapping tables '$links' to '$links_backup'; '$links_temp' to '$links'..." );
258
			$dbw->query( "RENAME TABLE links TO $links_backup, $links_temp TO $links", __METHOD__ );
259
			$this->output( " done.\n\n" );
260
261
			$this->output( "Conversion complete. The old table remains at $links_backup;\n" );
262
			$this->output( "delete at your leisure.\n" );
263
		} else {
264
			$this->output( "Conversion complete.  The converted table is at $links_temp;\n" );
265
			$this->output( "the original links table is unchanged.\n" );
266
		}
267
	}
268
269
	private function createTempTable() {
270
		$dbConn = $this->getDB( DB_MASTER );
271
272
		if ( !( $dbConn->isOpen() ) ) {
273
			$this->output( "Opening connection to database failed.\n" );
274
275
			return;
276
		}
277
		$links_temp = $dbConn->tableName( 'links_temp' );
278
279
		$this->output( "Dropping temporary links table if it exists..." );
280
		$dbConn->query( "DROP TABLE IF EXISTS $links_temp" );
281
		$this->output( " done.\n" );
282
283
		$this->output( "Creating temporary links table..." );
284
		if ( $this->hasOption( 'noKeys' ) ) {
285
			$dbConn->query( "CREATE TABLE $links_temp ( " .
286
				"l_from int(8) unsigned NOT NULL default '0', " .
287
				"l_to int(8) unsigned NOT NULL default '0')" );
288
		} else {
289
			$dbConn->query( "CREATE TABLE $links_temp ( " .
290
				"l_from int(8) unsigned NOT NULL default '0', " .
291
				"l_to int(8) unsigned NOT NULL default '0', " .
292
				"UNIQUE KEY l_from(l_from,l_to), " .
293
				"KEY (l_to))" );
294
		}
295
		$this->output( " done.\n\n" );
296
	}
297
298
	private function performanceLog( $fh, $text ) {
299
		if ( $this->logPerformance ) {
300
			fwrite( $fh, $text );
301
		}
302
	}
303
}
304
305
$maintClass = "ConvertLinks";
306
require_once RUN_MAINTENANCE_IF_MAIN;
307