1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare( strict_types = 1 ); |
4
|
|
|
namespace Wikibase\Repo\Maintenance; |
5
|
|
|
|
6
|
|
|
use Maintenance; |
7
|
|
|
use MediaWiki\MediaWikiServices; |
8
|
|
|
use Wikibase\DataModel\Entity\Item; |
9
|
|
|
use Wikibase\Lib\WikibaseSettings; |
10
|
|
|
use Wikibase\Repo\WikibaseRepo; |
11
|
|
|
use Wikimedia\Rdbms\IDatabase; |
12
|
|
|
use Wikimedia\Rdbms\ILBFactory; |
13
|
|
|
|
14
|
|
|
$basePath = getenv( 'MW_INSTALL_PATH' ) !== false ? getenv( 'MW_INSTALL_PATH' ) : __DIR__ . '/../../../..'; |
15
|
|
|
|
16
|
|
|
require_once $basePath . '/maintenance/Maintenance.php'; |
17
|
|
|
|
18
|
|
|
/** |
19
|
|
|
* Maintenance script for pruning rows belonging to deleted or redirected items |
20
|
|
|
* from the wb_items_per_site table. |
21
|
|
|
* |
22
|
|
|
* @license GPL-2.0-or-later |
23
|
|
|
* @author Marius Hoch < [email protected] > |
24
|
|
|
*/ |
25
|
|
|
class PruneItemsPerSite extends Maintenance { |
26
|
|
|
|
27
|
|
|
public function __construct() { |
28
|
|
|
parent::__construct(); |
29
|
|
|
|
30
|
|
|
$this->addDescription( 'Pune rows belonging to deleted or redirected Items from the wb_items_per_site table' ); |
31
|
|
|
|
32
|
|
|
$this->addOption( 'select-batch-size', "Number of table rows to scan per select (100000 by default)", false, true ); |
33
|
|
|
} |
34
|
|
|
|
35
|
|
|
/** |
36
|
|
|
* @inheritDoc |
37
|
|
|
*/ |
38
|
|
|
public function execute() { |
39
|
|
|
if ( !WikibaseSettings::isRepoEnabled() ) { |
40
|
|
|
$this->fatalError( "You need to have Wikibase enabled in order to use this maintenance script!\n\n" ); |
41
|
|
|
} |
42
|
|
|
|
43
|
|
|
$wikibaseRepo = WikibaseRepo::getDefaultInstance(); |
44
|
|
|
if ( !in_array( Item::ENTITY_TYPE, $wikibaseRepo->getLocalEntitySource()->getEntityTypes() ) ) { |
45
|
|
|
$this->fatalError( |
46
|
|
|
"This script assumes Items to be part of the local entity source." |
47
|
|
|
); |
48
|
|
|
} |
49
|
|
|
|
50
|
|
|
$itemNamespace = $wikibaseRepo->getEntityNamespaceLookup()->getEntityNamespace( Item::ENTITY_TYPE ); |
51
|
|
|
|
52
|
|
|
$loadBalancerFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory(); |
53
|
|
|
$selectBatchSize = (int)$this->getOption( 'select-batch-size', 100000 ); |
54
|
|
|
|
55
|
|
|
$this->prune( $loadBalancerFactory, $itemNamespace, $selectBatchSize ); |
56
|
|
|
} |
57
|
|
|
|
58
|
|
|
private function prune( |
59
|
|
|
ILBFactory $loadBalancerFactory, |
60
|
|
|
int $itemNamespace, |
61
|
|
|
int $selectBatchSize |
62
|
|
|
) { |
63
|
|
|
$dbr = $loadBalancerFactory->getMainLB()->getConnection( DB_REPLICA, [ 'vslow' ] ); |
64
|
|
|
$dbw = $loadBalancerFactory->getMainLB()->getConnection( DB_MASTER ); |
65
|
|
|
|
66
|
|
|
$maxIpsRowId = (int)$dbr->selectField( 'wb_items_per_site', 'MAX(ips_row_id)', '', __METHOD__ ); |
67
|
|
|
// Add 1%, but at least 50, to the maxIpsRowId to use, for items created during the script run |
68
|
|
|
$maxIpsRowId = max( $maxIpsRowId * 1.01, $maxIpsRowId + 50 ); |
69
|
|
|
|
70
|
|
|
$startRowId = (int)$dbr->selectField( 'wb_items_per_site', 'MIN(ips_row_id)', '', __METHOD__ ); |
71
|
|
|
while ( $startRowId < $maxIpsRowId ) { |
72
|
|
|
$endRowId = $startRowId + $selectBatchSize; |
73
|
|
|
$rowsToDelete = $this->selectInRange( $dbr, $itemNamespace, $startRowId, $endRowId ); |
74
|
|
|
$this->output( "Read up to ips_row_id $endRowId.\n" ); |
75
|
|
|
|
76
|
|
|
if ( $rowsToDelete ) { |
|
|
|
|
77
|
|
|
$affectedRows = $this->deleteRows( $dbw, $rowsToDelete ); |
78
|
|
|
$this->output( "Deleted $affectedRows rows.\n" ); |
79
|
|
|
$loadBalancerFactory->waitForReplication(); |
80
|
|
|
} |
81
|
|
|
|
82
|
|
|
$startRowId = $endRowId; |
83
|
|
|
} |
84
|
|
|
} |
85
|
|
|
|
86
|
|
|
private function selectInRange( IDatabase $dbr, int $itemNamespace, int $startRowId, int $endRowId ): array { |
87
|
|
|
return $dbr->selectFieldValues( |
88
|
|
|
[ 'wb_items_per_site', 'page' ], |
89
|
|
|
'ips_row_id', |
90
|
|
|
[ |
91
|
|
|
'ips_row_id >= ' . $startRowId, |
92
|
|
|
'ips_row_id < ' . $endRowId, |
93
|
|
|
'page_id IS NULL', |
94
|
|
|
], |
95
|
|
|
__METHOD__, |
96
|
|
|
[], |
97
|
|
|
[ |
98
|
|
|
'page' => [ |
99
|
|
|
'LEFT JOIN', |
100
|
|
|
[ |
101
|
|
|
'page_title = ' . $dbr->buildConcat( [ |
102
|
|
|
$dbr->addQuotes( "Q" ), |
103
|
|
|
"ips_item_id", |
104
|
|
|
] ), |
105
|
|
|
'page_namespace' => $itemNamespace, |
106
|
|
|
'page_is_redirect' => 0, |
107
|
|
|
], |
108
|
|
|
], |
109
|
|
|
] |
110
|
|
|
); |
111
|
|
|
} |
112
|
|
|
|
113
|
|
|
private function deleteRows( IDatabase $dbw, array $rowsToDelete ): int { |
114
|
|
|
$dbw->delete( |
115
|
|
|
'wb_items_per_site', |
116
|
|
|
[ |
117
|
|
|
'ips_row_id' => $rowsToDelete |
118
|
|
|
], |
119
|
|
|
__METHOD__ |
120
|
|
|
); |
121
|
|
|
|
122
|
|
|
return $dbw->affectedRows(); |
123
|
|
|
} |
124
|
|
|
|
125
|
|
|
} |
126
|
|
|
|
127
|
|
|
$maintClass = PruneItemsPerSite::class; |
128
|
|
|
require_once RUN_MAINTENANCE_IF_MAIN; |
129
|
|
|
|
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.
Consider making the comparison explicit by using
empty(..)
or! empty(...)
instead.