1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Wikibase\TermStore\MediaWiki\PackagePrivate; |
4
|
|
|
|
5
|
|
|
use Psr\Log\LoggerInterface; |
6
|
|
|
use Psr\Log\NullLogger; |
7
|
|
|
use Wikimedia\Rdbms\DBError; |
8
|
|
|
use Wikimedia\Rdbms\IDatabase; |
9
|
|
|
use Wikimedia\Rdbms\ILoadBalancer; |
10
|
|
|
|
11
|
|
|
/** |
12
|
|
|
* Cleans up the normalized term store after some terms are no longer needed. |
13
|
|
|
* Unused term_in_lang, text_in_lang and text rows are automatically removed. |
14
|
|
|
* (Unused type rows are never cleaned up.) |
15
|
|
|
* |
16
|
|
|
* @license GPL-2.0-or-later |
17
|
|
|
*/ |
18
|
|
|
class DatabaseTermCleaner implements TermCleaner { |
19
|
|
|
|
20
|
|
|
/** @var ILoadBalancer */ |
21
|
|
|
private $lb; |
22
|
|
|
|
23
|
|
|
/** @var IDatabase a connection to DB_REPLICA */ |
24
|
|
|
private $dbr = null; |
25
|
|
|
|
26
|
|
|
/** @var IDatabase a connection to DB_MASTER */ |
27
|
|
|
private $dbw = null; |
28
|
|
|
|
29
|
|
|
/** @var LoggerInterface */ |
30
|
|
|
private $logger; |
31
|
|
|
|
32
|
|
|
public function __construct( |
33
|
|
|
ILoadBalancer $lb, |
34
|
|
|
LoggerInterface $logger = null |
35
|
|
|
) { |
36
|
|
|
$this->lb = $lb; |
37
|
|
|
// $this->dbr and $this->dbw are lazily initialized in cleanTerms() |
38
|
|
|
$this->logger = $logger ?: new NullLogger(); |
|
|
|
|
39
|
|
|
} |
40
|
|
|
|
41
|
|
|
/** |
42
|
|
|
* Delete the specified term_in_lang rows from the database, |
43
|
|
|
* as well as any text_in_lang and text rows that are now unused. |
44
|
|
|
* |
45
|
|
|
* It is the caller’s responsibility ensure |
46
|
|
|
* that the term_in_lang rows are no longer referenced anywhere; |
47
|
|
|
* on the other hand, this class takes care that text_in_lang and text rows |
48
|
|
|
* used by other term_in_lang rows are not removed. |
49
|
|
|
* |
50
|
|
|
* @param int[] $termInLangIds |
51
|
|
|
*/ |
52
|
|
|
public function cleanTerms( array $termInLangIds ) { |
53
|
|
|
if ( $this->dbr === null ) { |
54
|
|
|
$this->dbr = $this->lb->getConnection( ILoadBalancer::DB_REPLICA ); |
55
|
|
|
$this->dbw = $this->lb->getConnection( ILoadBalancer::DB_MASTER ); |
56
|
|
|
} |
57
|
|
|
|
58
|
|
|
try { |
59
|
|
|
$this->lb->beginMasterChanges( __METHOD__ ); |
60
|
|
|
$this->cleanTermInLangIds( $termInLangIds ); |
61
|
|
|
$this->lb->commitMasterChanges( __METHOD__ ); |
62
|
|
|
} catch ( DBError $exception ) { |
|
|
|
|
63
|
|
|
$this->lb->rollbackMasterChanges( __METHOD__ ); |
64
|
|
|
$this->logger->error( |
65
|
|
|
'{method}: DBError while cleaning terms {termInLangIds}: {exception}', |
66
|
|
|
[ |
67
|
|
|
'method' => __METHOD__, |
68
|
|
|
'termInLangIds' => $termInLangIds, |
69
|
|
|
'exception' => $exception |
70
|
|
|
] |
71
|
|
|
); |
72
|
|
|
throw $exception; |
73
|
|
|
} |
74
|
|
|
} |
75
|
|
|
|
76
|
|
|
/** |
77
|
|
|
* Delete the specified term_in_lang rows from the database, |
78
|
|
|
* as well as any text_in_lang and text rows that are now unused. |
79
|
|
|
* |
80
|
|
|
* @param int[] $termInLangIds |
81
|
|
|
*/ |
82
|
|
|
private function cleanTermInLangIds( array $termInLangIds ) { |
83
|
|
|
if ( $termInLangIds === [] ) { |
84
|
|
|
return; |
85
|
|
|
} |
86
|
|
|
|
87
|
|
|
$this->logger->debug( |
88
|
|
|
'{method}: deleting {count} term_in_lang rows', |
89
|
|
|
[ |
90
|
|
|
'method' => __METHOD__, |
91
|
|
|
'count' => count( $termInLangIds ), |
92
|
|
|
] |
93
|
|
|
); |
94
|
|
|
|
95
|
|
|
$potentiallyUnusedTextInLangIds = $this->selectFieldValuesForPrimaryKey( |
96
|
|
|
'wbt_term_in_lang', |
97
|
|
|
'wbtl_text_in_lang_id', |
98
|
|
|
'wbtl_id', |
99
|
|
|
$termInLangIds, |
100
|
|
|
__METHOD__ |
101
|
|
|
); |
102
|
|
|
|
103
|
|
|
$this->dbw->delete( |
104
|
|
|
'wbt_term_in_lang', |
105
|
|
|
[ 'wbtl_id' => $termInLangIds ], |
106
|
|
|
__METHOD__ |
107
|
|
|
); |
108
|
|
|
|
109
|
|
|
$stillUsedTextInLangIds = $this->dbw->selectFieldValues( |
110
|
|
|
'wbt_term_in_lang', |
111
|
|
|
'wbtl_text_in_lang_id', |
112
|
|
|
[ 'wbtl_text_in_lang_id' => $potentiallyUnusedTextInLangIds ], |
113
|
|
|
__METHOD__, |
114
|
|
|
[ |
115
|
|
|
/** |
116
|
|
|
* If we try to clean up a text_in_lang whose last use in a term_in_lang we just |
117
|
|
|
* removed, and simultaneously another request adds a new term_in_lang using that |
118
|
|
|
* text_in_lang, we want one of the following to happen: |
119
|
|
|
* |
120
|
|
|
* 1. Our transaction completes first and removes the text_in_lang. The concurrent |
121
|
|
|
* request blocks until we’re done, then sees that the text_in_lang is gone and |
122
|
|
|
* creates it (again) as part of inserting the term_in_lang. |
123
|
|
|
* 2. The other transaction completes first and registers another term_in_lang using |
124
|
|
|
* that text_in_lang. We block until that’s done and then notice the text_in_lang |
125
|
|
|
* is still used and don’t clean it up. |
126
|
|
|
* |
127
|
|
|
* For this to work, we need to use 'FOR UPDATE' when checking whether a |
128
|
|
|
* text_in_lang is still used in a term_in_lang, and the other request needs to |
129
|
|
|
* ensure during or after insert of the new term_in_lang that the text_in_lang still |
130
|
|
|
* exists, or create it otherwise. This way, either our check here or the other |
131
|
|
|
* request’s insert will block and wait for the other to complete. |
132
|
|
|
*/ |
133
|
|
|
'FOR UPDATE', |
134
|
|
|
// 'DISTINCT', // not supported in combination with FOR UPDATE on some DB types |
135
|
|
|
] |
136
|
|
|
); |
137
|
|
|
$unusedTextInLangIds = array_diff( |
138
|
|
|
$potentiallyUnusedTextInLangIds, |
139
|
|
|
$stillUsedTextInLangIds |
140
|
|
|
); |
141
|
|
|
|
142
|
|
|
$this->cleanTextInLangIds( $unusedTextInLangIds ); |
143
|
|
|
} |
144
|
|
|
|
145
|
|
|
/** |
146
|
|
|
* Delete the specified text_in_lang rows from the database, |
147
|
|
|
* as well as any text rows that are now unused. |
148
|
|
|
* |
149
|
|
|
* @param int[] $textInLangIds |
150
|
|
|
*/ |
151
|
|
|
private function cleanTextInLangIds( array $textInLangIds ) { |
152
|
|
|
if ( $textInLangIds === [] ) { |
153
|
|
|
return; |
154
|
|
|
} |
155
|
|
|
|
156
|
|
|
$this->logger->debug( |
157
|
|
|
'{method}: deleting {count} text_in_lang rows', |
158
|
|
|
[ |
159
|
|
|
'method' => __METHOD__, |
160
|
|
|
'count' => count( $textInLangIds ), |
161
|
|
|
] |
162
|
|
|
); |
163
|
|
|
|
164
|
|
|
$potentiallyUnusedTextIds = $this->selectFieldValuesForPrimaryKey( |
165
|
|
|
'wbt_text_in_lang', |
166
|
|
|
'wbxl_text_id', |
167
|
|
|
'wbxl_id', |
168
|
|
|
$textInLangIds, |
169
|
|
|
__METHOD__ |
170
|
|
|
); |
171
|
|
|
|
172
|
|
|
$this->dbw->delete( |
173
|
|
|
'wbt_text_in_lang', |
174
|
|
|
[ 'wbxl_id' => $textInLangIds ], |
175
|
|
|
__METHOD__ |
176
|
|
|
); |
177
|
|
|
|
178
|
|
|
$stillUsedTextIds = $this->dbw->selectFieldValues( |
179
|
|
|
'wbt_text_in_lang', |
180
|
|
|
'wbxl_text_id', |
181
|
|
|
[ 'wbxl_text_id' => $potentiallyUnusedTextIds ], |
182
|
|
|
__METHOD__, |
183
|
|
|
[ |
184
|
|
|
'FOR UPDATE', // see comment in cleanTermInLangIds |
185
|
|
|
// 'DISTINCT', // not supported in combination with FOR UPDATE on some DB types |
186
|
|
|
] |
187
|
|
|
); |
188
|
|
|
$unusedTextIds = array_diff( |
189
|
|
|
$potentiallyUnusedTextIds, |
190
|
|
|
$stillUsedTextIds |
191
|
|
|
); |
192
|
|
|
|
193
|
|
|
$this->cleanTextIds( $unusedTextIds ); |
194
|
|
|
} |
195
|
|
|
|
196
|
|
|
/** |
197
|
|
|
* Delete the specified text rows from the database. |
198
|
|
|
* |
199
|
|
|
* @param array $textIds |
200
|
|
|
*/ |
201
|
|
|
private function cleanTextIds( array $textIds ) { |
202
|
|
|
if ( $textIds === [] ) { |
203
|
|
|
return; |
204
|
|
|
} |
205
|
|
|
|
206
|
|
|
$this->logger->debug( |
207
|
|
|
'{method}: deleting {count} text rows', |
208
|
|
|
[ |
209
|
|
|
'method' => __METHOD__, |
210
|
|
|
'count' => count( $textIds ), |
211
|
|
|
] |
212
|
|
|
); |
213
|
|
|
|
214
|
|
|
$this->dbw->delete( |
215
|
|
|
'wbt_text', |
216
|
|
|
[ 'wbx_id' => $textIds ], |
217
|
|
|
__METHOD__ |
218
|
|
|
); |
219
|
|
|
} |
220
|
|
|
|
221
|
|
|
/** |
222
|
|
|
* Select the values for a field in rows with the given primary key. |
223
|
|
|
* All the rows with these primary keys should exist in the master database, |
224
|
|
|
* and the selected values should never change. |
225
|
|
|
* |
226
|
|
|
* This initially selects from the replica database, |
227
|
|
|
* only falling back to the master if the replica did not return |
228
|
|
|
* as many rows as there were specified primary key values. |
229
|
|
|
* |
230
|
|
|
* @param string $table |
231
|
|
|
* @param string $selectedVar |
232
|
|
|
* @param string $primaryKeyVar |
233
|
|
|
* @param int[] $primaryKeyValues |
234
|
|
|
* @param string $fname |
235
|
|
|
* @return array |
236
|
|
|
*/ |
237
|
|
|
private function selectFieldValuesForPrimaryKey( |
238
|
|
|
$table, |
239
|
|
|
$selectedVar, |
240
|
|
|
$primaryKeyVar, |
241
|
|
|
$primaryKeyValues, |
242
|
|
|
$fname = __METHOD__ |
243
|
|
|
) { |
244
|
|
|
$values = $this->dbr->selectFieldValues( |
245
|
|
|
$table, |
246
|
|
|
$selectedVar, |
247
|
|
|
[ $primaryKeyVar => $primaryKeyValues ], |
248
|
|
|
$fname |
249
|
|
|
); |
250
|
|
|
|
251
|
|
|
if ( count( $values ) < count( $primaryKeyValues ) ) { |
252
|
|
|
$this->logger->debug( |
253
|
|
|
"{method}: replica only returned {valuesCount} '{selectedVar}' values " . |
254
|
|
|
"for {primaryKeyValuesCount} '{primaryKeyVar}' values, " . |
255
|
|
|
'falling back to read from master.', |
256
|
|
|
[ |
257
|
|
|
'method' => __METHOD__, |
258
|
|
|
'callingMethod' => $fname, |
259
|
|
|
'valuesCount' => count( $values ), |
260
|
|
|
'selectedVar' => $selectedVar, |
261
|
|
|
'primaryKeyValuesCount' => count( $primaryKeyValues ), |
262
|
|
|
'primaryKeyVar' => $primaryKeyVar, |
263
|
|
|
] |
264
|
|
|
); |
265
|
|
|
$values = $this->dbw->selectFieldValues( |
266
|
|
|
$table, |
267
|
|
|
$selectedVar, |
268
|
|
|
[ $primaryKeyVar => $primaryKeyValues ], |
269
|
|
|
$fname |
270
|
|
|
); |
271
|
|
|
} |
272
|
|
|
|
273
|
|
|
return $values; |
274
|
|
|
} |
275
|
|
|
|
276
|
|
|
} |
277
|
|
|
|
Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.
For example, imagine you have a variable
$accountId
that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to theid
property of an instance of theAccount
class. This class holds a proper account, so the id value must no longer be false.Either this assignment is in error or a type check should be added for that assignment.