This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include
, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | |||
3 | namespace Wikibase\Repo\Maintenance; |
||
4 | |||
5 | use ExtensionRegistry; |
||
6 | use Maintenance; |
||
7 | use MWException; |
||
8 | use Onoi\MessageReporter\ObservableMessageReporter; |
||
9 | use Wikibase\DataModel\Services\EntityId\EntityIdPager; |
||
10 | use Wikibase\DataModel\Services\Lookup\EntityLookupException; |
||
11 | use Wikibase\Lib\Reporting\ExceptionHandler; |
||
12 | use Wikibase\Lib\Reporting\ReportingExceptionHandler; |
||
13 | use Wikibase\Lib\WikibaseSettings; |
||
14 | use Wikibase\Repo\Dumpers\DumpGenerator; |
||
15 | use Wikibase\Repo\IO\EntityIdReader; |
||
16 | use Wikibase\Repo\IO\LineReader; |
||
17 | use Wikibase\Repo\Store\Sql\SqlEntityIdPager; |
||
18 | use Wikibase\Repo\Store\Sql\SqlEntityIdPagerFactory; |
||
19 | use Wikibase\Repo\Store\Store; |
||
20 | use Wikibase\Repo\WikibaseRepo; |
||
21 | use Wikimedia\AtEase\AtEase; |
||
22 | |||
23 | $basePath = getenv( 'MW_INSTALL_PATH' ) !== false ? getenv( 'MW_INSTALL_PATH' ) : __DIR__ . '/../../../..'; |
||
24 | |||
25 | require_once $basePath . '/maintenance/Maintenance.php'; |
||
26 | |||
27 | /** |
||
28 | * Maintenance script for generating a dump of entities in the repository. |
||
29 | * |
||
30 | * @license GPL-2.0-or-later |
||
31 | * @author Daniel Kinzler |
||
32 | */ |
||
33 | abstract class DumpEntities extends Maintenance { |
||
34 | |||
35 | /** |
||
36 | * @var SqlEntityIdPagerFactory |
||
37 | */ |
||
38 | private $sqlEntityIdPagerFactory; |
||
39 | |||
40 | /** |
||
41 | * @var bool|resource |
||
42 | */ |
||
43 | private $logFileHandle = false; |
||
44 | |||
45 | private $existingEntityTypes = []; |
||
46 | |||
47 | private $entityTypesToExcludeFromOutput = []; |
||
48 | |||
49 | public function __construct() { |
||
50 | parent::__construct(); |
||
51 | |||
52 | $this->addDescription( 'Generate a JSON dump from entities in the repository.' ); |
||
53 | |||
54 | $this->addOption( 'list-file', "A file containing one entity ID per line.", false, true ); |
||
55 | $this->addOption( |
||
56 | 'entity-type', |
||
57 | "Only dump this kind of entity, e.g. `item` or `property`. Can be given multiple times.", |
||
58 | false, |
||
59 | true, |
||
60 | false, |
||
61 | /* $multiOccurrence */ true |
||
62 | ); |
||
63 | $this->addOption( 'sharding-factor', "The number of shards (must be >= 1)", false, true ); |
||
64 | $this->addOption( 'shard', "The shard to output (must be less than the sharding-factor)", false, true ); |
||
65 | $this->addOption( 'batch-size', "The number of entities per processing batch", false, true ); |
||
66 | $this->addOption( 'output', "Output file (default is stdout). Will be overwritten.", false, true ); |
||
67 | $this->addOption( 'log', "Log file (default is stderr). Will be appended.", false, true ); |
||
68 | $this->addOption( 'quiet', "Disable progress reporting", false, false ); |
||
69 | $this->addOption( 'limit', "Limit how many entities are dumped.", false, true ); |
||
70 | $this->addOption( 'no-cache', "If this is set, don't try to read from an EntityRevisionCache.", false, false ); |
||
71 | $this->addOption( |
||
72 | 'first-page-id', |
||
73 | 'First page id to dump, use 1 to start with the first page. Use the reported last SqlEntityIdPager position + 1 ' . |
||
74 | 'to continue a previous run. Not compatible with --list-file.', |
||
75 | false, |
||
76 | true |
||
77 | ); |
||
78 | $this->addOption( |
||
79 | 'last-page-id', |
||
80 | 'Page id of the last page to possibly include in the dump. Not compatible with --list-file.', |
||
81 | false, |
||
82 | true |
||
83 | ); |
||
84 | $this->addOption( |
||
85 | 'ignore-missing', |
||
86 | 'Ignore missing IDs, do not report errors on them', |
||
87 | false, |
||
88 | false |
||
89 | ); |
||
90 | } |
||
91 | |||
92 | public function setDumpEntitiesServices( |
||
93 | SqlEntityIdPagerFactory $sqlEntityIdPagerFactory, |
||
94 | array $existingEntityTypes, |
||
95 | array $entityTypesToExcludeFromOutput |
||
96 | ) { |
||
97 | $this->sqlEntityIdPagerFactory = $sqlEntityIdPagerFactory; |
||
98 | $this->existingEntityTypes = $existingEntityTypes; |
||
99 | $this->entityTypesToExcludeFromOutput = $entityTypesToExcludeFromOutput; |
||
100 | } |
||
101 | |||
102 | /** |
||
103 | * Create concrete dumper instance |
||
104 | * @param resource $output |
||
105 | * @return DumpGenerator |
||
106 | */ |
||
107 | abstract protected function createDumper( $output ); |
||
108 | |||
109 | /** |
||
110 | * Outputs a message vis the output() method. |
||
111 | * |
||
112 | * @see MessageReporter::logMessage() |
||
113 | * |
||
114 | * @param string $message |
||
115 | */ |
||
116 | public function logMessage( $message ) { |
||
117 | if ( $this->logFileHandle ) { |
||
118 | fwrite( $this->logFileHandle, "$message\n" ); |
||
119 | fflush( $this->logFileHandle ); |
||
120 | } else { |
||
121 | $this->output( "$message\n" ); |
||
122 | } |
||
123 | } |
||
124 | |||
125 | /** |
||
126 | * Opens the given file for use by logMessage(). |
||
127 | * |
||
128 | * @param string $file use "-" as a shortcut for "php://stdout" |
||
129 | * |
||
130 | * @throws MWException |
||
131 | */ |
||
132 | private function openLogFile( $file ) { |
||
133 | $this->closeLogFile(); |
||
134 | |||
135 | if ( $file === '-' ) { |
||
136 | $file = 'php://stdout'; |
||
137 | } |
||
138 | |||
139 | // wouldn't streams be nice... |
||
140 | $this->logFileHandle = fopen( $file, 'a' ); |
||
141 | |||
142 | if ( !$this->logFileHandle ) { |
||
143 | throw new MWException( 'Failed to open log file: ' . $file ); |
||
144 | } |
||
145 | } |
||
146 | |||
147 | /** |
||
148 | * Closes any currently open file opened with openLogFile(). |
||
149 | */ |
||
150 | private function closeLogFile() { |
||
151 | if ( $this->logFileHandle |
||
152 | && $this->logFileHandle !== STDERR |
||
153 | && $this->logFileHandle !== STDOUT |
||
154 | ) { |
||
155 | fclose( $this->logFileHandle ); |
||
156 | } |
||
157 | |||
158 | $this->logFileHandle = false; |
||
159 | } |
||
160 | |||
161 | /** |
||
162 | * Do the actual work. All child classes will need to implement this |
||
163 | */ |
||
164 | public function execute() { |
||
165 | //TODO: more validation for options |
||
166 | $shardingFactor = (int)$this->getOption( 'sharding-factor', 1 ); |
||
167 | $shard = (int)$this->getOption( 'shard', 0 ); |
||
168 | $batchSize = (int)$this->getOption( 'batch-size', 100 ); |
||
169 | $limit = (int)$this->getOption( 'limit', 0 ); |
||
170 | |||
171 | //TODO: Allow injection of an OutputStream for logging |
||
172 | $this->openLogFile( $this->getOption( 'log', 'php://stderr' ) ); |
||
173 | |||
174 | $outFile = $this->getOption( 'output', 'php://stdout' ); |
||
175 | |||
176 | if ( $outFile === '-' ) { |
||
177 | $outFile = 'php://stdout'; |
||
178 | } |
||
179 | |||
180 | $output = fopen( $outFile, 'w' ); //TODO: Allow injection of an OutputStream |
||
181 | |||
182 | if ( !$output ) { |
||
183 | throw new MWException( 'Failed to open ' . $outFile . '!' ); |
||
184 | } |
||
185 | |||
186 | if ( $this->hasOption( 'list-file' ) ) { |
||
187 | $this->logMessage( "Dumping entities listed in " . $this->getOption( 'list-file' ) ); |
||
188 | } |
||
189 | |||
190 | $entityTypes = $this->getEntityTypes(); |
||
191 | if ( empty( $entityTypes ) ) { |
||
192 | $this->logMessage( "No entity types to dump" ); |
||
193 | $this->closeLogFile(); |
||
194 | return; |
||
195 | } |
||
196 | |||
197 | $this->logMessage( 'Dumping entities of type ' . implode( ', ', $entityTypes ) ); |
||
198 | |||
199 | if ( $shardingFactor ) { |
||
200 | $this->logMessage( "Dumping shard $shard/$shardingFactor" ); |
||
201 | } |
||
202 | |||
203 | $dumper = $this->createDumper( $output ); |
||
204 | $dumper->setLimit( $limit ); |
||
205 | |||
206 | $progressReporter = new ObservableMessageReporter(); |
||
207 | $progressReporter->registerReporterCallback( [ $this, 'logMessage' ] ); |
||
208 | $dumper->setProgressReporter( $progressReporter ); |
||
209 | |||
210 | $ignored = $this->hasOption( 'ignore-missing' ) ? |
||
211 | [ EntityLookupException::class ] : |
||
212 | []; |
||
213 | $exceptionReporter = new ReportingExceptionHandler( $progressReporter, $ignored ); |
||
214 | $dumper->setExceptionHandler( $exceptionReporter ); |
||
215 | |||
216 | //NOTE: we filter for $entityType twice: filtering in the DB is efficient, |
||
217 | // but filtering in the dumper is needed when working from a list file. |
||
218 | $dumper->setShardingFilter( $shardingFactor, $shard ); |
||
219 | $dumper->setEntityTypesFilter( $entityTypes ); |
||
220 | $dumper->setBatchSize( $batchSize ); |
||
221 | |||
222 | $idStream = $this->makeIdStream( $entityTypes, $exceptionReporter ); |
||
223 | AtEase::suppressWarnings(); |
||
224 | $dumper->generateDump( $idStream ); |
||
225 | AtEase::restoreWarnings(); |
||
226 | |||
227 | if ( $idStream instanceof EntityIdReader ) { |
||
228 | // close stream / free resources |
||
229 | $idStream->dispose(); |
||
230 | } |
||
231 | |||
232 | $this->closeLogFile(); |
||
233 | } |
||
234 | |||
235 | /** |
||
236 | * @inheritDoc |
||
237 | */ |
||
238 | public function finalSetup() { |
||
239 | global $wgHooks; |
||
240 | |||
241 | parent::finalSetup(); |
||
242 | |||
243 | if ( $this->hasOption( 'dbgroupdefault' ) ) { |
||
244 | // A group was set via cli, so no need to set the default here |
||
245 | return; |
||
246 | } |
||
247 | |||
248 | $wgHooks['MediaWikiServices'][] = function() { |
||
249 | global $wgDBDefaultGroup; |
||
250 | if ( !ExtensionRegistry::getInstance()->isLoaded( 'WikibaseRepository' ) ) { |
||
251 | // Something instantiates the MediaWikiServices before Wikibase |
||
252 | // is loaded, nothing we can do here. |
||
253 | wfWarn( self::class . ': Can not change default DB group.' ); |
||
254 | return; |
||
255 | } |
||
256 | |||
257 | // Don't use WikibaseRepo here as this is run very early on, thus |
||
258 | // the bootstrapping code is not ready yet (T202452). |
||
259 | $settings = WikibaseSettings::getRepoSettings(); |
||
260 | $dumpDBDefaultGroup = $settings->getSetting( 'dumpDBDefaultGroup' ); |
||
261 | |||
262 | if ( $dumpDBDefaultGroup !== null ) { |
||
263 | $wgDBDefaultGroup = $dumpDBDefaultGroup; |
||
264 | } |
||
265 | }; |
||
266 | } |
||
267 | |||
268 | private function getEntityTypes() { |
||
269 | return array_diff( |
||
270 | $this->getOption( 'entity-type', $this->existingEntityTypes ), |
||
271 | $this->entityTypesToExcludeFromOutput |
||
272 | ); |
||
273 | } |
||
274 | |||
275 | /** |
||
276 | * @param string[] $entityTypes |
||
277 | * @param ExceptionHandler|null $exceptionReporter |
||
278 | * |
||
279 | * @return EntityIdReader|SqlEntityIdPager a stream of EntityId objects |
||
280 | */ |
||
281 | private function makeIdStream( array $entityTypes, ExceptionHandler $exceptionReporter = null ) { |
||
282 | $listFile = $this->getOption( 'list-file' ); |
||
283 | |||
284 | if ( $listFile !== null ) { |
||
285 | $stream = $this->makeIdFileStream( $listFile, $exceptionReporter ); |
||
286 | } else { |
||
287 | $stream = $this->makeIdQueryStream( $entityTypes ); |
||
288 | } |
||
289 | |||
290 | return $stream; |
||
291 | } |
||
292 | |||
293 | /** |
||
294 | * Returns EntityIdPager::NO_REDIRECTS. |
||
295 | * |
||
296 | * @return mixed a EntityIdPager::XXX_REDIRECTS constant |
||
297 | */ |
||
298 | protected function getRedirectMode() { |
||
299 | return EntityIdPager::NO_REDIRECTS; |
||
300 | } |
||
301 | |||
302 | /** |
||
303 | * Cache flag for use in Store::getEntityRevisionLookup. |
||
304 | * |
||
305 | * @return string One of Store::LOOKUP_CACHING_RETRIEVE_ONLY and Store::LOOKUP_CACHING_DISABLED |
||
306 | */ |
||
307 | protected function getEntityRevisionLookupCacheMode() { |
||
308 | if ( $this->getOption( 'no-cache', false ) ) { |
||
309 | return Store::LOOKUP_CACHING_DISABLED; |
||
310 | } else { |
||
311 | return Store::LOOKUP_CACHING_RETRIEVE_ONLY; |
||
312 | } |
||
313 | } |
||
314 | |||
315 | /** |
||
316 | * @param string[] $entityTypes |
||
317 | * |
||
318 | * @return SqlEntityIdPager |
||
319 | */ |
||
320 | private function makeIdQueryStream( array $entityTypes ) { |
||
321 | $sqlEntityIdPager = $this->sqlEntityIdPagerFactory->newSqlEntityIdPager( $entityTypes, $this->getRedirectMode() ); |
||
322 | |||
323 | $firstPageId = $this->getOption( 'first-page-id', null ); |
||
324 | if ( $firstPageId ) { |
||
325 | $sqlEntityIdPager->setPosition( intval( $firstPageId ) - 1 ); |
||
326 | } |
||
327 | $lastPageId = $this->getOption( 'last-page-id', null ); |
||
328 | if ( $lastPageId ) { |
||
329 | $sqlEntityIdPager->setCutoffPosition( intval( $lastPageId ) ); |
||
330 | } |
||
331 | |||
332 | return $sqlEntityIdPager; |
||
333 | } |
||
334 | |||
335 | /** |
||
336 | * @param string $listFile |
||
337 | * @param ExceptionHandler|null $exceptionReporter |
||
338 | * |
||
339 | * @throws MWException |
||
340 | * @return EntityIdReader |
||
341 | */ |
||
342 | private function makeIdFileStream( $listFile, ExceptionHandler $exceptionReporter = null ) { |
||
343 | $input = fopen( $listFile, 'r' ); |
||
344 | |||
345 | if ( !$input ) { |
||
346 | throw new MWException( "Failed to open ID file: $listFile" ); |
||
347 | } |
||
348 | |||
349 | $stream = new EntityIdReader( new LineReader( $input ), WikibaseRepo::getDefaultInstance()->getEntityIdParser() ); |
||
350 | $stream->setExceptionHandler( $exceptionReporter ); |
||
0 ignored issues
–
show
|
|||
351 | |||
352 | return $stream; |
||
353 | } |
||
354 | |||
355 | } |
||
356 |
It seems like you allow that null is being passed for a parameter, however the function which is called does not seem to accept null.
We recommend to add an additional type check (or disallow null for the parameter):