This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
| 1 | <?php |
||
| 2 | |||
| 3 | namespace Wikibase\Repo\Maintenance; |
||
| 4 | |||
| 5 | use ExtensionRegistry; |
||
| 6 | use Maintenance; |
||
| 7 | use MWException; |
||
| 8 | use Onoi\MessageReporter\ObservableMessageReporter; |
||
| 9 | use Wikibase\DataModel\Services\EntityId\EntityIdPager; |
||
| 10 | use Wikibase\DataModel\Services\Lookup\EntityLookupException; |
||
| 11 | use Wikibase\Lib\Reporting\ExceptionHandler; |
||
| 12 | use Wikibase\Lib\Reporting\ReportingExceptionHandler; |
||
| 13 | use Wikibase\Lib\WikibaseSettings; |
||
| 14 | use Wikibase\Repo\Dumpers\DumpGenerator; |
||
| 15 | use Wikibase\Repo\IO\EntityIdReader; |
||
| 16 | use Wikibase\Repo\IO\LineReader; |
||
| 17 | use Wikibase\Repo\Store\Sql\SqlEntityIdPager; |
||
| 18 | use Wikibase\Repo\Store\Sql\SqlEntityIdPagerFactory; |
||
| 19 | use Wikibase\Repo\Store\Store; |
||
| 20 | use Wikibase\Repo\WikibaseRepo; |
||
| 21 | use Wikimedia\AtEase\AtEase; |
||
| 22 | |||
| 23 | $basePath = getenv( 'MW_INSTALL_PATH' ) !== false ? getenv( 'MW_INSTALL_PATH' ) : __DIR__ . '/../../../..'; |
||
| 24 | |||
| 25 | require_once $basePath . '/maintenance/Maintenance.php'; |
||
| 26 | |||
| 27 | /** |
||
| 28 | * Maintenance script for generating a dump of entities in the repository. |
||
| 29 | * |
||
| 30 | * @license GPL-2.0-or-later |
||
| 31 | * @author Daniel Kinzler |
||
| 32 | */ |
||
| 33 | abstract class DumpEntities extends Maintenance { |
||
| 34 | |||
| 35 | /** |
||
| 36 | * @var SqlEntityIdPagerFactory |
||
| 37 | */ |
||
| 38 | private $sqlEntityIdPagerFactory; |
||
| 39 | |||
| 40 | /** |
||
| 41 | * @var bool|resource |
||
| 42 | */ |
||
| 43 | private $logFileHandle = false; |
||
| 44 | |||
| 45 | private $existingEntityTypes = []; |
||
| 46 | |||
| 47 | private $entityTypesToExcludeFromOutput = []; |
||
| 48 | |||
| 49 | public function __construct() { |
||
| 50 | parent::__construct(); |
||
| 51 | |||
| 52 | $this->addDescription( 'Generate a JSON dump from entities in the repository.' ); |
||
| 53 | |||
| 54 | $this->addOption( 'list-file', "A file containing one entity ID per line.", false, true ); |
||
| 55 | $this->addOption( |
||
| 56 | 'entity-type', |
||
| 57 | "Only dump this kind of entity, e.g. `item` or `property`. Can be given multiple times.", |
||
| 58 | false, |
||
| 59 | true, |
||
| 60 | false, |
||
| 61 | /* $multiOccurrence */ true |
||
| 62 | ); |
||
| 63 | $this->addOption( 'sharding-factor', "The number of shards (must be >= 1)", false, true ); |
||
| 64 | $this->addOption( 'shard', "The shard to output (must be less than the sharding-factor)", false, true ); |
||
| 65 | $this->addOption( 'batch-size', "The number of entities per processing batch", false, true ); |
||
| 66 | $this->addOption( 'output', "Output file (default is stdout). Will be overwritten.", false, true ); |
||
| 67 | $this->addOption( 'log', "Log file (default is stderr). Will be appended.", false, true ); |
||
| 68 | $this->addOption( 'quiet', "Disable progress reporting", false, false ); |
||
| 69 | $this->addOption( 'limit', "Limit how many entities are dumped.", false, true ); |
||
| 70 | $this->addOption( 'no-cache', "If this is set, don't try to read from an EntityRevisionCache.", false, false ); |
||
| 71 | $this->addOption( |
||
| 72 | 'first-page-id', |
||
| 73 | 'First page id to dump, use 1 to start with the first page. Use the reported last SqlEntityIdPager position + 1 ' . |
||
| 74 | 'to continue a previous run. Not compatible with --list-file.', |
||
| 75 | false, |
||
| 76 | true |
||
| 77 | ); |
||
| 78 | $this->addOption( |
||
| 79 | 'last-page-id', |
||
| 80 | 'Page id of the last page to possibly include in the dump. Not compatible with --list-file.', |
||
| 81 | false, |
||
| 82 | true |
||
| 83 | ); |
||
| 84 | $this->addOption( |
||
| 85 | 'ignore-missing', |
||
| 86 | 'Ignore missing IDs, do not report errors on them', |
||
| 87 | false, |
||
| 88 | false |
||
| 89 | ); |
||
| 90 | } |
||
| 91 | |||
| 92 | public function setDumpEntitiesServices( |
||
| 93 | SqlEntityIdPagerFactory $sqlEntityIdPagerFactory, |
||
| 94 | array $existingEntityTypes, |
||
| 95 | array $entityTypesToExcludeFromOutput |
||
| 96 | ) { |
||
| 97 | $this->sqlEntityIdPagerFactory = $sqlEntityIdPagerFactory; |
||
| 98 | $this->existingEntityTypes = $existingEntityTypes; |
||
| 99 | $this->entityTypesToExcludeFromOutput = $entityTypesToExcludeFromOutput; |
||
| 100 | } |
||
| 101 | |||
| 102 | /** |
||
| 103 | * Create concrete dumper instance |
||
| 104 | * @param resource $output |
||
| 105 | * @return DumpGenerator |
||
| 106 | */ |
||
| 107 | abstract protected function createDumper( $output ); |
||
| 108 | |||
| 109 | /** |
||
| 110 | * Outputs a message vis the output() method. |
||
| 111 | * |
||
| 112 | * @see MessageReporter::logMessage() |
||
| 113 | * |
||
| 114 | * @param string $message |
||
| 115 | */ |
||
| 116 | public function logMessage( $message ) { |
||
| 117 | if ( $this->logFileHandle ) { |
||
| 118 | fwrite( $this->logFileHandle, "$message\n" ); |
||
| 119 | fflush( $this->logFileHandle ); |
||
| 120 | } else { |
||
| 121 | $this->output( "$message\n" ); |
||
| 122 | } |
||
| 123 | } |
||
| 124 | |||
| 125 | /** |
||
| 126 | * Opens the given file for use by logMessage(). |
||
| 127 | * |
||
| 128 | * @param string $file use "-" as a shortcut for "php://stdout" |
||
| 129 | * |
||
| 130 | * @throws MWException |
||
| 131 | */ |
||
| 132 | private function openLogFile( $file ) { |
||
| 133 | $this->closeLogFile(); |
||
| 134 | |||
| 135 | if ( $file === '-' ) { |
||
| 136 | $file = 'php://stdout'; |
||
| 137 | } |
||
| 138 | |||
| 139 | // wouldn't streams be nice... |
||
| 140 | $this->logFileHandle = fopen( $file, 'a' ); |
||
| 141 | |||
| 142 | if ( !$this->logFileHandle ) { |
||
| 143 | throw new MWException( 'Failed to open log file: ' . $file ); |
||
| 144 | } |
||
| 145 | } |
||
| 146 | |||
| 147 | /** |
||
| 148 | * Closes any currently open file opened with openLogFile(). |
||
| 149 | */ |
||
| 150 | private function closeLogFile() { |
||
| 151 | if ( $this->logFileHandle |
||
| 152 | && $this->logFileHandle !== STDERR |
||
| 153 | && $this->logFileHandle !== STDOUT |
||
| 154 | ) { |
||
| 155 | fclose( $this->logFileHandle ); |
||
| 156 | } |
||
| 157 | |||
| 158 | $this->logFileHandle = false; |
||
| 159 | } |
||
| 160 | |||
| 161 | /** |
||
| 162 | * Do the actual work. All child classes will need to implement this |
||
| 163 | */ |
||
| 164 | public function execute() { |
||
| 165 | //TODO: more validation for options |
||
| 166 | $shardingFactor = (int)$this->getOption( 'sharding-factor', 1 ); |
||
| 167 | $shard = (int)$this->getOption( 'shard', 0 ); |
||
| 168 | $batchSize = (int)$this->getOption( 'batch-size', 100 ); |
||
| 169 | $limit = (int)$this->getOption( 'limit', 0 ); |
||
| 170 | |||
| 171 | //TODO: Allow injection of an OutputStream for logging |
||
| 172 | $this->openLogFile( $this->getOption( 'log', 'php://stderr' ) ); |
||
| 173 | |||
| 174 | $outFile = $this->getOption( 'output', 'php://stdout' ); |
||
| 175 | |||
| 176 | if ( $outFile === '-' ) { |
||
| 177 | $outFile = 'php://stdout'; |
||
| 178 | } |
||
| 179 | |||
| 180 | $output = fopen( $outFile, 'w' ); //TODO: Allow injection of an OutputStream |
||
| 181 | |||
| 182 | if ( !$output ) { |
||
| 183 | throw new MWException( 'Failed to open ' . $outFile . '!' ); |
||
| 184 | } |
||
| 185 | |||
| 186 | if ( $this->hasOption( 'list-file' ) ) { |
||
| 187 | $this->logMessage( "Dumping entities listed in " . $this->getOption( 'list-file' ) ); |
||
| 188 | } |
||
| 189 | |||
| 190 | $entityTypes = $this->getEntityTypes(); |
||
| 191 | if ( empty( $entityTypes ) ) { |
||
| 192 | $this->logMessage( "No entity types to dump" ); |
||
| 193 | $this->closeLogFile(); |
||
| 194 | return; |
||
| 195 | } |
||
| 196 | |||
| 197 | $this->logMessage( 'Dumping entities of type ' . implode( ', ', $entityTypes ) ); |
||
| 198 | |||
| 199 | if ( $shardingFactor ) { |
||
| 200 | $this->logMessage( "Dumping shard $shard/$shardingFactor" ); |
||
| 201 | } |
||
| 202 | |||
| 203 | $dumper = $this->createDumper( $output ); |
||
| 204 | $dumper->setLimit( $limit ); |
||
| 205 | |||
| 206 | $progressReporter = new ObservableMessageReporter(); |
||
| 207 | $progressReporter->registerReporterCallback( [ $this, 'logMessage' ] ); |
||
| 208 | $dumper->setProgressReporter( $progressReporter ); |
||
| 209 | |||
| 210 | $ignored = $this->hasOption( 'ignore-missing' ) ? |
||
| 211 | [ EntityLookupException::class ] : |
||
| 212 | []; |
||
| 213 | $exceptionReporter = new ReportingExceptionHandler( $progressReporter, $ignored ); |
||
| 214 | $dumper->setExceptionHandler( $exceptionReporter ); |
||
| 215 | |||
| 216 | //NOTE: we filter for $entityType twice: filtering in the DB is efficient, |
||
| 217 | // but filtering in the dumper is needed when working from a list file. |
||
| 218 | $dumper->setShardingFilter( $shardingFactor, $shard ); |
||
| 219 | $dumper->setEntityTypesFilter( $entityTypes ); |
||
| 220 | $dumper->setBatchSize( $batchSize ); |
||
| 221 | |||
| 222 | $idStream = $this->makeIdStream( $entityTypes, $exceptionReporter ); |
||
| 223 | AtEase::suppressWarnings(); |
||
| 224 | $dumper->generateDump( $idStream ); |
||
| 225 | AtEase::restoreWarnings(); |
||
| 226 | |||
| 227 | if ( $idStream instanceof EntityIdReader ) { |
||
| 228 | // close stream / free resources |
||
| 229 | $idStream->dispose(); |
||
| 230 | } |
||
| 231 | |||
| 232 | $this->closeLogFile(); |
||
| 233 | } |
||
| 234 | |||
| 235 | /** |
||
| 236 | * @inheritDoc |
||
| 237 | */ |
||
| 238 | public function finalSetup() { |
||
| 239 | global $wgHooks; |
||
| 240 | |||
| 241 | parent::finalSetup(); |
||
| 242 | |||
| 243 | if ( $this->hasOption( 'dbgroupdefault' ) ) { |
||
| 244 | // A group was set via cli, so no need to set the default here |
||
| 245 | return; |
||
| 246 | } |
||
| 247 | |||
| 248 | $wgHooks['MediaWikiServices'][] = function() { |
||
| 249 | global $wgDBDefaultGroup; |
||
| 250 | if ( !ExtensionRegistry::getInstance()->isLoaded( 'WikibaseRepository' ) ) { |
||
| 251 | // Something instantiates the MediaWikiServices before Wikibase |
||
| 252 | // is loaded, nothing we can do here. |
||
| 253 | wfWarn( self::class . ': Can not change default DB group.' ); |
||
| 254 | return; |
||
| 255 | } |
||
| 256 | |||
| 257 | // Don't use WikibaseRepo here as this is run very early on, thus |
||
| 258 | // the bootstrapping code is not ready yet (T202452). |
||
| 259 | $settings = WikibaseSettings::getRepoSettings(); |
||
| 260 | $dumpDBDefaultGroup = $settings->getSetting( 'dumpDBDefaultGroup' ); |
||
| 261 | |||
| 262 | if ( $dumpDBDefaultGroup !== null ) { |
||
| 263 | $wgDBDefaultGroup = $dumpDBDefaultGroup; |
||
| 264 | } |
||
| 265 | }; |
||
| 266 | } |
||
| 267 | |||
| 268 | private function getEntityTypes() { |
||
| 269 | return array_diff( |
||
| 270 | $this->getOption( 'entity-type', $this->existingEntityTypes ), |
||
| 271 | $this->entityTypesToExcludeFromOutput |
||
| 272 | ); |
||
| 273 | } |
||
| 274 | |||
| 275 | /** |
||
| 276 | * @param string[] $entityTypes |
||
| 277 | * @param ExceptionHandler|null $exceptionReporter |
||
| 278 | * |
||
| 279 | * @return EntityIdReader|SqlEntityIdPager a stream of EntityId objects |
||
| 280 | */ |
||
| 281 | private function makeIdStream( array $entityTypes, ExceptionHandler $exceptionReporter = null ) { |
||
| 282 | $listFile = $this->getOption( 'list-file' ); |
||
| 283 | |||
| 284 | if ( $listFile !== null ) { |
||
| 285 | $stream = $this->makeIdFileStream( $listFile, $exceptionReporter ); |
||
| 286 | } else { |
||
| 287 | $stream = $this->makeIdQueryStream( $entityTypes ); |
||
| 288 | } |
||
| 289 | |||
| 290 | return $stream; |
||
| 291 | } |
||
| 292 | |||
| 293 | /** |
||
| 294 | * Returns EntityIdPager::NO_REDIRECTS. |
||
| 295 | * |
||
| 296 | * @return mixed a EntityIdPager::XXX_REDIRECTS constant |
||
| 297 | */ |
||
| 298 | protected function getRedirectMode() { |
||
| 299 | return EntityIdPager::NO_REDIRECTS; |
||
| 300 | } |
||
| 301 | |||
| 302 | /** |
||
| 303 | * Cache flag for use in Store::getEntityRevisionLookup. |
||
| 304 | * |
||
| 305 | * @return string One of Store::LOOKUP_CACHING_RETRIEVE_ONLY and Store::LOOKUP_CACHING_DISABLED |
||
| 306 | */ |
||
| 307 | protected function getEntityRevisionLookupCacheMode() { |
||
| 308 | if ( $this->getOption( 'no-cache', false ) ) { |
||
| 309 | return Store::LOOKUP_CACHING_DISABLED; |
||
| 310 | } else { |
||
| 311 | return Store::LOOKUP_CACHING_RETRIEVE_ONLY; |
||
| 312 | } |
||
| 313 | } |
||
| 314 | |||
| 315 | /** |
||
| 316 | * @param string[] $entityTypes |
||
| 317 | * |
||
| 318 | * @return SqlEntityIdPager |
||
| 319 | */ |
||
| 320 | private function makeIdQueryStream( array $entityTypes ) { |
||
| 321 | $sqlEntityIdPager = $this->sqlEntityIdPagerFactory->newSqlEntityIdPager( $entityTypes, $this->getRedirectMode() ); |
||
| 322 | |||
| 323 | $firstPageId = $this->getOption( 'first-page-id', null ); |
||
| 324 | if ( $firstPageId ) { |
||
| 325 | $sqlEntityIdPager->setPosition( intval( $firstPageId ) - 1 ); |
||
| 326 | } |
||
| 327 | $lastPageId = $this->getOption( 'last-page-id', null ); |
||
| 328 | if ( $lastPageId ) { |
||
| 329 | $sqlEntityIdPager->setCutoffPosition( intval( $lastPageId ) ); |
||
| 330 | } |
||
| 331 | |||
| 332 | return $sqlEntityIdPager; |
||
| 333 | } |
||
| 334 | |||
| 335 | /** |
||
| 336 | * @param string $listFile |
||
| 337 | * @param ExceptionHandler|null $exceptionReporter |
||
| 338 | * |
||
| 339 | * @throws MWException |
||
| 340 | * @return EntityIdReader |
||
| 341 | */ |
||
| 342 | private function makeIdFileStream( $listFile, ExceptionHandler $exceptionReporter = null ) { |
||
| 343 | $input = fopen( $listFile, 'r' ); |
||
| 344 | |||
| 345 | if ( !$input ) { |
||
| 346 | throw new MWException( "Failed to open ID file: $listFile" ); |
||
| 347 | } |
||
| 348 | |||
| 349 | $stream = new EntityIdReader( new LineReader( $input ), WikibaseRepo::getDefaultInstance()->getEntityIdParser() ); |
||
| 350 | $stream->setExceptionHandler( $exceptionReporter ); |
||
|
0 ignored issues
–
show
|
|||
| 351 | |||
| 352 | return $stream; |
||
| 353 | } |
||
| 354 | |||
| 355 | } |
||
| 356 |
It seems like you allow that null is being passed for a parameter, however the function which is called does not seem to accept null.
We recommend to add an additional type check (or disallow null for the parameter):