Completed
Branch master (86dc85)
by
unknown
23:45
created

ForeignAPIRepo::getThumbError()   B

Complexity

Conditions 4
Paths 2

Size

Total Lines 26
Code Lines 20

Duplication

Lines 13
Ratio 50 %
Metric Value
dl 13
loc 26
rs 8.5806
cc 4
eloc 20
nc 2
nop 5
1
<?php
2
/**
3
 * Foreign repository accessible through api.php requests.
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License along
16
 * with this program; if not, write to the Free Software Foundation, Inc.,
17
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18
 * http://www.gnu.org/copyleft/gpl.html
19
 *
20
 * @file
21
 * @ingroup FileRepo
22
 */
23
24
use MediaWiki\Logger\LoggerFactory;
25
26
/**
27
 * A foreign repository with a remote MediaWiki with an API thingy
28
 *
29
 * Example config:
30
 *
31
 * $wgForeignFileRepos[] = array(
32
 *   'class'                  => 'ForeignAPIRepo',
33
 *   'name'                   => 'shared',
34
 *   'apibase'                => 'https://en.wikipedia.org/w/api.php',
35
 *   'fetchDescription'       => true, // Optional
36
 *   'descriptionCacheExpiry' => 3600,
37
 * );
38
 *
39
 * @ingroup FileRepo
40
 */
41
class ForeignAPIRepo extends FileRepo {
42
	/* This version string is used in the user agent for requests and will help
43
	 * server maintainers in identify ForeignAPI usage.
44
	 * Update the version every time you make breaking or significant changes. */
45
	const VERSION = "2.1";
46
47
	/**
48
	 * List of iiprop values for the thumbnail fetch queries.
49
	 * @since 1.23
50
	 */
51
	protected static $imageInfoProps = [
52
		'url',
53
		'timestamp',
54
	];
55
56
	protected $fileFactory = [ 'ForeignAPIFile', 'newFromTitle' ];
57
	/** @var int Check back with Commons after this expiry */
58
	protected $apiThumbCacheExpiry = 86400; // 1 day (24*3600)
59
60
	/** @var int Redownload thumbnail files after this expiry */
61
	protected $fileCacheExpiry = 2592000; // 1 month (30*24*3600)
62
63
	/** @var array */
64
	protected $mFileExists = [];
65
66
	/** @var array */
67
	private $mQueryCache = [];
68
69
	/**
70
	 * @param array|null $info
71
	 */
72
	function __construct( $info ) {
73
		global $wgLocalFileRepo;
74
		parent::__construct( $info );
75
76
		// https://commons.wikimedia.org/w/api.php
77
		$this->mApiBase = isset( $info['apibase'] ) ? $info['apibase'] : null;
0 ignored issues
show
Bug introduced by
The property mApiBase does not exist. Did you maybe forget to declare it?

In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:

class MyClass { }

$x = new MyClass();
$x->foo = true;

Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion:

class MyClass {
    public $foo;
}

$x = new MyClass();
$x->foo = true;
Loading history...
78
79
		if ( isset( $info['apiThumbCacheExpiry'] ) ) {
80
			$this->apiThumbCacheExpiry = $info['apiThumbCacheExpiry'];
81
		}
82
		if ( isset( $info['fileCacheExpiry'] ) ) {
83
			$this->fileCacheExpiry = $info['fileCacheExpiry'];
84
		}
85
		if ( !$this->scriptDirUrl ) {
86
			// hack for description fetches
87
			$this->scriptDirUrl = dirname( $this->mApiBase );
88
		}
89
		// If we can cache thumbs we can guess sane defaults for these
90
		if ( $this->canCacheThumbs() && !$this->url ) {
91
			$this->url = $wgLocalFileRepo['url'];
92
		}
93
		if ( $this->canCacheThumbs() && !$this->thumbUrl ) {
94
			$this->thumbUrl = $this->url . '/thumb';
95
		}
96
	}
97
98
	/**
99
	 * @return string
100
	 * @since 1.22
101
	 */
102
	function getApiUrl() {
103
		return $this->mApiBase;
104
	}
105
106
	/**
107
	 * Per docs in FileRepo, this needs to return false if we don't support versioned
108
	 * files. Well, we don't.
109
	 *
110
	 * @param Title $title
111
	 * @param string|bool $time
112
	 * @return File
113
	 */
114
	function newFile( $title, $time = false ) {
115
		if ( $time ) {
116
			return false;
117
		}
118
119
		return parent::newFile( $title, $time );
120
	}
121
122
	/**
123
	 * @param array $files
124
	 * @return array
125
	 */
126
	function fileExistsBatch( array $files ) {
127
		$results = [];
128
		foreach ( $files as $k => $f ) {
129
			if ( isset( $this->mFileExists[$f] ) ) {
130
				$results[$k] = $this->mFileExists[$f];
131
				unset( $files[$k] );
132
			} elseif ( self::isVirtualUrl( $f ) ) {
133
				# @todo FIXME: We need to be able to handle virtual
134
				# URLs better, at least when we know they refer to the
135
				# same repo.
136
				$results[$k] = false;
137
				unset( $files[$k] );
138
			} elseif ( FileBackend::isStoragePath( $f ) ) {
139
				$results[$k] = false;
140
				unset( $files[$k] );
141
				wfWarn( "Got mwstore:// path '$f'." );
142
			}
143
		}
144
145
		$data = $this->fetchImageQuery( [
146
			'titles' => implode( $files, '|' ),
147
			'prop' => 'imageinfo' ]
148
		);
149
150
		if ( isset( $data['query']['pages'] ) ) {
151
			# First, get results from the query. Note we only care whether the image exists,
152
			# not whether it has a description page.
153
			foreach ( $data['query']['pages'] as $p ) {
0 ignored issues
show
Bug introduced by
The expression $data['query']['pages'] of type string is not traversable.
Loading history...
154
				$this->mFileExists[$p['title']] = ( $p['imagerepository'] !== '' );
155
			}
156
			# Second, copy the results to any redirects that were queried
157 View Code Duplication
			if ( isset( $data['query']['redirects'] ) ) {
158
				foreach ( $data['query']['redirects'] as $r ) {
0 ignored issues
show
Bug introduced by
The expression $data['query']['redirects'] of type string is not traversable.
Loading history...
159
					$this->mFileExists[$r['from']] = $this->mFileExists[$r['to']];
160
				}
161
			}
162
			# Third, copy the results to any non-normalized titles that were queried
163 View Code Duplication
			if ( isset( $data['query']['normalized'] ) ) {
164
				foreach ( $data['query']['normalized'] as $n ) {
0 ignored issues
show
Bug introduced by
The expression $data['query']['normalized'] of type string is not traversable.
Loading history...
165
					$this->mFileExists[$n['from']] = $this->mFileExists[$n['to']];
166
				}
167
			}
168
			# Finally, copy the results to the output
169
			foreach ( $files as $key => $file ) {
170
				$results[$key] = $this->mFileExists[$file];
171
			}
172
		}
173
174
		return $results;
175
	}
176
177
	/**
178
	 * @param string $virtualUrl
179
	 * @return bool
180
	 */
181
	function getFileProps( $virtualUrl ) {
182
		return false;
183
	}
184
185
	/**
186
	 * @param array $query
187
	 * @return string
188
	 */
189
	function fetchImageQuery( $query ) {
190
		global $wgLanguageCode;
191
192
		$query = array_merge( $query,
193
			[
194
				'format' => 'json',
195
				'action' => 'query',
196
				'redirects' => 'true'
197
			] );
198
199
		if ( !isset( $query['uselang'] ) ) { // uselang is unset or null
200
			$query['uselang'] = $wgLanguageCode;
201
		}
202
203
		$data = $this->httpGetCached( 'Metadata', $query );
0 ignored issues
show
Bug introduced by
Are you sure the assignment to $data is correct as $this->httpGetCached('Metadata', $query) (which targets ForeignAPIRepo::httpGetCached()) seems to always return null.

This check looks for function or method calls that always return null and whose return value is assigned to a variable.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
$object = $a->getObject();

The method getObject() can return nothing but null, so it makes no sense to assign that value to a variable.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
204
205
		if ( $data ) {
206
			return FormatJson::decode( $data, true );
207
		} else {
208
			return null;
209
		}
210
	}
211
212
	/**
213
	 * @param array $data
214
	 * @return bool|array
215
	 */
216
	function getImageInfo( $data ) {
217
		if ( $data && isset( $data['query']['pages'] ) ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $data of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
218
			foreach ( $data['query']['pages'] as $info ) {
219
				if ( isset( $info['imageinfo'][0] ) ) {
220
					$return = $info['imageinfo'][0];
221
					if ( isset( $info['pageid'] ) ) {
222
						$return['pageid'] = $info['pageid'];
223
					}
224
					return $return;
225
				}
226
			}
227
		}
228
229
		return false;
230
	}
231
232
	/**
233
	 * @param string $hash
234
	 * @return array
235
	 */
236
	function findBySha1( $hash ) {
237
		$results = $this->fetchImageQuery( [
238
			'aisha1base36' => $hash,
239
			'aiprop' => ForeignAPIFile::getProps(),
240
			'list' => 'allimages',
241
		] );
242
		$ret = [];
243
		if ( isset( $results['query']['allimages'] ) ) {
244
			foreach ( $results['query']['allimages'] as $img ) {
0 ignored issues
show
Bug introduced by
The expression $results['query']['allimages'] of type string is not traversable.
Loading history...
245
				// 1.14 was broken, doesn't return name attribute
246
				if ( !isset( $img['name'] ) ) {
247
					continue;
248
				}
249
				$ret[] = new ForeignAPIFile( Title::makeTitle( NS_FILE, $img['name'] ), $this, $img );
250
			}
251
		}
252
253
		return $ret;
254
	}
255
256
	/**
257
	 * @param string $name
258
	 * @param int $width
259
	 * @param int $height
260
	 * @param array $result Out parameter that will be changed by the function.
261
	 * @param string $otherParams
262
	 *
263
	 * @return bool
264
	 */
265
	function getThumbUrl( $name, $width = -1, $height = -1, &$result = null, $otherParams = '' ) {
266
		$data = $this->fetchImageQuery( [
267
			'titles' => 'File:' . $name,
268
			'iiprop' => self::getIIProps(),
269
			'iiurlwidth' => $width,
270
			'iiurlheight' => $height,
271
			'iiurlparam' => $otherParams,
272
			'prop' => 'imageinfo' ] );
273
		$info = $this->getImageInfo( $data );
274
275 View Code Duplication
		if ( $data && $info && isset( $info['thumburl'] ) ) {
276
			wfDebug( __METHOD__ . " got remote thumb " . $info['thumburl'] . "\n" );
277
			$result = $info;
278
279
			return $info['thumburl'];
280
		} else {
281
			return false;
282
		}
283
	}
284
285
	/**
286
	 * @param string $name
287
	 * @param int $width
288
	 * @param int $height
289
	 * @param string $otherParams
290
	 * @param string $lang Language code for language of error
291
	 * @return bool|MediaTransformError
292
	 * @since 1.22
293
	 */
294
	function getThumbError( $name, $width = -1, $height = -1, $otherParams = '', $lang = null ) {
295
		$data = $this->fetchImageQuery( [
296
			'titles' => 'File:' . $name,
297
			'iiprop' => self::getIIProps(),
298
			'iiurlwidth' => $width,
299
			'iiurlheight' => $height,
300
			'iiurlparam' => $otherParams,
301
			'prop' => 'imageinfo',
302
			'uselang' => $lang,
303
		] );
304
		$info = $this->getImageInfo( $data );
305
306 View Code Duplication
		if ( $data && $info && isset( $info['thumberror'] ) ) {
307
			wfDebug( __METHOD__ . " got remote thumb error " . $info['thumberror'] . "\n" );
308
309
			return new MediaTransformError(
310
				'thumbnail_error_remote',
311
				$width,
312
				$height,
313
				$this->getDisplayName(),
314
				$info['thumberror'] // already parsed message from foreign repo
315
			);
316
		} else {
317
			return false;
318
		}
319
	}
320
321
	/**
322
	 * Return the imageurl from cache if possible
323
	 *
324
	 * If the url has been requested today, get it from cache
325
	 * Otherwise retrieve remote thumb url, check for local file.
326
	 *
327
	 * @param string $name Is a dbkey form of a title
328
	 * @param int $width
329
	 * @param int $height
330
	 * @param string $params Other rendering parameters (page number, etc)
331
	 *   from handler's makeParamString.
332
	 * @return bool|string
333
	 */
334
	function getThumbUrlFromCache( $name, $width, $height, $params = "" ) {
335
		$cache = ObjectCache::getMainWANInstance();
336
		// We can't check the local cache using FileRepo functions because
337
		// we override fileExistsBatch(). We have to use the FileBackend directly.
338
		$backend = $this->getBackend(); // convenience
339
340
		if ( !$this->canCacheThumbs() ) {
341
			$result = null; // can't pass "null" by reference, but it's ok as default value
342
			return $this->getThumbUrl( $name, $width, $height, $result, $params );
343
		}
344
		$key = $this->getLocalCacheKey( 'ForeignAPIRepo', 'ThumbUrl', $name );
345
		$sizekey = "$width:$height:$params";
346
347
		/* Get the array of urls that we already know */
348
		$knownThumbUrls = $cache->get( $key );
349 View Code Duplication
		if ( !$knownThumbUrls ) {
350
			/* No knownThumbUrls for this file */
351
			$knownThumbUrls = [];
352
		} else {
353
			if ( isset( $knownThumbUrls[$sizekey] ) ) {
354
				wfDebug( __METHOD__ . ': Got thumburl from local cache: ' .
355
					"{$knownThumbUrls[$sizekey]} \n" );
356
357
				return $knownThumbUrls[$sizekey];
358
			}
359
			/* This size is not yet known */
360
		}
361
362
		$metadata = null;
363
		$foreignUrl = $this->getThumbUrl( $name, $width, $height, $metadata, $params );
364
365
		if ( !$foreignUrl ) {
366
			wfDebug( __METHOD__ . " Could not find thumburl\n" );
367
368
			return false;
369
		}
370
371
		// We need the same filename as the remote one :)
372
		$fileName = rawurldecode( pathinfo( $foreignUrl, PATHINFO_BASENAME ) );
373
		if ( !$this->validateFilename( $fileName ) ) {
374
			wfDebug( __METHOD__ . " The deduced filename $fileName is not safe\n" );
375
376
			return false;
377
		}
378
		$localPath = $this->getZonePath( 'thumb' ) . "/" . $this->getHashPath( $name ) . $name;
379
		$localFilename = $localPath . "/" . $fileName;
380
		$localUrl = $this->getZoneUrl( 'thumb' ) . "/" . $this->getHashPath( $name ) .
381
			rawurlencode( $name ) . "/" . rawurlencode( $fileName );
382
383
		if ( $backend->fileExists( [ 'src' => $localFilename ] )
384
			&& isset( $metadata['timestamp'] )
385
		) {
386
			wfDebug( __METHOD__ . " Thumbnail was already downloaded before\n" );
387
			$modified = $backend->getFileTimestamp( [ 'src' => $localFilename ] );
388
			$remoteModified = strtotime( $metadata['timestamp'] );
389
			$current = time();
390
			$diff = abs( $modified - $current );
391
			if ( $remoteModified < $modified && $diff < $this->fileCacheExpiry ) {
392
				/* Use our current and already downloaded thumbnail */
393
				$knownThumbUrls[$sizekey] = $localUrl;
394
				$cache->set( $key, $knownThumbUrls, $this->apiThumbCacheExpiry );
395
396
				return $localUrl;
397
			}
398
			/* There is a new Commons file, or existing thumbnail older than a month */
399
		}
400
		$thumb = self::httpGet( $foreignUrl );
401
		if ( !$thumb ) {
402
			wfDebug( __METHOD__ . " Could not download thumb\n" );
403
404
			return false;
405
		}
406
407
		# @todo FIXME: Delete old thumbs that aren't being used. Maintenance script?
408
		$backend->prepare( [ 'dir' => dirname( $localFilename ) ] );
409
		$params = [ 'dst' => $localFilename, 'content' => $thumb ];
410
		if ( !$backend->quickCreate( $params )->isOK() ) {
411
			wfDebug( __METHOD__ . " could not write to thumb path '$localFilename'\n" );
412
413
			return $foreignUrl;
414
		}
415
		$knownThumbUrls[$sizekey] = $localUrl;
416
		$cache->set( $key, $knownThumbUrls, $this->apiThumbCacheExpiry );
417
		wfDebug( __METHOD__ . " got local thumb $localUrl, saving to cache \n" );
418
419
		return $localUrl;
420
	}
421
422
	/**
423
	 * @see FileRepo::getZoneUrl()
424
	 * @param string $zone
425
	 * @param string|null $ext Optional file extension
426
	 * @return string
427
	 */
428
	function getZoneUrl( $zone, $ext = null ) {
429
		switch ( $zone ) {
430
			case 'public':
431
				return $this->url;
432
			case 'thumb':
433
				return $this->thumbUrl;
434
			default:
435
				return parent::getZoneUrl( $zone, $ext );
436
		}
437
	}
438
439
	/**
440
	 * Get the local directory corresponding to one of the basic zones
441
	 * @param string $zone
442
	 * @return bool|null|string
443
	 */
444
	function getZonePath( $zone ) {
445
		$supported = [ 'public', 'thumb' ];
446
		if ( in_array( $zone, $supported ) ) {
447
			return parent::getZonePath( $zone );
448
		}
449
450
		return false;
451
	}
452
453
	/**
454
	 * Are we locally caching the thumbnails?
455
	 * @return bool
456
	 */
457
	public function canCacheThumbs() {
458
		return ( $this->apiThumbCacheExpiry > 0 );
459
	}
460
461
	/**
462
	 * The user agent the ForeignAPIRepo will use.
463
	 * @return string
464
	 */
465
	public static function getUserAgent() {
466
		return Http::userAgent() . " ForeignAPIRepo/" . self::VERSION;
467
	}
468
469
	/**
470
	 * Get information about the repo - overrides/extends the parent
471
	 * class's information.
472
	 * @return array
473
	 * @since 1.22
474
	 */
475
	function getInfo() {
476
		$info = parent::getInfo();
477
		$info['apiurl'] = $this->getApiUrl();
478
479
		$query = [
480
			'format' => 'json',
481
			'action' => 'query',
482
			'meta' => 'siteinfo',
483
			'siprop' => 'general',
484
		];
485
486
		$data = $this->httpGetCached( 'SiteInfo', $query, 7200 );
0 ignored issues
show
Bug introduced by
Are you sure the assignment to $data is correct as $this->httpGetCached('SiteInfo', $query, 7200) (which targets ForeignAPIRepo::httpGetCached()) seems to always return null.

This check looks for function or method calls that always return null and whose return value is assigned to a variable.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
$object = $a->getObject();

The method getObject() can return nothing but null, so it makes no sense to assign that value to a variable.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
487
488
		if ( $data ) {
489
			$siteInfo = FormatJson::decode( $data, true );
490
			$general = $siteInfo['query']['general'];
491
492
			$info['articlepath'] = $general['articlepath'];
493
			$info['server'] = $general['server'];
494
495
			if ( isset( $general['favicon'] ) ) {
496
				$info['favicon'] = $general['favicon'];
497
			}
498
		}
499
500
		return $info;
501
	}
502
503
	/**
504
	 * Like a Http:get request, but with custom User-Agent.
505
	 * @see Http::get
506
	 * @param string $url
507
	 * @param string $timeout
508
	 * @param array $options
509
	 * @return bool|string
510
	 */
511
	public static function httpGet( $url, $timeout = 'default', $options = [] ) {
512
		$options['timeout'] = $timeout;
513
		/* Http::get */
514
		$url = wfExpandUrl( $url, PROTO_HTTP );
515
		wfDebug( "ForeignAPIRepo: HTTP GET: $url\n" );
516
		$options['method'] = "GET";
517
518
		if ( !isset( $options['timeout'] ) ) {
519
			$options['timeout'] = 'default';
520
		}
521
522
		$req = MWHttpRequest::factory( $url, $options, __METHOD__ );
0 ignored issues
show
Security Bug introduced by
It seems like $url defined by wfExpandUrl($url, PROTO_HTTP) on line 514 can also be of type false; however, MWHttpRequest::factory() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
523
		$req->setUserAgent( ForeignAPIRepo::getUserAgent() );
524
		$status = $req->execute();
525
526
		if ( $status->isOK() ) {
527
			return $req->getContent();
528
		} else {
529
			$logger = LoggerFactory::getInstance( 'http' );
530
			$logger->warning(
531
				$status->getWikiText( false, false, 'en' ),
532
				[ 'caller' => 'ForeignAPIRepo::httpGet' ]
533
			);
534
			return false;
535
		}
536
	}
537
538
	/**
539
	 * @return string
540
	 * @since 1.23
541
	 */
542
	protected static function getIIProps() {
543
		return implode( '|', self::$imageInfoProps );
544
	}
545
546
	/**
547
	 * HTTP GET request to a mediawiki API (with caching)
548
	 * @param string $target Used in cache key creation, mostly
549
	 * @param array $query The query parameters for the API request
550
	 * @param int $cacheTTL Time to live for the memcached caching
551
	 * @return null
552
	 */
553
	public function httpGetCached( $target, $query, $cacheTTL = 3600 ) {
554
		if ( $this->mApiBase ) {
555
			$url = wfAppendQuery( $this->mApiBase, $query );
556
		} else {
557
			$url = $this->makeUrl( $query, 'api' );
558
		}
559
560
		if ( !isset( $this->mQueryCache[$url] ) ) {
561
			$data = ObjectCache::getMainWANInstance()->getWithSetCallback(
562
				$this->getLocalCacheKey( get_class( $this ), $target, md5( $url ) ),
563
				$cacheTTL,
564
				function () use ( $url ) {
565
					return ForeignAPIRepo::httpGet( $url );
566
				}
567
			);
568
569
			if ( !$data ) {
570
				return null;
571
			}
572
573
			if ( count( $this->mQueryCache ) > 100 ) {
574
				// Keep the cache from growing infinitely
575
				$this->mQueryCache = [];
576
			}
577
578
			$this->mQueryCache[$url] = $data;
579
		}
580
581
		return $this->mQueryCache[$url];
582
	}
583
584
	/**
585
	 * @param callable $callback
586
	 * @throws MWException
587
	 */
588
	function enumFiles( $callback ) {
589
		throw new MWException( 'enumFiles is not supported by ' . get_class( $this ) );
590
	}
591
592
	/**
593
	 * @throws MWException
594
	 */
595
	protected function assertWritableRepo() {
596
		throw new MWException( get_class( $this ) . ': write operations are not supported.' );
597
	}
598
}
599