Completed
Branch master (939199)
by
unknown
39:35
created

includes/media/DjVu.php (1 issue)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
/**
3
 * Handler for DjVu images.
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License along
16
 * with this program; if not, write to the Free Software Foundation, Inc.,
17
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18
 * http://www.gnu.org/copyleft/gpl.html
19
 *
20
 * @file
21
 * @ingroup Media
22
 */
23
24
/**
25
 * Handler for DjVu images
26
 *
27
 * @ingroup Media
28
 */
29
class DjVuHandler extends ImageHandler {
30
	const EXPENSIVE_SIZE_LIMIT = 10485760; // 10MiB
31
32
	/**
33
	 * @return bool
34
	 */
35
	function isEnabled() {
36
		global $wgDjvuRenderer, $wgDjvuDump, $wgDjvuToXML;
37
		if ( !$wgDjvuRenderer || ( !$wgDjvuDump && !$wgDjvuToXML ) ) {
38
			wfDebug( "DjVu is disabled, please set \$wgDjvuRenderer and \$wgDjvuDump\n" );
39
40
			return false;
41
		} else {
42
			return true;
43
		}
44
	}
45
46
	/**
47
	 * @param File $file
48
	 * @return bool
49
	 */
50
	public function mustRender( $file ) {
51
		return true;
52
	}
53
54
	/**
55
	 * True if creating thumbnails from the file is large or otherwise resource-intensive.
56
	 * @param File $file
57
	 * @return bool
58
	 */
59
	public function isExpensiveToThumbnail( $file ) {
60
		return $file->getSize() > static::EXPENSIVE_SIZE_LIMIT;
61
	}
62
63
	/**
64
	 * @param File $file
65
	 * @return bool
66
	 */
67
	public function isMultiPage( $file ) {
68
		return true;
69
	}
70
71
	/**
72
	 * @return array
73
	 */
74
	public function getParamMap() {
75
		return [
76
			'img_width' => 'width',
77
			'img_page' => 'page',
78
		];
79
	}
80
81
	/**
82
	 * @param string $name
83
	 * @param mixed $value
84
	 * @return bool
85
	 */
86
	public function validateParam( $name, $value ) {
87
		if ( $name === 'page' && trim( $value ) !== (string)intval( $value ) ) {
88
			// Extra junk on the end of page, probably actually a caption
89
			// e.g. [[File:Foo.djvu|thumb|Page 3 of the document shows foo]]
90
			return false;
91
		}
92 View Code Duplication
		if ( in_array( $name, [ 'width', 'height', 'page' ] ) ) {
93
			if ( $value <= 0 ) {
94
				return false;
95
			} else {
96
				return true;
97
			}
98
		} else {
99
			return false;
100
		}
101
	}
102
103
	/**
104
	 * @param array $params
105
	 * @return bool|string
106
	 */
107
	public function makeParamString( $params ) {
108
		$page = isset( $params['page'] ) ? $params['page'] : 1;
109
		if ( !isset( $params['width'] ) ) {
110
			return false;
111
		}
112
113
		return "page{$page}-{$params['width']}px";
114
	}
115
116
	/**
117
	 * @param string $str
118
	 * @return array|bool
119
	 */
120 View Code Duplication
	public function parseParamString( $str ) {
121
		$m = false;
122
		if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) {
123
			return [ 'width' => $m[2], 'page' => $m[1] ];
124
		} else {
125
			return false;
126
		}
127
	}
128
129
	/**
130
	 * @param array $params
131
	 * @return array
132
	 */
133
	function getScriptParams( $params ) {
134
		return [
135
			'width' => $params['width'],
136
			'page' => $params['page'],
137
		];
138
	}
139
140
	/**
141
	 * @param File $image
142
	 * @param string $dstPath
143
	 * @param string $dstUrl
144
	 * @param array $params
145
	 * @param int $flags
146
	 * @return MediaTransformError|ThumbnailImage|TransformParameterError
147
	 */
148
	function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) {
149
		global $wgDjvuRenderer, $wgDjvuPostProcessor;
150
151
		if ( !$this->normaliseParams( $image, $params ) ) {
152
			return new TransformParameterError( $params );
153
		}
154
		$width = $params['width'];
155
		$height = $params['height'];
156
		$page = $params['page'];
157
158 View Code Duplication
		if ( $flags & self::TRANSFORM_LATER ) {
159
			$params = [
160
				'width' => $width,
161
				'height' => $height,
162
				'page' => $page
163
			];
164
165
			return new ThumbnailImage( $image, $dstUrl, $dstPath, $params );
166
		}
167
168 View Code Duplication
		if ( !wfMkdirParents( dirname( $dstPath ), null, __METHOD__ ) ) {
169
			return new MediaTransformError(
170
				'thumbnail_error',
171
				$width,
172
				$height,
173
				wfMessage( 'thumbnail_dest_directory' )->text()
174
			);
175
		}
176
177
		// Get local copy source for shell scripts
178
		// Thumbnail extraction is very inefficient for large files.
179
		// Provide a way to pool count limit the number of downloaders.
180
		if ( $image->getSize() >= 1e7 ) { // 10MB
181
			$work = new PoolCounterWorkViaCallback( 'GetLocalFileCopy', sha1( $image->getName() ),
182
				[
183
					'doWork' => function () use ( $image ) {
184
						return $image->getLocalRefPath();
185
					}
186
				]
187
			);
188
			$srcPath = $work->execute();
189
		} else {
190
			$srcPath = $image->getLocalRefPath();
191
		}
192
193 View Code Duplication
		if ( $srcPath === false ) { // Failed to get local copy
194
			wfDebugLog( 'thumbnail',
195
				sprintf( 'Thumbnail failed on %s: could not get local copy of "%s"',
196
					wfHostname(), $image->getName() ) );
197
198
			return new MediaTransformError( 'thumbnail_error',
199
				$params['width'], $params['height'],
200
				wfMessage( 'filemissing' )->text()
201
			);
202
		}
203
204
		# Use a subshell (brackets) to aggregate stderr from both pipeline commands
205
		# before redirecting it to the overall stdout. This works in both Linux and Windows XP.
206
		$cmd = '(' . wfEscapeShellArg(
207
			$wgDjvuRenderer,
208
			"-format=ppm",
209
			"-page={$page}",
210
			"-size={$params['physicalWidth']}x{$params['physicalHeight']}",
211
			$srcPath );
212
		if ( $wgDjvuPostProcessor ) {
213
			$cmd .= " | {$wgDjvuPostProcessor}";
214
		}
215
		$cmd .= ' > ' . wfEscapeShellArg( $dstPath ) . ') 2>&1';
216
		wfDebug( __METHOD__ . ": $cmd\n" );
217
		$retval = '';
218
		$err = wfShellExec( $cmd, $retval );
219
220
		$removed = $this->removeBadFile( $dstPath, $retval );
221
		if ( $retval != 0 || $removed ) {
222
			$this->logErrorForExternalProcess( $retval, $err, $cmd );
223
			return new MediaTransformError( 'thumbnail_error', $width, $height, $err );
224 View Code Duplication
		} else {
225
			$params = [
226
				'width' => $width,
227
				'height' => $height,
228
				'page' => $page
229
			];
230
231
			return new ThumbnailImage( $image, $dstUrl, $dstPath, $params );
232
		}
233
	}
234
235
	/**
236
	 * Cache an instance of DjVuImage in an Image object, return that instance
237
	 *
238
	 * @param File|FSFile $image
239
	 * @param string $path
240
	 * @return DjVuImage
241
	 */
242
	function getDjVuImage( $image, $path ) {
243
		if ( !$image ) {
244
			$deja = new DjVuImage( $path );
245
		} elseif ( !isset( $image->dejaImage ) ) {
246
			$deja = $image->dejaImage = new DjVuImage( $path );
247
		} else {
248
			$deja = $image->dejaImage;
249
		}
250
251
		return $deja;
252
	}
253
254
	/**
255
	 * Get metadata, unserializing it if neccessary.
256
	 *
257
	 * @param File $file The DjVu file in question
258
	 * @return string XML metadata as a string.
259
	 * @throws MWException
260
	 */
261
	private function getUnserializedMetadata( File $file ) {
262
		$metadata = $file->getMetadata();
263
		if ( substr( $metadata, 0, 3 ) === '<?xml' ) {
264
			// Old style. Not serialized but instead just a raw string of XML.
265
			return $metadata;
266
		}
267
268
		MediaWiki\suppressWarnings();
269
		$unser = unserialize( $metadata );
270
		MediaWiki\restoreWarnings();
271
		if ( is_array( $unser ) ) {
272
			if ( isset( $unser['error'] ) ) {
273
				return false;
274
			} elseif ( isset( $unser['xml'] ) ) {
275
				return $unser['xml'];
276
			} else {
277
				// Should never ever reach here.
278
				throw new MWException( "Error unserializing DjVu metadata." );
279
			}
280
		}
281
282
		// unserialize failed. Guess it wasn't really serialized after all,
283
		return $metadata;
284
	}
285
286
	/**
287
	 * Cache a document tree for the DjVu XML metadata
288
	 * @param File $image
289
	 * @param bool $gettext DOCUMENT (Default: false)
290
	 * @return bool|SimpleXMLElement
291
	 */
292
	public function getMetaTree( $image, $gettext = false ) {
293
		if ( $gettext && isset( $image->djvuTextTree ) ) {
294
			return $image->djvuTextTree;
0 ignored issues
show
The property djvuTextTree does not exist on object<File>. Since you implemented __get, maybe consider adding a @property annotation.

Since your code implements the magic getter _get, this function will be called for any read access on an undefined variable. You can add the @property annotation to your class or interface to document the existence of this variable.

<?php

/**
 * @property int $x
 * @property int $y
 * @property string $text
 */
class MyLabel
{
    private $properties;

    private $allowedProperties = array('x', 'y', 'text');

    public function __get($name)
    {
        if (isset($properties[$name]) && in_array($name, $this->allowedProperties)) {
            return $properties[$name];
        } else {
            return null;
        }
    }

    public function __set($name, $value)
    {
        if (in_array($name, $this->allowedProperties)) {
            $properties[$name] = $value;
        } else {
            throw new \LogicException("Property $name is not defined.");
        }
    }

}

If the property has read access only, you can use the @property-read annotation instead.

Of course, you may also just have mistyped another name, in which case you should fix the error.

See also the PhpDoc documentation for @property.

Loading history...
295
		}
296
		if ( !$gettext && isset( $image->dejaMetaTree ) ) {
297
			return $image->dejaMetaTree;
298
		}
299
300
		$metadata = $this->getUnserializedMetadata( $image );
301
		if ( !$this->isMetadataValid( $image, $metadata ) ) {
302
			wfDebug( "DjVu XML metadata is invalid or missing, should have been fixed in upgradeRow\n" );
303
304
			return false;
305
		}
306
307
		MediaWiki\suppressWarnings();
308
		try {
309
			// Set to false rather than null to avoid further attempts
310
			$image->dejaMetaTree = false;
311
			$image->djvuTextTree = false;
312
			$tree = new SimpleXMLElement( $metadata, LIBXML_PARSEHUGE );
313
			if ( $tree->getName() == 'mw-djvu' ) {
314
				/** @var SimpleXMLElement $b */
315
				foreach ( $tree->children() as $b ) {
316
					if ( $b->getName() == 'DjVuTxt' ) {
317
						// @todo File::djvuTextTree and File::dejaMetaTree are declared
318
						// dynamically. Add a public File::$data to facilitate this?
319
						$image->djvuTextTree = $b;
320
					} elseif ( $b->getName() == 'DjVuXML' ) {
321
						$image->dejaMetaTree = $b;
322
					}
323
				}
324
			} else {
325
				$image->dejaMetaTree = $tree;
326
			}
327
		} catch ( Exception $e ) {
328
			wfDebug( "Bogus multipage XML metadata on '{$image->getName()}'\n" );
329
		}
330
		MediaWiki\restoreWarnings();
331
		if ( $gettext ) {
332
			return $image->djvuTextTree;
333
		} else {
334
			return $image->dejaMetaTree;
335
		}
336
	}
337
338
	function getImageSize( $image, $path ) {
339
		return $this->getDjVuImage( $image, $path )->getImageSize();
340
	}
341
342
	function getThumbType( $ext, $mime, $params = null ) {
343
		global $wgDjvuOutputExtension;
344
		static $mime;
345
		if ( !isset( $mime ) ) {
346
			$magic = MimeMagic::singleton();
347
			$mime = $magic->guessTypesForExtension( $wgDjvuOutputExtension );
348
		}
349
350
		return [ $wgDjvuOutputExtension, $mime ];
351
	}
352
353
	function getMetadata( $image, $path ) {
354
		wfDebug( "Getting DjVu metadata for $path\n" );
355
356
		$xml = $this->getDjVuImage( $image, $path )->retrieveMetaData();
357
		if ( $xml === false ) {
358
			// Special value so that we don't repetitively try and decode a broken file.
359
			return serialize( [ 'error' => 'Error extracting metadata' ] );
360
		} else {
361
			return serialize( [ 'xml' => $xml ] );
362
		}
363
	}
364
365
	function getMetadataType( $image ) {
366
		return 'djvuxml';
367
	}
368
369
	function isMetadataValid( $image, $metadata ) {
370
		return !empty( $metadata ) && $metadata != serialize( [] );
371
	}
372
373
	function pageCount( File $image ) {
374
		$info = $this->getDimensionInfo( $image );
375
376
		return $info ? $info['pageCount'] : false;
377
	}
378
379
	function getPageDimensions( File $image, $page ) {
380
		$index = $page - 1; // MW starts pages at 1
381
382
		$info = $this->getDimensionInfo( $image );
383
		if ( $info && isset( $info['dimensionsByPage'][$index] ) ) {
384
			return $info['dimensionsByPage'][$index];
385
		}
386
387
		return false;
388
	}
389
390
	protected function getDimensionInfo( File $file ) {
391
		$cache = ObjectCache::getMainWANInstance();
392
		return $cache->getWithSetCallback(
393
			$cache->makeKey( 'file-djvu', 'dimensions', $file->getSha1() ),
394
			$cache::TTL_INDEFINITE,
395
			function () use ( $file ) {
396
				$tree = $this->getMetaTree( $file );
397
				if ( !$tree ) {
398
					return false;
399
				}
400
401
				$dimsByPage = [];
402
				$count = count( $tree->xpath( '//OBJECT' ) );
403
				for ( $i = 0; $i < $count; $i++ ) {
404
					$o = $tree->BODY[0]->OBJECT[$i];
405
					if ( $o ) {
406
						$dimsByPage[$i] = [
407
							'width' => (int)$o['width'],
408
							'height' => (int)$o['height'],
409
						];
410
					} else {
411
						$dimsByPage[$i] = false;
412
					}
413
				}
414
415
				return [ 'pageCount' => $count, 'dimensionsByPage' => $dimsByPage ];
416
			},
417
			[ 'pcTTL' => $cache::TTL_INDEFINITE ]
418
		);
419
	}
420
421
	/**
422
	 * @param File $image
423
	 * @param int $page Page number to get information for
424
	 * @return bool|string Page text or false when no text found.
425
	 */
426
	function getPageText( File $image, $page ) {
427
		$tree = $this->getMetaTree( $image, true );
428
		if ( !$tree ) {
429
			return false;
430
		}
431
432
		$o = $tree->BODY[0]->PAGE[$page - 1];
433
		if ( $o ) {
434
			$txt = $o['value'];
435
436
			return $txt;
437
		} else {
438
			return false;
439
		}
440
	}
441
}
442