CompareParsers   A
last analyzed

Complexity

Total Complexity 20

Size/Duplication

Total Lines 148
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 4

Importance

Changes 0
Metric Value
dl 0
loc 148
rs 10
c 0
b 0
f 0
wmc 20
lcom 1
cbo 4

6 Methods

Rating   Name   Duplication   Size   Complexity  
B __construct() 0 33 1
B checkOptions() 0 30 6
A conclusions() 0 6 2
A stripParameters() 0 7 2
B processRevision() 0 51 6
A checkParserLocally() 0 7 3
1
<?php
0 ignored issues
show
Coding Style Compatibility introduced by
For compatibility and reusability of your code, PSR1 recommends that a file should introduce either new symbols (like classes, functions, etc.) or have side-effects (like outputting something, or including other files), but not both at the same time. The first symbol is defined on line 39 and the first side effect is on line 31.

The PSR-1: Basic Coding Standard recommends that a file should either introduce new symbols, that is classes, functions, constants or similar, or have side effects. Side effects are anything that executes logic, like for example printing output, changing ini settings or writing to a file.

The idea behind this recommendation is that merely auto-loading a class should not change the state of an application. It also promotes a cleaner style of programming and makes your code less prone to errors, because the logic is not spread out all over the place.

To learn more about the PSR-1, please see the PHP-FIG site on the PSR-1.

Loading history...
2
/**
3
 * Take page text out of an XML dump file and render basic HTML out to files.
4
 * This is *NOT* suitable for publishing or offline use; it's intended for
5
 * running comparative tests of parsing behavior using real-world data.
6
 *
7
 * Templates etc are pulled from the local wiki database, not from the dump.
8
 *
9
 * Copyright © 2011 Platonides
10
 * https://www.mediawiki.org/
11
 *
12
 * This program is free software; you can redistribute it and/or modify
13
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17
 * This program is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License along
23
 * with this program; if not, write to the Free Software Foundation, Inc.,
24
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
25
 * http://www.gnu.org/copyleft/gpl.html
26
 *
27
 * @file
28
 * @ingroup Maintenance
29
 */
30
31
require_once __DIR__ . '/dumpIterator.php';
32
33
/**
34
 * Maintenance script to take page text out of an XML dump file and render
35
 * basic HTML out to files.
36
 *
37
 * @ingroup Maintenance
38
 */
39
class CompareParsers extends DumpIterator {
40
41
	private $count = 0;
0 ignored issues
show
Comprehensibility introduced by
Consider using a different property name as you override a private property of the parent class.
Loading history...
42
43
	public function __construct() {
44
		parent::__construct();
45
		$this->saveFailed = false;
0 ignored issues
show
Bug introduced by
The property saveFailed does not exist. Did you maybe forget to declare it?

In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:

class MyClass { }

$x = new MyClass();
$x->foo = true;

Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion:

class MyClass {
    public $foo;
}

$x = new MyClass();
$x->foo = true;
Loading history...
46
		$this->addDescription( 'Run a file or dump with several parsers' );
47
		$this->addOption( 'parser1', 'The first parser to compare.', true, true );
48
		$this->addOption( 'parser2', 'The second parser to compare.', true, true );
49
		$this->addOption( 'tidy', 'Run tidy on the articles.', false, false );
50
		$this->addOption(
51
			'save-failed',
52
			'Folder in which articles which differ will be stored.',
53
			false,
54
			true
55
		);
56
		$this->addOption( 'show-diff', 'Show a diff of the two renderings.', false, false );
57
		$this->addOption(
58
			'diff-bin',
59
			'Binary to use for diffing (can also be provided by DIFF env var).',
60
			false,
61
			false
62
		);
63
		$this->addOption(
64
			'strip-parameters',
65
			'Remove parameters of html tags to increase readability.',
66
			false,
67
			false
68
		);
69
		$this->addOption(
70
			'show-parsed-output',
71
			'Show the parsed html if both Parsers give the same output.',
72
			false,
73
			false
74
		);
75
	}
76
77
	public function checkOptions() {
78
		if ( $this->hasOption( 'save-failed' ) ) {
79
			$this->saveFailed = $this->getOption( 'save-failed' );
80
		}
81
82
		$this->stripParametersEnabled = $this->hasOption( 'strip-parameters' );
0 ignored issues
show
Bug introduced by
The property stripParametersEnabled does not exist. Did you maybe forget to declare it?

In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:

class MyClass { }

$x = new MyClass();
$x->foo = true;

Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion:

class MyClass {
    public $foo;
}

$x = new MyClass();
$x->foo = true;
Loading history...
83
		$this->showParsedOutput = $this->hasOption( 'show-parsed-output' );
0 ignored issues
show
Bug introduced by
The property showParsedOutput does not exist. Did you maybe forget to declare it?

In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:

class MyClass { }

$x = new MyClass();
$x->foo = true;

Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion:

class MyClass {
    public $foo;
}

$x = new MyClass();
$x->foo = true;
Loading history...
84
85
		$this->showDiff = $this->hasOption( 'show-diff' );
0 ignored issues
show
Bug introduced by
The property showDiff does not exist. Did you maybe forget to declare it?

In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:

class MyClass { }

$x = new MyClass();
$x->foo = true;

Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion:

class MyClass {
    public $foo;
}

$x = new MyClass();
$x->foo = true;
Loading history...
86
		if ( $this->showDiff ) {
87
			$bin = $this->getOption( 'diff-bin', getenv( 'DIFF' ) );
88
			if ( $bin != '' ) {
89
				global $wgDiff;
90
				$wgDiff = $bin;
91
			}
92
		}
93
94
		$user = new User();
95
		$this->options = ParserOptions::newFromUser( $user );
0 ignored issues
show
Bug introduced by
The property options does not seem to exist. Did you mean mOptions?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
96
97
		if ( $this->hasOption( 'tidy' ) ) {
98
			global $wgUseTidy;
99
			if ( !$wgUseTidy ) {
100
				$this->error( 'Tidy was requested but $wgUseTidy is not set in LocalSettings.php', true );
101
			}
102
			$this->options->setTidy( true );
0 ignored issues
show
Bug introduced by
The property options does not seem to exist. Did you mean mOptions?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
103
		}
104
105
		$this->failed = 0;
0 ignored issues
show
Bug introduced by
The property failed does not seem to exist. Did you mean saveFailed?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
106
	}
107
108
	public function conclusions() {
109
		$this->error( "{$this->failed} failed revisions out of {$this->count}" );
0 ignored issues
show
Bug introduced by
The property failed does not seem to exist. Did you mean saveFailed?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
110
		if ( $this->count > 0 ) {
111
			$this->output( " (" . ( $this->failed / $this->count ) . "%)\n" );
0 ignored issues
show
Bug introduced by
The property failed does not seem to exist. Did you mean saveFailed?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
112
		}
113
	}
114
115
	function stripParameters( $text ) {
116
		if ( !$this->stripParametersEnabled ) {
117
			return $text;
118
		}
119
120
		return preg_replace( '/(<a) [^>]+>/', '$1>', $text );
121
	}
122
123
	/**
124
	 * Callback function for each revision, parse with both parsers and compare
125
	 * @param Revision $rev
126
	 */
127
	public function processRevision( $rev ) {
128
		$title = $rev->getTitle();
129
130
		$parser1Name = $this->getOption( 'parser1' );
131
		$parser2Name = $this->getOption( 'parser2' );
132
133
		self::checkParserLocally( $parser1Name );
134
		self::checkParserLocally( $parser2Name );
135
136
		$parser1 = new $parser1Name();
137
		$parser2 = new $parser2Name();
138
139
		$content = $rev->getContent();
140
141
		if ( $content->getModel() !== CONTENT_MODEL_WIKITEXT ) {
142
			$this->error( "Page {$title->getPrefixedText()} does not contain wikitext "
143
				. "but {$content->getModel()}\n" );
144
145
			return;
146
		}
147
148
		$text = strval( $content->getNativeData() );
149
150
		$output1 = $parser1->parse( $text, $title, $this->options );
0 ignored issues
show
Bug introduced by
The property options does not seem to exist. Did you mean mOptions?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
151
		$output2 = $parser2->parse( $text, $title, $this->options );
0 ignored issues
show
Bug introduced by
The property options does not seem to exist. Did you mean mOptions?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
152
153
		if ( $output1->getText() != $output2->getText() ) {
154
			$this->failed++;
0 ignored issues
show
Bug introduced by
The property failed does not seem to exist. Did you mean saveFailed?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
155
			$this->error( "Parsing for {$title->getPrefixedText()} differs\n" );
156
157
			if ( $this->saveFailed ) {
158
				file_put_contents(
159
					$this->saveFailed . '/' . rawurlencode( $title->getPrefixedText() ) . ".txt",
160
					$text
161
				);
162
			}
163
			if ( $this->showDiff ) {
164
				$this->output( wfDiff(
0 ignored issues
show
Deprecated Code introduced by
The function wfDiff() has been deprecated with message: since 1.25, use DiffEngine/UnifiedDiffFormatter directly

This function has been deprecated. The supplier of the file has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the function will be removed from the class and what other function to use instead.

Loading history...
165
					$this->stripParameters( $output1->getText() ),
166
					$this->stripParameters( $output2->getText() ),
167
					''
168
				) );
169
			}
170
		} else {
171
			$this->output( $title->getPrefixedText() . "\tOK\n" );
172
173
			if ( $this->showParsedOutput ) {
174
				$this->output( $this->stripParameters( $output1->getText() ) );
175
			}
176
		}
177
	}
178
179
	private static function checkParserLocally( $parserName ) {
180
		/* Look for the parser in a file appropiately named in the current folder */
181
		if ( !class_exists( $parserName ) && file_exists( "$parserName.php" ) ) {
182
			global $wgAutoloadClasses;
183
			$wgAutoloadClasses[$parserName] = realpath( '.' ) . "/$parserName.php";
184
		}
185
	}
186
}
187
188
$maintClass = "CompareParsers";
189
require_once RUN_MAINTENANCE_IF_MAIN;
190