Completed
Branch master (02e057)
by
unknown
27:42
created

WordLevelDiff::split()   B

Complexity

Conditions 6
Paths 11

Size

Total Lines 26
Code Lines 18

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 6
eloc 18
c 1
b 0
f 0
nc 11
nop 1
dl 0
loc 26
rs 8.439
1
<?php
2
/**
3
 * Copyright © 2000, 2001 Geoffrey T. Dairiki <[email protected]>
4
 * You may copy this code freely under the conditions of the GPL.
5
 *
6
 * This program is free software; you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation; either version 2 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * This program is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License along
17
 * with this program; if not, write to the Free Software Foundation, Inc.,
18
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19
 * http://www.gnu.org/copyleft/gpl.html
20
 *
21
 * @file
22
 * @ingroup DifferenceEngine
23
 * @defgroup DifferenceEngine DifferenceEngine
24
 */
25
26
use MediaWiki\Diff\ComplexityException;
27
use MediaWiki\Diff\WordAccumulator;
28
29
/**
30
 * Performs a word-level diff on several lines
31
 *
32
 * @ingroup DifferenceEngine
33
 */
34
class WordLevelDiff extends \Diff {
35
	/**
36
	 * @inheritdoc
37
	 */
38
	protected $bailoutComplexity = 40000000; // Roughly 6K x 6K words changed
39
40
	/**
41
	 * @param string[] $linesBefore
42
	 * @param string[] $linesAfter
43
	 */
44
	public function __construct( $linesBefore, $linesAfter ) {
45
46
		list( $wordsBefore, $wordsBeforeStripped ) = $this->split( $linesBefore );
47
		list( $wordsAfter, $wordsAfterStripped ) = $this->split( $linesAfter );
48
49
		try {
50
			parent::__construct( $wordsBeforeStripped, $wordsAfterStripped );
51
		} catch ( ComplexityException $ex ) {
52
			// Too hard to diff, just show whole paragraph(s) as changed
53
			$this->edits = [ new DiffOpChange( $linesBefore, $linesAfter ) ];
54
		}
55
56
		$xi = $yi = 0;
57
		$editCount = count( $this->edits );
58
		for ( $i = 0; $i < $editCount; $i++ ) {
59
			$orig = &$this->edits[$i]->orig;
60
			if ( is_array( $orig ) ) {
61
				$orig = array_slice( $wordsBefore, $xi, count( $orig ) );
62
				$xi += count( $orig );
63
			}
64
65
			$closing = &$this->edits[$i]->closing;
66
			if ( is_array( $closing ) ) {
67
				$closing = array_slice( $wordsAfter, $yi, count( $closing ) );
68
				$yi += count( $closing );
69
			}
70
		}
71
72
	}
73
74
	/**
75
	 * @param string[] $lines
76
	 *
77
	 * @return array[]
78
	 */
79
	private function split( $lines ) {
80
81
		$words = [];
82
		$stripped = [];
83
		$first = true;
84
		foreach ( $lines as $line ) {
85
			if ( $first ) {
86
				$first = false;
87
			} else {
88
				$words[] = "\n";
89
				$stripped[] = "\n";
90
			}
91
			$m = [];
92
			if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
93
				$line, $m ) ) {
94
				foreach ( $m[0] as $word ) {
95
					$words[] = $word;
96
				}
97
				foreach ( $m[1] as $stripped_word ) {
98
					$stripped[] = $stripped_word;
99
				}
100
			}
101
		}
102
103
		return [ $words, $stripped ];
104
	}
105
106
	/**
107
	 * @return string[]
108
	 */
109 View Code Duplication
	public function orig() {
110
		$orig = new WordAccumulator;
111
112
		foreach ( $this->edits as $edit ) {
113
			if ( $edit->type == 'copy' ) {
114
				$orig->addWords( $edit->orig );
115
			} elseif ( $edit->orig ) {
116
				$orig->addWords( $edit->orig, 'del' );
117
			}
118
		}
119
		$lines = $orig->getLines();
120
121
		return $lines;
122
	}
123
124
	/**
125
	 * @return string[]
126
	 */
127 View Code Duplication
	public function closing() {
128
		$closing = new WordAccumulator;
129
130
		foreach ( $this->edits as $edit ) {
131
			if ( $edit->type == 'copy' ) {
132
				$closing->addWords( $edit->closing );
133
			} elseif ( $edit->closing ) {
134
				$closing->addWords( $edit->closing, 'ins' );
135
			}
136
		}
137
		$lines = $closing->getLines();
138
139
		return $lines;
140
	}
141
142
}
143