1 | <?php |
||
9 | class WikiTextStructure { |
||
10 | /** |
||
11 | * @var string |
||
12 | */ |
||
13 | private $openingText; |
||
14 | /** |
||
15 | * @var string |
||
16 | */ |
||
17 | private $allText; |
||
18 | /** |
||
19 | * @var string[] |
||
20 | */ |
||
21 | private $auxText = []; |
||
22 | /** |
||
23 | * @var ParserOutput |
||
24 | */ |
||
25 | private $parserOutput; |
||
26 | |||
27 | /** |
||
28 | * @var string[] selectors to elements that are excluded entirely from search |
||
29 | */ |
||
30 | private $excludedElementSelectors = [ |
||
31 | 'audio', 'video', // "it looks like you don't have javascript enabled..." |
||
32 | // do not need to index |
||
33 | 'sup.reference', // The [1] for references |
||
34 | '.mw-cite-backlink', // The ↑ next to references in the references section |
||
35 | 'h1', 'h2', 'h3', // Headings are already indexed in their own field. |
||
36 | 'h5', 'h6', 'h4', |
||
37 | '.autocollapse', // Collapsed fields are hidden by default so we don't want them |
||
38 | // showing up. |
||
39 | ]; |
||
40 | |||
41 | /** |
||
42 | * @var string[] selectors to elements that are considered auxiliary to article text for search |
||
43 | */ |
||
44 | private $auxiliaryElementSelectors = [ |
||
45 | '.thumbcaption', // Thumbnail captions aren't really part of the text proper |
||
46 | 'table', // Neither are tables |
||
47 | '.rellink', // Common style for "See also:". |
||
48 | '.dablink', // Common style for calling out helpful links at the top |
||
49 | // of the article. |
||
50 | '.searchaux', // New class users can use to mark stuff as auxiliary to searches. |
||
51 | ]; |
||
52 | |||
53 | /** |
||
54 | * WikiTextStructure constructor. |
||
55 | * @param ParserOutput $parserOutput |
||
56 | */ |
||
57 | public function __construct( ParserOutput $parserOutput ) { |
||
60 | |||
61 | /** |
||
62 | * Get headings on the page. |
||
63 | * @return string[] |
||
64 | * First strip out things that look like references. We can't use HTML filtering because |
||
65 | * the references come back as <sup> tags without a class. To keep from breaking stuff like |
||
66 | * ==Applicability of the strict mass–energy equivalence formula, ''E'' = ''mc''<sup>2</sup>== |
||
67 | * we don't remove the whole <sup> tag. We also don't want to strip the <sup> tag and remove |
||
68 | * everything that looks like [2] because, I dunno, maybe there is a band named Word [2] Foo |
||
69 | * or something. Whatever. So we only strip things that look like <sup> tags wrapping a |
||
70 | * reference. And since the data looks like: |
||
71 | * Reference in heading <sup>[1]</sup><sup>[2]</sup> |
||
72 | * we can not really use HtmlFormatter as we have no suitable selector. |
||
73 | */ |
||
74 | public function headings() { |
||
75 | $headings = []; |
||
76 | $ignoredHeadings = $this->getIgnoredHeadings(); |
||
77 | foreach ( $this->parserOutput->getSections() as $heading ) { |
||
78 | $heading = $heading[ 'line' ]; |
||
79 | |||
80 | // Some wikis wrap the brackets in a span: |
||
81 | // http://en.wikipedia.org/wiki/MediaWiki:Cite_reference_link |
||
82 | $heading = preg_replace( '/<\/?span>/', '', $heading ); |
||
83 | // Normalize [] so the following regexp would work. |
||
84 | $heading = preg_replace( [ '/[/', '/]/' ], [ '[', ']' ], $heading ); |
||
85 | $heading = preg_replace( '/<sup>\s*\[\s*\d+\s*\]\s*<\/sup>/is', '', $heading ); |
||
86 | |||
87 | // Strip tags from the heading or else we'll display them (escaped) in search results |
||
88 | $heading = trim( Sanitizer::stripAllTags( $heading ) ); |
||
89 | |||
90 | // Note that we don't take the level of the heading into account - all headings are equal. |
||
91 | // Except the ones we ignore. |
||
92 | if ( !in_array( $heading, $ignoredHeadings ) ) { |
||
93 | $headings[] = $heading; |
||
94 | } |
||
95 | } |
||
96 | return $headings; |
||
97 | } |
||
98 | |||
99 | /** |
||
100 | * Parse a message content into an array. This function is generally used to |
||
101 | * parse settings stored as i18n messages (see search-ignored-headings). |
||
102 | * |
||
103 | * @param string $message |
||
104 | * @return string[] |
||
105 | */ |
||
106 | public static function parseSettingsInMessage( $message ) { |
||
113 | |||
114 | /** |
||
115 | * Get list of heading to ignore. |
||
116 | * @return string[] |
||
117 | */ |
||
118 | private function getIgnoredHeadings() { |
||
134 | |||
135 | /** |
||
136 | * Extract parts of the text - opening, main and auxiliary. |
||
137 | */ |
||
138 | private function extractWikitextParts() { |
||
175 | |||
176 | /** |
||
177 | * Get text before first heading. |
||
178 | * @param string $text |
||
179 | * @return string|null |
||
180 | */ |
||
181 | private function extractHeadingBeforeFirstHeading( $text ) { |
||
209 | |||
210 | /** |
||
211 | * Get opening text |
||
212 | * @return string |
||
213 | */ |
||
214 | public function getOpeningText() { |
||
218 | |||
219 | /** |
||
220 | * Get main text |
||
221 | * @return string |
||
222 | */ |
||
223 | public function getMainText() { |
||
227 | |||
228 | /** |
||
229 | * Get auxiliary text |
||
230 | * @return string[] |
||
231 | */ |
||
232 | public function getAuxiliaryText() { |
||
236 | |||
237 | /** |
||
238 | * Get the defaultsort property |
||
239 | * @return string|null |
||
240 | */ |
||
241 | public function getDefaultSort() { |
||
244 | } |
||
245 |
Let’s assume that you have a directory layout like this:
and let’s assume the following content of
Bar.php
:If both files
OtherDir/Foo.php
andSomeDir/Foo.php
are loaded in the same runtime, you will see a PHP error such as the following:PHP Fatal error: Cannot use SomeDir\Foo as Foo because the name is already in use in OtherDir/Foo.php
However, as
OtherDir/Foo.php
does not necessarily have to be loaded and the error is only triggered if it is loaded beforeOtherDir/Bar.php
, this problem might go unnoticed for a while. In order to prevent this error from surfacing, you must import the namespace with a different alias: