This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include
, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | |||
3 | namespace MediaWiki\Site; |
||
4 | |||
5 | use FormatJson; |
||
6 | use Http; |
||
7 | use UtfNormal\Validator; |
||
8 | |||
9 | /** |
||
10 | * Service for normalizing a page name using a MediaWiki api. |
||
11 | * |
||
12 | * This program is free software; you can redistribute it and/or modify |
||
13 | * it under the terms of the GNU General Public License as published by |
||
14 | * the Free Software Foundation; either version 2 of the License, or |
||
15 | * (at your option) any later version. |
||
16 | * |
||
17 | * This program is distributed in the hope that it will be useful, |
||
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
20 | * GNU General Public License for more details. |
||
21 | * |
||
22 | * You should have received a copy of the GNU General Public License along |
||
23 | * with this program; if not, write to the Free Software Foundation, Inc., |
||
24 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
||
25 | * http://www.gnu.org/copyleft/gpl.html |
||
26 | * |
||
27 | * @since 1.27 |
||
28 | * |
||
29 | * @license GNU GPL v2+ |
||
30 | * @author John Erling Blad < [email protected] > |
||
31 | * @author Daniel Kinzler |
||
32 | * @author Jeroen De Dauw < [email protected] > |
||
33 | * @author Marius Hoch |
||
34 | */ |
||
35 | class MediaWikiPageNameNormalizer { |
||
36 | |||
37 | /** |
||
38 | * @var Http |
||
39 | */ |
||
40 | private $http; |
||
41 | |||
42 | /** |
||
43 | * @param Http|null $http |
||
44 | */ |
||
45 | public function __construct( Http $http = null ) { |
||
46 | if ( !$http ) { |
||
47 | $http = new Http(); |
||
48 | } |
||
49 | |||
50 | $this->http = $http; |
||
51 | } |
||
52 | |||
53 | /** |
||
54 | * Returns the normalized form of the given page title, using the |
||
55 | * normalization rules of the given site. If the given title is a redirect, |
||
56 | * the redirect weill be resolved and the redirect target is returned. |
||
57 | * |
||
58 | * @note This actually makes an API request to the remote site, so beware |
||
59 | * that this function is slow and depends on an external service. |
||
60 | * |
||
61 | * @see Site::normalizePageName |
||
62 | * |
||
63 | * @since 1.27 |
||
64 | * |
||
65 | * @param string $pageName |
||
66 | * @param string $apiUrl |
||
67 | * |
||
68 | * @return string |
||
69 | * @throws \MWException |
||
70 | */ |
||
71 | public function normalizePageName( $pageName, $apiUrl ) { |
||
72 | |||
73 | // Check if we have strings as arguments. |
||
74 | if ( !is_string( $pageName ) ) { |
||
75 | throw new \MWException( '$pageName must be a string' ); |
||
76 | } |
||
77 | |||
78 | // Go on call the external site |
||
79 | |||
80 | // Make sure the string is normalized into NFC (due to T42017) |
||
81 | // but do nothing to the whitespaces, that should work appropriately. |
||
82 | // @see https://phabricator.wikimedia.org/T42017 |
||
83 | $pageName = Validator::cleanUp( $pageName ); |
||
84 | |||
85 | // Build the args for the specific call |
||
86 | $args = [ |
||
87 | 'action' => 'query', |
||
88 | 'prop' => 'info', |
||
89 | 'redirects' => true, |
||
90 | 'converttitles' => true, |
||
91 | 'format' => 'json', |
||
92 | 'titles' => $pageName, |
||
93 | // @todo options for maxlag and maxage |
||
94 | // Note that maxlag will lead to a long delay before a reply is made, |
||
95 | // but that maxage can avoid the extreme delay. On the other hand |
||
96 | // maxage could be nice to use anyhow as it stops unnecessary requests. |
||
97 | // Also consider smaxage if maxage is used. |
||
98 | ]; |
||
99 | |||
100 | $url = wfAppendQuery( $apiUrl, $args ); |
||
101 | |||
102 | // Go on call the external site |
||
103 | // @todo we need a good way to specify a timeout here. |
||
104 | $ret = $this->http->get( $url, [], __METHOD__ ); |
||
105 | |||
106 | if ( $ret === false ) { |
||
107 | wfDebugLog( "MediaWikiSite", "call to external site failed: $url" ); |
||
108 | return false; |
||
109 | } |
||
110 | |||
111 | $data = FormatJson::decode( $ret, true ); |
||
0 ignored issues
–
show
|
|||
112 | |||
113 | if ( !is_array( $data ) ) { |
||
114 | wfDebugLog( "MediaWikiSite", "call to <$url> returned bad json: " . $ret ); |
||
115 | return false; |
||
116 | } |
||
117 | |||
118 | $page = static::extractPageRecord( $data, $pageName ); |
||
0 ignored issues
–
show
Since
extractPageRecord() is declared private, calling it with static will lead to errors in possible sub-classes. You can either use self , or increase the visibility of extractPageRecord() to at least protected.
Let’s assume you have a class which uses late-static binding: class YourClass
{
private static function getTemperature() {
return "3422 °C";
}
public static function getSomeVariable()
{
return static::getTemperature();
}
} The code above will run fine in your PHP runtime. However, if you now create a
sub-class and call the class YourSubClass extends YourClass {
private static function getTemperature() {
return "-182 °C";
}
}
print YourSubClass::getSomeVariable(); // Will cause an access error.
In the case above, it makes sense to update class YourClass
{
private static function getTemperature() {
return "3422 °C";
}
public static function getSomeVariable()
{
return self::getTemperature();
}
}
![]() |
|||
119 | |||
120 | if ( isset( $page['missing'] ) ) { |
||
121 | wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for a missing page title! " |
||
122 | . $ret ); |
||
123 | return false; |
||
124 | } |
||
125 | |||
126 | if ( isset( $page['invalid'] ) ) { |
||
127 | wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for an invalid page title! " |
||
128 | . $ret ); |
||
129 | return false; |
||
130 | } |
||
131 | |||
132 | if ( !isset( $page['title'] ) ) { |
||
133 | wfDebugLog( "MediaWikiSite", "call to <$url> did not return a page title! " . $ret ); |
||
134 | return false; |
||
135 | } |
||
136 | |||
137 | return $page['title']; |
||
138 | } |
||
139 | |||
140 | /** |
||
141 | * Get normalization record for a given page title from an API response. |
||
142 | * |
||
143 | * @param array $externalData A reply from the API on a external server. |
||
144 | * @param string $pageTitle Identifies the page at the external site, needing normalization. |
||
145 | * |
||
146 | * @return array|bool A 'page' structure representing the page identified by $pageTitle. |
||
147 | */ |
||
148 | private static function extractPageRecord( $externalData, $pageTitle ) { |
||
149 | // If there is a special case with only one returned page |
||
150 | // we can cheat, and only return |
||
151 | // the single page in the "pages" substructure. |
||
152 | if ( isset( $externalData['query']['pages'] ) ) { |
||
153 | $pages = array_values( $externalData['query']['pages'] ); |
||
154 | if ( count( $pages ) === 1 ) { |
||
155 | return $pages[0]; |
||
156 | } |
||
157 | } |
||
158 | // This is only used during internal testing, as it is assumed |
||
159 | // a more optimal (and lossfree) storage. |
||
160 | // Make initial checks and return if prerequisites are not meet. |
||
161 | if ( !is_array( $externalData ) || !isset( $externalData['query'] ) ) { |
||
162 | return false; |
||
163 | } |
||
164 | // Loop over the tree different named structures, that otherwise are similar |
||
165 | $structs = [ |
||
166 | 'normalized' => 'from', |
||
167 | 'converted' => 'from', |
||
168 | 'redirects' => 'from', |
||
169 | 'pages' => 'title' |
||
170 | ]; |
||
171 | foreach ( $structs as $listId => $fieldId ) { |
||
172 | // Check if the substructure exist at all. |
||
173 | if ( !isset( $externalData['query'][$listId] ) ) { |
||
174 | continue; |
||
175 | } |
||
176 | // Filter the substructure down to what we actually are using. |
||
177 | $collectedHits = array_filter( |
||
178 | array_values( $externalData['query'][$listId] ), |
||
179 | function ( $a ) use ( $fieldId, $pageTitle ) { |
||
180 | return $a[$fieldId] === $pageTitle; |
||
181 | } |
||
182 | ); |
||
183 | // If still looping over normalization, conversion or redirects, |
||
184 | // then we need to keep the new page title for later rounds. |
||
185 | if ( $fieldId === 'from' && is_array( $collectedHits ) ) { |
||
186 | switch ( count( $collectedHits ) ) { |
||
187 | case 0: |
||
188 | break; |
||
189 | case 1: |
||
190 | $pageTitle = $collectedHits[0]['to']; |
||
191 | break; |
||
192 | default: |
||
193 | return false; |
||
194 | } |
||
195 | } elseif ( $fieldId === 'title' && is_array( $collectedHits ) ) { |
||
196 | // If on the pages structure we should prepare for returning. |
||
197 | |||
198 | switch ( count( $collectedHits ) ) { |
||
199 | case 0: |
||
200 | return false; |
||
201 | case 1: |
||
202 | return array_shift( $collectedHits ); |
||
203 | default: |
||
204 | return false; |
||
205 | } |
||
206 | } |
||
207 | } |
||
208 | // should never be here |
||
209 | return false; |
||
210 | } |
||
211 | |||
212 | } |
||
213 |
If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:
If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.