wikimedia /
mediawiki
This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
| 1 | <?php |
||
| 2 | |||
| 3 | namespace MediaWiki\Site; |
||
| 4 | |||
| 5 | use FormatJson; |
||
| 6 | use Http; |
||
| 7 | use UtfNormal\Validator; |
||
| 8 | |||
| 9 | /** |
||
| 10 | * Service for normalizing a page name using a MediaWiki api. |
||
| 11 | * |
||
| 12 | * This program is free software; you can redistribute it and/or modify |
||
| 13 | * it under the terms of the GNU General Public License as published by |
||
| 14 | * the Free Software Foundation; either version 2 of the License, or |
||
| 15 | * (at your option) any later version. |
||
| 16 | * |
||
| 17 | * This program is distributed in the hope that it will be useful, |
||
| 18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
| 19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
| 20 | * GNU General Public License for more details. |
||
| 21 | * |
||
| 22 | * You should have received a copy of the GNU General Public License along |
||
| 23 | * with this program; if not, write to the Free Software Foundation, Inc., |
||
| 24 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
||
| 25 | * http://www.gnu.org/copyleft/gpl.html |
||
| 26 | * |
||
| 27 | * @since 1.27 |
||
| 28 | * |
||
| 29 | * @license GNU GPL v2+ |
||
| 30 | * @author John Erling Blad < [email protected] > |
||
| 31 | * @author Daniel Kinzler |
||
| 32 | * @author Jeroen De Dauw < [email protected] > |
||
| 33 | * @author Marius Hoch |
||
| 34 | */ |
||
| 35 | class MediaWikiPageNameNormalizer { |
||
| 36 | |||
| 37 | /** |
||
| 38 | * @var Http |
||
| 39 | */ |
||
| 40 | private $http; |
||
| 41 | |||
| 42 | /** |
||
| 43 | * @param Http|null $http |
||
| 44 | */ |
||
| 45 | public function __construct( Http $http = null ) { |
||
| 46 | if ( !$http ) { |
||
| 47 | $http = new Http(); |
||
| 48 | } |
||
| 49 | |||
| 50 | $this->http = $http; |
||
| 51 | } |
||
| 52 | |||
| 53 | /** |
||
| 54 | * Returns the normalized form of the given page title, using the |
||
| 55 | * normalization rules of the given site. If the given title is a redirect, |
||
| 56 | * the redirect weill be resolved and the redirect target is returned. |
||
| 57 | * |
||
| 58 | * @note This actually makes an API request to the remote site, so beware |
||
| 59 | * that this function is slow and depends on an external service. |
||
| 60 | * |
||
| 61 | * @see Site::normalizePageName |
||
| 62 | * |
||
| 63 | * @since 1.27 |
||
| 64 | * |
||
| 65 | * @param string $pageName |
||
| 66 | * @param string $apiUrl |
||
| 67 | * |
||
| 68 | * @return string |
||
| 69 | * @throws \MWException |
||
| 70 | */ |
||
| 71 | public function normalizePageName( $pageName, $apiUrl ) { |
||
| 72 | |||
| 73 | // Check if we have strings as arguments. |
||
| 74 | if ( !is_string( $pageName ) ) { |
||
| 75 | throw new \MWException( '$pageName must be a string' ); |
||
| 76 | } |
||
| 77 | |||
| 78 | // Go on call the external site |
||
| 79 | |||
| 80 | // Make sure the string is normalized into NFC (due to T42017) |
||
| 81 | // but do nothing to the whitespaces, that should work appropriately. |
||
| 82 | // @see https://phabricator.wikimedia.org/T42017 |
||
| 83 | $pageName = Validator::cleanUp( $pageName ); |
||
| 84 | |||
| 85 | // Build the args for the specific call |
||
| 86 | $args = [ |
||
| 87 | 'action' => 'query', |
||
| 88 | 'prop' => 'info', |
||
| 89 | 'redirects' => true, |
||
| 90 | 'converttitles' => true, |
||
| 91 | 'format' => 'json', |
||
| 92 | 'titles' => $pageName, |
||
| 93 | // @todo options for maxlag and maxage |
||
| 94 | // Note that maxlag will lead to a long delay before a reply is made, |
||
| 95 | // but that maxage can avoid the extreme delay. On the other hand |
||
| 96 | // maxage could be nice to use anyhow as it stops unnecessary requests. |
||
| 97 | // Also consider smaxage if maxage is used. |
||
| 98 | ]; |
||
| 99 | |||
| 100 | $url = wfAppendQuery( $apiUrl, $args ); |
||
| 101 | |||
| 102 | // Go on call the external site |
||
| 103 | // @todo we need a good way to specify a timeout here. |
||
| 104 | $ret = $this->http->get( $url, [], __METHOD__ ); |
||
| 105 | |||
| 106 | if ( $ret === false ) { |
||
| 107 | wfDebugLog( "MediaWikiSite", "call to external site failed: $url" ); |
||
| 108 | return false; |
||
| 109 | } |
||
| 110 | |||
| 111 | $data = FormatJson::decode( $ret, true ); |
||
|
0 ignored issues
–
show
|
|||
| 112 | |||
| 113 | if ( !is_array( $data ) ) { |
||
| 114 | wfDebugLog( "MediaWikiSite", "call to <$url> returned bad json: " . $ret ); |
||
| 115 | return false; |
||
| 116 | } |
||
| 117 | |||
| 118 | $page = static::extractPageRecord( $data, $pageName ); |
||
|
0 ignored issues
–
show
Since
extractPageRecord() is declared private, calling it with static will lead to errors in possible sub-classes. You can either use self, or increase the visibility of extractPageRecord() to at least protected.
Let’s assume you have a class which uses late-static binding: class YourClass
{
private static function getTemperature() {
return "3422 °C";
}
public static function getSomeVariable()
{
return static::getTemperature();
}
} The code above will run fine in your PHP runtime. However, if you now create a
sub-class and call the class YourSubClass extends YourClass {
private static function getTemperature() {
return "-182 °C";
}
}
print YourSubClass::getSomeVariable(); // Will cause an access error.
In the case above, it makes sense to update class YourClass
{
private static function getTemperature() {
return "3422 °C";
}
public static function getSomeVariable()
{
return self::getTemperature();
}
}
Loading history...
|
|||
| 119 | |||
| 120 | if ( isset( $page['missing'] ) ) { |
||
| 121 | wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for a missing page title! " |
||
| 122 | . $ret ); |
||
| 123 | return false; |
||
| 124 | } |
||
| 125 | |||
| 126 | if ( isset( $page['invalid'] ) ) { |
||
| 127 | wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for an invalid page title! " |
||
| 128 | . $ret ); |
||
| 129 | return false; |
||
| 130 | } |
||
| 131 | |||
| 132 | if ( !isset( $page['title'] ) ) { |
||
| 133 | wfDebugLog( "MediaWikiSite", "call to <$url> did not return a page title! " . $ret ); |
||
| 134 | return false; |
||
| 135 | } |
||
| 136 | |||
| 137 | return $page['title']; |
||
| 138 | } |
||
| 139 | |||
| 140 | /** |
||
| 141 | * Get normalization record for a given page title from an API response. |
||
| 142 | * |
||
| 143 | * @param array $externalData A reply from the API on a external server. |
||
| 144 | * @param string $pageTitle Identifies the page at the external site, needing normalization. |
||
| 145 | * |
||
| 146 | * @return array|bool A 'page' structure representing the page identified by $pageTitle. |
||
| 147 | */ |
||
| 148 | private static function extractPageRecord( $externalData, $pageTitle ) { |
||
| 149 | // If there is a special case with only one returned page |
||
| 150 | // we can cheat, and only return |
||
| 151 | // the single page in the "pages" substructure. |
||
| 152 | if ( isset( $externalData['query']['pages'] ) ) { |
||
| 153 | $pages = array_values( $externalData['query']['pages'] ); |
||
| 154 | if ( count( $pages ) === 1 ) { |
||
| 155 | return $pages[0]; |
||
| 156 | } |
||
| 157 | } |
||
| 158 | // This is only used during internal testing, as it is assumed |
||
| 159 | // a more optimal (and lossfree) storage. |
||
| 160 | // Make initial checks and return if prerequisites are not meet. |
||
| 161 | if ( !is_array( $externalData ) || !isset( $externalData['query'] ) ) { |
||
| 162 | return false; |
||
| 163 | } |
||
| 164 | // Loop over the tree different named structures, that otherwise are similar |
||
| 165 | $structs = [ |
||
| 166 | 'normalized' => 'from', |
||
| 167 | 'converted' => 'from', |
||
| 168 | 'redirects' => 'from', |
||
| 169 | 'pages' => 'title' |
||
| 170 | ]; |
||
| 171 | foreach ( $structs as $listId => $fieldId ) { |
||
| 172 | // Check if the substructure exist at all. |
||
| 173 | if ( !isset( $externalData['query'][$listId] ) ) { |
||
| 174 | continue; |
||
| 175 | } |
||
| 176 | // Filter the substructure down to what we actually are using. |
||
| 177 | $collectedHits = array_filter( |
||
| 178 | array_values( $externalData['query'][$listId] ), |
||
| 179 | function ( $a ) use ( $fieldId, $pageTitle ) { |
||
| 180 | return $a[$fieldId] === $pageTitle; |
||
| 181 | } |
||
| 182 | ); |
||
| 183 | // If still looping over normalization, conversion or redirects, |
||
| 184 | // then we need to keep the new page title for later rounds. |
||
| 185 | if ( $fieldId === 'from' && is_array( $collectedHits ) ) { |
||
| 186 | switch ( count( $collectedHits ) ) { |
||
| 187 | case 0: |
||
| 188 | break; |
||
| 189 | case 1: |
||
| 190 | $pageTitle = $collectedHits[0]['to']; |
||
| 191 | break; |
||
| 192 | default: |
||
| 193 | return false; |
||
| 194 | } |
||
| 195 | } elseif ( $fieldId === 'title' && is_array( $collectedHits ) ) { |
||
| 196 | // If on the pages structure we should prepare for returning. |
||
| 197 | |||
| 198 | switch ( count( $collectedHits ) ) { |
||
| 199 | case 0: |
||
| 200 | return false; |
||
| 201 | case 1: |
||
| 202 | return array_shift( $collectedHits ); |
||
| 203 | default: |
||
| 204 | return false; |
||
| 205 | } |
||
| 206 | } |
||
| 207 | } |
||
| 208 | // should never be here |
||
| 209 | return false; |
||
| 210 | } |
||
| 211 | |||
| 212 | } |
||
| 213 |
If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:
If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.