Completed
Branch master (bbf110)
by
unknown
25:51
created

ApiStashEdit::parseAndStash()   C

Complexity

Conditions 9
Paths 11

Size

Total Lines 63
Code Lines 41

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 9
eloc 41
c 1
b 0
f 0
nc 11
nop 4
dl 0
loc 63
rs 6.6149

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * This program is free software; you can redistribute it and/or modify
4
 * it under the terms of the GNU General Public License as published by
5
 * the Free Software Foundation; either version 2 of the License, or
6
 * (at your option) any later version.
7
 *
8
 * This program is distributed in the hope that it will be useful,
9
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
 * GNU General Public License for more details.
12
 *
13
 * You should have received a copy of the GNU General Public License along
14
 * with this program; if not, write to the Free Software Foundation, Inc.,
15
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16
 * http://www.gnu.org/copyleft/gpl.html
17
 *
18
 * @file
19
 * @author Aaron Schulz
20
 */
21
22
use MediaWiki\Logger\LoggerFactory;
23
24
/**
25
 * Prepare an edit in shared cache so that it can be reused on edit
26
 *
27
 * This endpoint can be called via AJAX as the user focuses on the edit
28
 * summary box. By the time of submission, the parse may have already
29
 * finished, and can be immediately used on page save. Certain parser
30
 * functions like {{REVISIONID}} or {{CURRENTTIME}} may cause the cache
31
 * to not be used on edit. Template and files used are check for changes
32
 * since the output was generated. The cache TTL is also kept low for sanity.
33
 *
34
 * @ingroup API
35
 * @since 1.25
36
 */
37
class ApiStashEdit extends ApiBase {
38
	const ERROR_NONE = 'stashed';
39
	const ERROR_PARSE = 'error_parse';
40
	const ERROR_CACHE = 'error_cache';
41
	const ERROR_UNCACHEABLE = 'uncacheable';
42
	const ERROR_BUSY = 'busy';
43
44
	const PRESUME_FRESH_TTL_SEC = 30;
45
	const MAX_CACHE_TTL = 300; // 5 minutes
46
47
	public function execute() {
48
		$user = $this->getUser();
49
		$params = $this->extractRequestParams();
50
51
		if ( $user->isBot() ) { // sanity
52
			$this->dieUsage( 'This interface is not supported for bots', 'botsnotsupported' );
53
		}
54
55
		$cache = ObjectCache::getLocalClusterInstance();
56
		$page = $this->getTitleOrPageId( $params );
57
		$title = $page->getTitle();
58
59
		if ( !ContentHandler::getForModelID( $params['contentmodel'] )
60
			->isSupportedFormat( $params['contentformat'] )
61
		) {
62
			$this->dieUsage( 'Unsupported content model/format', 'badmodelformat' );
63
		}
64
65
		if ( strlen( $params['stashedtexthash'] ) ) {
66
			// Load from cache since the client indicates the text is the same as last stash
67
			$textHash = $params['stashedtexthash'];
68
			$textKey = $cache->makeKey( 'stashedit', 'text', $textHash );
69
			$text = $cache->get( $textKey );
70
			if ( !is_string( $text ) ) {
71
				$this->dieUsage( 'No stashed text found with the given hash', 'missingtext' );
72
			}
73
		} elseif ( $params['text'] !== null ) {
74
			// Trim and fix newlines so the key SHA1's match (see WebRequest::getText())
75
			$text = rtrim( str_replace( "\r\n", "\n", $params['text'] ) );
76
			$textHash = sha1( $text );
77
		} else {
78
			$this->dieUsage(
79
				'The text or stashedtexthash parameter must be given', 'missingtextparam' );
80
		}
81
82
		$textContent = ContentHandler::makeContent(
83
			$text, $title, $params['contentmodel'], $params['contentformat'] );
0 ignored issues
show
Bug introduced by
The variable $text does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
84
85
		$page = WikiPage::factory( $title );
86
		if ( $page->exists() ) {
87
			// Page exists: get the merged content with the proposed change
88
			$baseRev = Revision::newFromPageId( $page->getId(), $params['baserevid'] );
89
			if ( !$baseRev ) {
90
				$this->dieUsage( "No revision ID {$params['baserevid']}", 'missingrev' );
91
			}
92
			$currentRev = $page->getRevision();
93
			if ( !$currentRev ) {
94
				$this->dieUsage( "No current revision of page ID {$page->getId()}", 'missingrev' );
95
			}
96
			// Merge in the new version of the section to get the proposed version
97
			$editContent = $page->replaceSectionAtRev(
98
				$params['section'],
99
				$textContent,
100
				$params['sectiontitle'],
101
				$baseRev->getId()
102
			);
103
			if ( !$editContent ) {
104
				$this->dieUsage( 'Could not merge updated section.', 'replacefailed' );
105
			}
106
			if ( $currentRev->getId() == $baseRev->getId() ) {
107
				// Base revision was still the latest; nothing to merge
108
				$content = $editContent;
109
			} else {
110
				// Merge the edit into the current version
111
				$baseContent = $baseRev->getContent();
112
				$currentContent = $currentRev->getContent();
113
				if ( !$baseContent || !$currentContent ) {
114
					$this->dieUsage( "Missing content for page ID {$page->getId()}", 'missingrev' );
115
				}
116
				$handler = ContentHandler::getForModelID( $baseContent->getModel() );
117
				$content = $handler->merge3( $baseContent, $editContent, $currentContent );
0 ignored issues
show
Bug introduced by
It seems like $editContent defined by $page->replaceSectionAtR...e'], $baseRev->getId()) on line 97 can also be of type null or string; however, ContentHandler::merge3() does only seem to accept object<Content>, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
118
			}
119
		} else {
120
			// New pages: use the user-provided content model
121
			$content = $textContent;
122
		}
123
124
		if ( !$content ) { // merge3() failed
125
			$this->getResult()->addValue( null,
126
				$this->getModuleName(), [ 'status' => 'editconflict' ] );
127
			return;
128
		}
129
130
		// The user will abort the AJAX request by pressing "save", so ignore that
131
		ignore_user_abort( true );
132
133
		if ( $user->pingLimiter( 'stashedit' ) ) {
134
			$status = 'ratelimited';
135
		} else {
136
			$status = self::parseAndStash( $page, $content, $user, $params['summary'] );
137
			$textKey = $cache->makeKey( 'stashedit', 'text', $textHash );
0 ignored issues
show
Bug introduced by
The variable $textHash does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
138
			$cache->set( $textKey, $text, self::MAX_CACHE_TTL );
139
		}
140
141
		$this->getStats()->increment( "editstash.cache_stores.$status" );
142
143
		$this->getResult()->addValue(
144
			null,
145
			$this->getModuleName(),
146
			[
147
				'status' => $status,
148
				'texthash' => $textHash
149
			]
150
		);
151
	}
152
153
	/**
154
	 * @param WikiPage $page
155
	 * @param Content $content Edit content
156
	 * @param User $user
157
	 * @param string $summary Edit summary
158
	 * @return integer ApiStashEdit::ERROR_* constant
159
	 * @since 1.25
160
	 */
161
	public static function parseAndStash( WikiPage $page, Content $content, User $user, $summary ) {
162
		$cache = ObjectCache::getLocalClusterInstance();
163
		$logger = LoggerFactory::getInstance( 'StashEdit' );
164
165
		$title = $page->getTitle();
166
		$key = self::getStashKey( $title, self::getContentHash( $content ), $user );
167
168
		// Use the master DB for fast blocking locks
169
		$dbw = wfGetDB( DB_MASTER );
170
		if ( !$dbw->lock( $key, __METHOD__, 1 ) ) {
171
			// De-duplicate requests on the same key
172
			return self::ERROR_BUSY;
173
		}
174
		$unlocker = new ScopedCallback( function () use ( $dbw, $key ) {
0 ignored issues
show
Unused Code introduced by
$unlocker is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
175
			$dbw->unlock( $key, __METHOD__ );
176
		} );
177
178
		$cutoffTime = time() - self::PRESUME_FRESH_TTL_SEC;
179
180
		// Reuse any freshly build matching edit stash cache
181
		$editInfo = $cache->get( $key );
182
		if ( $editInfo && wfTimestamp( TS_UNIX, $editInfo->timestamp ) >= $cutoffTime ) {
183
			$alreadyCached = true;
184
		} else {
185
			$format = $content->getDefaultFormat();
186
			$editInfo = $page->prepareContentForEdit( $content, null, $user, $format, false );
187
			$alreadyCached = false;
188
		}
189
190
		if ( $editInfo && $editInfo->output ) {
191
			// Let extensions add ParserOutput metadata or warm other caches
192
			Hooks::run( 'ParserOutputStashForEdit',
193
				[ $page, $content, $editInfo->output, $summary, $user ] );
194
195
			if ( $alreadyCached ) {
196
				$logger->debug( "Already cached parser output for key '$key' ('$title')." );
197
				return self::ERROR_NONE;
198
			}
199
200
			list( $stashInfo, $ttl, $code ) = self::buildStashValue(
201
				$editInfo->pstContent,
202
				$editInfo->output,
203
				$editInfo->timestamp,
204
				$user
205
			);
206
207
			if ( $stashInfo ) {
208
				$ok = $cache->set( $key, $stashInfo, $ttl );
209
				if ( $ok ) {
210
					$logger->debug( "Cached parser output for key '$key' ('$title')." );
211
					return self::ERROR_NONE;
212
				} else {
213
					$logger->error( "Failed to cache parser output for key '$key' ('$title')." );
214
					return self::ERROR_CACHE;
215
				}
216
			} else {
217
				$logger->info( "Uncacheable parser output for key '$key' ('$title') [$code]." );
218
				return self::ERROR_UNCACHEABLE;
219
			}
220
		}
221
222
		return self::ERROR_PARSE;
223
	}
224
225
	/**
226
	 * Check that a prepared edit is in cache and still up-to-date
227
	 *
228
	 * This method blocks if the prepared edit is already being rendered,
229
	 * waiting until rendering finishes before doing final validity checks.
230
	 *
231
	 * The cache is rejected if template or file changes are detected.
232
	 * Note that foreign template or file transclusions are not checked.
233
	 *
234
	 * The result is a map (pstContent,output,timestamp) with fields
235
	 * extracted directly from WikiPage::prepareContentForEdit().
236
	 *
237
	 * @param Title $title
238
	 * @param Content $content
239
	 * @param User $user User to get parser options from
240
	 * @return stdClass|bool Returns false on cache miss
241
	 */
242
	public static function checkCache( Title $title, Content $content, User $user ) {
243
		if ( $user->isBot() ) {
244
			return false; // bots never stash - don't pollute stats
245
		}
246
247
		$cache = ObjectCache::getLocalClusterInstance();
248
		$logger = LoggerFactory::getInstance( 'StashEdit' );
249
		$stats = RequestContext::getMain()->getStats();
250
251
		$key = self::getStashKey( $title, self::getContentHash( $content ), $user );
252
		$editInfo = $cache->get( $key );
253
		if ( !is_object( $editInfo ) ) {
254
			$start = microtime( true );
255
			// We ignore user aborts and keep parsing. Block on any prior parsing
256
			// so as to use its results and make use of the time spent parsing.
257
			// Skip this logic if there no master connection in case this method
258
			// is called on an HTTP GET request for some reason.
259
			$lb = wfGetLB();
0 ignored issues
show
Deprecated Code introduced by
The function wfGetLB() has been deprecated with message: since 1.27, use MediaWikiServices::getDBLoadBalancer() or MediaWikiServices::getDBLoadBalancerFactory() instead.

This function has been deprecated. The supplier of the file has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the function will be removed from the class and what other function to use instead.

Loading history...
260
			$dbw = $lb->getAnyOpenConnection( $lb->getWriterIndex() );
261
			if ( $dbw && $dbw->lock( $key, __METHOD__, 30 ) ) {
262
				$editInfo = $cache->get( $key );
263
				$dbw->unlock( $key, __METHOD__ );
264
			}
265
266
			$timeMs = 1000 * max( 0, microtime( true ) - $start );
267
			$stats->timing( 'editstash.lock_wait_time', $timeMs );
268
		}
269
270
		if ( !is_object( $editInfo ) || !$editInfo->output ) {
271
			$stats->increment( 'editstash.cache_misses.no_stash' );
272
			$logger->debug( "Empty cache for key '$key' ('$title'); user '{$user->getName()}'." );
273
			return false;
274
		}
275
276
		$age = time() - wfTimestamp( TS_UNIX, $editInfo->output->getCacheTime() );
277
		if ( $age <= self::PRESUME_FRESH_TTL_SEC ) {
278
			// Assume nothing changed in this time
279
			$stats->increment( 'editstash.cache_hits.presumed_fresh' );
280
			$logger->debug( "Timestamp-based cache hit for key '$key' (age: $age sec)." );
281
		} elseif ( isset( $editInfo->edits ) && $editInfo->edits === $user->getEditCount() ) {
282
			// Logged-in user made no local upload/template edits in the meantime
283
			$stats->increment( 'editstash.cache_hits.presumed_fresh' );
284
			$logger->debug( "Edit count based cache hit for key '$key' (age: $age sec)." );
285
		} elseif ( $user->isAnon()
286
			&& self::lastEditTime( $user ) < $editInfo->output->getCacheTime()
287
		) {
288
			// Logged-out user made no local upload/template edits in the meantime
289
			$stats->increment( 'editstash.cache_hits.presumed_fresh' );
290
			$logger->debug( "Edit check based cache hit for key '$key' (age: $age sec)." );
291
		} else {
292
			// User may have changed included content
293
			$editInfo = false;
294
		}
295
296
		if ( !$editInfo ) {
297
			$stats->increment( 'editstash.cache_misses.proven_stale' );
298
			$logger->info( "Stale cache for key '$key'; old key with outside edits. (age: $age sec)" );
299
		} elseif ( $editInfo->output->getFlag( 'vary-revision' ) ) {
300
			// This can be used for the initial parse, e.g. for filters or doEditContent(),
301
			// but a second parse will be triggered in doEditUpdates(). This is not optimal.
302
			$logger->info( "Cache for key '$key' ('$title') has vary_revision." );
303
		} elseif ( $editInfo->output->getFlag( 'vary-revision-id' ) ) {
304
			// Similar to the above if we didn't guess the ID correctly.
305
			$logger->info( "Cache for key '$key' ('$title') has vary_revision_id." );
306
		}
307
308
		return $editInfo;
309
	}
310
311
	/**
312
	 * @param User $user
313
	 * @return string|null TS_MW timestamp or null
314
	 */
315
	private static function lastEditTime( User $user ) {
316
		$time = wfGetDB( DB_SLAVE )->selectField(
317
			'recentchanges',
318
			'MAX(rc_timestamp)',
319
			[ 'rc_user_text' => $user->getName() ],
320
			__METHOD__
321
		);
322
323
		return wfTimestampOrNull( TS_MW, $time );
324
	}
325
326
	/**
327
	 * Get hash of the content, factoring in model/format
328
	 *
329
	 * @param Content $content
330
	 * @return string
331
	 */
332
	private static function getContentHash( Content $content ) {
333
		return sha1( implode( "\n", [
334
			$content->getModel(),
335
			$content->getDefaultFormat(),
336
			$content->serialize( $content->getDefaultFormat() )
337
		] ) );
338
	}
339
340
	/**
341
	 * Get the temporary prepared edit stash key for a user
342
	 *
343
	 * This key can be used for caching prepared edits provided:
344
	 *   - a) The $user was used for PST options
345
	 *   - b) The parser output was made from the PST using cannonical matching options
346
	 *
347
	 * @param Title $title
348
	 * @param string $contentHash Result of getContentHash()
349
	 * @param User $user User to get parser options from
350
	 * @return string
351
	 */
352
	private static function getStashKey( Title $title, $contentHash, User $user ) {
353
		return ObjectCache::getLocalClusterInstance()->makeKey(
354
			'prepared-edit',
355
			md5( $title->getPrefixedDBkey() ),
356
			// Account for the edit model/text
357
			$contentHash,
358
			// Account for user name related variables like signatures
359
			md5( $user->getId() . "\n" . $user->getName() )
360
		);
361
	}
362
363
	/**
364
	 * Build a value to store in memcached based on the PST content and parser output
365
	 *
366
	 * This makes a simple version of WikiPage::prepareContentForEdit() as stash info
367
	 *
368
	 * @param Content $pstContent Pre-Save transformed content
369
	 * @param ParserOutput $parserOutput
370
	 * @param string $timestamp TS_MW
371
	 * @param User $user
372
	 * @return array (stash info array, TTL in seconds, info code) or (null, 0, info code)
373
	 */
374
	private static function buildStashValue(
375
		Content $pstContent, ParserOutput $parserOutput, $timestamp, User $user
376
	) {
377
		// If an item is renewed, mind the cache TTL determined by config and parser functions.
378
		// Put an upper limit on the TTL for sanity to avoid extreme template/file staleness.
379
		$since = time() - wfTimestamp( TS_UNIX, $parserOutput->getTimestamp() );
380
		$ttl = min( $parserOutput->getCacheExpiry() - $since, self::MAX_CACHE_TTL );
381
		if ( $ttl <= 0 ) {
382
			return [ null, 0, 'no_ttl' ];
383
		}
384
385
		// Only store what is actually needed
386
		$stashInfo = (object)[
387
			'pstContent' => $pstContent,
388
			'output'     => $parserOutput,
389
			'timestamp'  => $timestamp,
390
			'edits'      => $user->getEditCount()
391
		];
392
393
		return [ $stashInfo, $ttl, 'ok' ];
394
	}
395
396
	public function getAllowedParams() {
397
		return [
398
			'title' => [
399
				ApiBase::PARAM_TYPE => 'string',
400
				ApiBase::PARAM_REQUIRED => true
401
			],
402
			'section' => [
403
				ApiBase::PARAM_TYPE => 'string',
404
			],
405
			'sectiontitle' => [
406
				ApiBase::PARAM_TYPE => 'string'
407
			],
408
			'text' => [
409
				ApiBase::PARAM_TYPE => 'text',
410
				ApiBase::PARAM_DFLT => null
411
			],
412
			'stashedtexthash' => [
413
				ApiBase::PARAM_TYPE => 'string',
414
				ApiBase::PARAM_DFLT => null
415
			],
416
			'summary' => [
417
				ApiBase::PARAM_TYPE => 'string',
418
			],
419
			'contentmodel' => [
420
				ApiBase::PARAM_TYPE => ContentHandler::getContentModels(),
421
				ApiBase::PARAM_REQUIRED => true
422
			],
423
			'contentformat' => [
424
				ApiBase::PARAM_TYPE => ContentHandler::getAllContentFormats(),
425
				ApiBase::PARAM_REQUIRED => true
426
			],
427
			'baserevid' => [
428
				ApiBase::PARAM_TYPE => 'integer',
429
				ApiBase::PARAM_REQUIRED => true
430
			]
431
		];
432
	}
433
434
	public function needsToken() {
435
		return 'csrf';
436
	}
437
438
	public function mustBePosted() {
439
		return true;
440
	}
441
442
	public function isWriteMode() {
443
		return true;
444
	}
445
446
	public function isInternal() {
447
		return true;
448
	}
449
}
450