Completed
Branch master (098997)
by
unknown
28:44
created

ApiStashEdit   C

Complexity

Total Complexity 49

Size/Duplication

Total Lines 417
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 18

Importance

Changes 0
Metric Value
dl 0
loc 417
rs 5.1442
c 0
b 0
f 0
wmc 49
lcom 1
cbo 18

12 Methods

Rating   Name   Duplication   Size   Complexity  
A lastEditTime() 0 10 1
A getContentHash() 0 7 1
A getStashKey() 0 10 1
A buildStashValue() 0 21 2
B getAllowedParams() 0 37 1
A needsToken() 0 3 1
A mustBePosted() 0 3 1
A isWriteMode() 0 3 1
A isInternal() 0 3 1
F execute() 0 108 15
C parseAndStash() 0 64 9
C checkCache() 0 68 15

How to fix   Complexity   

Complex Class

Complex classes like ApiStashEdit often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use ApiStashEdit, and based on these observations, apply Extract Interface, too.

1
<?php
2
/**
3
 * This program is free software; you can redistribute it and/or modify
4
 * it under the terms of the GNU General Public License as published by
5
 * the Free Software Foundation; either version 2 of the License, or
6
 * (at your option) any later version.
7
 *
8
 * This program is distributed in the hope that it will be useful,
9
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
 * GNU General Public License for more details.
12
 *
13
 * You should have received a copy of the GNU General Public License along
14
 * with this program; if not, write to the Free Software Foundation, Inc.,
15
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16
 * http://www.gnu.org/copyleft/gpl.html
17
 *
18
 * @file
19
 * @author Aaron Schulz
20
 */
21
22
use MediaWiki\Logger\LoggerFactory;
23
use MediaWiki\MediaWikiServices;
24
25
/**
26
 * Prepare an edit in shared cache so that it can be reused on edit
27
 *
28
 * This endpoint can be called via AJAX as the user focuses on the edit
29
 * summary box. By the time of submission, the parse may have already
30
 * finished, and can be immediately used on page save. Certain parser
31
 * functions like {{REVISIONID}} or {{CURRENTTIME}} may cause the cache
32
 * to not be used on edit. Template and files used are check for changes
33
 * since the output was generated. The cache TTL is also kept low for sanity.
34
 *
35
 * @ingroup API
36
 * @since 1.25
37
 */
38
class ApiStashEdit extends ApiBase {
39
	const ERROR_NONE = 'stashed';
40
	const ERROR_PARSE = 'error_parse';
41
	const ERROR_CACHE = 'error_cache';
42
	const ERROR_UNCACHEABLE = 'uncacheable';
43
	const ERROR_BUSY = 'busy';
44
45
	const PRESUME_FRESH_TTL_SEC = 30;
46
	const MAX_CACHE_TTL = 300; // 5 minutes
47
48
	public function execute() {
49
		$user = $this->getUser();
50
		$params = $this->extractRequestParams();
51
52
		if ( $user->isBot() ) { // sanity
53
			$this->dieUsage( 'This interface is not supported for bots', 'botsnotsupported' );
54
		}
55
56
		$cache = ObjectCache::getLocalClusterInstance();
57
		$page = $this->getTitleOrPageId( $params );
58
		$title = $page->getTitle();
59
60
		if ( !ContentHandler::getForModelID( $params['contentmodel'] )
61
			->isSupportedFormat( $params['contentformat'] )
62
		) {
63
			$this->dieUsage( 'Unsupported content model/format', 'badmodelformat' );
64
		}
65
66
		$text = null;
67
		$textHash = null;
68
		if ( strlen( $params['stashedtexthash'] ) ) {
69
			// Load from cache since the client indicates the text is the same as last stash
70
			$textHash = $params['stashedtexthash'];
71
			$textKey = $cache->makeKey( 'stashedit', 'text', $textHash );
72
			$text = $cache->get( $textKey );
73
			if ( !is_string( $text ) ) {
74
				$this->dieUsage( 'No stashed text found with the given hash', 'missingtext' );
75
			}
76
		} elseif ( $params['text'] !== null ) {
77
			// Trim and fix newlines so the key SHA1's match (see WebRequest::getText())
78
			$text = rtrim( str_replace( "\r\n", "\n", $params['text'] ) );
79
			$textHash = sha1( $text );
80
		} else {
81
			$this->dieUsage(
82
				'The text or stashedtexthash parameter must be given', 'missingtextparam' );
83
		}
84
85
		$textContent = ContentHandler::makeContent(
86
			$text, $title, $params['contentmodel'], $params['contentformat'] );
87
88
		$page = WikiPage::factory( $title );
89
		if ( $page->exists() ) {
90
			// Page exists: get the merged content with the proposed change
91
			$baseRev = Revision::newFromPageId( $page->getId(), $params['baserevid'] );
92
			if ( !$baseRev ) {
93
				$this->dieUsage( "No revision ID {$params['baserevid']}", 'missingrev' );
94
			}
95
			$currentRev = $page->getRevision();
96
			if ( !$currentRev ) {
97
				$this->dieUsage( "No current revision of page ID {$page->getId()}", 'missingrev' );
98
			}
99
			// Merge in the new version of the section to get the proposed version
100
			$editContent = $page->replaceSectionAtRev(
101
				$params['section'],
102
				$textContent,
103
				$params['sectiontitle'],
104
				$baseRev->getId()
105
			);
106
			if ( !$editContent ) {
107
				$this->dieUsage( 'Could not merge updated section.', 'replacefailed' );
108
			}
109
			if ( $currentRev->getId() == $baseRev->getId() ) {
110
				// Base revision was still the latest; nothing to merge
111
				$content = $editContent;
112
			} else {
113
				// Merge the edit into the current version
114
				$baseContent = $baseRev->getContent();
115
				$currentContent = $currentRev->getContent();
116
				if ( !$baseContent || !$currentContent ) {
117
					$this->dieUsage( "Missing content for page ID {$page->getId()}", 'missingrev' );
118
				}
119
				$handler = ContentHandler::getForModelID( $baseContent->getModel() );
120
				$content = $handler->merge3( $baseContent, $editContent, $currentContent );
0 ignored issues
show
Bug introduced by
It seems like $editContent defined by $page->replaceSectionAtR...e'], $baseRev->getId()) on line 100 can also be of type null or string; however, ContentHandler::merge3() does only seem to accept object<Content>, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
121
			}
122
		} else {
123
			// New pages: use the user-provided content model
124
			$content = $textContent;
125
		}
126
127
		if ( !$content ) { // merge3() failed
128
			$this->getResult()->addValue( null,
129
				$this->getModuleName(), [ 'status' => 'editconflict' ] );
130
			return;
131
		}
132
133
		// The user will abort the AJAX request by pressing "save", so ignore that
134
		ignore_user_abort( true );
135
136
		if ( $user->pingLimiter( 'stashedit' ) ) {
137
			$status = 'ratelimited';
138
		} else {
139
			$status = self::parseAndStash( $page, $content, $user, $params['summary'] );
140
			$textKey = $cache->makeKey( 'stashedit', 'text', $textHash );
141
			$cache->set( $textKey, $text, self::MAX_CACHE_TTL );
142
		}
143
144
		$stats = MediaWikiServices::getInstance()->getStatsdDataFactory();
145
		$stats->increment( "editstash.cache_stores.$status" );
146
147
		$this->getResult()->addValue(
148
			null,
149
			$this->getModuleName(),
150
			[
151
				'status' => $status,
152
				'texthash' => $textHash
153
			]
154
		);
155
	}
156
157
	/**
158
	 * @param WikiPage $page
159
	 * @param Content $content Edit content
160
	 * @param User $user
161
	 * @param string $summary Edit summary
162
	 * @return integer ApiStashEdit::ERROR_* constant
163
	 * @since 1.25
164
	 */
165
	public static function parseAndStash( WikiPage $page, Content $content, User $user, $summary ) {
166
		$cache = ObjectCache::getLocalClusterInstance();
167
		$logger = LoggerFactory::getInstance( 'StashEdit' );
168
169
		$title = $page->getTitle();
170
		$key = self::getStashKey( $title, self::getContentHash( $content ), $user );
171
172
		// Use the master DB for fast blocking locks
173
		$dbw = wfGetDB( DB_MASTER );
174
		if ( !$dbw->lock( $key, __METHOD__, 1 ) ) {
175
			// De-duplicate requests on the same key
176
			return self::ERROR_BUSY;
177
		}
178
		/** @noinspection PhpUnusedLocalVariableInspection */
179
		$unlocker = new ScopedCallback( function () use ( $dbw, $key ) {
0 ignored issues
show
Unused Code introduced by
$unlocker is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
180
			$dbw->unlock( $key, __METHOD__ );
181
		} );
182
183
		$cutoffTime = time() - self::PRESUME_FRESH_TTL_SEC;
184
185
		// Reuse any freshly build matching edit stash cache
186
		$editInfo = $cache->get( $key );
187
		if ( $editInfo && wfTimestamp( TS_UNIX, $editInfo->timestamp ) >= $cutoffTime ) {
188
			$alreadyCached = true;
189
		} else {
190
			$format = $content->getDefaultFormat();
191
			$editInfo = $page->prepareContentForEdit( $content, null, $user, $format, false );
192
			$alreadyCached = false;
193
		}
194
195
		if ( $editInfo && $editInfo->output ) {
196
			// Let extensions add ParserOutput metadata or warm other caches
197
			Hooks::run( 'ParserOutputStashForEdit',
198
				[ $page, $content, $editInfo->output, $summary, $user ] );
199
200
			if ( $alreadyCached ) {
201
				$logger->debug( "Already cached parser output for key '$key' ('$title')." );
202
				return self::ERROR_NONE;
203
			}
204
205
			list( $stashInfo, $ttl, $code ) = self::buildStashValue(
206
				$editInfo->pstContent,
207
				$editInfo->output,
208
				$editInfo->timestamp,
209
				$user
210
			);
211
212
			if ( $stashInfo ) {
213
				$ok = $cache->set( $key, $stashInfo, $ttl );
214
				if ( $ok ) {
215
					$logger->debug( "Cached parser output for key '$key' ('$title')." );
216
					return self::ERROR_NONE;
217
				} else {
218
					$logger->error( "Failed to cache parser output for key '$key' ('$title')." );
219
					return self::ERROR_CACHE;
220
				}
221
			} else {
222
				$logger->info( "Uncacheable parser output for key '$key' ('$title') [$code]." );
223
				return self::ERROR_UNCACHEABLE;
224
			}
225
		}
226
227
		return self::ERROR_PARSE;
228
	}
229
230
	/**
231
	 * Check that a prepared edit is in cache and still up-to-date
232
	 *
233
	 * This method blocks if the prepared edit is already being rendered,
234
	 * waiting until rendering finishes before doing final validity checks.
235
	 *
236
	 * The cache is rejected if template or file changes are detected.
237
	 * Note that foreign template or file transclusions are not checked.
238
	 *
239
	 * The result is a map (pstContent,output,timestamp) with fields
240
	 * extracted directly from WikiPage::prepareContentForEdit().
241
	 *
242
	 * @param Title $title
243
	 * @param Content $content
244
	 * @param User $user User to get parser options from
245
	 * @return stdClass|bool Returns false on cache miss
246
	 */
247
	public static function checkCache( Title $title, Content $content, User $user ) {
248
		if ( $user->isBot() ) {
249
			return false; // bots never stash - don't pollute stats
250
		}
251
252
		$cache = ObjectCache::getLocalClusterInstance();
253
		$logger = LoggerFactory::getInstance( 'StashEdit' );
254
		$stats = MediaWikiServices::getInstance()->getStatsdDataFactory();
255
256
		$key = self::getStashKey( $title, self::getContentHash( $content ), $user );
257
		$editInfo = $cache->get( $key );
258
		if ( !is_object( $editInfo ) ) {
259
			$start = microtime( true );
260
			// We ignore user aborts and keep parsing. Block on any prior parsing
261
			// so as to use its results and make use of the time spent parsing.
262
			// Skip this logic if there no master connection in case this method
263
			// is called on an HTTP GET request for some reason.
264
			$lb = MediaWikiServices::getInstance()->getDBLoadBalancer();
265
			$dbw = $lb->getAnyOpenConnection( $lb->getWriterIndex() );
266
			if ( $dbw && $dbw->lock( $key, __METHOD__, 30 ) ) {
267
				$editInfo = $cache->get( $key );
268
				$dbw->unlock( $key, __METHOD__ );
269
			}
270
271
			$timeMs = 1000 * max( 0, microtime( true ) - $start );
272
			$stats->timing( 'editstash.lock_wait_time', $timeMs );
273
		}
274
275
		if ( !is_object( $editInfo ) || !$editInfo->output ) {
276
			$stats->increment( 'editstash.cache_misses.no_stash' );
277
			$logger->debug( "Empty cache for key '$key' ('$title'); user '{$user->getName()}'." );
278
			return false;
279
		}
280
281
		$age = time() - wfTimestamp( TS_UNIX, $editInfo->output->getCacheTime() );
282
		if ( $age <= self::PRESUME_FRESH_TTL_SEC ) {
283
			// Assume nothing changed in this time
284
			$stats->increment( 'editstash.cache_hits.presumed_fresh' );
285
			$logger->debug( "Timestamp-based cache hit for key '$key' (age: $age sec)." );
286
		} elseif ( isset( $editInfo->edits ) && $editInfo->edits === $user->getEditCount() ) {
287
			// Logged-in user made no local upload/template edits in the meantime
288
			$stats->increment( 'editstash.cache_hits.presumed_fresh' );
289
			$logger->debug( "Edit count based cache hit for key '$key' (age: $age sec)." );
290
		} elseif ( $user->isAnon()
291
			&& self::lastEditTime( $user ) < $editInfo->output->getCacheTime()
292
		) {
293
			// Logged-out user made no local upload/template edits in the meantime
294
			$stats->increment( 'editstash.cache_hits.presumed_fresh' );
295
			$logger->debug( "Edit check based cache hit for key '$key' (age: $age sec)." );
296
		} else {
297
			// User may have changed included content
298
			$editInfo = false;
299
		}
300
301
		if ( !$editInfo ) {
302
			$stats->increment( 'editstash.cache_misses.proven_stale' );
303
			$logger->info( "Stale cache for key '$key'; old key with outside edits. (age: $age sec)" );
304
		} elseif ( $editInfo->output->getFlag( 'vary-revision' ) ) {
305
			// This can be used for the initial parse, e.g. for filters or doEditContent(),
306
			// but a second parse will be triggered in doEditUpdates(). This is not optimal.
307
			$logger->info( "Cache for key '$key' ('$title') has vary_revision." );
308
		} elseif ( $editInfo->output->getFlag( 'vary-revision-id' ) ) {
309
			// Similar to the above if we didn't guess the ID correctly.
310
			$logger->info( "Cache for key '$key' ('$title') has vary_revision_id." );
311
		}
312
313
		return $editInfo;
314
	}
315
316
	/**
317
	 * @param User $user
318
	 * @return string|null TS_MW timestamp or null
319
	 */
320
	private static function lastEditTime( User $user ) {
321
		$time = wfGetDB( DB_REPLICA )->selectField(
322
			'recentchanges',
323
			'MAX(rc_timestamp)',
324
			[ 'rc_user_text' => $user->getName() ],
325
			__METHOD__
326
		);
327
328
		return wfTimestampOrNull( TS_MW, $time );
329
	}
330
331
	/**
332
	 * Get hash of the content, factoring in model/format
333
	 *
334
	 * @param Content $content
335
	 * @return string
336
	 */
337
	private static function getContentHash( Content $content ) {
338
		return sha1( implode( "\n", [
339
			$content->getModel(),
340
			$content->getDefaultFormat(),
341
			$content->serialize( $content->getDefaultFormat() )
342
		] ) );
343
	}
344
345
	/**
346
	 * Get the temporary prepared edit stash key for a user
347
	 *
348
	 * This key can be used for caching prepared edits provided:
349
	 *   - a) The $user was used for PST options
350
	 *   - b) The parser output was made from the PST using cannonical matching options
351
	 *
352
	 * @param Title $title
353
	 * @param string $contentHash Result of getContentHash()
354
	 * @param User $user User to get parser options from
355
	 * @return string
356
	 */
357
	private static function getStashKey( Title $title, $contentHash, User $user ) {
358
		return ObjectCache::getLocalClusterInstance()->makeKey(
359
			'prepared-edit',
360
			md5( $title->getPrefixedDBkey() ),
361
			// Account for the edit model/text
362
			$contentHash,
363
			// Account for user name related variables like signatures
364
			md5( $user->getId() . "\n" . $user->getName() )
365
		);
366
	}
367
368
	/**
369
	 * Build a value to store in memcached based on the PST content and parser output
370
	 *
371
	 * This makes a simple version of WikiPage::prepareContentForEdit() as stash info
372
	 *
373
	 * @param Content $pstContent Pre-Save transformed content
374
	 * @param ParserOutput $parserOutput
375
	 * @param string $timestamp TS_MW
376
	 * @param User $user
377
	 * @return array (stash info array, TTL in seconds, info code) or (null, 0, info code)
378
	 */
379
	private static function buildStashValue(
380
		Content $pstContent, ParserOutput $parserOutput, $timestamp, User $user
381
	) {
382
		// If an item is renewed, mind the cache TTL determined by config and parser functions.
383
		// Put an upper limit on the TTL for sanity to avoid extreme template/file staleness.
384
		$since = time() - wfTimestamp( TS_UNIX, $parserOutput->getTimestamp() );
385
		$ttl = min( $parserOutput->getCacheExpiry() - $since, self::MAX_CACHE_TTL );
386
		if ( $ttl <= 0 ) {
387
			return [ null, 0, 'no_ttl' ];
388
		}
389
390
		// Only store what is actually needed
391
		$stashInfo = (object)[
392
			'pstContent' => $pstContent,
393
			'output'     => $parserOutput,
394
			'timestamp'  => $timestamp,
395
			'edits'      => $user->getEditCount()
396
		];
397
398
		return [ $stashInfo, $ttl, 'ok' ];
399
	}
400
401
	public function getAllowedParams() {
402
		return [
403
			'title' => [
404
				ApiBase::PARAM_TYPE => 'string',
405
				ApiBase::PARAM_REQUIRED => true
406
			],
407
			'section' => [
408
				ApiBase::PARAM_TYPE => 'string',
409
			],
410
			'sectiontitle' => [
411
				ApiBase::PARAM_TYPE => 'string'
412
			],
413
			'text' => [
414
				ApiBase::PARAM_TYPE => 'text',
415
				ApiBase::PARAM_DFLT => null
416
			],
417
			'stashedtexthash' => [
418
				ApiBase::PARAM_TYPE => 'string',
419
				ApiBase::PARAM_DFLT => null
420
			],
421
			'summary' => [
422
				ApiBase::PARAM_TYPE => 'string',
423
			],
424
			'contentmodel' => [
425
				ApiBase::PARAM_TYPE => ContentHandler::getContentModels(),
426
				ApiBase::PARAM_REQUIRED => true
427
			],
428
			'contentformat' => [
429
				ApiBase::PARAM_TYPE => ContentHandler::getAllContentFormats(),
430
				ApiBase::PARAM_REQUIRED => true
431
			],
432
			'baserevid' => [
433
				ApiBase::PARAM_TYPE => 'integer',
434
				ApiBase::PARAM_REQUIRED => true
435
			]
436
		];
437
	}
438
439
	public function needsToken() {
440
		return 'csrf';
441
	}
442
443
	public function mustBePosted() {
444
		return true;
445
	}
446
447
	public function isWriteMode() {
448
		return true;
449
	}
450
451
	public function isInternal() {
452
		return true;
453
	}
454
}
455