|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
declare( strict_types = 1 ); |
|
4
|
|
|
|
|
5
|
|
|
namespace WMDE\Fundraising\Frontend\ApplicationContext\DataAccess; |
|
6
|
|
|
|
|
7
|
|
|
use Mediawiki\Api\ApiUser; |
|
8
|
|
|
use Mediawiki\Api\MediawikiApi; |
|
9
|
|
|
use Mediawiki\Api\SimpleRequest; |
|
10
|
|
|
use Mediawiki\Api\UsageException; |
|
11
|
|
|
use Psr\Log\LoggerInterface; |
|
12
|
|
|
use WMDE\Fundraising\Frontend\ApplicationContext\Infrastructure\PageRetriever; |
|
13
|
|
|
|
|
14
|
|
|
/** |
|
15
|
|
|
* @licence GNU GPL v2+ |
|
16
|
|
|
* @author Kai Nissen |
|
17
|
|
|
* @author Gabriel Birke < [email protected] > |
|
18
|
|
|
* @author Jeroen De Dauw < [email protected] > |
|
19
|
|
|
*/ |
|
20
|
|
|
class ApiBasedPageRetriever implements PageRetriever { |
|
21
|
|
|
|
|
22
|
|
|
const MW_COMMENT_PATTERNS = [ |
|
23
|
|
|
'/<!--\s*NewPP limit report.*?-->/s' => '', |
|
24
|
|
|
'/<!--\s*Transclusion expansion time report.*?-->/s' => '', |
|
25
|
|
|
'/<!--\s*Saved in parser cache with key.*?-->/s' => '' |
|
26
|
|
|
]; |
|
27
|
|
|
|
|
28
|
|
|
private $api; |
|
29
|
|
|
private $apiUser; |
|
30
|
|
|
private $logger; |
|
31
|
|
|
private $pageTitlePrefix; |
|
32
|
|
|
|
|
33
|
87 |
|
public function __construct( MediawikiApi $api, ApiUser $apiUser, LoggerInterface $logger, string $pageTitlePrefix ) { |
|
34
|
87 |
|
$this->api = $api; |
|
35
|
87 |
|
$this->apiUser = $apiUser; |
|
36
|
87 |
|
$this->logger = $logger; |
|
37
|
87 |
|
$this->pageTitlePrefix = $pageTitlePrefix; |
|
38
|
87 |
|
} |
|
39
|
|
|
|
|
40
|
|
|
/** |
|
41
|
|
|
* @param string $pageTitle |
|
42
|
|
|
* @param string $action |
|
43
|
|
|
* @throws \RuntimeException if the value of $action is not supported |
|
44
|
|
|
* @return string |
|
45
|
|
|
*/ |
|
46
|
8 |
|
public function fetchPage( string $pageTitle, string $action = PageRetriever::MODE_RENDERED ): string { |
|
47
|
8 |
|
$normalizedPageName = $this->normalizePageName( $this->getPrefixedPageTitle( $pageTitle ) ); |
|
48
|
|
|
|
|
49
|
8 |
|
$this->logger->debug( __METHOD__ . ': pageTitle', [ $normalizedPageName ] ); |
|
50
|
|
|
|
|
51
|
8 |
|
if ( !$this->api->isLoggedin() ) { |
|
52
|
2 |
|
$this->doLogin(); |
|
53
|
|
|
} |
|
54
|
|
|
|
|
55
|
8 |
|
$content = $this->retrieveContent( $normalizedPageName, $action ); |
|
56
|
|
|
|
|
57
|
8 |
|
if ( $content === false || $content === null ) { |
|
58
|
3 |
|
$this->logger->debug( __METHOD__ . ': fail, got non-value', [ $content ] ); |
|
59
|
3 |
|
return ''; |
|
60
|
|
|
} |
|
61
|
|
|
|
|
62
|
5 |
|
return $content; |
|
63
|
|
|
} |
|
64
|
|
|
|
|
65
|
2 |
|
private function doLogin() { |
|
66
|
2 |
|
$this->api->login( $this->apiUser ); |
|
67
|
2 |
|
} |
|
68
|
|
|
|
|
69
|
|
|
/** |
|
70
|
|
|
* @param string $pageTitle |
|
71
|
|
|
* @param string $action |
|
72
|
|
|
* @return string|bool retrieved content or false on error |
|
73
|
|
|
*/ |
|
74
|
8 |
|
private function retrieveContent( string $pageTitle, string $action ) { |
|
75
|
|
|
switch ( $action ) { |
|
76
|
8 |
|
case 'raw': |
|
77
|
2 |
|
return $this->retrieveWikiText( $pageTitle ); |
|
78
|
6 |
|
case 'render': |
|
79
|
6 |
|
return $this->retrieveRenderedPage( $pageTitle ); |
|
80
|
|
|
default: |
|
81
|
|
|
throw new \RuntimeException( 'Action "' . $action . '" not supported' ); |
|
82
|
|
|
break; |
|
|
|
|
|
|
83
|
|
|
} |
|
84
|
|
|
} |
|
85
|
|
|
|
|
86
|
6 |
|
private function retrieveRenderedPage( $pageTitle ) { |
|
87
|
|
|
$params = [ |
|
88
|
6 |
|
'page' => $pageTitle, |
|
89
|
6 |
|
'prop' => 'text' |
|
90
|
|
|
]; |
|
91
|
|
|
|
|
92
|
|
|
try { |
|
93
|
6 |
|
$response = $this->api->postRequest( new SimpleRequest( 'parse', $params ) ); |
|
94
|
1 |
|
} catch ( UsageException $e ) { |
|
95
|
1 |
|
return false; |
|
96
|
|
|
} |
|
97
|
|
|
|
|
98
|
5 |
|
if ( !empty( $response['parse']['text']['*'] ) ) { |
|
99
|
3 |
|
return $this->cleanupWikiHtml( $response['parse']['text']['*'] ); |
|
100
|
|
|
} |
|
101
|
2 |
|
return null; |
|
102
|
|
|
} |
|
103
|
|
|
|
|
104
|
2 |
|
private function retrieveWikiText( $pageTitle ) { |
|
105
|
|
|
$params = [ |
|
106
|
2 |
|
'titles' => $pageTitle, |
|
107
|
2 |
|
'prop' => 'revisions', |
|
108
|
2 |
|
'rvprop' => 'content' |
|
109
|
|
|
]; |
|
110
|
|
|
|
|
111
|
|
|
try { |
|
112
|
2 |
|
$response = $this->api->postRequest( new SimpleRequest( 'query', $params ) ); |
|
113
|
|
|
} catch ( UsageException $e ) { |
|
114
|
|
|
return false; |
|
115
|
|
|
} |
|
116
|
|
|
|
|
117
|
2 |
|
if ( !is_array( $response['query']['pages'] ) ) { |
|
118
|
|
|
return false; |
|
119
|
|
|
} |
|
120
|
2 |
|
$page = reset( $response['query']['pages'] ); |
|
121
|
|
|
|
|
122
|
2 |
|
return $page['revisions'][0]['*']; |
|
123
|
|
|
} |
|
124
|
|
|
|
|
125
|
3 |
|
private function cleanupWikiHtml( string $text ): string { |
|
126
|
3 |
|
return rtrim( |
|
127
|
|
|
preg_replace( |
|
128
|
3 |
|
array_keys( self::MW_COMMENT_PATTERNS ), |
|
129
|
3 |
|
array_values( self::MW_COMMENT_PATTERNS ), |
|
130
|
|
|
$text |
|
131
|
|
|
) |
|
132
|
|
|
); |
|
133
|
|
|
} |
|
134
|
|
|
|
|
135
|
8 |
|
private function normalizePageName( string $title ): string { |
|
136
|
8 |
|
return ucfirst( str_replace( ' ', '_', trim( $title ) ) ); |
|
137
|
|
|
} |
|
138
|
|
|
|
|
139
|
8 |
|
private function getPrefixedPageTitle( string $pageTitle ): string { |
|
140
|
8 |
|
return $this->pageTitlePrefix . $pageTitle; |
|
141
|
|
|
} |
|
142
|
|
|
|
|
143
|
|
|
} |
|
144
|
|
|
|
This check looks for unreachable code. It uses sophisticated control flow analysis techniques to find statements which will never be executed.
Unreachable code is most often the result of
return,dieorexitstatements that have been added for debug purposes.In the above example, the last
return falsewill never be executed, because a return statement has already been met in every possible execution path.