1 | <?php |
||||
2 | |||||
3 | namespace SilverStripe\TextExtraction\Rest; |
||||
4 | |||||
5 | use GuzzleHttp\Client; |
||||
6 | use GuzzleHttp\Exception\RequestException; |
||||
7 | use GuzzleHttp\Psr7\Response; |
||||
8 | use Psr\Log\LoggerInterface; |
||||
9 | use SilverStripe\Core\Convert; |
||||
10 | use SilverStripe\Core\Environment; |
||||
11 | use SilverStripe\Core\Injector\Injector; |
||||
12 | |||||
13 | class TikaRestClient extends Client |
||||
14 | { |
||||
15 | /** |
||||
16 | * Authentication options to be sent to the Tika server |
||||
17 | * |
||||
18 | * @var array |
||||
19 | */ |
||||
20 | protected $options = ['username' => null, 'password' => null]; |
||||
21 | |||||
22 | /** |
||||
23 | * @var array |
||||
24 | */ |
||||
25 | protected $mimes = []; |
||||
26 | |||||
27 | /** |
||||
28 | * |
||||
29 | * @param string $baseUrl |
||||
30 | * @param array $config |
||||
31 | */ |
||||
32 | public function __construct($baseUrl = '', $config = []) |
||||
33 | { |
||||
34 | $password = Environment::getEnv('SS_TIKA_PASSWORD'); |
||||
35 | |||||
36 | if (!empty($password)) { |
||||
37 | $this->options = [ |
||||
38 | 'username' => Environment::getEnv('SS_TIKA_USERNAME'), |
||||
39 | 'password' => $password, |
||||
40 | ]; |
||||
41 | } |
||||
42 | |||||
43 | $config['base_uri'] = $baseUrl; |
||||
44 | |||||
45 | parent::__construct($config); |
||||
46 | } |
||||
47 | |||||
48 | /** |
||||
49 | * Detect if the service is available |
||||
50 | * |
||||
51 | * @return bool |
||||
52 | */ |
||||
53 | public function isAvailable() |
||||
54 | { |
||||
55 | try { |
||||
56 | /** @var Response $result */ |
||||
57 | $result = $this->get('/', $this->getGuzzleOptions()); |
||||
58 | |||||
59 | if ($result->getStatusCode() == 200) { |
||||
60 | return true; |
||||
61 | } |
||||
62 | } catch (RequestException $ex) { |
||||
63 | $msg = sprintf("Tika unavailable - %s", $ex->getMessage()); |
||||
64 | Injector::inst()->get(LoggerInterface::class)->info($msg); |
||||
65 | |||||
66 | return false; |
||||
67 | } |
||||
68 | } |
||||
69 | |||||
70 | /** |
||||
71 | * Get version code |
||||
72 | * |
||||
73 | * @return string |
||||
74 | */ |
||||
75 | public function getVersion() |
||||
76 | { |
||||
77 | /** @var Response $response */ |
||||
78 | $response = $this->get('version', $this->getGuzzleOptions()); |
||||
79 | $version = 0; |
||||
80 | |||||
81 | // Parse output |
||||
82 | if ($response->getStatusCode() == 200 |
||||
83 | && preg_match('/Apache Tika (?<version>[\.\d]+)/', $response->getBody(), $matches) |
||||
84 | ) { |
||||
85 | $version = $matches['version']; |
||||
86 | } |
||||
87 | |||||
88 | return (string) $version; |
||||
89 | } |
||||
90 | |||||
91 | /** |
||||
92 | * Gets supported mime data. May include aliased mime types. |
||||
93 | * |
||||
94 | * @return array |
||||
95 | */ |
||||
96 | public function getSupportedMimes() |
||||
97 | { |
||||
98 | if ($this->mimes) { |
||||
0 ignored issues
–
show
|
|||||
99 | return $this->mimes; |
||||
100 | } |
||||
101 | |||||
102 | $response = $this->get( |
||||
103 | 'mime-types', |
||||
104 | $this->getGuzzleOptions([ |
||||
105 | 'headers' => [ |
||||
106 | 'Accept' => 'application/json', |
||||
107 | ], |
||||
108 | ]) |
||||
109 | ); |
||||
110 | |||||
111 | return $this->mimes = Convert::json2array($response->getBody()); |
||||
0 ignored issues
–
show
It seems like
SilverStripe\Core\Conver...y($response->getBody()) can also be of type boolean . However, the property $mimes is declared as type array . Maybe add an additional type check?
Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly. For example, imagine you have a variable Either this assignment is in error or a type check should be added for that assignment. class Id
{
public $id;
public function __construct($id)
{
$this->id = $id;
}
}
class Account
{
/** @var Id $id */
public $id;
}
$account_id = false;
if (starsAreRight()) {
$account_id = new Id(42);
}
$account = new Account();
if ($account instanceof Id)
{
$account->id = $account_id;
}
![]() The function
SilverStripe\Core\Convert::json2array() has been deprecated: 4.4.0:5.0.0 Use json_decode() instead
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This function has been deprecated. The supplier of the function has supplied an explanatory message. The explanatory message should give you some clue as to whether and when the function will be removed and what other function to use instead. ![]() |
|||||
112 | } |
||||
113 | |||||
114 | /** |
||||
115 | * Extract text content from a given file. |
||||
116 | * Logs a notice-level error if the document can't be parsed. |
||||
117 | * |
||||
118 | * @param string $file Full filesystem path to a file to post |
||||
119 | * @return string Content of the file extracted as plain text |
||||
120 | */ |
||||
121 | public function tika($file) |
||||
122 | { |
||||
123 | $text = null; |
||||
124 | try { |
||||
125 | /** @var Response $response */ |
||||
126 | $response = $this->put( |
||||
127 | 'tika', |
||||
128 | $this->getGuzzleOptions([ |
||||
129 | 'headers' => [ |
||||
130 | 'Accept' => 'text/plain', |
||||
131 | ], |
||||
132 | 'body' => file_get_contents($file), |
||||
133 | ]) |
||||
134 | ); |
||||
135 | $text = $response->getBody(); |
||||
136 | } catch (RequestException $e) { |
||||
137 | $msg = sprintf( |
||||
138 | 'TikaRestClient was not able to process %s. Response: %s %s.', |
||||
139 | $file, |
||||
140 | $e->getResponse()->getStatusCode(), |
||||
141 | $e->getResponse()->getReasonPhrase() |
||||
142 | ); |
||||
143 | // Only available if tika-server was started with --includeStack |
||||
144 | $body = $e->getResponse()->getBody(); |
||||
145 | if ($body) { |
||||
0 ignored issues
–
show
|
|||||
146 | $msg .= ' Body: ' . $body; |
||||
147 | } |
||||
148 | |||||
149 | Injector::inst()->get(LoggerInterface::class)->info($msg); |
||||
150 | } |
||||
151 | |||||
152 | return (string) $text; |
||||
153 | } |
||||
154 | |||||
155 | /** |
||||
156 | * Assembles an array of request options to pass to Guzzle |
||||
157 | * |
||||
158 | * @param array $options Authentication (etc) will be merged into this array and returned |
||||
159 | * @return array |
||||
160 | */ |
||||
161 | protected function getGuzzleOptions($options = []) |
||||
162 | { |
||||
163 | if (!empty($this->options['username']) && !empty($this->options['password'])) { |
||||
164 | $options['auth'] = [ |
||||
165 | $this->options['username'], |
||||
166 | $this->options['password'] |
||||
167 | ]; |
||||
168 | } |
||||
169 | return $options; |
||||
170 | } |
||||
171 | } |
||||
172 |
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.
Consider making the comparison explicit by using
empty(..)
or! empty(...)
instead.