1 | <?php |
||
2 | |||
3 | namespace SilverStripe\TextExtraction\Extractor; |
||
4 | |||
5 | use SilverStripe\Assets\File; |
||
6 | use SilverStripe\Core\Environment; |
||
7 | use SilverStripe\Core\Injector\Injector; |
||
8 | use SilverStripe\TextExtraction\Rest\TikaRestClient; |
||
9 | |||
10 | /** |
||
11 | * Enables text extraction of file content via the Tika Rest Server |
||
12 | * |
||
13 | * {@link http://tika.apache.org/1.7/gettingstarted.html} |
||
14 | */ |
||
15 | class TikaServerTextExtractor extends FileTextExtractor |
||
16 | { |
||
17 | /** |
||
18 | * Tika server is pretty efficient so use it immediately if available |
||
19 | * |
||
20 | * @var integer |
||
21 | * @config |
||
22 | */ |
||
23 | private static $priority = 80; |
||
24 | |||
25 | /** |
||
26 | * Server endpoint |
||
27 | * |
||
28 | * @var string |
||
29 | * @config |
||
30 | */ |
||
31 | private static $server_endpoint; |
||
32 | |||
33 | /** |
||
34 | * @var TikaRestClient |
||
35 | */ |
||
36 | protected $client = null; |
||
37 | |||
38 | /** |
||
39 | * Cache of supported mime types |
||
40 | * |
||
41 | * @var array |
||
42 | */ |
||
43 | protected $supportedMimes = []; |
||
44 | |||
45 | /** |
||
46 | * @return TikaRestClient |
||
47 | */ |
||
48 | public function getClient() |
||
49 | { |
||
50 | if (!$this->client) { |
||
51 | $this->client = Injector::inst()->createWithArgs( |
||
52 | TikaRestClient::class, |
||
53 | [$this->getServerEndpoint()] |
||
54 | ); |
||
55 | } |
||
56 | return $this->client; |
||
57 | } |
||
58 | |||
59 | /** |
||
60 | * @return string |
||
61 | */ |
||
62 | public function getServerEndpoint() |
||
63 | { |
||
64 | if ($endpoint = Environment::getEnv('SS_TIKA_ENDPOINT')) { |
||
65 | return $endpoint; |
||
66 | } |
||
67 | |||
68 | // Default to configured endpoint |
||
69 | return $this->config()->get('server_endpoint'); |
||
70 | } |
||
71 | |||
72 | /** |
||
73 | * Get the version of Tika installed, or 0 if not installed |
||
74 | * |
||
75 | * @return float version of Tika |
||
76 | */ |
||
77 | public function getVersion() |
||
78 | { |
||
79 | return $this->getClient()->getVersion(); |
||
80 | } |
||
81 | |||
82 | /** |
||
83 | * @return boolean |
||
84 | */ |
||
85 | public function isAvailable() |
||
86 | { |
||
87 | return $this->getServerEndpoint() |
||
88 | && $this->getClient()->isAvailable() |
||
89 | && version_compare($this->getVersion(), '1.7') >= 0; |
||
90 | } |
||
91 | |||
92 | /** |
||
93 | * @param string $extension |
||
94 | * @return boolean |
||
95 | */ |
||
96 | public function supportsExtension($extension) |
||
97 | { |
||
98 | // Determine support via mime type only |
||
99 | return false; |
||
100 | } |
||
101 | |||
102 | /** |
||
103 | * @param string $mime |
||
104 | * @return boolean |
||
105 | */ |
||
106 | public function supportsMime($mime) |
||
107 | { |
||
108 | if (!$this->supportedMimes) { |
||
0 ignored issues
–
show
|
|||
109 | $this->supportedMimes = (array) $this->getClient()->getSupportedMimes(); |
||
110 | } |
||
111 | |||
112 | // Check if supported (most common / quickest lookup) |
||
113 | if (isset($this->supportedMimes[$mime])) { |
||
114 | return true; |
||
115 | } |
||
116 | |||
117 | // Check aliases |
||
118 | foreach ($this->supportedMimes as $info) { |
||
119 | if (isset($info['alias']) && in_array($mime, $info['alias'])) { |
||
120 | return true; |
||
121 | } |
||
122 | } |
||
123 | |||
124 | return false; |
||
125 | } |
||
126 | |||
127 | public function getContent($file) |
||
128 | { |
||
129 | $tempFile = $file instanceof File ? $this->getPathFromFile($file) : $file; |
||
130 | $content = $this->getClient()->tika($tempFile); |
||
131 | //Cleanup temp file |
||
132 | if ($file instanceof File) { |
||
133 | unlink($tempFile); |
||
134 | } |
||
135 | return $content; |
||
136 | } |
||
137 | } |
||
138 |
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.
Consider making the comparison explicit by using
empty(..)
or! empty(...)
instead.