Issues (27)

src/Extractor/TikaServerTextExtractor.php (1 issue)

1
<?php
2
3
namespace SilverStripe\TextExtraction\Extractor;
4
5
use SilverStripe\Assets\File;
6
use SilverStripe\Core\Environment;
7
use SilverStripe\Core\Injector\Injector;
8
use SilverStripe\TextExtraction\Rest\TikaRestClient;
9
10
/**
11
 * Enables text extraction of file content via the Tika Rest Server
12
 *
13
 * {@link http://tika.apache.org/1.7/gettingstarted.html}
14
 */
15
class TikaServerTextExtractor extends FileTextExtractor
16
{
17
    /**
18
     * Tika server is pretty efficient so use it immediately if available
19
     *
20
     * @var integer
21
     * @config
22
     */
23
    private static $priority = 80;
24
25
    /**
26
     * Server endpoint
27
     *
28
     * @var string
29
     * @config
30
     */
31
    private static $server_endpoint;
32
33
    /**
34
     * @var TikaRestClient
35
     */
36
    protected $client = null;
37
38
    /**
39
     * Cache of supported mime types
40
     *
41
     * @var array
42
     */
43
    protected $supportedMimes = [];
44
45
    /**
46
     * @return TikaRestClient
47
     */
48
    public function getClient()
49
    {
50
        if (!$this->client) {
51
            $this->client = Injector::inst()->createWithArgs(
52
                TikaRestClient::class,
53
                [$this->getServerEndpoint()]
54
            );
55
        }
56
        return $this->client;
57
    }
58
59
    /**
60
     * @return string
61
     */
62
    public function getServerEndpoint()
63
    {
64
        if ($endpoint = Environment::getEnv('SS_TIKA_ENDPOINT')) {
65
            return $endpoint;
66
        }
67
68
        // Default to configured endpoint
69
        return $this->config()->get('server_endpoint');
70
    }
71
72
    /**
73
     * Get the version of Tika installed, or 0 if not installed
74
     *
75
     * @return float version of Tika
76
     */
77
    public function getVersion()
78
    {
79
        return $this->getClient()->getVersion();
80
    }
81
82
    /**
83
     * @return boolean
84
     */
85
    public function isAvailable()
86
    {
87
        return $this->getServerEndpoint()
88
            && $this->getClient()->isAvailable()
89
            && version_compare($this->getVersion(), '1.7') >= 0;
90
    }
91
92
    /**
93
     * @param  string $extension
94
     * @return boolean
95
     */
96
    public function supportsExtension($extension)
97
    {
98
        // Determine support via mime type only
99
        return false;
100
    }
101
102
    /**
103
     * @param  string $mime
104
     * @return boolean
105
     */
106
    public function supportsMime($mime)
107
    {
108
        if (!$this->supportedMimes) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->supportedMimes of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
109
            $this->supportedMimes = (array) $this->getClient()->getSupportedMimes();
110
        }
111
112
        // Check if supported (most common / quickest lookup)
113
        if (isset($this->supportedMimes[$mime])) {
114
            return true;
115
        }
116
117
        // Check aliases
118
        foreach ($this->supportedMimes as $info) {
119
            if (isset($info['alias']) && in_array($mime, $info['alias'])) {
120
                return true;
121
            }
122
        }
123
124
        return false;
125
    }
126
127
    public function getContent($file)
128
    {
129
        $tempFile = $file instanceof File ? $this->getPathFromFile($file) : $file;
130
        $content = $this->getClient()->tika($tempFile);
131
        //Cleanup temp file
132
        if ($file instanceof File) {
133
            unlink($tempFile);
134
        }
135
        return $content;
136
    }
137
}
138