Passed
Push — master ( 7c3b34...ddfab5 )
by David
01:27
created

CLIClient::check()   A

Complexity

Conditions 5
Paths 4

Size

Total Lines 17
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 7
nc 4
nop 0
dl 0
loc 17
rs 9.6111
c 0
b 0
f 0
1
<?php
2
3
namespace Vaites\ApacheTika\Clients;
4
5
use Exception;
6
7
use Vaites\ApacheTika\Client;
8
use Vaites\ApacheTika\Metadata\Metadata;
9
10
/**
11
 * Apache Tika command line interface client
12
 *
13
 * @author  David Martínez <[email protected]>
14
 * @link    http://wiki.apache.org/tika/TikaJAXRS
15
 * @link    https://tika.apache.org/1.12/formats.html
16
 */
17
class CLIClient extends Client
18
{
19
    const MODE = 'cli';
20
21
    /**
22
     * Apache Tika app path
23
     *
24
     * @var string
25
     */
26
    protected $path = null;
27
28
    /**
29
     * Java binary path
30
     *
31
     * @var string
32
     */
33
    protected $java = null;
34
35
    /**
36
     * Configure client
37
     *
38
     * @param   string  $path
39
     * @param   string  $java
40
     *
41
     * @throws Exception
42
     */
43
    public function __construct($path = null, $java = null)
44
    {
45
        if($path)
46
        {
47
            $this->setPath($path);
48
        }
49
50
        if($java)
51
        {
52
            $this->setJava($java);
53
        }
54
55
        if(self::$check == true)
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like you are loosely comparing two booleans. Considering using the strict comparison === instead.

When comparing two booleans, it is generally considered safer to use the strict comparison operator.

Loading history...
56
        {
57
            $this->check();
58
        }
59
    }
60
61
    /**
62
     * Get the path
63
     *
64
     * @return  null|string
65
     */
66
    public function getPath()
67
    {
68
        return $this->path;
69
    }
70
71
    /**
72
     * Set the path
73
     *
74
     * @param   string  $path
75
     * @return  $this
76
     */
77
    public function setPath($path)
78
    {
79
        $this->path = $path;
80
81
        return $this;
82
    }
83
84
    /**
85
     * Get the Java path
86
     *
87
     * @return  null|int
88
     */
89
    public function getJava()
90
    {
91
        return $this->java;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $this->java returns the type string which is incompatible with the documented return type integer|null.
Loading history...
92
    }
93
94
    /**
95
     * Set the Java path
96
     *
97
     * @param   string    $java
98
     * @return  $this
99
     */
100
    public function setJava($java)
101
    {
102
        $this->java = $java;
103
104
        return $this;
105
    }
106
107
    /**
108
     * Check Java binary, JAR path or server connection
109
     *
110
     * @return  void
111
     * @throws  \Exception
112
     */
113
    public function check()
114
    {
115
        if(self::$checked == false)
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like you are loosely comparing two booleans. Considering using the strict comparison === instead.

When comparing two booleans, it is generally considered safer to use the strict comparison operator.

Loading history...
116
        {
117
            // Java command must not return an error
118
            exec(($this->java ?: 'java') . ' -version 2> /dev/null', $output, $return);
119
            if($return != 0)
120
            {
121
                throw new Exception('Java command not found');
122
            }
123
            // JAR path must exists
124
            elseif(file_exists($this->path) == false)
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like you are loosely comparing two booleans. Considering using the strict comparison === instead.

When comparing two booleans, it is generally considered safer to use the strict comparison operator.

Loading history...
125
            {
126
                throw new Exception('Apache Tika app JAR not found');
127
            }
128
129
            self::$checked = true;
130
        }
131
    }
132
133
    /**
134
     * Configure and make a request and return its results
135
     *
136
     * @param   string  $type
137
     * @param   string  $file
138
     * @return  string
139
     * @throws  \Exception
140
     */
141
    public function request($type, $file = null)
142
    {
143
        // check if not checked
144
        $this->check();
145
146
        // check if is cached
147
        if(isset($this->cache[sha1($file)][$type]))
148
        {
149
            return $this->cache[sha1($file)][$type];
150
        }
151
152
        // command arguments
153
        $arguments = $this->getArguments($type, $file);
154
155
        // check the request
156
        $file = parent::checkRequest($type, $file);
157
158
        // add last argument
159
        if($file)
160
        {
161
            $arguments[] = escapeshellarg($file);
162
        }
163
164
        // build command
165
        $jar = escapeshellarg($this->path);
166
        $command = ($this->java ?: 'java') . " -jar $jar " . implode(' ', $arguments);
167
168
        // run command
169
        $response = $this->exec($command);
170
171
        // metadata response
172
        if($type == 'meta')
173
        {
174
            // fix for invalid? json returned only with images
175
            $response = str_replace(basename($file) . '"}{', '", ', $response);
176
177
            // on Windows, response comes in another charset
178
            if(defined('PHP_WINDOWS_VERSION_MAJOR'))
179
            {
180
                $response = utf8_encode($response);
181
            }
182
183
            $response = Metadata::make($response, $file);
184
        }
185
186
        // cache certain responses
187
        if(in_array($type, ['lang', 'meta']))
188
        {
189
            $this->cache[sha1($file)][$type] = $response;
190
        }
191
192
        return $response;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $response also could return the type Vaites\ApacheTika\Metada...\Metadata\ImageMetadata which is incompatible with the documented return type string.
Loading history...
193
    }
194
195
    /**
196
     * Run the command and return its results
197
     *
198
     * @param   string  $command
199
     * @return  null|string
200
     * @throws  \Exception
201
     */
202
    public function exec($command)
203
    {
204
        // run command
205
        $exit = -1;
206
        $logfile = sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'tika-error.log';
207
        $descriptors = [['pipe', 'r'], ['pipe', 'w'], ['file', $logfile, 'a']];
208
        $process = proc_open($command, $descriptors, $pipes);
209
        $callback = $this->callback;
210
211
        // get output if command runs ok
212
        if(is_resource($process))
213
        {
214
            fclose($pipes[0]);
215
            $this->response = '';
216
            while($chunk = stream_get_line($pipes[1], $this->chunkSize))
217
            {
218
                if(!is_null($callback))
219
                {
220
                    $callback($chunk);
221
                }
222
223
                $this->response .= $chunk;
224
            }
225
            fclose($pipes[1]);
226
            $exit = proc_close($process);
227
        }
228
229
        // exception if exit value is not zero
230
        if($exit > 0)
231
        {
232
            throw new Exception("Unexpected exit value ($exit) for command $command");
233
        }
234
235
        return trim($this->response);
236
    }
237
238
    /**
239
     * Get the arguments to run the command
240
     *
241
     * @param   string  $type
242
     * @param   string  $file
243
     * @return  array
244
     * @throws  Exception
245
     */
246
    protected function getArguments($type, $file = null)
0 ignored issues
show
Unused Code introduced by
The parameter $file is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

246
    protected function getArguments($type, /** @scrutinizer ignore-unused */ $file = null)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
247
    {
248
        // parameters for command
249
        $arguments = [];
250
        switch($type)
251
        {
252
            case 'html':
253
                $arguments[] = '--html';
254
                break;
255
256
            case 'lang':
257
                $arguments[] = '--language';
258
                break;
259
260
            case 'mime':
261
                $arguments[] = '--detect';
262
                break;
263
264
            case 'meta':
265
                $arguments[] = '--metadata --json';
266
                break;
267
268
            case 'text':
269
                $arguments[] = '--text';
270
                break;
271
272
            case 'text-main':
273
                $arguments[] = '--text-main';
274
                break;
275
276
            case 'mime-types':
277
                $arguments[] = '--list-supported-types';
278
                break;
279
280
            case 'detectors':
281
                $arguments[] = '--list-detectors';
282
                break;
283
284
            case 'parsers':
285
                $arguments[] = '--list-parsers';
286
                break;
287
288
            case 'version':
289
                $arguments[] = '--version';
290
                break;
291
292
            default:
293
                throw new Exception("Unknown type $type");
294
        }
295
296
        return $arguments;
297
    }
298
}
299