Passed
Push — master ( 2807aa...ab76ce )
by David
01:23
created

Client::downloadFile()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 31
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
eloc 15
nc 4
nop 1
dl 0
loc 31
rs 9.7666
c 0
b 0
f 0
1
<?php
2
3
namespace Vaites\ApacheTika;
4
5
use Closure;
6
use Exception;
7
8
use Vaites\ApacheTika\Clients\CLIClient;
9
use Vaites\ApacheTika\Clients\WebClient;
10
11
/**
12
 * Apache Tika client interface
13
 *
14
 * @author  David Martínez <[email protected]>
15
 * @link    http://wiki.apache.org/tika/TikaJAXRS
16
 * @link    https://tika.apache.org/1.10/formats.html
17
 */
18
abstract class Client
19
{
20
    /**
21
     * List of supported Apache Tika versions
22
     *
23
     * @var array
24
     */
25
    protected static $supportedVersions =
26
    [
27
        '1.7', '1.8', '1.9', '1.10', '1.11', '1.12', '1.13', '1.14',
28
        '1.15', '1.16', '1.17', '1.18', '1.19', '1.19.1', '1.20'
29
    ];
30
31
    /**
32
     * Verify JAR or server connection on constructor
33
     *
34
     * @var bool
35
     */
36
    protected static $check = true;
37
38
    /**
39
     * Checked flag
40
     *
41
     * @var bool
42
     */
43
    protected static $checked = false;
44
45
    /**
46
     * Response using callbacks
47
     *
48
     * @var string
49
     */
50
    protected $response = null;
51
52
    /**
53
     * Cached responses to avoid multiple request for the same file.
54
     *
55
     * @var array
56
     */
57
    protected $cache = [];
58
59
    /**
60
     * Callback called on secuential read
61
     *
62
     * @var \Closure
63
     */
64
    protected $callback = null;
65
66
    /**
67
     * Size of chunks for callback
68
     *
69
     * @var int
70
     */
71
    protected $chunkSize = 1048576;
72
73
    /**
74
     * Remote download flag
75
     *
76
     * @var bool
77
     */
78
    protected $downloadRemote = false;
79
80
    /**
81
     * Get a class instance throwing an exception if check fails
82
     *
83
     * @param   string  $param1     path or host
84
     * @param   int     $param2     Java binary path or port for web client
85
     * @param   array   $options    options for cURL request
86
     * @return  \Vaites\ApacheTika\Clients\CLIClient|\Vaites\ApacheTika\Clients\WebClient
87
     * @throws  \Exception
88
     */
89
    public static function make($param1 = null, $param2 = null, $options = [])
90
    {
91
        if (preg_match('/\.jar$/', func_get_arg(0)))
92
        {
93
            return new CLIClient($param1, $param2);
94
        }
95
        else
96
        {
97
            return new WebClient($param1, $param2, $options);
98
        }
99
    }
100
101
    /**
102
     * Get a class instance delaying the check
103
     *
104
     * @param   string  $param1     path or host
105
     * @param   int     $param2     Java binary path or port for web client
106
     * @param   array   $options    options for cURL request
107
     * @return  \Vaites\ApacheTika\Clients\CLIClient|\Vaites\ApacheTika\Clients\WebClient
108
     * @throws  \Exception
109
     */
110
    public static function prepare($param1 = null, $param2 = null, $options = [])
111
    {
112
        self::$check = false;
113
114
        return self::make($param1, $param2, $options);
115
    }
116
117
    /**
118
     * Get the callback
119
     *
120
     * @return  \Closure|null
121
     */
122
    public function getCallback()
123
    {
124
        return $this->callback;
125
    }
126
127
    /**
128
     * Set the callback (callable or closure) for call on secuential read
129
     *
130
     * @param   mixed   $callback
131
     * @return  $this
132
     * @throws  \Exception
133
     */
134
    public function setCallback($callback)
135
    {
136
        if($callback instanceof Closure)
137
        {
138
            $this->callback = $callback;
139
        }
140
        elseif(is_callable($callback))
141
        {
142
            $this->callback = function($chunk) use($callback)
143
            {
144
                return call_user_func_array($callback, [$chunk]);
145
            };
146
        }
147
        else
148
        {
149
            throw new Exception('Invalid callback');
150
        }
151
152
        return $this;
153
    }
154
155
    /**
156
     * Get the chunk size
157
     *
158
     * @return  int
159
     */
160
    public function getChunkSize()
161
    {
162
        return $this->chunkSize;
163
    }
164
165
    /**
166
     * Set the chunk size for secuential read
167
     *
168
     * @param   int     $size
169
     * @return  $this
170
     * @throws  \Exception
171
     */
172
    public function setChunkSize($size)
173
    {
174
        if(static::MODE == 'cli' && is_numeric($size))
0 ignored issues
show
Bug introduced by
The constant Vaites\ApacheTika\Client::MODE was not found. Maybe you did not declare it correctly or list all dependencies?
Loading history...
175
        {
176
            $this->chunkSize = (int)$size;
177
        }
178
        elseif(static::MODE == 'web')
179
        {
180
            throw new Exception('Chunk size is not supported on web mode');
181
        }
182
        else
183
        {
184
            throw new Exception("$size is not a valid chunk size");
185
        }
186
187
        return $this;
188
    }
189
190
    /**
191
     * Get the remote download flag
192
     *
193
     * @return  bool
194
     */
195
    public function getDownloadRemote()
196
    {
197
        return $this->downloadRemote;
198
    }
199
200
    /**
201
     * Set the remote download flag
202
     *
203
     * @param   bool    $download
204
     * @return  $this
205
     */
206
    public function setDownloadRemote($download)
207
    {
208
        $this->downloadRemote = (bool) $download;
209
210
        return $this;
211
    }
212
213
    /**
214
     * Gets file metadata
215
     *
216
     * @param   string  $file
217
     * @return  \Vaites\ApacheTika\Metadata\Metadata
218
     * @throws  \Exception
219
     */
220
    public function getMetadata($file)
221
    {
222
        return $this->request('meta', $file);
0 ignored issues
show
Bug Best Practice introduced by
The expression return $this->request('meta', $file) returns the type string which is incompatible with the documented return type Vaites\ApacheTika\Metadata\Metadata.
Loading history...
223
    }
224
225
    /**
226
     * Detect language
227
     *
228
     * @param   string  $file
229
     * @return  string
230
     * @throws  \Exception
231
     */
232
    public function getLanguage($file)
233
    {
234
        return $this->request('lang', $file);
235
    }
236
237
    /**
238
     * Detect MIME type
239
     *
240
     * @param   string  $file
241
     * @return  string
242
     * @throws \Exception
243
     */
244
    public function getMIME($file)
245
    {
246
        return $this->request('mime', $file);
247
    }
248
249
    /**
250
     * Extracts HTML
251
     *
252
     * @param   string  $file
253
     * @param   mixed   $callback
254
     * @return  string
255
     * @throws  \Exception
256
     */
257
    public function getHTML($file, $callback = null)
258
    {
259
        if(!is_null($callback))
260
        {
261
            $this->setCallback($callback);
262
        }
263
264
        return $this->request('html', $file);
265
    }
266
267
    /**
268
     * Extracts text
269
     *
270
     * @param   string  $file
271
     * @param   mixed   $callback
272
     * @return  string
273
     * @throws  \Exception
274
     */
275
    public function getText($file, $callback = null)
276
    {
277
        if(!is_null($callback))
278
        {
279
            $this->setCallback($callback);
280
        }
281
282
        return $this->request('text', $file);
283
    }
284
285
    /**
286
     * Extracts main text
287
     *
288
     * @param   string  $file
289
     * @param   mixed   $callback
290
     * @return  string
291
     * @throws  \Exception
292
     */
293
    public function getMainText($file, $callback = null)
294
    {
295
        if(!is_null($callback))
296
        {
297
            $this->setCallback($callback);
298
        }
299
300
        return $this->request('text-main', $file);
301
    }
302
303
    /**
304
     * Returns the supported MIME types
305
     *
306
     * @return  string
307
     * @throws  \Exception
308
     */
309
    public function getSupportedMIMETypes()
310
    {
311
        return $this->request('mime-types');
0 ignored issues
show
Bug introduced by
The call to Vaites\ApacheTika\Client::request() has too few arguments starting with file. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

311
        return $this->/** @scrutinizer ignore-call */ request('mime-types');

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
312
    }
313
314
    /**
315
     * Returns the available detectors
316
     *
317
     * @return  string
318
     * @throws  \Exception
319
     */
320
    public function getAvailableDetectors()
321
    {
322
        return $this->request('detectors');
0 ignored issues
show
Bug introduced by
The call to Vaites\ApacheTika\Client::request() has too few arguments starting with file. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

322
        return $this->/** @scrutinizer ignore-call */ request('detectors');

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
323
    }
324
325
    /**
326
     * Returns the available parsers
327
     *
328
     * @return  string
329
     * @throws  \Exception
330
     */
331
    public function getAvailableParsers()
332
    {
333
        return $this->request('parsers');
0 ignored issues
show
Bug introduced by
The call to Vaites\ApacheTika\Client::request() has too few arguments starting with file. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

333
        return $this->/** @scrutinizer ignore-call */ request('parsers');

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
334
    }
335
336
    /**
337
     * Returns current Tika version
338
     *
339
     * @return  string
340
     * @throws  \Exception
341
     */
342
    public function getVersion()
343
    {
344
        return $this->request('version');
0 ignored issues
show
Bug introduced by
The call to Vaites\ApacheTika\Client::request() has too few arguments starting with file. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

344
        return $this->/** @scrutinizer ignore-call */ request('version');

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
345
    }
346
347
    /**
348
     * Return the list of Apache Tika supported versions
349
     *
350
     * @return array
351
     */
352
    public static function getSupportedVersions()
353
    {
354
        return self::$supportedVersions;
355
    }
356
357
    /**
358
     * Sets the checked flag
359
     *
360
     * @param $checked
361
     */
362
    public static function setChecked($checked)
363
    {
364
        self::$checked = (bool) $checked;
365
    }
366
367
    /**
368
     * Checks if instance is checked
369
     *
370
     * @return  bool
371
     */
372
    public static function isChecked()
373
    {
374
        return self::$checked;
375
    }
376
377
    /**
378
     * Checks if a specific version is supported
379
     *
380
     * @param   string  $version
381
     * @return  bool
382
     */
383
    public static function isVersionSupported($version)
384
    {
385
        return in_array($version, self::getSupportedVersions());
386
    }
387
388
    /**
389
     * Check the request before executing
390
     *
391
     * @param   string  $type
392
     * @param   string  $file
393
     * @return  string
394
     * @throws  \Exception
395
     */
396
    public function checkRequest($type, $file)
397
    {
398
        // no checks for getters
399
        if(in_array($type, ['detectors', 'mime-types', 'parsers', 'version']))
400
        {
401
            //
402
        }
403
        // invalid local file
404
        elseif(!preg_match('/^http/', $file) && !file_exists($file))
405
        {
406
            throw new Exception("File $file can't be opened");
407
        }
408
        // invalid remote file
409
        elseif(preg_match('/^http/', $file) && !preg_match('/200/', get_headers($file)[0]))
410
        {
411
            throw new Exception("File $file can't be opened", 2);
412
        }
413
        // download remote file if required only for integrated downloader
414
        elseif(preg_match('/^http/', $file) && $this->downloadRemote)
415
        {
416
            $file = $this->downloadFile($file);
417
        }
418
419
        return $file;
420
    }
421
422
    /**
423
     * Download file to a temporary folder
424
     *
425
     * @link    https://wiki.apache.org/tika/TikaJAXRS#Specifying_a_URL_Instead_of_Putting_Bytes
426
     * @param   string  $file
427
     * @return  string
428
     * @throws  \Exception
429
     */
430
    protected function downloadFile($file)
431
    {
432
        $dest = tempnam(sys_get_temp_dir(), 'TIKA');
433
434
        $fp = fopen($dest, 'w+');
435
436
        if($fp === false)
437
        {
438
            throw new Exception("$dest can't be opened");
439
        }
440
441
        $ch = curl_init($file);
442
        curl_setopt($ch, CURLOPT_FILE, $fp);
443
        curl_setopt($ch, CURLOPT_TIMEOUT, 5);
444
        curl_exec($ch);
445
446
        if(curl_errno($ch))
447
        {
448
            throw new Exception(curl_error($ch));
449
        }
450
451
        $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
452
453
        curl_close($ch);
454
455
        if($code != 200)
456
        {
457
            throw new Exception("$file can't be downloaded", $code);
458
        }
459
460
        return $dest;
461
    }
462
463
    /**
464
     * Check Java binary, JAR path or server connection
465
     *
466
     * @return  void
467
     */
468
    abstract public function check();
469
470
    /**
471
     * Configure and make a request and return its results.
472
     *
473
     * @param   string  $type
474
     * @param   string  $file
475
     * @return  string
476
     * @throws  \Exception
477
     */
478
    abstract public function request($type, $file);
479
}
480