Passed
Branch master (2ad8bc)
by David
01:55 queued 32s
created

Client::cacheResponse()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 2
nc 1
nop 3
dl 0
loc 5
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace Vaites\ApacheTika;
4
5
use Closure;
6
use Exception;
7
8
use Vaites\ApacheTika\Clients\CLIClient;
9
use Vaites\ApacheTika\Clients\WebClient;
10
use Vaites\ApacheTika\Metadata\Metadata;
11
12
/**
13
 * Apache Tika client interface
14
 *
15
 * @author  David Martínez <[email protected]>
16
 * @link    https://tika.apache.org/1.21/formats.html
17
 */
18
abstract class Client
19
{
20
    /**
21
     * List of supported Apache Tika versions
22
     *
23
     * @var array
24
     */
25
    protected static $supportedVersions =
26
    [
27
        '1.7', '1.8', '1.9', '1.10', '1.11', '1.12', '1.13', '1.14',
28
        '1.15', '1.16', '1.17', '1.18', '1.19', '1.19.1', '1.20', '1.21'
29
    ];
30
31
    /**
32
     * Checked flag
33
     *
34
     * @var bool
35
     */
36
    protected $checked = false;
37
38
    /**
39
     * Response using callbacks
40
     *
41
     * @var string
42
     */
43
    protected $response = null;
44
45
    /**
46
     * Platform (unix or win)
47
     *
48
     * @var string
49
     */
50
    protected $platform = null;
51
52
    /**
53
     * Cached responses to avoid multiple request for the same file.
54
     *
55
     * @var array
56
     */
57
    protected $cache = [];
58
59
    /**
60
     * Callback called on secuential read
61
     *
62
     * @var \Closure
63
     */
64
    protected $callback = null;
65
66
    /**
67
     * Size of chunks for callback
68
     *
69
     * @var int
70
     */
71
    protected $chunkSize = 1048576;
72
73
    /**
74
     * Remote download flag
75
     *
76
     * @var bool
77
     */
78
    protected $downloadRemote = false;
79
80
    /**
81
     * Configure client
82
     */
83
    public function __construct()
84
    {
85
        $this->platform = defined('PHP_WINDOWS_VERSION_MAJOR') ? 'win' : 'unix';
86
    }
87
88
    /**
89
     * Get a class instance throwing an exception if check fails
90
     *
91
     * @param   string  $param1     path or host
92
     * @param   int     $param2     Java binary path or port for web client
93
     * @param   array   $options    options for cURL request
94
     * @return  \Vaites\ApacheTika\Clients\CLIClient|\Vaites\ApacheTika\Clients\WebClient
95
     * @throws  \Exception
96
     */
97
    public static function make($param1 = null, $param2 = null, $options = [], $check = true)
98
    {
99
        if (preg_match('/\.jar$/', func_get_arg(0)))
100
        {
101
            return new CLIClient($param1, $param2, $check);
102
        }
103
        else
104
        {
105
            return new WebClient($param1, $param2, $options, $check);
106
        }
107
    }
108
109
    /**
110
     * Get a class instance delaying the check
111
     *
112
     * @param   string  $param1     path or host
113
     * @param   int     $param2     Java binary path or port for web client
114
     * @param   array   $options    options for cURL request
115
     * @return  \Vaites\ApacheTika\Clients\CLIClient|\Vaites\ApacheTika\Clients\WebClient
116
     * @throws  \Exception
117
     */
118
    public static function prepare($param1 = null, $param2 = null, $options = [])
119
    {
120
        return self::make($param1, $param2, $options, false);
121
    }
122
123
    /**
124
     * Get the callback
125
     *
126
     * @return  \Closure|null
127
     */
128
    public function getCallback()
129
    {
130
        return $this->callback;
131
    }
132
133
    /**
134
     * Set the callback (callable or closure) for call on secuential read
135
     *
136
     * @param   mixed   $callback
137
     * @return  $this
138
     * @throws  \Exception
139
     */
140
    public function setCallback($callback)
141
    {
142
        if($callback instanceof Closure)
143
        {
144
            $this->callback = $callback;
145
        }
146
        elseif(is_callable($callback))
147
        {
148
            $this->callback = function($chunk) use($callback)
149
            {
150
                return call_user_func_array($callback, [$chunk]);
151
            };
152
        }
153
        else
154
        {
155
            throw new Exception('Invalid callback');
156
        }
157
158
        return $this;
159
    }
160
161
    /**
162
     * Get the chunk size
163
     *
164
     * @return  int
165
     */
166
    public function getChunkSize()
167
    {
168
        return $this->chunkSize;
169
    }
170
171
    /**
172
     * Set the chunk size for secuential read
173
     *
174
     * @param   int     $size
175
     * @return  $this
176
     * @throws  \Exception
177
     */
178
    public function setChunkSize($size)
179
    {
180
        if(static::MODE == 'cli' && is_numeric($size))
0 ignored issues
show
Bug introduced by
The constant Vaites\ApacheTika\Client::MODE was not found. Maybe you did not declare it correctly or list all dependencies?
Loading history...
181
        {
182
            $this->chunkSize = (int)$size;
183
        }
184
        elseif(static::MODE == 'web')
185
        {
186
            throw new Exception('Chunk size is not supported on web mode');
187
        }
188
        else
189
        {
190
            throw new Exception("$size is not a valid chunk size");
191
        }
192
193
        return $this;
194
    }
195
196
    /**
197
     * Get the remote download flag
198
     *
199
     * @return  bool
200
     */
201
    public function getDownloadRemote()
202
    {
203
        return $this->downloadRemote;
204
    }
205
206
    /**
207
     * Set the remote download flag
208
     *
209
     * @param   bool    $download
210
     * @return  $this
211
     */
212
    public function setDownloadRemote($download)
213
    {
214
        $this->downloadRemote = (bool) $download;
215
216
        return $this;
217
    }
218
219
    /**
220
     * Gets file metadata using recursive if specified
221
     *
222
     * @link    https://wiki.apache.org/tika/TikaJAXRS#Recursive_Metadata_and_Content
223
     * @param   string  $file
224
     * @param   string  $recursive
225
     * @return  \Vaites\ApacheTika\Metadata\Metadata
226
     * @throws  \Exception
227
     */
228
    public function getMetadata($file, $recursive = null)
229
    {
230
        if(is_null($recursive))
231
        {
232
            $response = $this->request('meta', $file);
233
        }
234
        elseif(in_array($recursive, ['text', 'html', 'ignore']))
235
        {
236
            $response = $this->request("rmeta/$recursive", $file);
237
        }
238
        else
239
        {
240
            throw new Exception("Unknown recursive type (must be text, html, ignore or null)");
241
        }
242
243
        return Metadata::make($response, $file);
0 ignored issues
show
Bug introduced by
It seems like $response can also be of type Vaites\ApacheTika\Metadata\Metadata; however, parameter $response of Vaites\ApacheTika\Metadata\Metadata::make() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

243
        return Metadata::make(/** @scrutinizer ignore-type */ $response, $file);
Loading history...
244
    }
245
246
    /**
247
     * Gets recursive file metadata (alias for getMetadata)
248
     *
249
     * @param   string  $file
250
     * @param   string  $recursive
251
     * @return  \Vaites\ApacheTika\Metadata\Metadata
252
     * @throws  \Exception
253
     */
254
    public function getRecursiveMetadata($file, $recursive)
255
    {
256
        return $this->getMetadata($file, $recursive);
257
    }
258
259
    /**
260
     * Detect language
261
     *
262
     * @param   string  $file
263
     * @return  string
264
     * @throws  \Exception
265
     */
266
    public function getLanguage($file)
267
    {
268
        return $this->request('lang', $file);
0 ignored issues
show
Bug Best Practice introduced by
The expression return $this->request('lang', $file) also could return the type Vaites\ApacheTika\Metadata\Metadata which is incompatible with the documented return type string.
Loading history...
269
    }
270
271
    /**
272
     * Detect MIME type
273
     *
274
     * @param   string  $file
275
     * @return  string
276
     * @throws \Exception
277
     */
278
    public function getMIME($file)
279
    {
280
        return $this->request('mime', $file);
0 ignored issues
show
Bug Best Practice introduced by
The expression return $this->request('mime', $file) also could return the type Vaites\ApacheTika\Metadata\Metadata which is incompatible with the documented return type string.
Loading history...
281
    }
282
283
    /**
284
     * Extracts HTML
285
     *
286
     * @param   string  $file
287
     * @param   mixed   $callback
288
     * @return  string
289
     * @throws  \Exception
290
     */
291
    public function getHTML($file, $callback = null)
292
    {
293
        if(!is_null($callback))
294
        {
295
            $this->setCallback($callback);
296
        }
297
298
        return $this->request('html', $file);
0 ignored issues
show
Bug Best Practice introduced by
The expression return $this->request('html', $file) also could return the type Vaites\ApacheTika\Metadata\Metadata which is incompatible with the documented return type string.
Loading history...
299
    }
300
301
    /**
302
     * Extracts text
303
     *
304
     * @param   string  $file
305
     * @param   mixed   $callback
306
     * @return  string
307
     * @throws  \Exception
308
     */
309
    public function getText($file, $callback = null)
310
    {
311
        if(!is_null($callback))
312
        {
313
            $this->setCallback($callback);
314
        }
315
316
        return $this->request('text', $file);
0 ignored issues
show
Bug Best Practice introduced by
The expression return $this->request('text', $file) also could return the type Vaites\ApacheTika\Metadata\Metadata which is incompatible with the documented return type string.
Loading history...
317
    }
318
319
    /**
320
     * Extracts main text
321
     *
322
     * @param   string  $file
323
     * @param   mixed   $callback
324
     * @return  string
325
     * @throws  \Exception
326
     */
327
    public function getMainText($file, $callback = null)
328
    {
329
        if(!is_null($callback))
330
        {
331
            $this->setCallback($callback);
332
        }
333
334
        return $this->request('text-main', $file);
0 ignored issues
show
Bug Best Practice introduced by
The expression return $this->request('text-main', $file) also could return the type Vaites\ApacheTika\Metadata\Metadata which is incompatible with the documented return type string.
Loading history...
335
    }
336
337
    /**
338
     * Returns the supported MIME types
339
     *
340
     * @return  string
341
     * @throws  \Exception
342
     */
343
    public function getSupportedMIMETypes()
344
    {
345
        return $this->request('mime-types');
0 ignored issues
show
Bug Best Practice introduced by
The expression return $this->request('mime-types') also could return the type Vaites\ApacheTika\Metadata\Metadata which is incompatible with the documented return type string.
Loading history...
Bug introduced by
The call to Vaites\ApacheTika\Client::request() has too few arguments starting with file. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

345
        return $this->/** @scrutinizer ignore-call */ request('mime-types');

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
346
    }
347
348
    /**
349
     * Returns the available detectors
350
     *
351
     * @return  string
352
     * @throws  \Exception
353
     */
354
    public function getAvailableDetectors()
355
    {
356
        return $this->request('detectors');
0 ignored issues
show
Bug Best Practice introduced by
The expression return $this->request('detectors') also could return the type Vaites\ApacheTika\Metadata\Metadata which is incompatible with the documented return type string.
Loading history...
Bug introduced by
The call to Vaites\ApacheTika\Client::request() has too few arguments starting with file. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

356
        return $this->/** @scrutinizer ignore-call */ request('detectors');

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
357
    }
358
359
    /**
360
     * Returns the available parsers
361
     *
362
     * @return  string
363
     * @throws  \Exception
364
     */
365
    public function getAvailableParsers()
366
    {
367
        return $this->request('parsers');
0 ignored issues
show
Bug Best Practice introduced by
The expression return $this->request('parsers') also could return the type Vaites\ApacheTika\Metadata\Metadata which is incompatible with the documented return type string.
Loading history...
Bug introduced by
The call to Vaites\ApacheTika\Client::request() has too few arguments starting with file. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

367
        return $this->/** @scrutinizer ignore-call */ request('parsers');

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
368
    }
369
370
    /**
371
     * Returns current Tika version
372
     *
373
     * @return  string
374
     * @throws  \Exception
375
     */
376
    public function getVersion()
377
    {
378
        return $this->request('version');
0 ignored issues
show
Bug Best Practice introduced by
The expression return $this->request('version') also could return the type Vaites\ApacheTika\Metadata\Metadata which is incompatible with the documented return type string.
Loading history...
Bug introduced by
The call to Vaites\ApacheTika\Client::request() has too few arguments starting with file. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

378
        return $this->/** @scrutinizer ignore-call */ request('version');

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
379
    }
380
381
    /**
382
     * Return the list of Apache Tika supported versions
383
     *
384
     * @return array
385
     */
386
    public static function getSupportedVersions()
387
    {
388
        return self::$supportedVersions;
389
    }
390
391
    /**
392
     * Sets the checked flag
393
     *
394
     * @param   bool    $checked
395
     */
396
    public function setChecked($checked)
397
    {
398
        $this->checked = (bool) $checked;
399
    }
400
401
    /**
402
     * Checks if instance is checked
403
     *
404
     * @return  bool
405
     */
406
    public function isChecked()
407
    {
408
        return $this->checked;
409
    }
410
411
    /**
412
     * Check if a response is cached
413
     *
414
     * @param   string  $type
415
     * @param   string  $file
416
     * @return  mixed
417
     */
418
    protected function isCached($type, $file)
419
    {
420
        return isset($this->cache[sha1($file)][$type]);
421
    }
422
423
    /**
424
     * Get a cached response
425
     *
426
     * @param   string  $type
427
     * @param   string  $file
428
     * @return  mixed
429
     */
430
    protected function getCachedResponse($type, $file)
431
    {
432
        return isset($this->cache[sha1($file)][$type]) ? $this->cache[sha1($file)][$type] : null;
433
    }
434
435
    /**
436
     * Check if a request type must be cached
437
     *
438
     * @param   string  $type
439
     * @return  bool
440
     */
441
    protected function isCacheable($type)
442
    {
443
        return in_array($type, ['lang', 'meta']);
444
    }
445
446
    /**
447
     * Caches a response
448
     *
449
     * @param   string  $type
450
     * @param   mixed   $response
451
     * @param   string  $file
452
     * @return  bool
453
     */
454
    protected function cacheResponse($type, $response, $file)
455
    {
456
        $this->cache[sha1($file)][$type] = $response;
457
458
        return true;
459
    }
460
461
    /**
462
     * Checks if a specific version is supported
463
     *
464
     * @param   string  $version
465
     * @return  bool
466
     */
467
    public static function isVersionSupported($version)
468
    {
469
        return in_array($version, self::getSupportedVersions());
470
    }
471
472
    /**
473
     * Check the request before executing
474
     *
475
     * @param   string  $type
476
     * @param   string  $file
477
     * @return  string
478
     * @throws  \Exception
479
     */
480
    public function checkRequest($type, $file)
481
    {
482
        // no checks for getters
483
        if(in_array($type, ['detectors', 'mime-types', 'parsers', 'version']))
484
        {
485
            //
486
        }
487
        // invalid local file
488
        elseif(!preg_match('/^http/', $file) && !file_exists($file))
489
        {
490
            throw new Exception("File $file can't be opened");
491
        }
492
        // invalid remote file
493
        elseif(preg_match('/^http/', $file) && !preg_match('/200/', get_headers($file)[0]))
494
        {
495
            throw new Exception("File $file can't be opened", 2);
496
        }
497
        // download remote file if required only for integrated downloader
498
        elseif(preg_match('/^http/', $file) && $this->downloadRemote)
499
        {
500
            $file = $this->downloadFile($file);
501
        }
502
503
        return $file;
504
    }
505
506
    /**
507
     * Download file to a temporary folder
508
     *
509
     * @link    https://wiki.apache.org/tika/TikaJAXRS#Specifying_a_URL_Instead_of_Putting_Bytes
510
     * @param   string  $file
511
     * @return  string
512
     * @throws  \Exception
513
     */
514
    protected function downloadFile($file)
515
    {
516
        $dest = tempnam(sys_get_temp_dir(), 'TIKA');
517
518
        $fp = fopen($dest, 'w+');
519
520
        if($fp === false)
521
        {
522
            throw new Exception("$dest can't be opened");
523
        }
524
525
        $ch = curl_init($file);
526
        curl_setopt($ch, CURLOPT_FILE, $fp);
0 ignored issues
show
Bug introduced by
It seems like $ch can also be of type false; however, parameter $ch of curl_setopt() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

526
        curl_setopt(/** @scrutinizer ignore-type */ $ch, CURLOPT_FILE, $fp);
Loading history...
527
        curl_setopt($ch, CURLOPT_TIMEOUT, 5);
528
        curl_exec($ch);
0 ignored issues
show
Bug introduced by
It seems like $ch can also be of type false; however, parameter $ch of curl_exec() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

528
        curl_exec(/** @scrutinizer ignore-type */ $ch);
Loading history...
529
530
        if(curl_errno($ch))
0 ignored issues
show
Bug introduced by
It seems like $ch can also be of type false; however, parameter $ch of curl_errno() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

530
        if(curl_errno(/** @scrutinizer ignore-type */ $ch))
Loading history...
531
        {
532
            throw new Exception(curl_error($ch));
0 ignored issues
show
Bug introduced by
It seems like $ch can also be of type false; however, parameter $ch of curl_error() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

532
            throw new Exception(curl_error(/** @scrutinizer ignore-type */ $ch));
Loading history...
533
        }
534
535
        $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
0 ignored issues
show
Bug introduced by
It seems like $ch can also be of type false; however, parameter $ch of curl_getinfo() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

535
        $code = curl_getinfo(/** @scrutinizer ignore-type */ $ch, CURLINFO_HTTP_CODE);
Loading history...
536
537
        curl_close($ch);
0 ignored issues
show
Bug introduced by
It seems like $ch can also be of type false; however, parameter $ch of curl_close() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

537
        curl_close(/** @scrutinizer ignore-type */ $ch);
Loading history...
538
539
        if($code != 200)
540
        {
541
            throw new Exception("$file can't be downloaded", $code);
542
        }
543
544
        return $dest;
545
    }
546
547
    /**
548
     * Check Java binary, JAR path or server connection
549
     *
550
     * @return  void
551
     */
552
    abstract public function check();
553
554
    /**
555
     * Configure and make a request and return its results.
556
     *
557
     * @param   string  $type
558
     * @param   string  $file
559
     * @return  string|\Vaites\ApacheTika\Metadata\Metadata
560
     * @throws  \Exception
561
     */
562
    abstract public function request($type, $file);
563
}
564