TikaTextExtractor::runShell()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 27
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 15
nc 2
nop 4
dl 0
loc 27
rs 9.7666
c 0
b 0
f 0
1
<?php
2
3
namespace SilverStripe\TextExtraction\Extractor;
4
5
use SilverStripe\Assets\File;
6
7
/**
8
 * Enables text extraction of file content via the Tika CLI
9
 *
10
 * {@link http://tika.apache.org/1.7/gettingstarted.html}
11
 */
12
class TikaTextExtractor extends FileTextExtractor
13
{
14
    /**
15
     * Text extraction mode. Defaults to -t (plain text)
16
     *
17
     * @var string
18
     * @config
19
     */
20
    private static $output_mode = '-t';
0 ignored issues
show
introduced by
The private property $output_mode is not used, and could be removed.
Loading history...
21
22
    /**
23
     * Get the version of tika installed, or 0 if not installed
24
     *
25
     * @return mixed float | int The version of tika
26
     */
27
    public function getVersion()
28
    {
29
        $code = $this->runShell('tika --version', $stdout);
30
31
        // Parse output
32
        if (!$code && preg_match('/Apache Tika (?<version>[\.\d]+)/', $stdout, $matches)) {
33
            return $matches['version'];
34
        }
35
36
        return 0;
37
    }
38
39
    /**
40
     * Runs an arbitrary and safely escaped shell command
41
     *
42
     * @param  string $command Full command including arguments
43
     * @param  string &$stdout Standand output
44
     * @param  string &$stderr Standard error
45
     * @param  string $input   Content to pass via standard input
46
     * @return int Exit code.  0 is success
47
     */
48
    protected function runShell($command, &$stdout = '', &$stderr = '', $input = '')
49
    {
50
        $descriptorSpecs = [
51
            0 => ["pipe", "r"],
52
            1 => ["pipe", "w"],
53
            2 => ["pipe", "w"]
54
        ];
55
        // Invoke command
56
        $pipes = [];
57
        $proc = proc_open($command, $descriptorSpecs, $pipes);
58
59
        if (!is_resource($proc)) {
60
            return 255;
61
        }
62
63
        // Send content as input
64
        fwrite($pipes[0], $input);
65
        fclose($pipes[0]);
66
67
        // Get output
68
        $stdout = stream_get_contents($pipes[1]);
69
        fclose($pipes[1]);
70
        $stderr = stream_get_contents($pipes[2]);
71
        fclose($pipes[2]);
72
73
        // Get result
74
        return proc_close($proc);
75
    }
76
77
    public function getContent($file)
78
    {
79
        $mode = $this->config()->get('output_mode');
80
        $path = $file instanceof File ? $this->getPathFromFile($file) : $file;
81
        $command = sprintf('tika %s %s', $mode, escapeshellarg($path));
82
        $code = $this->runShell($command, $output);
83
        //Cleanup temp file
84
        if ($file instanceof File) {
85
            unlink($path);
86
        }
87
88
        if ($code == 0) {
89
            return $output;
90
        }
91
    }
92
93
    /**
94
     * @return bool
95
     */
96
    public function isAvailable()
97
    {
98
        return $this->getVersion() > 0;
99
    }
100
101
    /**
102
     * @return bool
103
     */
104
    public function supportsExtension($extension)
105
    {
106
        // Determine support via mime type only
107
        return false;
108
    }
109
110
111
    /**
112
     * @param  string $mime
113
     * @return bool
114
     */
115
    public function supportsMime($mime)
116
    {
117
        // Get list of supported mime types
118
        $code = $this->runShell('tika --list-supported-types', $supportedTypes, $error);
119
120
        if ($code) {
121
            // Error case
122
            return false;
123
        }
124
125
        // Check if the mime type is inside the result
126
        $pattern = sprintf('/\b(%s)\b/', preg_quote($mime, '/'));
127
128
        return (bool)preg_match($pattern, $supportedTypes);
129
    }
130
}
131