Reader::searchForTokenStart()   A
last analyzed

Complexity

Conditions 4
Paths 4

Size

Total Lines 14
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 10
CRAP Score 4.074

Importance

Changes 2
Bugs 0 Features 0
Metric Value
c 2
b 0
f 0
dl 0
loc 14
ccs 10
cts 12
cp 0.8333
rs 9.2
cc 4
eloc 10
nc 4
nop 1
crap 4.074
1
<?php
2
3
/*
4
 * This file is part of the kaloa/xmp package.
5
 *
6
 * For full copyright and license information, please view the LICENSE file
7
 * that was distributed with this source code.
8
 */
9
10
namespace Kaloa\Xmp;
11
12
use DOMDocument;
13
use ErrorException;
14
use Exception;
15
use Kaloa\Xmp\Document as XmpDocument;
16
use Kaloa\Xmp\ReaderException;
17
18
/**
19
 * Extracts an XMP document from a data stream.
20
 *
21
 * The current algorithm ignores specific features and requirements of file
22
 * formats. It simply looks for the first occurrences of $tokenStart and
23
 * $tokenEnd and returns the content in between. This is a flexible approach but
24
 * it is not a correct one. There are cases in which the algorithm won't
25
 * succeed. Both false positives and false negatives are possible.
26
 */
27
class Reader
28
{
29
    /**
30
     * Start token of XMP data.
31
     *
32
     * @var string
33
     */
34
    private $tokenStart = '<x:xmpmeta';
35
36
    /**
37
     * End token of XMP data.
38
     *
39
     * @var string
40
     */
41
    private $tokenEnd = '</x:xmpmeta>';
42
43
    /**
44
     * Size (in bytes) of data chunks read from the stream.
45
     *
46
     * @var int
47
     */
48
    private $chunkSize = 1024;
49
50
    /**
51
     * Buffer to construct XMP data in.
52
     *
53
     * @var string
54
     */
55
    private $buffer;
56
57
    /**
58
     * True if $tokenStart has been found.
59
     *
60
     * @var bool
61
     */
62
    private $started;
63
64
    /**
65
     * True if $started and $tokenEnd has been found.
66
     *
67
     * @var bool
68
     */
69
    private $ended;
70
71
    /**
72
     * Counts how many characters of the token that is currently searched for
73
     * have been found.
74
     *
75
     * This is reset whenever a character that doesn't equal the next one from
76
     * the token is found. If $delimPos reaches token length, the token has been
77
     * found.
78
     *
79
     * This variable is needed because a token might be split over two chunks of
80
     * input data so that functions such as strpos aren't sufficient.
81
     *
82
     * @var int
83
     */
84
    private $delimPos;
85
86
    /**
87
     * Length (in byte) of $tokenStart.
88
     *
89
     * @var int
90
     */
91
    private $tokenStartLen;
92
93
    /**
94
     * Length (in byte) of $tokenEnd.
95
     *
96
     * @var int
97
     */
98
    private $tokenEndLen;
99
100
    /**
101
     * Initializes the instance.
102
     */
103 4
    public function __construct()
104
    {
105 4
        $this->reset();
106 4
    }
107
108
    /**
109
     * Resets instance data to clean starting state.
110
     */
111 4
    private function reset()
112
    {
113 4
        $this->buffer = '';
114 4
        $this->started = false;
115 4
        $this->ended = false;
116 4
        $this->tokenStartLen = strlen($this->tokenStart);
117 4
        $this->tokenEndLen = strlen($this->tokenEnd);
118 4
        $this->delimPos = 0;
119 4
    }
120
121
    /**
122
     * Searches incoming data for $tokenStart adapting internal state if found.
123
     *
124
     * @param string $char A single byte
125
     */
126 3
    private function searchForTokenStart($char)
127
    {
128 3
        if ($char === $this->tokenStart[$this->delimPos]) {
129 3
            $this->delimPos++;
130 3
            if ($this->delimPos === $this->tokenStartLen) {
131 3
                $this->delimPos = 0;
132 3
                $this->started = true;
133 3
            }
134 3
        } elseif ($char === $this->tokenStart[0]) {
135
            $this->delimPos = 1;
136
        } else {
137 3
            $this->delimPos = 0;
138
        }
139 3
    }
140
141
    /**
142
     * Searches incoming data for $tokenEnd adapting internal state if found.
143
     *
144
     * @param string $char A single byte
145
     */
146 3
    private function searchForTokenEnd($char)
147
    {
148 3
        $this->buffer .= $char;
149 3
        if ($char === $this->tokenEnd[$this->delimPos]) {
150 3
            $this->delimPos++;
151 3
            if ($this->delimPos === $this->tokenEndLen) {
152 2
                $this->ended = true;
153 2
            }
154 3
        } elseif ($char === $this->tokenEnd[0]) {
155
            $this->delimPos = 1;
156
        } else {
157 3
            $this->delimPos = 0;
158
        }
159 3
    }
160
161
    /**
162
     * Extracts the first found XMP document from the stream.
163
     *
164
     * The stream is read in chunks and processed byte by byte in an
165
     * automaton-like fashion.
166
     *
167
     * After the execution of this method, instance variables $buffer, $started,
168
     * and $ended will contain meaningful values.
169
     *
170
     * @param resource $stream A stream resource
171
     */
172 3
    private function getXmpData($stream)
173
    {
174 3
        while (!feof($stream)) {
175 3
            $chunk = fread($stream, $this->chunkSize);
176
177 3
            foreach (str_split($chunk) as $char) {
178 3
                if (!$this->started) {
179 3
                    $this->searchForTokenStart($char);
180 3
                } else {
181 3
                    $this->searchForTokenEnd($char);
182
                }
183
184 3
                if ($this->ended) {
185 2
                    break 2;
186
                }
187 3
            }
188 3
        }
189
190 3
        if ($this->started && $this->ended) {
191 2
            $this->buffer = $this->tokenStart . $this->buffer;
192 2
        } else {
193 1
            $this->buffer = '';
194
        }
195 3
    }
196
197
    /**
198
     * Returns a Kaloa\Xmp\Document of the first occurrence of XMP data in the
199
     * stream.
200
     *
201
     * @todo The method of error handling (set_error_handler) is just insane.
202
     *
203
     * @param resource $stream A stream resource
204
     * @return XmpDocument
205
     * @throws ReaderException
206
     */
207 4
    public function getXmpDocument($stream)
208
    {
209 4
        if (!is_resource($stream) || get_resource_type($stream) !== 'stream') {
210 1
            throw new ReaderException('$stream is not a valid stream resource');
211
        }
212
213 3
        $this->getXmpData($stream);
214
215 3
        if ($this->buffer === '') {
216 1
            $this->reset();
217 1
            throw new ReaderException('No XMP document found in stream');
218
        }
219
220
221 2
        set_error_handler(function($errno, $errstr, $errfile, $errline) {
222 1
            throw new ErrorException($errstr, $errno, 0, $errfile, $errline);
223 2
        });
224
225
        try {
226 2
            $dom = new DOMDocument();
227 2
            $ret = $dom->loadXML($this->buffer);
228
229
            // Added to make testErroneousXmpDataThrowsException work with hhvm
230 1
            if (false === $ret) {
231
                throw new Exception('loadXML returned false.');
232
            }
233 2
        } catch (Exception $e) {
234
            // Finally
235 1
            restore_error_handler();
236 1
            $this->reset();
237
238 1
            throw new ReaderException($e->getMessage());
239
        }
240
241
        // Finally
242 1
        restore_error_handler();
243 1
        $this->reset();
244
245
246 1
        $xmpDoc = new XmpDocument($dom);
247
248 1
        return $xmpDoc;
249
    }
250
}
251