Passed
Push — master ( 5e666c...805eaf )
by Zaahid
03:42
created

MessageParser::readBoundaryLine()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 12
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 4.128

Importance

Changes 0
Metric Value
cc 4
eloc 9
nc 4
nop 2
dl 0
loc 12
ccs 8
cts 10
cp 0.8
crap 4.128
rs 9.9666
c 0
b 0
f 0
1
<?php
2
/**
3
 * This file is part of the ZBateson\MailMimeParser project.
4
 *
5
 * @license http://opensource.org/licenses/bsd-license.php BSD
6
 */
7
namespace ZBateson\MailMimeParser\Message;
8
9
use Psr\Http\Message\StreamInterface;
10
use ZBateson\MailMimeParser\Message\Part\PartBuilder;
11
use ZBateson\MailMimeParser\Message\Part\Factory\PartBuilderFactory;
12
use ZBateson\MailMimeParser\Message\Part\Factory\PartFactoryService;
13
use GuzzleHttp\Psr7\StreamWrapper;
14
15
/**
16
 * Parses a mail mime message into its component parts.  To invoke, call
17
 * MailMimeParser::parse.
18
 *
19
 * @author Zaahid Bateson
20
 */
21
class MessageParser
22
{
23
    /**
24
     * @var PartFactoryService service instance used to create MimePartFactory
25
     *      objects.
26
     */
27
    protected $partFactoryService;
28
    
29
    /**
30
     * @var PartBuilderFactory used to create PartBuilders
31
     */
32
    protected $partBuilderFactory;
33
    
34
    /**
35
     * @var int maintains the character length of the last line separator,
36
     *      typically 2 for CRLF, to keep track of the correct 'end' position
37
     *      for a part because the CRLF before a boundary is considered part of
38
     *      the boundary.
39
     */
40
    private $lastLineSeparatorLength = 0;
41
    
42
    /**
43
     * Sets up the parser with its dependencies.
44
     * 
45
     * @param PartFactoryService $pfs
46
     * @param PartBuilderFactory $pbf
47
     */
48 7
    public function __construct(
49
        PartFactoryService $pfs,
50
        PartBuilderFactory $pbf
51
    ) {
52 7
        $this->partFactoryService = $pfs;
53 7
        $this->partBuilderFactory = $pbf;
54 7
    }
55
    
56
    /**
57
     * Parses the passed stream into a ZBateson\MailMimeParser\Message object
58
     * and returns it.
59
     * 
60
     * @param StreamInterface $stream the stream to parse the message from
61
     * @return \ZBateson\MailMimeParser\Message
62
     */
63 7
    public function parse(StreamInterface $stream)
64
    {
65 7
        $partBuilder = $this->read($stream);
66 7
        return $partBuilder->createMessagePart($stream);
67
    }
68
    
69
    /**
70
     * Ensures the header isn't empty and contains a colon separator character,
71
     * then splits it and calls $partBuilder->addHeader.
72
     * 
73
     * @param string $header
74
     * @param PartBuilder $partBuilder
75
     */
76 6
    private function addRawHeaderToPart($header, PartBuilder $partBuilder)
77
    {
78 6
        if ($header !== '' && strpos($header, ':') !== false) {
79 6
            $a = explode(':', $header, 2);
80 6
            $partBuilder->addHeader($a[0], trim($a[1]));
81
        }
82 6
    }
83
84
    /**
85
     * Reads a line of up to 4096 characters.  If the line is larger than that,
86
     * the remaining characters in the line are read and discarded, and only the
87
     * first 4096 characters are returned.
88
     *
89
     * @param resource $handle
90
     * @return string
91
     */
92 6
    private function readLine($handle)
93
    {
94 6
        $size = 4096;
95 6
        $ret = $line = fgets($handle, $size);
96 6
        while (strlen($line) === $size - 1 && substr($line, -1) !== "\n") {
97 1
            $line = fgets($handle, $size);
98
        }
99 6
        return $ret;
100
    }
101
102
    /**
103
     * Reads a line of 2048 characters.  If the line is larger than that, the
104
     * remaining characters in the line are read and
105
     * discarded, and only the first part is returned.
106
     *
107
     * This method is identical to readLine, except it calculates the number of
108
     * characters that make up the line's new line characters (e.g. 2 for "\r\n"
109
     * or 1 for "\n").
110
     *
111
     * @param resource $handle
112
     * @param int $lineSeparatorLength
113
     * @return string
114
     */
115 4
    private function readBoundaryLine($handle, &$lineSeparatorLength = 0)
116
    {
117 4
        $size = 2048;
118 4
        $isCut = false;
119 4
        $line = fgets($handle, $size);
120 4
        while (strlen($line) === $size - 1 && substr($line, -1) !== "\n") {
121
            $line = fgets($handle, $size);
122
            $isCut = true;
123
        }
124 4
        $ret = rtrim($line, "\r\n");
125 4
        $lineSeparatorLength = strlen($line) - strlen($ret);
126 4
        return ($isCut) ? '' : $ret;
127
    }
128
129
    /**
130
     * Reads header lines up to an empty line, adding them to the passed
131
     * $partBuilder.
132
     * 
133
     * @param resource $handle the resource handle to read from
134
     * @param PartBuilder $partBuilder the current part to add headers to
135
     */
136 6
    protected function readHeaders($handle, PartBuilder $partBuilder)
137
    {
138 6
        $header = '';
139
        do {
140 6
            $line = $this->readLine($handle);
141 6
            if (empty($line) || $line[0] !== "\t" && $line[0] !== ' ') {
142 6
                $this->addRawHeaderToPart($header, $partBuilder);
143 6
                $header = '';
144
            } else {
145 2
                $line = "\r\n" . $line;
146
            }
147 6
            $header .= rtrim($line, "\r\n");
148 6
        } while ($header !== '');
149 6
    }
150
151
    /**
152
     * Reads lines from the passed $handle, calling
153
     * $partBuilder->setEndBoundaryFound with the passed line until it returns
154
     * true or the stream is at EOF.
155
     * 
156
     * setEndBoundaryFound returns true if the passed line matches a boundary
157
     * for the $partBuilder itself or any of its parents.
158
     * 
159
     * Once a boundary is found, setStreamPartAndContentEndPos is called with
160
     * the passed $handle's read pos before the boundary and its line separator
161
     * were read.
162
     * 
163
     * @param resource $handle
164
     * @param PartBuilder $partBuilder
165
     */
166 4
    private function findContentBoundary($handle, PartBuilder $partBuilder)
167
    {
168
        // last separator before a boundary belongs to the boundary, and is not
169
        // part of the current part
170 4
        while (!feof($handle)) {
171 4
            $endPos = ftell($handle) - $this->lastLineSeparatorLength;
172 4
            $line = $this->readBoundaryLine($handle, $this->lastLineSeparatorLength);
173 4
            if ($line !== '' && $partBuilder->setEndBoundaryFound($line)) {
174 2
                $partBuilder->setStreamPartAndContentEndPos($endPos);
175 2
                return;
176
            }
177
        }
178 4
        $partBuilder->setStreamPartAndContentEndPos(ftell($handle));
179 4
        $partBuilder->setEof();
180 4
    }
181
    
182
    /**
183
     * Reads content for a non-mime message.  If there are uuencoded attachment
184
     * parts in the message (denoted by 'begin' lines), those parts are read and
185
     * added to the passed $partBuilder as children.
186
     * 
187
     * @param resource $handle
188
     * @param PartBuilder $partBuilder
189
     * @return string
190
     */
191 3
    protected function readUUEncodedOrPlainTextMessage($handle, PartBuilder $partBuilder)
192
    {
193 3
        $partBuilder->setStreamContentStartPos(ftell($handle));
194 3
        $part = $partBuilder;
195 3
        while (!feof($handle)) {
196 2
            $start = ftell($handle);
197 2
            $line = trim($this->readLine($handle));
198 2
            if (preg_match('/^begin ([0-7]{3}) (.*)$/', $line, $matches)) {
199 1
                $part = $this->partBuilderFactory->newPartBuilder(
200 1
                    $this->partFactoryService->getUUEncodedPartFactory()
201
                );
202 1
                $part->setStreamPartStartPos($start);
203
                // 'begin' line is part of the content
204 1
                $part->setStreamContentStartPos($start);
205 1
                $part->setProperty('mode', $matches[1]);
206 1
                $part->setProperty('filename', $matches[2]);
207 1
                $partBuilder->addChild($part);
208
            }
209 2
            $part->setStreamPartAndContentEndPos(ftell($handle));
210
        }
211 3
        $partBuilder->setStreamPartEndPos(ftell($handle));
212 3
    }
213
    
214
    /**
215
     * Reads content for a single part of a MIME message.
216
     * 
217
     * If the part being read is in turn a multipart part, readPart is called on
218
     * it recursively to read its headers and content.
219
     * 
220
     * The start/end positions of the part's content are set on the passed
221
     * $partBuilder, which in turn sets the end position of the part and its
222
     * parents.
223
     * 
224
     * @param resource $handle
225
     * @param PartBuilder $partBuilder
226
     */
227 4
    private function readPartContent($handle, PartBuilder $partBuilder)
228
    {
229 4
        $partBuilder->setStreamContentStartPos(ftell($handle));
230 4
        $this->findContentBoundary($handle, $partBuilder);
231 4
        if ($partBuilder->isMultiPart()) {
232 2
            while (!$partBuilder->isParentBoundaryFound()) {
233 2
                $child = $this->partBuilderFactory->newPartBuilder(
234 2
                    $this->partFactoryService->getMimePartFactory()
235
                );
236 2
                $partBuilder->addChild($child);
237 2
                $this->readPart($handle, $child);
238
            }
239
        }
240 4
    }
241
    
242
    /**
243
     * Reads a part and any of its children, into the passed $partBuilder,
244
     * either by calling readUUEncodedOrPlainTextMessage or readPartContent
245
     * after reading headers.
246
     * 
247
     * @param resource $handle
248
     * @param PartBuilder $partBuilder
249
     */
250 7
    protected function readPart($handle, PartBuilder $partBuilder)
251
    {
252 7
        $partBuilder->setStreamPartStartPos(ftell($handle));
253
        
254 7
        if ($partBuilder->canHaveHeaders()) {
255 6
            $this->readHeaders($handle, $partBuilder);
256 6
            $this->lastLineSeparatorLength = 0;
257
        }
258 7
        if ($partBuilder->getParent() === null && !$partBuilder->isMime()) {
259 3
            $this->readUUEncodedOrPlainTextMessage($handle, $partBuilder);
260
        } else {
261 4
            $this->readPartContent($handle, $partBuilder);
262
        }
263 7
    }
264
    
265
    /**
266
     * Reads the message from the passed stream and returns a PartBuilder
267
     * representing it.
268
     * 
269
     * @param StreamInterface $stream
270
     * @return PartBuilder
271
     */
272 7
    protected function read(StreamInterface $stream)
273
    {
274 7
        $partBuilder = $this->partBuilderFactory->newPartBuilder(
275 7
            $this->partFactoryService->getMessageFactory()
276
        );
277
        // the remaining parts use a resource handle for better performance...
278
        // it seems fgets does much better than Psr7\readline (not specifically
279
        // measured, but difference in running tests is big)
280 7
        $this->readPart(StreamWrapper::getResource($stream), $partBuilder);
281 7
        return $partBuilder;
282
    }
283
}
284