Passed
Push — 1.3.x ( c5ab82...057911 )
by Zaahid
03:06
created

MessageParser::addRawHeaderToPart()   A

Complexity

Conditions 3
Paths 2

Size

Total Lines 5
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 3

Importance

Changes 0
Metric Value
eloc 3
dl 0
loc 5
ccs 4
cts 4
cp 1
rs 10
c 0
b 0
f 0
cc 3
nc 2
nop 2
crap 3
1
<?php
2
/**
3
 * This file is part of the ZBateson\MailMimeParser project.
4
 *
5
 * @license http://opensource.org/licenses/bsd-license.php BSD
6
 */
7
namespace ZBateson\MailMimeParser\Message;
8
9
use Psr\Http\Message\StreamInterface;
10
use ZBateson\MailMimeParser\Message\Part\PartBuilder;
11
use ZBateson\MailMimeParser\Message\Part\Factory\PartBuilderFactory;
12
use ZBateson\MailMimeParser\Message\Part\Factory\PartFactoryService;
13
use GuzzleHttp\Psr7\StreamWrapper;
14
15
/**
16
 * Parses a mail mime message into its component parts.  To invoke, call
17
 * MailMimeParser::parse.
18
 *
19
 * @author Zaahid Bateson
20
 */
21
class MessageParser
22
{
23
    /**
24
     * @var PartFactoryService service instance used to create MimePartFactory
25
     *      objects.
26
     */
27
    protected $partFactoryService;
28
    
29
    /**
30
     * @var PartBuilderFactory used to create PartBuilders
31
     */
32
    protected $partBuilderFactory;
33
    
34
    /**
35
     * @var int maintains the character length of the last line separator,
36
     *      typically 2 for CRLF, to keep track of the correct 'end' position
37
     *      for a part because the CRLF before a boundary is considered part of
38
     *      the boundary.
39
     */
40
    private $lastLineSeparatorLength = 0;
41
42
    /**
43
     * Sets up the parser with its dependencies.
44
     * 
45
     * @param PartFactoryService $pfs
46
     * @param PartBuilderFactory $pbf
47
     */
48 8
    public function __construct(
49
        PartFactoryService $pfs,
50
        PartBuilderFactory $pbf
51
    ) {
52 8
        $this->partFactoryService = $pfs;
53 8
        $this->partBuilderFactory = $pbf;
54 8
    }
55
    
56
    /**
57
     * Parses the passed stream into a ZBateson\MailMimeParser\Message object
58
     * and returns it.
59
     * 
60
     * @param StreamInterface $stream the stream to parse the message from
61
     * @param Extension[] array of extensions
62
     * @return \ZBateson\MailMimeParser\Message
63
     */
64 8
    public function parse(StreamInterface $stream, array $extensions = [])
65
    {
66 8
        $partBuilder = $this->read($stream);
67 8
        $message = $partBuilder->createMessagePart($stream);
68 8
        foreach ($extensions as $ext) {
69 1
            if ($ext->isSupported($message)) {
70 1
                $message = $ext->getExtendedMessage($message, $this);
71
            }
72
        }
73 8
        return $message;
74
    }
75
    
76
    /**
77
     * Ensures the header isn't empty and contains a colon separator character,
78
     * then splits it and calls $partBuilder->addHeader.
79
     * 
80
     * @param string $header
81
     * @param PartBuilder $partBuilder
82
     */
83 6
    private function addRawHeaderToPart($header, PartBuilder $partBuilder)
84
    {
85 6
        if ($header !== '' && strpos($header, ':') !== false) {
86 6
            $a = explode(':', $header, 2);
87 6
            $partBuilder->addHeader($a[0], trim($a[1]));
88
        }
89 6
    }
90
91
    /**
92
     * Reads a line of up to 4096 characters.  If the line is larger than that,
93
     * the remaining characters in the line are read and discarded, and only the
94
     * first 4096 characters are returned.
95
     *
96
     * @param resource $handle
97
     * @return string
98
     */
99 6
    private function readLine($handle)
100
    {
101 6
        $size = 4096;
102 6
        $ret = $line = fgets($handle, $size);
103 6
        while (strlen($line) === $size - 1 && substr($line, -1) !== "\n") {
104 1
            $line = fgets($handle, $size);
105
        }
106 6
        return $ret;
107
    }
108
109
    /**
110
     * Reads a line of 2048 characters.  If the line is larger than that, the
111
     * remaining characters in the line are read and
112
     * discarded, and only the first part is returned.
113
     *
114
     * This method is identical to readLine, except it calculates the number of
115
     * characters that make up the line's new line characters (e.g. 2 for "\r\n"
116
     * or 1 for "\n").
117
     *
118
     * @param resource $handle
119
     * @param int $lineSeparatorLength
120
     * @return string
121
     */
122 4
    private function readBoundaryLine($handle, &$lineSeparatorLength = 0)
123
    {
124 4
        $size = 2048;
125 4
        $isCut = false;
126 4
        $line = fgets($handle, $size);
127 4
        while (strlen($line) === $size - 1 && substr($line, -1) !== "\n") {
128
            $line = fgets($handle, $size);
129
            $isCut = true;
130
        }
131 4
        $ret = rtrim($line, "\r\n");
132 4
        $lineSeparatorLength = strlen($line) - strlen($ret);
133 4
        return ($isCut) ? '' : $ret;
134
    }
135
136
    /**
137
     * Reads header lines up to an empty line, adding them to the passed
138
     * $partBuilder.
139
     * 
140
     * @param resource $handle the resource handle to read from
141
     * @param PartBuilder $partBuilder the current part to add headers to
142
     */
143 6
    protected function readHeaders($handle, PartBuilder $partBuilder)
144
    {
145 6
        $header = '';
146
        do {
147 6
            $line = $this->readLine($handle);
148 6
            if (empty($line) || $line[0] !== "\t" && $line[0] !== ' ') {
149 6
                $this->addRawHeaderToPart($header, $partBuilder);
150 6
                $header = '';
151
            } else {
152 2
                $line = "\r\n" . $line;
153
            }
154 6
            $header .= rtrim($line, "\r\n");
155 6
        } while ($header !== '');
156 6
    }
157
158
    /**
159
     * Reads lines from the passed $handle, calling
160
     * $partBuilder->setEndBoundaryFound with the passed line until it returns
161
     * true or the stream is at EOF.
162
     * 
163
     * setEndBoundaryFound returns true if the passed line matches a boundary
164
     * for the $partBuilder itself or any of its parents.
165
     * 
166
     * Once a boundary is found, setStreamPartAndContentEndPos is called with
167
     * the passed $handle's read pos before the boundary and its line separator
168
     * were read.
169
     * 
170
     * @param resource $handle
171
     * @param PartBuilder $partBuilder
172
     */
173 4
    private function findContentBoundary($handle, PartBuilder $partBuilder)
174
    {
175
        // last separator before a boundary belongs to the boundary, and is not
176
        // part of the current part
177 4
        while (!feof($handle)) {
178 4
            $endPos = ftell($handle) - $this->lastLineSeparatorLength;
179 4
            $line = $this->readBoundaryLine($handle, $this->lastLineSeparatorLength);
180 4
            if ($line !== '' && $partBuilder->setEndBoundaryFound($line)) {
181 2
                $partBuilder->setStreamPartAndContentEndPos($endPos);
182 2
                return;
183
            }
184
        }
185 4
        $partBuilder->setStreamPartAndContentEndPos(ftell($handle));
186 4
        $partBuilder->setEof();
187 4
    }
188
    
189
    /**
190
     * Reads content for a non-mime message.  If there are uuencoded attachment
191
     * parts in the message (denoted by 'begin' lines), those parts are read and
192
     * added to the passed $partBuilder as children.
193
     * 
194
     * @param resource $handle
195
     * @param PartBuilder $partBuilder
196
     * @return string
197
     */
198 4
    protected function readUUEncodedOrPlainTextMessage($handle, PartBuilder $partBuilder)
199
    {
200 4
        $partBuilder->setStreamContentStartPos(ftell($handle));
201 4
        $part = $partBuilder;
202 4
        while (!feof($handle)) {
203 2
            $start = ftell($handle);
204 2
            $line = trim($this->readLine($handle));
205 2
            if (preg_match('/^begin ([0-7]{3}) (.*)$/', $line, $matches)) {
206 1
                $part = $this->partBuilderFactory->newPartBuilder(
207 1
                    $this->partFactoryService->getUUEncodedPartFactory()
208
                );
209 1
                $part->setStreamPartStartPos($start);
210
                // 'begin' line is part of the content
211 1
                $part->setStreamContentStartPos($start);
212 1
                $part->setProperty('mode', $matches[1]);
213 1
                $part->setProperty('filename', $matches[2]);
214 1
                $partBuilder->addChild($part);
215
            }
216 2
            $part->setStreamPartAndContentEndPos(ftell($handle));
217
        }
218 4
        $partBuilder->setStreamPartEndPos(ftell($handle));
219 4
    }
220
    
221
    /**
222
     * Reads content for a single part of a MIME message.
223
     * 
224
     * If the part being read is in turn a multipart part, readPart is called on
225
     * it recursively to read its headers and content.
226
     * 
227
     * The start/end positions of the part's content are set on the passed
228
     * $partBuilder, which in turn sets the end position of the part and its
229
     * parents.
230
     * 
231
     * @param resource $handle
232
     * @param PartBuilder $partBuilder
233
     */
234 4
    private function readPartContent($handle, PartBuilder $partBuilder)
235
    {
236 4
        $partBuilder->setStreamContentStartPos(ftell($handle));
237 4
        $this->findContentBoundary($handle, $partBuilder);
238 4
        if ($partBuilder->isMultiPart()) {
239 2
            while (!$partBuilder->isParentBoundaryFound()) {
240 2
                $child = $this->partBuilderFactory->newPartBuilder(
241 2
                    $this->partFactoryService->getMimePartFactory()
242
                );
243 2
                $partBuilder->addChild($child);
244 2
                $this->readPart($handle, $child);
245
            }
246
        }
247 4
    }
248
    
249
    /**
250
     * Reads a part and any of its children, into the passed $partBuilder,
251
     * either by calling readUUEncodedOrPlainTextMessage or readPartContent
252
     * after reading headers.
253
     * 
254
     * @param resource $handle
255
     * @param PartBuilder $partBuilder
256
     */
257 8
    protected function readPart($handle, PartBuilder $partBuilder)
258
    {
259 8
        $partBuilder->setStreamPartStartPos(ftell($handle));
260
        
261 8
        if ($partBuilder->canHaveHeaders()) {
262 6
            $this->readHeaders($handle, $partBuilder);
263 6
            $this->lastLineSeparatorLength = 0;
264
        }
265 8
        if ($partBuilder->getParent() === null && !$partBuilder->isMime()) {
266 4
            $this->readUUEncodedOrPlainTextMessage($handle, $partBuilder);
267
        } else {
268 4
            $this->readPartContent($handle, $partBuilder);
269
        }
270 8
    }
271
    
272
    /**
273
     * Reads the message from the passed stream and returns a PartBuilder
274
     * representing it.
275
     * 
276
     * @param StreamInterface $stream
277
     * @return PartBuilder
278
     */
279 8
    protected function read(StreamInterface $stream)
280
    {
281 8
        $partBuilder = $this->partBuilderFactory->newPartBuilder(
282 8
            $this->partFactoryService->getMessageFactory()
283
        );
284
        // the remaining parts use a resource handle for better performance...
285
        // it seems fgets does much better than Psr7\readline (not specifically
286
        // measured, but difference in running tests is big)
287 8
        $this->readPart(StreamWrapper::getResource($stream), $partBuilder);
288 8
        return $partBuilder;
289
    }
290
}
291