Passed
Branch master (e17e02)
by Jacques
02:12
created

Parser::convertSgmlToXml()   A

Complexity

Conditions 6
Paths 4

Size

Total Lines 29
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 14
CRAP Score 6

Importance

Changes 6
Bugs 0 Features 0
Metric Value
eloc 14
c 6
b 0
f 0
dl 0
loc 29
ccs 14
cts 14
cp 1
rs 9.2222
cc 6
nc 4
nop 1
crap 6
1
<?php
2
3
namespace OfxParser;
4
5
use SimpleXMLElement;
6
7
/**
8
 * An OFX parser library
9
 *
10
 * Heavily refactored from Guillaume Bailleul's grimfor/ofxparser
11
 *
12
 * @author Guillaume BAILLEUL <[email protected]>
13
 * @author James Titcumb <[email protected]>
14
 * @author Oliver Lowe <[email protected]>
15
 */
16
class Parser
17
{
18
    /**
19
     * Factory to extend support for OFX document structures.
20
     * @param SimpleXMLElement $xml
21
     * @return Ofx
22
     */
23 15
    protected function createOfx(SimpleXMLElement $xml)
24
    {
25 15
        return new Ofx($xml);
26
    }
27
28
    /**
29
     * Load an OFX file into this parser by way of a filename
30
     *
31
     * @param string $ofxFile A path that can be loaded with file_get_contents
32
     * @return Ofx
33
     * @throws \InvalidArgumentException
34
     */
35 16
    public function loadFromFile($ofxFile)
36
    {
37 16
        if (!file_exists($ofxFile)) {
38 1
            throw new \InvalidArgumentException("File '{$ofxFile}' could not be found");
39
        }
40
41 15
        return $this->loadFromString(file_get_contents($ofxFile));
42
    }
43
44
    /**
45
     * Load an OFX by directly using the text content
46
     *
47
     * @param string $ofxContent
48
     * @return  Ofx
49
     */
50 15
    public function loadFromString($ofxContent)
51
    {
52 15
        $ofxContent = str_replace(["\r\n", "\r"], "\n", $ofxContent);
53 15
        $ofxContent = utf8_encode($ofxContent);
54
55 15
        $sgmlStart = stripos($ofxContent, '<OFX>');
56 15
        $ofxHeader =  trim(substr($ofxContent, 0, $sgmlStart));
57 15
        $header = $this->parseHeader($ofxHeader);
58
59 15
        $ofxSgml = trim(substr($ofxContent, $sgmlStart));
60 15
        if (stripos($ofxHeader, '<?xml') === 0) {
61 2
            $ofxXml = $ofxSgml;
62
        } else {
63 13
            $ofxSgml = $this->conditionallyAddNewlines($ofxSgml);
64 13
            $ofxXml = $this->convertSgmlToXml($ofxSgml);
65
        }
66
67 15
        $xml = $this->xmlLoadString($ofxXml);
68
69 15
        $ofx = $this->createOfx($xml);
70 15
        $ofx->buildHeader($header);
71
72 15
        return $ofx;
73
    }
74
75
    /**
76
     * Detect if the OFX file is on one line. If it is, add newlines automatically.
77
     *
78
     * @param string $ofxContent
79
     * @return string
80
     */
81 13
    private function conditionallyAddNewlines($ofxContent)
82
    {
83 13
        if (preg_match('/<OFX>.*<\/OFX>/', $ofxContent) === 1) {
84 1
            return str_replace('<', "\n<", $ofxContent); // add line breaks to allow XML to parse
85
        }
86
87 12
        return $ofxContent;
88
    }
89
90
    /**
91
     * Load an XML string without PHP errors - throws exception instead
92
     *
93
     * @param string $xmlString
94
     * @throws \RuntimeException
95
     * @return \SimpleXMLElement
96
     */
97 17
    private function xmlLoadString($xmlString)
98
    {
99 17
        libxml_clear_errors();
100 17
        libxml_use_internal_errors(true);
101 17
        $xml = simplexml_load_string($xmlString);
102
103 17
        if ($errors = libxml_get_errors()) {
104 1
            throw new \RuntimeException('Failed to parse OFX: ' . var_export($errors, true));
105
        }
106
107 16
        return $xml;
108
    }
109
110
    /**
111
     * Detect any unclosed XML tags - if they exist, close them
112
     *
113
     * @param string $line
114
     * @return string
115
     */
116 24
    private function closeUnclosedXmlTags($line)
117
    {
118
        // Special case discovered where empty content tag wasn't closed
119 24
        $line = trim($line);
120 24
        if (preg_match('/<MEMO>$/', $line) === 1) {
121 1
            return '<MEMO></MEMO>';
122
        }
123
124
        // Matches: <SOMETHING>blah
125
        // Does not match: <SOMETHING>
126
        // Does not match: <SOMETHING>blah</SOMETHING>
127 23
        if (preg_match(
128 23
            "/<([A-Za-z0-9.]+)>([\wà-úÀ-Ú0-9\.\-\_\+\, ;:\[\]\'\&\/\\\*\(\)\+\{\|\}\!\£\$\?=@€£#%±§~`\"]+)$/",
129 23
            $line,
130 23
            $matches
131
        )) {
132 17
            return "<{$matches[1]}>{$matches[2]}</{$matches[1]}>";
133
        }
134 21
        return $line;
135
    }
136
137
    /**
138
     * Parse the SGML Header to an Array
139
     *
140
     * @param string $ofxHeader
141
     * @param int $sgmlStart
142
     * @return array
143
     */
144 15
    private function parseHeader($ofxHeader)
145
    {
146 15
        $header = [];
147
148
149 15
        $ofxHeader = trim($ofxHeader);
150
        // Remove empty new lines.
151 15
        $ofxHeader = preg_replace('/^\n+/m', '', $ofxHeader);
152
153
        // Check if it's an XML file (OFXv2)
154 15
        if(preg_match('/^<\?xml/', $ofxHeader) === 1) {
155
            // Only parse OFX headers and not XML headers.
156 2
            $ofxHeader = preg_replace('/<\?xml .*?\?>\n?/', '', $ofxHeader);
157 2
            $ofxHeader = preg_replace(['/"/', '/\?>/', '/<\?OFX/i'], '', $ofxHeader);
158 2
            $ofxHeaderLine = explode(' ', trim($ofxHeader));
159
160 2
            foreach ($ofxHeaderLine as $value) {
161 2
                $tag = explode('=', $value);
162 2
                $header[$tag[0]] = $tag[1];
163
            }
164
165 2
            return $header;
166
        }
167
168 13
        $ofxHeaderLines = explode("\n", $ofxHeader);
169 13
        foreach ($ofxHeaderLines as $value) {
170 13
            $tag = explode(':', $value);
171 13
            $header[$tag[0]] = $tag[1];
172
        }
173
174 13
        return $header;
175
    }
176
177
    /**
178
     * Convert an SGML to an XML string
179
     *
180
     * @param string $sgml
181
     * @return string
182
     */
183 16
    private function convertSgmlToXml($sgml)
184
    {
185 16
        $sgml = preg_replace('/&(?!#?[a-z0-9]+;)/', '&amp;', $sgml);
186
187 16
        $lines = explode("\n", $sgml);
188 16
        $tags = [];
189
190 16
        foreach ($lines as $i => &$line) {
191 16
            $line = trim($this->closeUnclosedXmlTags($line)) . "\n";
192
193
            // Matches tags like <SOMETHING> or </SOMETHING>
194 16
            if (!preg_match("/^<(\/?[A-Za-z0-9.]+)>$/", trim($line), $matches)) {
195 16
                continue;
196
            }
197
198
            // If matches </SOMETHING>, looks back and replaces all tags like
199
            // <OTHERTHING> to <OTHERTHING/> until finds the opening tag <SOMETHING>
200 16
            if ($matches[1][0] == '/') {
201 16
                $tag = substr($matches[1], 1);
202
203 16
                while (($last = array_pop($tags)) && $last[1] != $tag) {
204 1
                    $lines[$last[0]] = "<{$last[1]}/>";
205
                }
206
            } else {
207 16
                $tags[] = [$i, $matches[1]];
208
            }
209
        }
210
211 16
        return implode("\n", array_map('trim', $lines));
212
    }
213
}
214