1 | <?php |
||
2 | |||
3 | namespace Endeken\OFX; |
||
4 | |||
5 | use RuntimeException; |
||
6 | use SimpleXMLElement; |
||
7 | |||
8 | class OFXUtils |
||
9 | { |
||
10 | public static function normalizeOfx(string $ofxContent): string|false|SimpleXMLElement |
||
11 | { |
||
12 | $ofxContent = str_replace(['\r\n'], '\n', $ofxContent); |
||
13 | $ofxContent = mb_convert_encoding($ofxContent, 'UTF-8', 'ISO-8859-1'); |
||
14 | $sgmlStart = stripos($ofxContent, '<OFX>'); |
||
15 | $ofxHeader = trim(substr($ofxContent, 0, $sgmlStart)); |
||
16 | $header = self::parseHeader($ofxHeader); |
||
0 ignored issues
–
show
Unused Code
introduced
by
![]() |
|||
17 | $ofxSgml = trim(substr($ofxContent, $sgmlStart)); |
||
18 | if (stripos($ofxHeader, '<?xml') === 0) { |
||
19 | $ofxXml = $ofxSgml; |
||
20 | } else { |
||
21 | if (preg_match('/<OFX>.*<\/OFX>/', $ofxSgml) === 1) { |
||
22 | return str_replace('<', "\n<", $ofxSgml); // add line breaks to allow XML to parse |
||
23 | } |
||
24 | $ofxXml = self::convertSgmlToXml($ofxSgml); |
||
25 | } |
||
26 | libxml_clear_errors(); |
||
27 | libxml_use_internal_errors(true); |
||
28 | $xml = simplexml_load_string($ofxXml); |
||
29 | |||
30 | if ($errors = libxml_get_errors()) { |
||
31 | throw new RuntimeException('Failed to parse OFX: ' . var_export($errors, true)); |
||
32 | } |
||
33 | |||
34 | return $xml; |
||
35 | } |
||
36 | |||
37 | private static function parseHeader(string $ofxHeader): array |
||
38 | { |
||
39 | $header = []; |
||
40 | |||
41 | $ofxHeader = trim($ofxHeader); |
||
42 | // Remove empty new lines. |
||
43 | $ofxHeader = preg_replace('/^\n+/m', '', $ofxHeader); |
||
44 | |||
45 | // Check if it's an XML file (OFXv2) |
||
46 | if(preg_match('/^<\?xml/', $ofxHeader) === 1) { |
||
47 | // Only parse OFX headers and not XML headers. |
||
48 | $ofxHeader = preg_replace('/<\?xml .*?\?>\n?/', '', $ofxHeader); |
||
49 | $ofxHeader = preg_replace(['/"/', '/\?>/', '/<\?OFX/i'], '', $ofxHeader); |
||
50 | $ofxHeaderLine = explode(' ', trim($ofxHeader)); |
||
51 | |||
52 | foreach ($ofxHeaderLine as $value) { |
||
53 | $tag = explode('=', $value); |
||
54 | $header[$tag[0]] = $tag[1]; |
||
55 | } |
||
56 | |||
57 | return $header; |
||
58 | } |
||
59 | |||
60 | $ofxHeaderLines = explode("\n", $ofxHeader); |
||
61 | foreach ($ofxHeaderLines as $value) { |
||
62 | $tag = explode(':', $value); |
||
63 | $header[$tag[0]] = $tag[1]; |
||
64 | } |
||
65 | |||
66 | return $header; |
||
67 | } |
||
68 | |||
69 | private static function convertSgmlToXml($sgml): string |
||
70 | { |
||
71 | $sgml = preg_replace('/&(?!#?[a-z0-9]+;)/', '&', $sgml); |
||
72 | |||
73 | $lines = explode("\n", $sgml); |
||
74 | $tags = []; |
||
75 | |||
76 | foreach ($lines as $i => &$line) { |
||
77 | $line = trim(self::closeUnclosedXmlTags($line)) . "\n"; |
||
78 | |||
79 | // Matches tags like <SOMETHING> or </SOMETHING> |
||
80 | if (!preg_match("/^<(\/?[A-Za-z0-9.]+)>$/", trim($line), $matches)) { |
||
81 | continue; |
||
82 | } |
||
83 | |||
84 | // If matches </SOMETHING>, looks back and replaces all tags like |
||
85 | // <OTHERTHING> to <OTHERTHING/> until finds the opening tag <SOMETHING> |
||
86 | if ($matches[1][0] == '/') { |
||
87 | $tag = substr($matches[1], 1); |
||
88 | |||
89 | while (($last = array_pop($tags)) && $last[1] != $tag) { |
||
90 | $lines[$last[0]] = "<{$last[1]}/>"; |
||
91 | } |
||
92 | } else { |
||
93 | $tags[] = [$i, $matches[1]]; |
||
94 | } |
||
95 | } |
||
96 | |||
97 | return implode("\n", array_map('trim', $lines)); |
||
98 | } |
||
99 | |||
100 | private static function closeUnclosedXmlTags($line): string |
||
101 | { |
||
102 | // Special case discovered where empty content tag wasn't closed |
||
103 | $line = trim($line); |
||
104 | if (preg_match('/<MEMO>$/', $line) === 1) { |
||
105 | return '<MEMO></MEMO>'; |
||
106 | } |
||
107 | |||
108 | // Matches: <SOMETHING>blah |
||
109 | // Does not match: <SOMETHING> |
||
110 | // Does not match: <SOMETHING>blah</SOMETHING> |
||
111 | if (preg_match( |
||
112 | "/<([A-Za-z0-9.]+)>([\wà-úÀ-Ú0-9.\-_+, ;:\[\]'&\/\\\*()+{|}!£\$?=@€£#%±§~`\"]+)$/", |
||
113 | $line, |
||
114 | $matches |
||
115 | )) { |
||
116 | return "<$matches[1]>$matches[2]</$matches[1]>"; |
||
117 | } |
||
118 | return $line; |
||
119 | } |
||
120 | |||
121 | } |
||
122 |