1 | <?php |
||||
2 | |||||
3 | namespace Endeken\OFX; |
||||
4 | |||||
5 | use RuntimeException; |
||||
6 | use SimpleXMLElement; |
||||
7 | |||||
8 | class OFXUtils |
||||
9 | { |
||||
10 | public static function normalizeOfx(string $ofxContent): string|false|SimpleXMLElement |
||||
11 | { |
||||
12 | $ofxContent = str_replace(['\r\n'], '\n', $ofxContent); |
||||
13 | $ofxContent = mb_convert_encoding($ofxContent, 'UTF-8', 'ISO-8859-1'); |
||||
14 | $sgmlStart = stripos($ofxContent, '<OFX>'); |
||||
0 ignored issues
–
show
Bug
introduced
by
![]() |
|||||
15 | $ofxHeader = trim(substr($ofxContent, 0, $sgmlStart)); |
||||
0 ignored issues
–
show
It seems like
$ofxContent can also be of type array ; however, parameter $string of substr() does only seem to accept string , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
16 | $header = self::parseHeader($ofxHeader); |
||||
0 ignored issues
–
show
|
|||||
17 | $ofxSgml = trim(substr($ofxContent, $sgmlStart)); |
||||
18 | if (stripos($ofxHeader, '<?xml') === 0) { |
||||
19 | $ofxXml = $ofxSgml; |
||||
20 | } else { |
||||
21 | if (preg_match('/<OFX>.*<\/OFX>/', $ofxSgml) === 1) { |
||||
22 | return str_replace('<', "\n<", $ofxSgml); // add line breaks to allow XML to parse |
||||
23 | } |
||||
24 | $ofxXml = self::convertSgmlToXml($ofxSgml); |
||||
25 | } |
||||
26 | libxml_clear_errors(); |
||||
27 | libxml_use_internal_errors(true); |
||||
28 | $xml = simplexml_load_string($ofxXml); |
||||
29 | |||||
30 | if ($errors = libxml_get_errors()) { |
||||
31 | throw new RuntimeException('Failed to parse OFX: ' . var_export($errors, true)); |
||||
32 | } |
||||
33 | |||||
34 | return $xml; |
||||
35 | } |
||||
36 | |||||
37 | private static function parseHeader(string $ofxHeader): array |
||||
38 | { |
||||
39 | $header = []; |
||||
40 | |||||
41 | $ofxHeader = trim($ofxHeader); |
||||
42 | // Remove empty new lines. |
||||
43 | $ofxHeader = preg_replace('/^\n+/m', '', $ofxHeader); |
||||
44 | |||||
45 | // Check if it's an XML file (OFXv2) |
||||
46 | if(preg_match('/^<\?xml/', $ofxHeader) === 1) { |
||||
47 | // Only parse OFX headers and not XML headers. |
||||
48 | $ofxHeader = preg_replace('/<\?xml .*?\?>\n?/', '', $ofxHeader); |
||||
49 | $ofxHeader = preg_replace(['/"/', '/\?>/', '/<\?OFX/i'], '', $ofxHeader); |
||||
50 | $ofxHeaderLine = explode(' ', trim($ofxHeader)); |
||||
51 | |||||
52 | foreach ($ofxHeaderLine as $value) { |
||||
53 | $tag = explode('=', $value); |
||||
54 | $header[$tag[0]] = $tag[1]; |
||||
55 | } |
||||
56 | |||||
57 | return $header; |
||||
58 | } |
||||
59 | |||||
60 | $ofxHeaderLines = explode("\n", $ofxHeader); |
||||
61 | foreach ($ofxHeaderLines as $value) { |
||||
62 | $tag = explode(':', $value); |
||||
63 | $header[$tag[0]] = $tag[1]; |
||||
64 | } |
||||
65 | |||||
66 | return $header; |
||||
67 | } |
||||
68 | |||||
69 | private static function convertSgmlToXml($sgml): string |
||||
70 | { |
||||
71 | $sgml = preg_replace('/&(?!#?[a-z0-9]+;)/', '&', $sgml); |
||||
72 | |||||
73 | $lines = explode("\n", $sgml); |
||||
74 | $tags = []; |
||||
75 | |||||
76 | foreach ($lines as $i => &$line) { |
||||
77 | $line = trim(self::closeUnclosedXmlTags($line)) . "\n"; |
||||
78 | |||||
79 | // Matches tags like <SOMETHING> or </SOMETHING> |
||||
80 | if (!preg_match("/^<(\/?[A-Za-z0-9.]+)>$/", trim($line), $matches)) { |
||||
81 | continue; |
||||
82 | } |
||||
83 | |||||
84 | // If matches </SOMETHING>, looks back and replaces all tags like |
||||
85 | // <OTHERTHING> to <OTHERTHING/> until finds the opening tag <SOMETHING> |
||||
86 | if ($matches[1][0] == '/') { |
||||
87 | $tag = substr($matches[1], 1); |
||||
88 | |||||
89 | while (($last = array_pop($tags)) && $last[1] != $tag) { |
||||
90 | $lines[$last[0]] = "<{$last[1]}/>"; |
||||
91 | } |
||||
92 | } else { |
||||
93 | $tags[] = [$i, $matches[1]]; |
||||
94 | } |
||||
95 | } |
||||
96 | |||||
97 | return implode("\n", array_map('trim', $lines)); |
||||
98 | } |
||||
99 | |||||
100 | private static function closeUnclosedXmlTags($line): string |
||||
101 | { |
||||
102 | // Special case discovered where empty content tag wasn't closed |
||||
103 | $line = trim($line); |
||||
104 | if (preg_match('/<MEMO>$/', $line) === 1) { |
||||
105 | return '<MEMO></MEMO>'; |
||||
106 | } |
||||
107 | |||||
108 | // Matches: <SOMETHING>blah |
||||
109 | // Does not match: <SOMETHING> |
||||
110 | // Does not match: <SOMETHING>blah</SOMETHING> |
||||
111 | if (preg_match( |
||||
112 | "/<([A-Za-z0-9.]+)>([\wà-úÀ-Ú0-9.\-_+, ;:\[\]'&\/\\\*()+{|}!£\$?=@€£#%±§~`\"]+)$/", |
||||
113 | $line, |
||||
114 | $matches |
||||
115 | )) { |
||||
116 | return "<$matches[1]>$matches[2]</$matches[1]>"; |
||||
117 | } |
||||
118 | return $line; |
||||
119 | } |
||||
120 | |||||
121 | } |
||||
122 |