endeken-com /
ofx-php-parser
| 1 | <?php |
||||
| 2 | |||||
| 3 | namespace Endeken\OFX; |
||||
| 4 | |||||
| 5 | use RuntimeException; |
||||
| 6 | use SimpleXMLElement; |
||||
| 7 | |||||
| 8 | class OFXUtils |
||||
| 9 | { |
||||
| 10 | public static function normalizeOfx(string $ofxContent): string|false|SimpleXMLElement |
||||
| 11 | { |
||||
| 12 | $ofxContent = str_replace(['\r\n'], '\n', $ofxContent); |
||||
| 13 | $ofxContent = mb_convert_encoding($ofxContent, 'UTF-8', 'ISO-8859-1'); |
||||
| 14 | $sgmlStart = stripos($ofxContent, '<OFX>'); |
||||
|
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||||
| 15 | $ofxHeader = trim(substr($ofxContent, 0, $sgmlStart)); |
||||
|
0 ignored issues
–
show
It seems like
$ofxContent can also be of type array; however, parameter $string of substr() does only seem to accept string, maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 16 | $header = self::parseHeader($ofxHeader); |
||||
| 17 | $ofxSgml = trim(substr($ofxContent, $sgmlStart)); |
||||
| 18 | if (stripos($ofxHeader, '<?xml') === 0) { |
||||
| 19 | $ofxXml = $ofxSgml; |
||||
| 20 | } else { |
||||
| 21 | if (preg_match('/<OFX>.*<\/OFX>/', $ofxSgml) === 1) { |
||||
| 22 | $ofxSgml = str_replace('<', "\n<", $ofxSgml); // add line breaks to allow XML to parse |
||||
| 23 | } |
||||
| 24 | $ofxXml = self::convertSgmlToXml($ofxSgml); |
||||
| 25 | } |
||||
| 26 | libxml_clear_errors(); |
||||
| 27 | libxml_use_internal_errors(true); |
||||
| 28 | $xml = simplexml_load_string($ofxXml); |
||||
| 29 | |||||
| 30 | if ($errors = libxml_get_errors()) { |
||||
| 31 | throw new RuntimeException('Failed to parse OFX: ' . var_export($errors, true)); |
||||
| 32 | } |
||||
| 33 | |||||
| 34 | return $xml; |
||||
| 35 | } |
||||
| 36 | |||||
| 37 | private static function parseHeader(string $ofxHeader): array |
||||
| 38 | { |
||||
| 39 | $header = []; |
||||
| 40 | |||||
| 41 | $ofxHeader = trim($ofxHeader); |
||||
| 42 | // Remove empty new lines. |
||||
| 43 | $ofxHeader = preg_replace('/^\n+/m', '', $ofxHeader); |
||||
| 44 | |||||
| 45 | // Check if it's an XML file (OFXv2) |
||||
| 46 | if(preg_match('/^<\?xml/', $ofxHeader) === 1) { |
||||
| 47 | // Only parse OFX headers and not XML headers. |
||||
| 48 | $ofxHeader = preg_replace('/<\?xml .*?\?>\n?/', '', $ofxHeader); |
||||
| 49 | $ofxHeader = preg_replace(['/"/', '/\?>/', '/<\?OFX/i'], '', $ofxHeader); |
||||
| 50 | $ofxHeaderLine = explode(' ', trim($ofxHeader)); |
||||
| 51 | |||||
| 52 | foreach ($ofxHeaderLine as $value) { |
||||
| 53 | $tag = explode('=', $value); |
||||
| 54 | $header[$tag[0]] = $tag[1]; |
||||
| 55 | } |
||||
| 56 | |||||
| 57 | return $header; |
||||
| 58 | } |
||||
| 59 | |||||
| 60 | $ofxHeaderLines = explode("\n", $ofxHeader); |
||||
| 61 | foreach ($ofxHeaderLines as $value) { |
||||
| 62 | $tag = explode(':', $value); |
||||
| 63 | $header[$tag[0]] = $tag[1]; |
||||
| 64 | } |
||||
| 65 | |||||
| 66 | return $header; |
||||
| 67 | } |
||||
| 68 | |||||
| 69 | private static function convertSgmlToXml($sgml): string |
||||
| 70 | { |
||||
| 71 | $sgml = preg_replace('/&(?!#?[a-z0-9]+;)/', '&', $sgml); |
||||
| 72 | |||||
| 73 | $lines = explode("\n", $sgml); |
||||
| 74 | $tags = []; |
||||
| 75 | |||||
| 76 | foreach ($lines as $i => &$line) { |
||||
| 77 | $line = trim(self::closeUnclosedXmlTags($line)) . "\n"; |
||||
| 78 | |||||
| 79 | // Matches tags like <SOMETHING> or </SOMETHING> |
||||
| 80 | if (!preg_match("/^<(\/?[A-Za-z0-9.]+)>$/", trim($line), $matches)) { |
||||
| 81 | continue; |
||||
| 82 | } |
||||
| 83 | |||||
| 84 | // If matches </SOMETHING>, looks back and replaces all tags like |
||||
| 85 | // <OTHERTHING> to <OTHERTHING/> until finds the opening tag <SOMETHING> |
||||
| 86 | if ($matches[1][0] == '/') { |
||||
| 87 | $tag = substr($matches[1], 1); |
||||
| 88 | |||||
| 89 | while (($last = array_pop($tags)) && $last[1] != $tag) { |
||||
| 90 | $lines[$last[0]] = "<{$last[1]}/>"; |
||||
| 91 | } |
||||
| 92 | } else { |
||||
| 93 | $tags[] = [$i, $matches[1]]; |
||||
| 94 | } |
||||
| 95 | } |
||||
| 96 | |||||
| 97 | return implode("\n", array_map('trim', $lines)); |
||||
| 98 | } |
||||
| 99 | |||||
| 100 | private static function closeUnclosedXmlTags($line): string |
||||
| 101 | { |
||||
| 102 | // Special case discovered where empty content tag wasn't closed |
||||
| 103 | $line = trim($line); |
||||
| 104 | if (preg_match('/<MEMO>$/', $line) === 1) { |
||||
| 105 | return '<MEMO></MEMO>'; |
||||
| 106 | } |
||||
| 107 | |||||
| 108 | // Matches: <SOMETHING>blah |
||||
| 109 | // Does not match: <SOMETHING> |
||||
| 110 | // Does not match: <SOMETHING>blah</SOMETHING> |
||||
| 111 | if (preg_match( |
||||
| 112 | "/<([A-Za-z0-9.]+)>([\wà-úÀ-Ú0-9.\-_+, ;:\[\]'&\/\\\*()+{|}!£\$?=@€£#%±§~`\"]+)$/", |
||||
| 113 | $line, |
||||
| 114 | $matches |
||||
| 115 | )) { |
||||
| 116 | return "<$matches[1]>$matches[2]</$matches[1]>"; |
||||
| 117 | } |
||||
| 118 | return $line; |
||||
| 119 | } |
||||
| 120 | |||||
| 121 | } |
||||
| 122 |