endeken-com /
ofx-php-parser
| 1 | <?php |
||
| 2 | |||
| 3 | namespace Endeken\OFX; |
||
| 4 | |||
| 5 | use RuntimeException; |
||
| 6 | use SimpleXMLElement; |
||
| 7 | |||
| 8 | class OFXUtils |
||
| 9 | { |
||
| 10 | public static function normalizeOfx(string $ofxContent): string|false|SimpleXMLElement |
||
| 11 | { |
||
| 12 | $ofxContent = str_replace(['\r\n'], '\n', $ofxContent); |
||
| 13 | $ofxContent = mb_convert_encoding($ofxContent, 'UTF-8', 'ISO-8859-1'); |
||
| 14 | $sgmlStart = stripos($ofxContent, '<OFX>'); |
||
| 15 | $ofxHeader = trim(substr($ofxContent, 0, $sgmlStart)); |
||
| 16 | $header = self::parseHeader($ofxHeader); |
||
|
0 ignored issues
–
show
Unused Code
introduced
by
Loading history...
|
|||
| 17 | $ofxSgml = trim(substr($ofxContent, $sgmlStart)); |
||
| 18 | if (stripos($ofxHeader, '<?xml') === 0) { |
||
| 19 | $ofxXml = $ofxSgml; |
||
| 20 | } else { |
||
| 21 | if (preg_match('/<OFX>.*<\/OFX>/', $ofxSgml) === 1) { |
||
| 22 | $ofxSgml = str_replace('<', "\n<", $ofxSgml); // add line breaks to allow XML to parse |
||
| 23 | } |
||
| 24 | $ofxXml = self::convertSgmlToXml($ofxSgml); |
||
| 25 | } |
||
| 26 | libxml_clear_errors(); |
||
| 27 | libxml_use_internal_errors(true); |
||
| 28 | $xml = simplexml_load_string($ofxXml); |
||
| 29 | |||
| 30 | if ($errors = libxml_get_errors()) { |
||
| 31 | throw new RuntimeException('Failed to parse OFX: ' . var_export($errors, true)); |
||
| 32 | } |
||
| 33 | |||
| 34 | return $xml; |
||
| 35 | } |
||
| 36 | |||
| 37 | private static function parseHeader(string $ofxHeader): array |
||
| 38 | { |
||
| 39 | $header = []; |
||
| 40 | |||
| 41 | $ofxHeader = trim($ofxHeader); |
||
| 42 | // Remove empty new lines. |
||
| 43 | $ofxHeader = preg_replace('/^\n+/m', '', $ofxHeader); |
||
| 44 | |||
| 45 | // Check if it's an XML file (OFXv2) |
||
| 46 | if(preg_match('/^<\?xml/', $ofxHeader) === 1) { |
||
| 47 | // Only parse OFX headers and not XML headers. |
||
| 48 | $ofxHeader = preg_replace('/<\?xml .*?\?>\n?/', '', $ofxHeader); |
||
| 49 | $ofxHeader = preg_replace(['/"/', '/\?>/', '/<\?OFX/i'], '', $ofxHeader); |
||
| 50 | $ofxHeaderLine = explode(' ', trim($ofxHeader)); |
||
| 51 | |||
| 52 | foreach ($ofxHeaderLine as $value) { |
||
| 53 | $tag = explode('=', $value); |
||
| 54 | $header[$tag[0]] = $tag[1]; |
||
| 55 | } |
||
| 56 | |||
| 57 | return $header; |
||
| 58 | } |
||
| 59 | |||
| 60 | $ofxHeaderLines = explode("\n", $ofxHeader); |
||
| 61 | foreach ($ofxHeaderLines as $value) { |
||
| 62 | $tag = explode(':', $value); |
||
| 63 | $header[$tag[0]] = $tag[1]; |
||
| 64 | } |
||
| 65 | |||
| 66 | return $header; |
||
| 67 | } |
||
| 68 | |||
| 69 | private static function convertSgmlToXml($sgml): string |
||
| 70 | { |
||
| 71 | $sgml = preg_replace('/&(?!#?[a-z0-9]+;)/', '&', $sgml); |
||
| 72 | |||
| 73 | $lines = explode("\n", $sgml); |
||
| 74 | $tags = []; |
||
| 75 | |||
| 76 | foreach ($lines as $i => &$line) { |
||
| 77 | $line = trim(self::closeUnclosedXmlTags($line)) . "\n"; |
||
| 78 | |||
| 79 | // Matches tags like <SOMETHING> or </SOMETHING> |
||
| 80 | if (!preg_match("/^<(\/?[A-Za-z0-9.]+)>$/", trim($line), $matches)) { |
||
| 81 | continue; |
||
| 82 | } |
||
| 83 | |||
| 84 | // If matches </SOMETHING>, looks back and replaces all tags like |
||
| 85 | // <OTHERTHING> to <OTHERTHING/> until finds the opening tag <SOMETHING> |
||
| 86 | if ($matches[1][0] == '/') { |
||
| 87 | $tag = substr($matches[1], 1); |
||
| 88 | |||
| 89 | while (($last = array_pop($tags)) && $last[1] != $tag) { |
||
| 90 | $lines[$last[0]] = "<{$last[1]}/>"; |
||
| 91 | } |
||
| 92 | } else { |
||
| 93 | $tags[] = [$i, $matches[1]]; |
||
| 94 | } |
||
| 95 | } |
||
| 96 | |||
| 97 | return implode("\n", array_map('trim', $lines)); |
||
| 98 | } |
||
| 99 | |||
| 100 | private static function closeUnclosedXmlTags($line): string |
||
| 101 | { |
||
| 102 | // Special case discovered where empty content tag wasn't closed |
||
| 103 | $line = trim($line); |
||
| 104 | if (preg_match('/<MEMO>$/', $line) === 1) { |
||
| 105 | return '<MEMO></MEMO>'; |
||
| 106 | } |
||
| 107 | |||
| 108 | // Matches: <SOMETHING>blah |
||
| 109 | // Does not match: <SOMETHING> |
||
| 110 | // Does not match: <SOMETHING>blah</SOMETHING> |
||
| 111 | if (preg_match( |
||
| 112 | "/<([A-Za-z0-9.]+)>([\wà-úÀ-Ú0-9.\-_+, ;:\[\]'&\/\\\*()+{|}!£\$?=@€£#%±§~`\"]+)$/", |
||
| 113 | $line, |
||
| 114 | $matches |
||
| 115 | )) { |
||
| 116 | return "<$matches[1]>$matches[2]</$matches[1]>"; |
||
| 117 | } |
||
| 118 | return $line; |
||
| 119 | } |
||
| 120 | |||
| 121 | } |
||
| 122 |