beccha /
ofxparser
| 1 | <?php |
||
| 2 | |||
| 3 | declare(strict_types=1); |
||
| 4 | |||
| 5 | namespace Beccha\OfxParser\Service; |
||
| 6 | |||
| 7 | use Beccha\OfxParser\Exception\FileNotFoundException; |
||
| 8 | use Beccha\OfxParser\Exception\OfxTagNotFoundException; |
||
| 9 | use Beccha\OfxParser\Exception\XmlContentNotFoundException; |
||
| 10 | use SimpleXMLElement; |
||
| 11 | |||
| 12 | class SgmlToXml |
||
| 13 | { |
||
| 14 | public function parse(string $sgmlFilePath): SimpleXMLElement |
||
| 15 | { |
||
| 16 | $fileContent = $this->loadFile($sgmlFilePath); |
||
| 17 | |||
| 18 | // Check if file is already XML |
||
| 19 | $isXml = $this->isXml($fileContent); |
||
| 20 | if ($isXml) { |
||
| 21 | return $this->getXmlContent($fileContent); |
||
| 22 | } |
||
| 23 | |||
| 24 | $contentWithoutSgmlHeader = $this->removeSgmlHeader($fileContent); |
||
| 25 | |||
| 26 | // Fix unclosed tags |
||
| 27 | $sgmlLines = $this->sgmlContentToArrayOfLines($contentWithoutSgmlHeader); |
||
| 28 | $tagsWithoutContent = $this->listTagsWithoutContent($sgmlLines); |
||
| 29 | $tagsToClose = $this->filterTagsThatHaveAClosingCouterpart($sgmlLines, $tagsWithoutContent); |
||
| 30 | $tagsFixedForXml = $this->closeTags($sgmlLines, $tagsToClose); |
||
| 31 | $xmlFileContent = $this->buildXmlFileContent($tagsFixedForXml); |
||
| 32 | |||
| 33 | return $this->getXmlContent($xmlFileContent); |
||
| 34 | } |
||
| 35 | |||
| 36 | private function loadFile(string $sgmlFilePath): string |
||
| 37 | { |
||
| 38 | if (file_exists($sgmlFilePath) && $fileContent = file_get_contents($sgmlFilePath)) { |
||
| 39 | $detectedEncoding = mb_detect_encoding($fileContent); |
||
| 40 | if ($detectedEncoding) { |
||
| 41 | return mb_convert_encoding($fileContent, "UTF-8", $detectedEncoding); |
||
|
0 ignored issues
–
show
Bug
Best Practice
introduced
by
Loading history...
|
|||
| 42 | } |
||
| 43 | } |
||
| 44 | throw new FileNotFoundException($sgmlFilePath); |
||
| 45 | } |
||
| 46 | |||
| 47 | /** |
||
| 48 | * Remove SGMLheader |
||
| 49 | */ |
||
| 50 | private function removeSgmlHeader(string $sgmlFileContent): string |
||
| 51 | { |
||
| 52 | $upercasedContent = mb_convert_case($sgmlFileContent, MB_CASE_UPPER); |
||
| 53 | if ($sgmlStart = stripos($upercasedContent, '<OFX>')) { |
||
| 54 | return trim(substr($sgmlFileContent, $sgmlStart)); |
||
| 55 | } |
||
| 56 | throw new OfxTagNotFoundException('OFX'); |
||
| 57 | } |
||
| 58 | |||
| 59 | /** |
||
| 60 | * @return array<string> |
||
| 61 | */ |
||
| 62 | private function sgmlContentToArrayOfLines(string $sgmlFileContent): array |
||
| 63 | { |
||
| 64 | $trimmedLines = []; |
||
| 65 | $lines = explode("\n", $sgmlFileContent); |
||
| 66 | foreach ($lines as $line) { |
||
| 67 | $trimmedLines[] = trim($line); |
||
| 68 | } |
||
| 69 | |||
| 70 | return $trimmedLines; |
||
| 71 | } |
||
| 72 | |||
| 73 | /** |
||
| 74 | * Search for tags within a xml file without content |
||
| 75 | * @param array<string> $linesFromSgml |
||
| 76 | * @return array<string> |
||
| 77 | */ |
||
| 78 | private function listTagsWithoutContent(array $linesFromSgml): array |
||
| 79 | { |
||
| 80 | $tagsWithoutContent = []; |
||
| 81 | foreach ($linesFromSgml as $line) { |
||
| 82 | $trimmedLine = trim($line); |
||
| 83 | if (preg_match('/^<[a-z0-9\-_]*>$/i', $trimmedLine)) { |
||
| 84 | $tagsWithoutContent[] = $trimmedLine; |
||
| 85 | } |
||
| 86 | } |
||
| 87 | |||
| 88 | return $tagsWithoutContent; |
||
| 89 | } |
||
| 90 | |||
| 91 | /** |
||
| 92 | * Within a xml file, filter out tags that have a closing couterpart |
||
| 93 | * @param array<string> $linesFromSgml |
||
| 94 | * @param array<string> $tagsWithoutContent |
||
| 95 | * @return array<string> |
||
| 96 | */ |
||
| 97 | private function filterTagsThatHaveAClosingCouterpart(array $linesFromSgml, array $tagsWithoutContent): array |
||
| 98 | { |
||
| 99 | $tagsToClose = []; |
||
| 100 | foreach ($tagsWithoutContent as $tag) { |
||
| 101 | $tagWithoutClosing = str_replace('<', '</', $tag); |
||
| 102 | if (!in_array($tagWithoutClosing, $linesFromSgml, true)) { |
||
| 103 | $tagsToClose[] = $tag; |
||
| 104 | } |
||
| 105 | } |
||
| 106 | return $tagsToClose; |
||
| 107 | } |
||
| 108 | |||
| 109 | /** |
||
| 110 | * Within a xml file, close tags that are given in the array $tagsToClose |
||
| 111 | * @param array<string> $linesFromSgml |
||
| 112 | * @param array<string> $emptyTagsToClose |
||
| 113 | * @return array<string> |
||
| 114 | */ |
||
| 115 | public function closeTags(array $linesFromSgml, array $emptyTagsToClose): array |
||
| 116 | { |
||
| 117 | $updatedLines = []; |
||
| 118 | |||
| 119 | foreach ($linesFromSgml as $id => $tag) { |
||
| 120 | $updatedLines[$id] = $tag; |
||
| 121 | |||
| 122 | // Close empty tags that no closing counterpart and no data |
||
| 123 | if (in_array($tag, $emptyTagsToClose, true)) { |
||
| 124 | $updatedLines[$id] = $tag . str_replace('<', '</', $tag); |
||
| 125 | } |
||
| 126 | |||
| 127 | // Close tags that have no closing counterpart but have data |
||
| 128 | $pattern = '/^(<[a-z0-9\-_]*>)(.+)$/i'; |
||
| 129 | |||
| 130 | if (preg_match($pattern, $tag, $openingTag)) { |
||
| 131 | $closingTag = str_replace('<', '</', $openingTag[1]); |
||
| 132 | |||
| 133 | // Only close tag if no clasing tag is found in the file |
||
| 134 | if (!in_array($closingTag, $linesFromSgml, true)) { |
||
| 135 | $cleanedUpContent = $this->cleanUpContent($openingTag[2]); |
||
| 136 | $updatedLines[$id] = $openingTag[1] . $cleanedUpContent . $closingTag; |
||
| 137 | } |
||
| 138 | } |
||
| 139 | } |
||
| 140 | |||
| 141 | return $updatedLines; |
||
| 142 | } |
||
| 143 | |||
| 144 | private function cleanUpContent(string $content): string |
||
| 145 | { |
||
| 146 | return str_replace('&', '&', $content); |
||
| 147 | } |
||
| 148 | |||
| 149 | /** |
||
| 150 | * @param array<string> $linesFromSgml |
||
| 151 | */ |
||
| 152 | private function buildXmlFileContent(array $linesFromSgml): string |
||
| 153 | { |
||
| 154 | array_unshift($linesFromSgml, '<?xml version="1.0" encoding="UTF-8"?>'); |
||
| 155 | return implode("\n", $linesFromSgml); |
||
| 156 | } |
||
| 157 | |||
| 158 | private function getXmlContent(string $fileContent): SimpleXMLElement |
||
| 159 | { |
||
| 160 | if ($xmlContent = simplexml_load_string($fileContent)) { |
||
| 161 | return $xmlContent; |
||
| 162 | } |
||
| 163 | |||
| 164 | throw new XmlContentNotFoundException(); |
||
| 165 | } |
||
| 166 | |||
| 167 | private function isXml(string $fileContent): bool |
||
| 168 | { |
||
| 169 | libxml_use_internal_errors(true); |
||
| 170 | $isXml = simplexml_load_string($fileContent); |
||
| 171 | return $isXml !== false; |
||
| 172 | } |
||
| 173 | } |
||
| 174 |