Issues (2)

src/Service/SgmlToXml.php (1 issue)

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Beccha\OfxParser\Service;
6
7
use Beccha\OfxParser\Exception\FileNotFoundException;
8
use Beccha\OfxParser\Exception\OfxTagNotFoundException;
9
use Beccha\OfxParser\Exception\XmlContentNotFoundException;
10
use SimpleXMLElement;
11
12
class SgmlToXml
13
{
14
    public function parse(string $sgmlFilePath): SimpleXMLElement
15
    {
16
        $fileContent = $this->loadFile($sgmlFilePath);
17
18
        // Check if file is already XML
19
        $isXml = $this->isXml($fileContent);
20
        if ($isXml) {
21
            return $this->getXmlContent($fileContent);
22
        }
23
24
        $contentWithoutSgmlHeader = $this->removeSgmlHeader($fileContent);
25
26
        // Fix unclosed tags
27
        $sgmlLines = $this->sgmlContentToArrayOfLines($contentWithoutSgmlHeader);
28
        $tagsWithoutContent = $this->listTagsWithoutContent($sgmlLines);
29
        $tagsToClose = $this->filterTagsThatHaveAClosingCouterpart($sgmlLines, $tagsWithoutContent);
30
        $tagsFixedForXml = $this->closeTags($sgmlLines, $tagsToClose);
31
        $xmlFileContent = $this->buildXmlFileContent($tagsFixedForXml);
32
33
        return $this->getXmlContent($xmlFileContent);
34
    }
35
36
    private function loadFile(string $sgmlFilePath): string
37
    {
38
        if (file_exists($sgmlFilePath) && $fileContent = file_get_contents($sgmlFilePath)) {
39
            $detectedEncoding = mb_detect_encoding($fileContent);
40
            if ($detectedEncoding) {
41
                return mb_convert_encoding($fileContent, "UTF-8", $detectedEncoding);
0 ignored issues
show
Bug Best Practice introduced by
The expression return mb_convert_encodi...-8', $detectedEncoding) could return the type array which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
42
            }
43
        }
44
        throw new FileNotFoundException($sgmlFilePath);
45
    }
46
47
    /**
48
     * Remove SGMLheader
49
     */
50
    private function removeSgmlHeader(string $sgmlFileContent): string
51
    {
52
        $upercasedContent = mb_convert_case($sgmlFileContent, MB_CASE_UPPER);
53
        if ($sgmlStart = stripos($upercasedContent, '<OFX>')) {
54
            return trim(substr($sgmlFileContent, $sgmlStart));
55
        }
56
        throw new OfxTagNotFoundException('OFX');
57
    }
58
59
    /**
60
     * @return array<string>
61
     */
62
    private function sgmlContentToArrayOfLines(string $sgmlFileContent): array
63
    {
64
        $trimmedLines = [];
65
        $lines = explode("\n", $sgmlFileContent);
66
        foreach ($lines as $line) {
67
            $trimmedLines[] = trim($line);
68
        }
69
70
        return $trimmedLines;
71
    }
72
73
    /**
74
     * Search for tags within a xml file without content
75
     * @param array<string> $linesFromSgml
76
     * @return array<string>
77
     */
78
    private function listTagsWithoutContent(array $linesFromSgml): array
79
    {
80
        $tagsWithoutContent = [];
81
        foreach ($linesFromSgml as $line) {
82
            $trimmedLine = trim($line);
83
            if (preg_match('/^<[a-z0-9\-_]*>$/i', $trimmedLine)) {
84
                $tagsWithoutContent[] = $trimmedLine;
85
            }
86
        }
87
88
        return $tagsWithoutContent;
89
    }
90
91
    /**
92
     * Within a xml file, filter out tags that have a closing couterpart
93
     * @param array<string> $linesFromSgml
94
     * @param array<string> $tagsWithoutContent
95
     * @return array<string>
96
     */
97
    private function filterTagsThatHaveAClosingCouterpart(array $linesFromSgml, array $tagsWithoutContent): array
98
    {
99
        $tagsToClose = [];
100
        foreach ($tagsWithoutContent as $tag) {
101
            $tagWithoutClosing = str_replace('<', '</', $tag);
102
            if (!in_array($tagWithoutClosing, $linesFromSgml, true)) {
103
                $tagsToClose[] = $tag;
104
            }
105
        }
106
        return $tagsToClose;
107
    }
108
109
    /**
110
     * Within a xml file, close tags that are given in the array $tagsToClose
111
     * @param array<string> $linesFromSgml
112
     * @param array<string> $emptyTagsToClose
113
     * @return array<string>
114
     */
115
    public function closeTags(array $linesFromSgml, array $emptyTagsToClose): array
116
    {
117
        $updatedLines = [];
118
119
        foreach ($linesFromSgml as $id => $tag) {
120
            $updatedLines[$id] = $tag;
121
122
            // Close empty tags that no closing counterpart and no data
123
            if (in_array($tag, $emptyTagsToClose, true)) {
124
                $updatedLines[$id] = $tag . str_replace('<', '</', $tag);
125
            }
126
127
            // Close tags that have no closing counterpart but have data
128
            $pattern = '/^(<[a-z0-9\-_]*>)(.+)$/i';
129
130
            if (preg_match($pattern, $tag, $openingTag)) {
131
                $closingTag = str_replace('<', '</', $openingTag[1]);
132
133
                // Only close tag if no clasing tag is found in the file
134
                if (!in_array($closingTag, $linesFromSgml, true)) {
135
                    $cleanedUpContent = $this->cleanUpContent($openingTag[2]);
136
                    $updatedLines[$id] = $openingTag[1] . $cleanedUpContent . $closingTag;
137
                }
138
            }
139
        }
140
141
        return $updatedLines;
142
    }
143
144
    private function cleanUpContent(string $content): string
145
    {
146
        return str_replace('&', '&amp;', $content);
147
    }
148
149
    /**
150
     * @param array<string> $linesFromSgml
151
     */
152
    private function buildXmlFileContent(array $linesFromSgml): string
153
    {
154
        array_unshift($linesFromSgml, '<?xml version="1.0" encoding="UTF-8"?>');
155
        return implode("\n", $linesFromSgml);
156
    }
157
158
    private function getXmlContent(string $fileContent): SimpleXMLElement
159
    {
160
        if ($xmlContent = simplexml_load_string($fileContent)) {
161
            return $xmlContent;
162
        }
163
164
        throw new XmlContentNotFoundException();
165
    }
166
167
    private function isXml(string $fileContent): bool
168
    {
169
        libxml_use_internal_errors(true);
170
        $isXml = simplexml_load_string($fileContent);
171
        return $isXml !== false;
172
    }
173
}
174