EncodingFactory::detect() - Code Metrics - Inspection of "Update for PHP 8.0" - fisharebest/webtrees - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — dev ( 824cd4...d410ef )

by Greg

created 2022-02-22 10:13 UTC

EncodingFactory::detect() C

↳ Parent: EncodingFactory

Complexity

Conditions	13
Paths	75

Size

Total Lines	97
Code Lines	67

Duplication

Lines	0
Ratio	0 %

Importance

Changes	1
Bugs	0	Features	1

Metric	Value
cc	13
eloc	67
c	1
b	0
f	1
nc	75
nop	1
dl	0
loc	97
rs	6.0133

How to fix Long Method Complexity

<?php

/**
 * webtrees: online genealogy
 * Copyright (C) 2021 webtrees development team
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <https://www.gnu.org/licenses/>.
 */

declare(strict_types=1);

namespace Fisharebest\Webtrees\Factories;

use DomainException;
use Fisharebest\Webtrees\Contracts\EncodingFactoryInterface;
use Fisharebest\Webtrees\Encodings\ANSEL;
use Fisharebest\Webtrees\Encodings\ASCII;
use Fisharebest\Webtrees\Encodings\CP437;
use Fisharebest\Webtrees\Encodings\CP850;
use Fisharebest\Webtrees\Encodings\EncodingInterface;
use Fisharebest\Webtrees\Encodings\ISO88591;
use Fisharebest\Webtrees\Encodings\ISO88592;
use Fisharebest\Webtrees\Encodings\MacRoman;
use Fisharebest\Webtrees\Encodings\UTF16BE;
use Fisharebest\Webtrees\Encodings\UTF16LE;
use Fisharebest\Webtrees\Encodings\UTF8;
use Fisharebest\Webtrees\Encodings\Windows1250;
use Fisharebest\Webtrees\Encodings\Windows1251;
use Fisharebest\Webtrees\Encodings\Windows1252;
use Fisharebest\Webtrees\Exceptions\InvalidGedcomEncodingException;

use function explode;
use function ltrim;
use function preg_match;
use function str_contains;
use function str_starts_with;
use function strstr;

/**
 * Create an encoding object.
 */
class EncodingFactory implements EncodingFactoryInterface
{
    /**
     * Detect an encoding from a GEDCOM header record.
     *
     * @param string $header
     *
     * @return EncodingInterface|null
     * @throws InvalidGedcomEncodingException
     */
    public function detect(string $header): ?EncodingInterface
    {
        $utf_bom = [
            '/^' . UTF8::BYTE_ORDER_MARK . '/'    => UTF8::NAME,
            '/^' . UTF16BE::BYTE_ORDER_MARK . '/' => UTF16BE::NAME,
            '/^' . UTF16LE::BYTE_ORDER_MARK . '/' => UTF16LE::NAME,
        ];

        foreach ($utf_bom as $regex => $encoding) {
            if (preg_match($regex, $header) === 1) {
                return $this->make($encoding);
            }
        }

        $utf16 = [
            "\x000" => UTF16BE::NAME,
            "0\x00" => UTF16LE::NAME,
        ];

        foreach ($utf16 as $start => $encoding) {
            if (str_starts_with($header, $start)) {
                return $this->make($encoding);
            }
        }

        // Standardize whitespace to simplify matching.
        $header = strtr(ltrim($header), ["\r\n" => "\n", "\n\r" => "\n", "\r" => "\n"]);

        while (str_contains($header, "\n ") || str_contains($header, " \n") || str_contains($header, '  ')) {
            $header = strtr($header, ["\n " => "\n", " \n" => "\n", '  ' => ' ']);
        }

        // We need a complete header record
        $header = strstr($header, "\n0", true);

        if ($header === false) {
            return null;
        }

        // Some of these come from Tamura Jones, the rest from webtrees users.
        $character_sets = [
            'ASCII'             => ASCII::NAME,
            'ANSEL'             => ANSEL::NAME,
            'UTF-8'             => UTF8::NAME,
            'UNICODE'           => UTF8::NAME, // If the null byte test failed, this can't be UTF16
            'ASCII/MacOS Roman' => MacRoman::NAME, // GEDitCOM
            'ASCII/MACINTOSH'   => MacRoman::NAME, // MacFamilyTree < 8.3.5
            'MACINTOSH'         => MacRoman::NAME, // MacFamilyTree >= 8.3.5
            'CP437'             => CP437::NAME,
            'IBMPC'             => CP437::NAME,
            'IBM'               => CP437::NAME, // Reunion
            'IBM-PC'            => CP437::NAME, // CumberlandFamilyTree
            'OEM'               => CP437::NAME, // Généatique
            'CP850'             => CP850::NAME,
            'MSDOS'             => CP850::NAME,
            'IBM-DOS'           => CP850::NAME, // Reunion, EasyTree
            'MS-DOS'            => CP850::NAME, // AbrEdit FTM for Windows
            'ANSI'              => CP850::NAME,
            'WINDOWS'           => CP850::NAME, // Parentele
            'IBM WINDOWS'       => CP850::NAME, // EasyTree, Généalogie, Reunion, TribalPages
            'IBM_WINDOWS'       => CP850::NAME, // EasyTree
            'CP1250'            => Windows1250::NAME,
            'windows-1250'      => Windows1250::NAME, // GenoPro, Rodokmen Pro
            'CP1251'            => Windows1251::NAME,
            'WINDOWS-1251'      => Windows1251::NAME, // Rodovid
            'CP1252'            => Windows1252::NAME, // Lifelines
            'ISO-8859-1'        => ISO88591::NAME, // Cumberland Family Tree, Lifelines
            'ISO8859-1'         => ISO88591::NAME, // Scion Genealogist
            'ISO8859'           => ISO88591::NAME, // Genealogica Grafica
            'LATIN-1'           => ISO88591::NAME,
            'LATIN1'            => ISO88591::NAME, // GenealogyJ
            'ISO-8859-2'        => ISO88592::NAME,
            'ISO8859-2'         => ISO88592::NAME,
            'LATIN-2'           => ISO88592::NAME,
            'LATIN2'            => ISO88592::NAME,
        ];

        foreach ($character_sets as $pattern => $encoding) {
            if (str_contains($pattern, '/')) {
                [$char, $vers] = explode('/', $pattern);
                $regex = "\n1 CHAR " . $char . "\n2 VERS " . $vers;
            } else {
                $regex = "\n1 CHAR(?:ACTER)? " . $pattern;
            }

            if (preg_match("/" . $regex . "/i", $header) === 1) {
                return $this->make($encoding);
            }
        }

        if (preg_match('/1 CHAR (.+)/', $header, $match) === 1) {
            $charset = $match[1];
        } else {
            $charset = '???';
        }

        throw new InvalidGedcomEncodingException($charset);
    }

    /**
     * Create a named encoding.
     *
     * @param string $name
     *
     * @return EncodingInterface
     * @thorws DomainException
     */
    public function make(string $name): EncodingInterface
    {
        switch ($name) {
            case UTF8::NAME:
                return new UTF8();

            case UTF16BE::NAME:
                return new UTF16BE();

            case UTF16LE::NAME:
                return new UTF16LE();

            case ANSEL::NAME:
                return new ANSEL();

            case ASCII::NAME:
                return new ASCII();

            case CP437::NAME:
                return new CP437();

            case CP850::NAME:
                return new CP850();

            case Windows1250::NAME:
                return new Windows1250();

            case Windows1251::NAME:
                return new Windows1251();

            case Windows1252::NAME:
                return new Windows1252();

            case MacRoman::NAME:
                return new MacRoman();

            case ISO88591::NAME:
                return new ISO88591();

            case ISO88592::NAME:
                return new ISO88592();

            default:
                throw new DomainException('Invalid encoding: ' . $name);
        }
    }

    /**
     * A list of supported encodings and their names.
     *
     * @return array<string,string>
     */
    public function list(): array
    {
        return [
            UTF8::NAME        => 'UTF-8',
            UTF16BE::NAME     => 'UTF-16BE',
            UTF16LE::NAME     => 'UTF-16LE',
            ANSEL::NAME       => 'ANSEL',
            ASCII::NAME       => 'ASCII',
            ISO88591::NAME    => 'ISO-8859-1',
            ISO88592::NAME    => 'ISO-8859-2',
            Windows1250::NAME => 'Windows 1250',
            Windows1251::NAME => 'Windows 1251',
            Windows1252::NAME => 'Windows 1252',
            CP437::NAME       => 'CP437',
            CP850::NAME       => 'CP850',
            MacRoman::NAME    => 'MacOS Roman',
        ];
    }
}


1			<?php
2
3			/**
4			* webtrees: online genealogy
5			* Copyright (C) 2021 webtrees development team
6			* This program is free software: you can redistribute it and/or modify
7			* it under the terms of the GNU General Public License as published by
8			* the Free Software Foundation, either version 3 of the License, or
9			* (at your option) any later version.
10			* This program is distributed in the hope that it will be useful,
11			* but WITHOUT ANY WARRANTY; without even the implied warranty of
12			* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13			* GNU General Public License for more details.
14			* You should have received a copy of the GNU General Public License
15			* along with this program. If not, see <https://www.gnu.org/licenses/>.
16			*/
17
18			declare(strict_types=1);
19
20			namespace Fisharebest\Webtrees\Factories;
21
22			use DomainException;
23			use Fisharebest\Webtrees\Contracts\EncodingFactoryInterface;
24			use Fisharebest\Webtrees\Encodings\ANSEL;
25			use Fisharebest\Webtrees\Encodings\ASCII;
26			use Fisharebest\Webtrees\Encodings\CP437;
27			use Fisharebest\Webtrees\Encodings\CP850;
28			use Fisharebest\Webtrees\Encodings\EncodingInterface;
29			use Fisharebest\Webtrees\Encodings\ISO88591;
30			use Fisharebest\Webtrees\Encodings\ISO88592;
31			use Fisharebest\Webtrees\Encodings\MacRoman;
32			use Fisharebest\Webtrees\Encodings\UTF16BE;
33			use Fisharebest\Webtrees\Encodings\UTF16LE;
34			use Fisharebest\Webtrees\Encodings\UTF8;
35			use Fisharebest\Webtrees\Encodings\Windows1250;
36			use Fisharebest\Webtrees\Encodings\Windows1251;
37			use Fisharebest\Webtrees\Encodings\Windows1252;
38			use Fisharebest\Webtrees\Exceptions\InvalidGedcomEncodingException;
39
40			use function explode;
41			use function ltrim;
42			use function preg_match;
43			use function str_contains;
44			use function str_starts_with;
45			use function strstr;
46
47			/**
48			* Create an encoding object.
49			*/
50			class EncodingFactory implements EncodingFactoryInterface
51			{
52			/**
53			* Detect an encoding from a GEDCOM header record.
54			*
55			* @param string $header
56			*
57			* @return EncodingInterface\|null
58			* @throws InvalidGedcomEncodingException
59			*/
60			public function detect(string $header): ?EncodingInterface
61			{
62			$utf_bom = [
63			'/^' . UTF8::BYTE_ORDER_MARK . '/' => UTF8::NAME,
64			'/^' . UTF16BE::BYTE_ORDER_MARK . '/' => UTF16BE::NAME,
65			'/^' . UTF16LE::BYTE_ORDER_MARK . '/' => UTF16LE::NAME,
66			];
67
68			foreach ($utf_bom as $regex => $encoding) {
69			if (preg_match($regex, $header) === 1) {
70			return $this->make($encoding);
71			}
72			}
73
74			$utf16 = [
75			"\x000" => UTF16BE::NAME,
76			"0\x00" => UTF16LE::NAME,
77			];
78
79			foreach ($utf16 as $start => $encoding) {
80			if (str_starts_with($header, $start)) {
81			return $this->make($encoding);
82			}
83			}
84
85			// Standardize whitespace to simplify matching.
86			$header = strtr(ltrim($header), ["\r\n" => "\n", "\n\r" => "\n", "\r" => "\n"]);
87
88			while (str_contains($header, "\n ") \|\| str_contains($header, " \n") \|\| str_contains($header, ' ')) {
89			$header = strtr($header, ["\n " => "\n", " \n" => "\n", ' ' => ' ']);
90			}
91
92			// We need a complete header record
93			$header = strstr($header, "\n0", true);
94
95			if ($header === false) {
96			return null;
97			}
98
99			// Some of these come from Tamura Jones, the rest from webtrees users.
100			$character_sets = [
101			'ASCII' => ASCII::NAME,
102			'ANSEL' => ANSEL::NAME,
103			'UTF-8' => UTF8::NAME,
104			'UNICODE' => UTF8::NAME, // If the null byte test failed, this can't be UTF16
105			'ASCII/MacOS Roman' => MacRoman::NAME, // GEDitCOM
106			'ASCII/MACINTOSH' => MacRoman::NAME, // MacFamilyTree < 8.3.5
107			'MACINTOSH' => MacRoman::NAME, // MacFamilyTree >= 8.3.5
108			'CP437' => CP437::NAME,
109			'IBMPC' => CP437::NAME,
110			'IBM' => CP437::NAME, // Reunion
111			'IBM-PC' => CP437::NAME, // CumberlandFamilyTree
112			'OEM' => CP437::NAME, // Généatique
113			'CP850' => CP850::NAME,
114			'MSDOS' => CP850::NAME,
115			'IBM-DOS' => CP850::NAME, // Reunion, EasyTree
116			'MS-DOS' => CP850::NAME, // AbrEdit FTM for Windows
117			'ANSI' => CP850::NAME,
118			'WINDOWS' => CP850::NAME, // Parentele
119			'IBM WINDOWS' => CP850::NAME, // EasyTree, Généalogie, Reunion, TribalPages
120			'IBM_WINDOWS' => CP850::NAME, // EasyTree
121			'CP1250' => Windows1250::NAME,
122			'windows-1250' => Windows1250::NAME, // GenoPro, Rodokmen Pro
123			'CP1251' => Windows1251::NAME,
124			'WINDOWS-1251' => Windows1251::NAME, // Rodovid
125			'CP1252' => Windows1252::NAME, // Lifelines
126			'ISO-8859-1' => ISO88591::NAME, // Cumberland Family Tree, Lifelines
127			'ISO8859-1' => ISO88591::NAME, // Scion Genealogist
128			'ISO8859' => ISO88591::NAME, // Genealogica Grafica
129			'LATIN-1' => ISO88591::NAME,
130			'LATIN1' => ISO88591::NAME, // GenealogyJ
131			'ISO-8859-2' => ISO88592::NAME,
132			'ISO8859-2' => ISO88592::NAME,
133			'LATIN-2' => ISO88592::NAME,
134			'LATIN2' => ISO88592::NAME,
135			];
136
137			foreach ($character_sets as $pattern => $encoding) {
138			if (str_contains($pattern, '/')) {
139			[$char, $vers] = explode('/', $pattern);
140			$regex = "\n1 CHAR " . $char . "\n2 VERS " . $vers;
141			} else {
142			$regex = "\n1 CHAR(?:ACTER)? " . $pattern;
143			}
144
145			if (preg_match("/" . $regex . "/i", $header) === 1) {
146			return $this->make($encoding);
147			}
148			}
149
150			if (preg_match('/1 CHAR (.+)/', $header, $match) === 1) {
151			$charset = $match[1];
152			} else {
153			$charset = '???';
154			}
155
156			throw new InvalidGedcomEncodingException($charset);
157			}
158
159			/**
160			* Create a named encoding.
161			*
162			* @param string $name
163			*
164			* @return EncodingInterface
165			* @thorws DomainException
166			*/
167			public function make(string $name): EncodingInterface
168			{
169			switch ($name) {
170			case UTF8::NAME:
171			return new UTF8();
172
173			case UTF16BE::NAME:
174			return new UTF16BE();
175
176			case UTF16LE::NAME:
177			return new UTF16LE();
178
179			case ANSEL::NAME:
180			return new ANSEL();
181
182			case ASCII::NAME:
183			return new ASCII();
184
185			case CP437::NAME:
186			return new CP437();
187
188			case CP850::NAME:
189			return new CP850();
190
191			case Windows1250::NAME:
192			return new Windows1250();
193
194			case Windows1251::NAME:
195			return new Windows1251();
196
197			case Windows1252::NAME:
198			return new Windows1252();
199
200			case MacRoman::NAME:
201			return new MacRoman();
202
203			case ISO88591::NAME:
204			return new ISO88591();
205
206			case ISO88592::NAME:
207			return new ISO88592();
208
209			default:
210			throw new DomainException('Invalid encoding: ' . $name);
211			}
212			}
213
214			/**
215			* A list of supported encodings and their names.
216			*
217			* @return array<string,string>
218			*/
219			public function list(): array
220			{
221			return [
222			UTF8::NAME => 'UTF-8',
223			UTF16BE::NAME => 'UTF-16BE',
224			UTF16LE::NAME => 'UTF-16LE',
225			ANSEL::NAME => 'ANSEL',
226			ASCII::NAME => 'ASCII',
227			ISO88591::NAME => 'ISO-8859-1',
228			ISO88592::NAME => 'ISO-8859-2',
229			Windows1250::NAME => 'Windows 1250',
230			Windows1251::NAME => 'Windows 1251',
231			Windows1252::NAME => 'Windows 1252',
232			CP437::NAME => 'CP437',
233			CP850::NAME => 'CP850',
234			MacRoman::NAME => 'MacOS Roman',
235			];
236			}
237			}
238

fisharebest / webtrees

Push — dev ( 824cd4...d410ef )

EncodingFactory::detect() C

Complexity

Size

Duplication

Importance

How to fix Long Method Complexity

Long Method

Duplication Side-by-Side

Filter issues like