XmlTokenizer - Code Metrics - heiglandreas/Org_Heigl_Hyphenator - Measure and Improve Code Quality continuously with Scrutinizer

XmlTokenizer A
last analyzed 2021-04-29 19:35 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	70
Duplicated Lines	38.57 %

Coupling/Cohesion

Components	0
Dependencies	3

Importance

Changes

Metric	Value
wmc	10
lcom	0
cbo	3
dl	27
loc	70
rs	10
c	0
b	0
f	0

2 Methods

Rating	Name	Duplication	Size	Complexity
B	run()	27	27	6
A	tokenize()	0	18	4

How to fix Duplicated Code

<?php
/**
 * Copyright (c) 2008-2011 Andreas Heigl<[email protected]>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 *
 * @category   Hyphenation
 * @package    Org_Heigl_Hyphenator
 * @subpackage Tokenizer
 * @author     Andreas Heigl <[email protected]>
 * @copyright  2008-2011 Andreas Heigl<[email protected]>
 * @license    http://www.opensource.org/licenses/mit-license.php MIT-License
 * @version    2.0.1
 * @link       http://github.com/heiglandreas/Hyphenator
 * @since      11.11.2011
 */

namespace Org\Heigl\Hyphenator\Tokenizer;

/**
 * Use Whitespace to split any input into tokens
 *
 * @category   Hyphenation
 * @package    Org_Heigl_Hyphenator
 * @subpackage Tokenizer
 * @author     Andreas Heigl <[email protected]>
 * @copyright  2008-2011 Andreas Heigl<[email protected]>
 * @license    http://www.opensource.org/licenses/mit-license.php MIT-License
 * @version    2.0.1
 * @link       http://github.com/heiglandreas/Hyphenator
 * @since      04.11.2011
 */
class XmlTokenizer implements Tokenizer
{
    /**
     * Split the given input into tokens using Html-Elements as splitter
     *
     * The input can be a string or a tokenRegistry. If the input is a
     * TokenRegistry, each item will be tokenized.
     *
     * @param string|TokenRegistry $input The
     * input to be tokenized
     *
     * @return TokenRegistry
     */
    public function run($input)
    {
        if ($input instanceof TokenRegistry) {
            // Tokenize a TokenRegistry
            foreach ($input as $token) {
                if (! $token instanceof WordToken) {
                    continue;
                }
                $newTokens = $this->tokenize($token->get());
                if ($newTokens == array($token)) {
                    continue;
                }
                $input->replace($token, $newTokens);
            }

            return $input ;
        }

        // Tokenize a simple string.
        $array =  $this->tokenize($input);
        $registry = new TokenRegistry();
        foreach ($array as $item) {
            $registry->add($item);
        }

        return $registry;
    }

    /**
     * Split the given string into tokens using whitespace.
     *
     * Each whitespace is placed in a WhitespaceToken and everything else is
     * placed in a WordToken-Object
     *
     * @param string $input The String to tokenize
     *
     * @return Token[]
     */
    private function tokenize($input)
    {
        $tokens = array();
        $splits = preg_split("/(<\/?[^>]+\/?>)/u", $input, -1, PREG_SPLIT_DELIM_CAPTURE);

        foreach ($splits as $split) {
            if (! $split) {
                continue;
            }
            if (0 === mb_strpos($split, '<')) {
                $tokens[] = new NonWordToken($split);
                continue;
            }
            $tokens[] = new WordToken($split);
        }

        return $tokens;
    }
}


1		<?php
2		/**
3		* Copyright (c) 2008-2011 Andreas Heigl<[email protected]>
4		*
5		* Permission is hereby granted, free of charge, to any person obtaining a copy
6		* of this software and associated documentation files (the "Software"), to deal
7		* in the Software without restriction, including without limitation the rights
8		* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9		* copies of the Software, and to permit persons to whom the Software is
10		* furnished to do so, subject to the following conditions:
11		*
12		* The above copyright notice and this permission notice shall be included in
13		* all copies or substantial portions of the Software.
14		*
15		* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16		* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17		* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18		* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19		* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20		* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21		* THE SOFTWARE.
22		*
23		* @category Hyphenation
24		* @package Org_Heigl_Hyphenator
25		* @subpackage Tokenizer
26		* @author Andreas Heigl <[email protected]>
27		* @copyright 2008-2011 Andreas Heigl<[email protected]>
28		* @license http://www.opensource.org/licenses/mit-license.php MIT-License
29		* @version 2.0.1
30		* @link http://github.com/heiglandreas/Hyphenator
31		* @since 11.11.2011
32		*/
33
34		namespace Org\Heigl\Hyphenator\Tokenizer;
35
36		/**
37		* Use Whitespace to split any input into tokens
38		*
39		* @category Hyphenation
40		* @package Org_Heigl_Hyphenator
41		* @subpackage Tokenizer
42		* @author Andreas Heigl <[email protected]>
43		* @copyright 2008-2011 Andreas Heigl<[email protected]>
44		* @license http://www.opensource.org/licenses/mit-license.php MIT-License
45		* @version 2.0.1
46		* @link http://github.com/heiglandreas/Hyphenator
47		* @since 04.11.2011
48		*/
49		class XmlTokenizer implements Tokenizer
50		{
51		/**
52		* Split the given input into tokens using Html-Elements as splitter
53		*
54		* The input can be a string or a tokenRegistry. If the input is a
55		* TokenRegistry, each item will be tokenized.
56		*
57		* @param string\|TokenRegistry $input The
58		* input to be tokenized
59		*
60		* @return TokenRegistry
61		*/
62	View Code Duplication	public function run($input)
63		{
64		if ($input instanceof TokenRegistry) {
65		// Tokenize a TokenRegistry
66		foreach ($input as $token) {
67		if (! $token instanceof WordToken) {
68		continue;
69		}
70		$newTokens = $this->tokenize($token->get());
71		if ($newTokens == array($token)) {
72		continue;
73		}
74		$input->replace($token, $newTokens);
75		}
76
77		return $input ;
78		}
79
80		// Tokenize a simple string.
81		$array = $this->tokenize($input);
82		$registry = new TokenRegistry();
83		foreach ($array as $item) {
84		$registry->add($item);
85		}
86
87		return $registry;
88		}
89
90		/**
91		* Split the given string into tokens using whitespace.
92		*
93		* Each whitespace is placed in a WhitespaceToken and everything else is
94		* placed in a WordToken-Object
95		*
96		* @param string $input The String to tokenize
97		*
98		* @return Token[]
99		*/
100		private function tokenize($input)
101		{
102		$tokens = array();
103		$splits = preg_split("/(<\/?[^>]+\/?>)/u", $input, -1, PREG_SPLIT_DELIM_CAPTURE);
104
105		foreach ($splits as $split) {
106		if (! $split) {
107		continue;
108		}
109		if (0 === mb_strpos($split, '<')) {
110		$tokens[] = new NonWordToken($split);
111		continue;
112		}
113		$tokens[] = new WordToken($split);
114		}
115
116		return $tokens;
117		}
118		}
119

heiglandreas / Org_Heigl_Hyphenator

XmlTokenizer A last analyzed 2021-04-29 19:35 UTC

Complexity

Size/Duplication

Coupling/Cohesion

Importance

2 Methods

How to fix Duplicated Code

Duplicated Code

Duplication Side-by-Side

Filter issues like

XmlTokenizer A
last analyzed 2021-04-29 19:35 UTC