XmlTokenizer::tokenize() - Code Metrics - Inspection of "Set PHP 7.2 as default version" - heiglandreas/Org_Heigl_Hyphenator - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#37)

by Andreas

created 2018-02-09 17:09 UTC

XmlTokenizer::tokenize() A

↳ Parent: XmlTokenizer

Complexity

Conditions	4
Paths	4

Size

Total Lines	18
Code Lines	11

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
c	0
b	0
f	0
dl	0
loc	18
rs	9.2
cc	4
eloc	11
nc	4
nop	1

<?php
/**
 * Copyright (c) 2008-2011 Andreas Heigl<[email protected]>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 *
 * @category   Hyphenation
 * @package    Org_Heigl_Hyphenator
 * @subpackage Tokenizer
 * @author     Andreas Heigl <[email protected]>
 * @copyright  2008-2011 Andreas Heigl<[email protected]>
 * @license    http://www.opensource.org/licenses/mit-license.php MIT-License
 * @version    2.0.1
 * @link       http://github.com/heiglandreas/Hyphenator
 * @since      11.11.2011
 */

namespace Org\Heigl\Hyphenator\Tokenizer;

/**
 * Use Whitespace to split any input into tokens
 *
 * @category   Hyphenation
 * @package    Org_Heigl_Hyphenator
 * @subpackage Tokenizer
 * @author     Andreas Heigl <[email protected]>
 * @copyright  2008-2011 Andreas Heigl<[email protected]>
 * @license    http://www.opensource.org/licenses/mit-license.php MIT-License
 * @version    2.0.1
 * @link       http://github.com/heiglandreas/Hyphenator
 * @since      04.11.2011
 */
class XmlTokenizer implements Tokenizer
{
    /**
     * Split the given input into tokens using Html-Elements as splitter
     *
     * The input can be a string or a tokenRegistry. If the input is a
     * TokenRegistry, each item will be tokenized.
     *
     * @param string|\Org\Heigl\Hyphenator\Tokenizer\TokenRegistry $input The
     * input to be tokenized
     *
     * @return \Org\Heigl\Hyphenator\Tokenizer\TokenRegistry
     */
    public function run($input)
    {
        if ($input instanceof TokenRegistry) {
            // Tokenize a TokenRegistry
            foreach ($input as $token) {
                if (! $token instanceof WordToken) {
                    continue;
                }
                $newTokens = $this->tokenize($token->get());
                if ($newTokens == array($token)) {
                    continue;
                }
                $input->replace($token, $newTokens);
            }

            return $input ;
        }

        // Tokenize a simple string.
        $array =  $this->tokenize($input);
        $registry = new TokenRegistry();
        foreach ($array as $item) {
            $registry->add($item);
        }

        return $registry;
    }

    /**
     * Split the given string into tokens using whitespace.
     *
     * Each whitespace is placed in a WhitespaceToken and everything else is
     * placed in a WordToken-Object
     *
     * @param string $input The String to tokenize
     *
     * @return Token
     */
    private function tokenize($input)
    {
        $tokens = array();
        $splits = preg_split("/(<\/?[^>]+\/?>)/u", $input, -1, PREG_SPLIT_DELIM_CAPTURE);

        foreach ($splits as $split) {
            if (! $split) {
                continue;
            }
            if (0 === mb_strpos($split, '<')) {
                $tokens[] = new NonWordToken($split);
                continue;
            }
            $tokens[] = new WordToken($split);
        }

        return $tokens;
    }
}


1			<?php
2			/**
3			* Copyright (c) 2008-2011 Andreas Heigl<[email protected]>
4			*
5			* Permission is hereby granted, free of charge, to any person obtaining a copy
6			* of this software and associated documentation files (the "Software"), to deal
7			* in the Software without restriction, including without limitation the rights
8			* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9			* copies of the Software, and to permit persons to whom the Software is
10			* furnished to do so, subject to the following conditions:
11			*
12			* The above copyright notice and this permission notice shall be included in
13			* all copies or substantial portions of the Software.
14			*
15			* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16			* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17			* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18			* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19			* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20			* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21			* THE SOFTWARE.
22			*
23			* @category Hyphenation
24			* @package Org_Heigl_Hyphenator
25			* @subpackage Tokenizer
26			* @author Andreas Heigl <[email protected]>
27			* @copyright 2008-2011 Andreas Heigl<[email protected]>
28			* @license http://www.opensource.org/licenses/mit-license.php MIT-License
29			* @version 2.0.1
30			* @link http://github.com/heiglandreas/Hyphenator
31			* @since 11.11.2011
32			*/
33
34			namespace Org\Heigl\Hyphenator\Tokenizer;
35
36			/**
37			* Use Whitespace to split any input into tokens
38			*
39			* @category Hyphenation
40			* @package Org_Heigl_Hyphenator
41			* @subpackage Tokenizer
42			* @author Andreas Heigl <[email protected]>
43			* @copyright 2008-2011 Andreas Heigl<[email protected]>
44			* @license http://www.opensource.org/licenses/mit-license.php MIT-License
45			* @version 2.0.1
46			* @link http://github.com/heiglandreas/Hyphenator
47			* @since 04.11.2011
48			*/
49			class XmlTokenizer implements Tokenizer
50			{
51			/**
52			* Split the given input into tokens using Html-Elements as splitter
53			*
54			* The input can be a string or a tokenRegistry. If the input is a
55			* TokenRegistry, each item will be tokenized.
56			*
57			* @param string\|\Org\Heigl\Hyphenator\Tokenizer\TokenRegistry $input The
58			* input to be tokenized
59			*
60			* @return \Org\Heigl\Hyphenator\Tokenizer\TokenRegistry
61			*/
62			public function run($input)
63			{
64			if ($input instanceof TokenRegistry) {
65			// Tokenize a TokenRegistry
66			foreach ($input as $token) {
67			if (! $token instanceof WordToken) {
68			continue;
69			}
70			$newTokens = $this->tokenize($token->get());
71			if ($newTokens == array($token)) {
72			continue;
73			}
74			$input->replace($token, $newTokens);
75			}
76
77			return $input ;
78			}
79
80			// Tokenize a simple string.
81			$array = $this->tokenize($input);
82			$registry = new TokenRegistry();
83			foreach ($array as $item) {
84			$registry->add($item);
85			}
86
87			return $registry;
88			}
89
90			/**
91			* Split the given string into tokens using whitespace.
92			*
93			* Each whitespace is placed in a WhitespaceToken and everything else is
94			* placed in a WordToken-Object
95			*
96			* @param string $input The String to tokenize
97			*
98			* @return Token
99			*/
100			private function tokenize($input)
101			{
102			$tokens = array();
103			$splits = preg_split("/(<\/?[^>]+\/?>)/u", $input, -1, PREG_SPLIT_DELIM_CAPTURE);
104
105			foreach ($splits as $split) {
106			if (! $split) {
107			continue;
108			}
109			if (0 === mb_strpos($split, '<')) {
110			$tokens[] = new NonWordToken($split);
111			continue;
112			}
113			$tokens[] = new WordToken($split);
114			}
115
116			return $tokens;
117			}
118			}
119

heiglandreas / Org_Heigl_Hyphenator

Pull Request — master (#37)

XmlTokenizer::tokenize() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like