Wrap_URLs_Fix::__construct() - Code Metrics - mundschenk-at/php-typography - Measure and Improve Code Quality continuously with Scrutinizer

Wrap_URLs_Fix::__construct() A
last analyzed 2020-04-05 12:19 UTC

↳ Parent: Wrap_URLs_Fix

Complexity

Conditions	1
Paths	1

Size

Total Lines	17
Code Lines	4

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	5
CRAP Score	1

Importance

Changes

Metric	Value
eloc	4
dl	0
loc	17
rs	10
c	0
b	0
f	0
ccs	5
cts	5
cp	1
cc	1
nc	1
nop	2
crap	1

<?php
/**
 *  This file is part of PHP-Typography.
 *
 *  Copyright 2014-2017 Peter Putzer.
 *  Copyright 2009-2011 KINGdesk, LLC.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 *
 *  ***
 *
 *  @package mundschenk-at/php-typography
 *  @license http://www.gnu.org/licenses/gpl-2.0.html
 */

namespace PHP_Typography\Fixes\Token_Fixes;

use PHP_Typography\Fixes\Token_Fix;
use PHP_Typography\Hyphenator\Cache;
use PHP_Typography\RE;
use PHP_Typography\Settings;
use PHP_Typography\Text_Parser;
use PHP_Typography\Text_Parser\Token;
use PHP_Typography\U;

/**
 * Wraps URL parts zero-width spaces (if enabled).
 *
 * @author Peter Putzer <[email protected]>
 *
 * @since 5.0.0
 */
class Wrap_URLs_Fix extends Hyphenate_Fix {
	// Valid URL schemes.
	const URL_SCHEME = '(?:https?|ftps?|file|nfs|feed|itms|itpc)';

	const WRAP_URLS_DOMAIN_PARTS = '#(\-|\.)#';

	/**
	 * The URL matching regular expression.
	 *
	 * @var string
	 */
	protected $url_pattern;

	/**
	 * Creates a new fix instance.
	 *
	 * @param Cache|null $cache           Optional. Default null.
	 * @param bool       $feed_compatible Optional. Default false.
	 */
	public function __construct( Cache $cache = null, $feed_compatible = false ) {
		parent::__construct( $cache, Token_Fix::OTHER, $feed_compatible );

		// Combined URL pattern.
		$this->url_pattern = '`(?:
			\A
			(?<schema>' . self::URL_SCHEME . ':\/\/)?	        # Subpattern 1: contains _http://_ if it exists
			(?<domain>											# Subpattern 2: contains subdomains.domain.tld
				(?:
					[a-z0-9]									# first chr of (sub)domain can not be a hyphen
					[a-z0-9\-]{0,61}							# middle chrs of (sub)domain may be a hyphen;
																# limit qty of middle chrs so total domain does not exceed 63 chrs
					[a-z0-9]									# last chr of (sub)domain can not be a hyphen
					\.											# dot separator
				)+
				(?:
					' . RE::top_level_domains() . '             # validates top level domain
				)
				(?:												# optional port numbers
					:
					(?:
						[1-5]?[0-9]{1,4} | 6[0-4][0-9]{3} | 65[0-4][0-9]{2} | 655[0-2][0-9] | 6553[0-5]
					)
				)?
			)
			(?<path>											# Subpattern 3: contains path following domain
				(?:
					\/											# marks nested directory
					[a-z0-9\"\$\-_\.\+!\*\'\(\),;\?:@=&\#]+		# valid characters within directory structure
				)*
				[\/]?											# trailing slash if any
			)
			\Z
		)`xi'; // required modifiers: x (multiline pattern) i (case insensitive).
	}

	/**
	 * Apply the tweak to a given textnode.
	 *
	 * @param Token[]       $tokens   Required.
	 * @param Settings      $settings Required.
	 * @param bool          $is_title Optional. Default false.
	 * @param \DOMText|null $textnode Optional. Default null.
	 *
	 * @return Token[] An array of tokens.
	 */
	public function apply( array $tokens, Settings $settings, $is_title = false, \DOMText $textnode = null ) {
		if ( empty( $settings[ Settings::URL_WRAP ] ) || empty( $settings[ Settings::URL_MIN_AFTER_WRAP ] ) ) {
			return $tokens;
		}

		// Test for and parse urls.
		foreach ( $tokens as $token_index => $text_token ) {
			if ( \preg_match( $this->url_pattern, $text_token->value, $url_match ) ) {

				// $url_match['schema'] holds "http://".
				// $url_match['domain'] holds "subdomains.domain.tld".
				// $url_match['path']   holds the path after the domain.
				$http = ( $url_match['schema'] ) ? $url_match[1] . U::ZERO_WIDTH_SPACE : '';

				$domain_parts = \preg_split( self::WRAP_URLS_DOMAIN_PARTS, $url_match['domain'], -1, PREG_SPLIT_DELIM_CAPTURE );
				if ( false === $domain_parts ) {
					// Should not happen.
					continue;  // @codeCoverageIgnore
				}

				// This is a hack, but it works.
				// First, we hyphenate each part, we need it formated like a group of words.
				$parsed_words_like = [];
				foreach ( $domain_parts as $key => $part ) {
					$parsed_words_like[ $key ] = new Text_Parser\Token( $part, Text_Parser\Token::OTHER );
				}

				// Do the hyphenation.
				$parsed_words_like = $this->do_hyphenate( $parsed_words_like, $settings, U::ZERO_WIDTH_SPACE );

				// Restore format.
				foreach ( $parsed_words_like as $key => $parsed_word ) {
					$value = $parsed_word->value;

					if ( $key > 0 && 1 === \strlen( $value ) ) {
						$domain_parts[ $key ] = U::ZERO_WIDTH_SPACE . $value;
					} else {
						$domain_parts[ $key ] = $value;
					}
				}

				// Lastly let's recombine.
				$domain = \implode( '', $domain_parts );

				// Break up the URL path to individual characters.
				$path_parts = \str_split( $url_match['path'], 1 );
				$path_count = \count( $path_parts );
				$path       = '';
				foreach ( $path_parts as $index => $path_part ) {
					if ( 0 === $index || $path_count - $index < $settings[ Settings::URL_MIN_AFTER_WRAP ] ) {
						$path .= $path_part;
					} else {
						$path .= U::ZERO_WIDTH_SPACE . $path_part;
					}
				}

				$tokens[ $token_index ] = $text_token->with_value( $http . $domain . $path );
			}
		}

		return $tokens;
	}
}


1		<?php
2		/**
3		* This file is part of PHP-Typography.
4		*
5		* Copyright 2014-2017 Peter Putzer.
6		* Copyright 2009-2011 KINGdesk, LLC.
7		*
8		* This program is free software; you can redistribute it and/or modify
9		* it under the terms of the GNU General Public License as published by
10		* the Free Software Foundation; either version 2 of the License, or
11		* (at your option) any later version.
12		*
13		* This program is distributed in the hope that it will be useful,
14		* but WITHOUT ANY WARRANTY; without even the implied warranty of
15		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16		* GNU General Public License for more details.
17		*
18		* You should have received a copy of the GNU General Public License along
19		* with this program; if not, write to the Free Software Foundation, Inc.,
20		* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21		*
22		* ***
23		*
24		* @package mundschenk-at/php-typography
25		* @license http://www.gnu.org/licenses/gpl-2.0.html
26		*/
27
28		namespace PHP_Typography\Fixes\Token_Fixes;
29
30		use PHP_Typography\Fixes\Token_Fix;
31		use PHP_Typography\Hyphenator\Cache;
32		use PHP_Typography\RE;
33		use PHP_Typography\Settings;
34		use PHP_Typography\Text_Parser;
35		use PHP_Typography\Text_Parser\Token;
36		use PHP_Typography\U;
37
38		/**
39		* Wraps URL parts zero-width spaces (if enabled).
40		*
41		* @author Peter Putzer <[email protected]>
42		*
43		* @since 5.0.0
44		*/
45		class Wrap_URLs_Fix extends Hyphenate_Fix {
46		// Valid URL schemes.
47		const URL_SCHEME = '(?:https?\|ftps?\|file\|nfs\|feed\|itms\|itpc)';
48
49		const WRAP_URLS_DOMAIN_PARTS = '#(\-\|\.)#';
50
51		/**
52		* The URL matching regular expression.
53		*
54		* @var string
55		*/
56		protected $url_pattern;
57
58		/**
59		* Creates a new fix instance.
60		*
61		* @param Cache\|null $cache Optional. Default null.
62		* @param bool $feed_compatible Optional. Default false.
63		*/
64	1	public function __construct( Cache $cache = null, $feed_compatible = false ) {
65	1	parent::__construct( $cache, Token_Fix::OTHER, $feed_compatible );
66
67		// Combined URL pattern.
68	1	$this->url_pattern = '`(?:
69		\A
70	1	(?<schema>' . self::URL_SCHEME . ':\/\/)? # Subpattern 1: contains _http://_ if it exists
71		(?<domain> # Subpattern 2: contains subdomains.domain.tld
72		(?:
73		[a-z0-9] # first chr of (sub)domain can not be a hyphen
74		[a-z0-9\-]{0,61} # middle chrs of (sub)domain may be a hyphen;
75		# limit qty of middle chrs so total domain does not exceed 63 chrs
76		[a-z0-9] # last chr of (sub)domain can not be a hyphen
77		\. # dot separator
78		)+
79		(?:
80	1	' . RE::top_level_domains() . ' # validates top level domain
81		)
82		(?: # optional port numbers
83		:
84		(?:
85		[1-5]?[0-9]{1,4} \| 6[0-4][0-9]{3} \| 65[0-4][0-9]{2} \| 655[0-2][0-9] \| 6553[0-5]
86		)
87		)?
88		)
89		(?<path> # Subpattern 3: contains path following domain
90		(?:
91		\/ # marks nested directory
92		[a-z0-9\"\$\-_\.\+!\*\'\(\),;\?:@=&\#]+ # valid characters within directory structure
93		)*
94		[\/]? # trailing slash if any
95		)
96		\Z
97		)`xi'; // required modifiers: x (multiline pattern) i (case insensitive).
98	1	}
99
100		/**
101		* Apply the tweak to a given textnode.
102		*
103		* @param Token[] $tokens Required.
104		* @param Settings $settings Required.
105		* @param bool $is_title Optional. Default false.
106		* @param \DOMText\|null $textnode Optional. Default null.
107		*
108		* @return Token[] An array of tokens.
109		*/
110	10	public function apply( array $tokens, Settings $settings, $is_title = false, \DOMText $textnode = null ) {
111	10	if ( empty( $settings[ Settings::URL_WRAP ] ) \|\| empty( $settings[ Settings::URL_MIN_AFTER_WRAP ] ) ) {
112	5	return $tokens;
113		}
114
115		// Test for and parse urls.
116	5	foreach ( $tokens as $token_index => $text_token ) {
117	5	if ( \preg_match( $this->url_pattern, $text_token->value, $url_match ) ) {
118
119		// $url_match['schema'] holds "http://".
120		// $url_match['domain'] holds "subdomains.domain.tld".
121		// $url_match['path'] holds the path after the domain.
122	5	$http = ( $url_match['schema'] ) ? $url_match[1] . U::ZERO_WIDTH_SPACE : '';
123
124	5	$domain_parts = \preg_split( self::WRAP_URLS_DOMAIN_PARTS, $url_match['domain'], -1, PREG_SPLIT_DELIM_CAPTURE );
125	5	if ( false === $domain_parts ) {
126		// Should not happen.
127		continue; // @codeCoverageIgnore
128		}
129
130		// This is a hack, but it works.
131		// First, we hyphenate each part, we need it formated like a group of words.
132	5	$parsed_words_like = [];
133	5	foreach ( $domain_parts as $key => $part ) {
134	5	$parsed_words_like[ $key ] = new Text_Parser\Token( $part, Text_Parser\Token::OTHER );
135		}
136
137		// Do the hyphenation.
138	5	$parsed_words_like = $this->do_hyphenate( $parsed_words_like, $settings, U::ZERO_WIDTH_SPACE );
139
140		// Restore format.
141	5	foreach ( $parsed_words_like as $key => $parsed_word ) {
142	5	$value = $parsed_word->value;
143
144	5	if ( $key > 0 && 1 === \strlen( $value ) ) {
145	5	$domain_parts[ $key ] = U::ZERO_WIDTH_SPACE . $value;
146		} else {
147	5	$domain_parts[ $key ] = $value;
148		}
149		}
150
151		// Lastly let's recombine.
152	5	$domain = \implode( '', $domain_parts );
153
154		// Break up the URL path to individual characters.
155	5	$path_parts = \str_split( $url_match['path'], 1 );
156	5	$path_count = \count( $path_parts );
157	5	$path = '';
158	5	foreach ( $path_parts as $index => $path_part ) {
159	5	if ( 0 === $index \|\| $path_count - $index < $settings[ Settings::URL_MIN_AFTER_WRAP ] ) {
160	5	$path .= $path_part;
161		} else {
162	1	$path .= U::ZERO_WIDTH_SPACE . $path_part;
163		}
164		}
165
166	5	$tokens[ $token_index ] = $text_token->with_value( $http . $domain . $path );
167		}
168		}
169
170	5	return $tokens;
171		}
172		}
173

Scrutinizer GitHub App not installed

GitHub Access Token became invalid

Wrap_URLs_Fix::__construct() A
last analyzed 2020-04-05 12:19 UTC

Complexity

Size

Duplication

Code Coverage

Importance

mundschenk-at / php-typography

Scrutinizer GitHub App not installed

GitHub Access Token became invalid

Wrap_URLs_Fix::__construct() A last analyzed 2020-04-05 12:19 UTC

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like

Wrap_URLs_Fix::__construct() A
last analyzed 2020-04-05 12:19 UTC