MarkdownRemover - Code Metrics - tigitz/php-spellchecker - Measure and Improve Code Quality continuously with Scrutinizer

MarkdownRemover A
last analyzed 2024-05-04 15:42 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	54
Duplicated Lines	0 %

Test Coverage

Coverage

100%

Importance

Changes	1
Bugs	0	Features	0

Metric	Value
eloc	21
c	1
b	0
f	0
dl	0
loc	54
ccs	21
cts	21
cp	1
rs	10
wmc	1

1 Method

Rating	Name	Duplication	Size	Complexity
A	process()	0	52	1

<?php

declare(strict_types=1);

namespace PhpSpellcheck\TextProcessor;

use PhpSpellcheck\TextInterface;

/**
 * @experimental
 *
 * Removes markdown while trying to keeping original lines and offset position of
 * characters in order to make spellchecking relevant.
 */
class MarkdownRemover implements TextProcessorInterface
{
    public function process(TextInterface $text): TextInterface
    {
        // Horizontal rules (stripListHeaders conflict with this rule, which is why it has been moved to the top)
        $output = \Safe\preg_replace('/^(-\s*?|\*\s*?|_\s*?){3,}(\s*)$/m', PHP_EOL . '$2', $text->getContent());

        // Github Flavored Markdown
        // Header
        $output = \Safe\preg_replace('/\n={2,}/', '\n', $output);
        /**
         * Fenced codeblocks.
         *
         *@TODO parse programming language comments from codeblock instead of removing whole block
         */
        $output = \Safe\preg_replace('/~{3}.*\n/', '', $output);
        // Strikethrough
        $output = \Safe\preg_replace('/~~/', '', $output);
        // Common Markdown
        // Remove HTML tags
        $output = \Safe\preg_replace('/<[^>]*>/', '', $output);
        // Remove setext-style headers
        $output = \Safe\preg_replace('/^[=\-]{2,}\s*$/', '', $output);
        // Remove footnotes?
        $output = \Safe\preg_replace('/\[\^.+?\](\: .*?$)?/', '', $output);
        $output = \Safe\preg_replace('/\s{0,2}\[.*?\]: .*?$/', '', $output);
        // Remove images
        $output = \Safe\preg_replace('/\!\[(.*?)\][\[\(].*?[\]\)]/', '$1', $output);
        // Remove inline links
        $output = \Safe\preg_replace('/\[(.*?)\][\[\(].*?[\]\)]/', '$1', $output);
        // Remove blockquotes
        $output = \Safe\preg_replace('/^\s{0,3}>\s?/', '', $output);
        // Remove reference-style links?
        $output = \Safe\preg_replace('/^\s{1,2}\[(.*?)\]: (\S+)( ".*?")?\s*$/', '', $output);
        /**
         * Remove atx-style headers.
         *
         *@TODO find a way to merge the two regex below
         * remove ## Heading ##
         */
        $output = \Safe\preg_replace('/^#{1,6}\s+(.*)(\s+#{1,6})$/m', '$1', $output);
        // remove ## Heading
        $output = \Safe\preg_replace('/^#{1,6}\s+(.*)$/m', '$1', $output);
        // Remove emphasis (repeat the line to remove double emphasis)
        $output = \Safe\preg_replace('/([\*_]{1,3})(\S.*?\S{0,1})\1/', '$2', $output);
        $output = \Safe\preg_replace('/([\*_]{1,3})(\S.*?\S{0,1})\1/', '$2', $output);
        // Remove list items
        $output = \Safe\preg_replace('/^([^\S\r\n]*)\*\s/m', '$1', $output);
        // Remove code blocks
        $output = \Safe\preg_replace('/^`{3,}(.*)*$/m', '', $output);
        // Remove inline code
        $output = \Safe\preg_replace('/`(.+?)`/', '$1', $output);

        return $text->replaceContent($output);
    }
}


1		<?php
2
3		declare(strict_types=1);
4
5		namespace PhpSpellcheck\TextProcessor;
6
7		use PhpSpellcheck\TextInterface;
8
9		/**
10		* @experimental
11		*
12		* Removes markdown while trying to keeping original lines and offset position of
13		* characters in order to make spellchecking relevant.
14		*/
15		class MarkdownRemover implements TextProcessorInterface
16	25	{
17		public function process(TextInterface $text): TextInterface
18		{
19	25	// Horizontal rules (stripListHeaders conflict with this rule, which is why it has been moved to the top)
20		$output = \Safe\preg_replace('/^(-\s?\|\\s?\|_\s?){3,}(\s*)$/m', PHP_EOL . '$2', $text->getContent());
21
22		// Github Flavored Markdown
23	25	// Header
24		$output = \Safe\preg_replace('/\n={2,}/', '\n', $output);
25		/**
26	25	* Fenced codeblocks.
27		*
28	25	*@TODO parse programming language comments from codeblock instead of removing whole block
29		*/
30		$output = \Safe\preg_replace('/~{3}.*\n/', '', $output);
31	25	// Strikethrough
32		$output = \Safe\preg_replace('/~~/', '', $output);
33	25	// Common Markdown
34		// Remove HTML tags
35	25	$output = \Safe\preg_replace('/<[^>]*>/', '', $output);
36	25	// Remove setext-style headers
37		$output = \Safe\preg_replace('/^[=\-]{2,}\s*$/', '', $output);
38	25	// Remove footnotes?
39		$output = \Safe\preg_replace('/\[\^.+?\](\: .*?$)?/', '', $output);
40	25	$output = \Safe\preg_replace('/\s{0,2}\[.?\]: .?$/', '', $output);
41		// Remove images
42	25	$output = \Safe\preg_replace('/\!\[(.?)\][\[\(].?[\]\)]/', '$1', $output);
43		// Remove inline links
44	25	$output = \Safe\preg_replace('/\[(.?)\][\[\(].?[\]\)]/', '$1', $output);
45		// Remove blockquotes
46		$output = \Safe\preg_replace('/^\s{0,3}>\s?/', '', $output);
47		// Remove reference-style links?
48	25	$output = \Safe\preg_replace('/^\s{1,2}\[(.?)\]: (\S+)( ".?")?\s*$/', '', $output);
49		/**
50	25	* Remove atx-style headers.
51		*
52	25	*@TODO find a way to merge the two regex below
53	25	* remove ## Heading ##
54		*/
55	25	$output = \Safe\preg_replace('/^#{1,6}\s+(.*)(\s+#{1,6})$/m', '$1', $output);
56		// remove ## Heading
57	25	$output = \Safe\preg_replace('/^#{1,6}\s+(.*)$/m', '$1', $output);
58		// Remove emphasis (repeat the line to remove double emphasis)
59	25	$output = \Safe\preg_replace('/([\_]{1,3})(\S.?\S{0,1})\1/', '$2', $output);
60		$output = \Safe\preg_replace('/([\_]{1,3})(\S.?\S{0,1})\1/', '$2', $output);
61	25	// Remove list items
62		$output = \Safe\preg_replace('/^([^\S\r\n])\\s/m', '$1', $output);
63		// Remove code blocks
64		$output = \Safe\preg_replace('/^`{3,}(.)$/m', '', $output);
65		// Remove inline code
66		$output = \Safe\preg_replace('/`(.+?)`/', '$1', $output);
67
68		return $text->replaceContent($output);
69		}
70		}
71

tigitz / php-spellchecker

MarkdownRemover A last analyzed 2024-05-04 15:42 UTC

Complexity

Size/Duplication

Test Coverage

Importance

1 Method

Duplication Side-by-Side

Filter issues like

MarkdownRemover A
last analyzed 2024-05-04 15:42 UTC