1
|
|
|
<?php |
2
|
|
|
namespace NirjharLo\Cgss\Lib\Analysis\Lib; |
3
|
|
|
|
4
|
|
|
if ( ! defined( 'ABSPATH' ) ) exit; |
5
|
|
|
|
6
|
|
|
|
7
|
|
|
use \DomXPath; |
|
|
|
|
8
|
|
|
|
9
|
|
|
/** |
10
|
|
|
* An object for treating text by using methods of xpath and generating array of words along with |
11
|
|
|
* text to html ratio in kb. |
12
|
|
|
* |
13
|
|
|
* 2 properties. |
14
|
|
|
* @property obj $dom document object model |
15
|
|
|
* @property string $body_size Size of complete HTML |
16
|
|
|
*/ |
17
|
|
|
class Text { |
18
|
|
|
|
19
|
|
|
|
20
|
|
|
public $dom; |
21
|
|
|
public $body_size; |
22
|
|
|
|
23
|
|
|
|
24
|
|
|
// Execute the xPath first, to prevent multiple execution |
25
|
|
|
public function execute() { |
26
|
|
|
|
27
|
|
|
$this->xpath = $this->xpath(); |
|
|
|
|
28
|
|
|
$this->text = $this->text(); |
|
|
|
|
29
|
|
|
} |
30
|
|
|
|
31
|
|
|
|
32
|
|
|
//generate individual words from text content |
33
|
|
|
public function words() { |
34
|
|
|
|
35
|
|
|
//get word counts from text string. Here I use 2 loops to check for voids and characters. |
36
|
|
|
$text = str_replace( array( '.', ',', ':', '\'', '"', ')', '(', ']', '[', '}', '{', ';', '+', '-', '_', '*', '&', '^', '%', '$', '#', '@', '!', '~', '?', '>', '<', '/', '\\', '|' ), ' ' , $this->text ); |
37
|
|
|
$pure_text = filter_var( $text, FILTER_SANITIZE_STRING, FILTER_FLAG_STRIP_LOW ); |
38
|
|
|
|
39
|
|
|
//after formating text, explode the string into words and remove empty elements. |
40
|
|
|
$text_string = explode( ' ', $pure_text ); |
41
|
|
|
$words = array(); |
42
|
|
|
foreach ( $text_string as $key ) { |
43
|
|
|
if ( $key ) { |
44
|
|
|
$words[] = trim( $key ); |
45
|
|
|
} |
46
|
|
|
} |
47
|
|
|
return $words; |
48
|
|
|
} |
49
|
|
|
|
50
|
|
|
//get text to html ratio |
51
|
|
|
public function ratio() { |
52
|
|
|
|
53
|
|
|
$xpath = $this->xpath; |
54
|
|
|
return $xpath['ratio']; |
55
|
|
|
} |
56
|
|
|
|
57
|
|
|
//get total text content from xpath |
58
|
|
|
public function text() { |
59
|
|
|
|
60
|
|
|
$xpath = $this->xpath; |
61
|
|
|
return preg_replace( '/[ \n]+/', ' ', preg_replace( '/[ \t]+/', ' ', preg_replace( '/\s*$^\s*/m', ' ', $xpath['content'] ) ) ); |
62
|
|
|
} |
63
|
|
|
|
64
|
|
|
//get total text content from xpath |
65
|
|
|
public function size() { |
66
|
|
|
$xpath = $this->xpath; |
67
|
|
|
return $xpath['size']; |
68
|
|
|
} |
69
|
|
|
|
70
|
|
|
//Create xpath object from document object model |
71
|
|
|
public function xpath() { |
72
|
|
|
|
73
|
|
|
//generate whole html |
74
|
|
|
$xpath = new DomXPath( $this->dom ); |
75
|
|
|
|
76
|
|
|
//Get html size |
77
|
|
|
$html_size = $this->body_size; |
78
|
|
|
|
79
|
|
|
//make it ready to get body xpath for text. |
80
|
|
|
foreach ( $xpath->query( '//script' ) as $key ) { |
81
|
|
|
$key->parentNode->removeChild( $key ); |
82
|
|
|
} |
83
|
|
|
foreach ( $xpath->query( '//style' ) as $key ) { |
84
|
|
|
$key->parentNode->removeChild( $key ); |
85
|
|
|
} |
86
|
|
|
$all_text = $xpath->query( '//body[text()]' ); |
87
|
|
|
|
88
|
|
|
//generate whole text |
89
|
|
|
$all_text_target = $all_text->item(0); |
90
|
|
|
if ( $all_text_target ) { |
91
|
|
|
$only_text = strtolower( trim( $all_text_target->nodeValue ) ); |
92
|
|
|
} else { |
93
|
|
|
$only_text = ''; |
94
|
|
|
} |
95
|
|
|
|
96
|
|
|
//get text size |
97
|
|
|
$text_size = mb_strlen( $only_text, '8bit' ); |
98
|
|
|
|
99
|
|
|
//get html to text ratio |
100
|
|
|
$ht_ratio = round( ( $text_size / $html_size ) * 100, 1 ); |
101
|
|
|
|
102
|
|
|
return array( |
103
|
|
|
'content' => $only_text, |
104
|
|
|
'ratio' => $ht_ratio, |
105
|
|
|
'size' => round( ( $text_size / 1024 ), 1 ), |
106
|
|
|
); |
107
|
|
|
} |
108
|
|
|
} |
109
|
|
|
?> |
|
|
|
|
110
|
|
|
|
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"]
, you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths