Passed
Push — master ( d4b2aa...fef639 )
by Burak
01:41
created

Analyze::NofollowTag()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 16
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 16
rs 9.4285
c 0
b 0
f 0
cc 3
eloc 8
nc 3
nop 0
1
<?php
2
namespace SEOCheckup;
3
4
/**
5
 * @package seo-checkup
6
 * @author  Burak <[email protected]>
7
 */
8
9
use DOMDocument;
10
use DOMXPath;
11
12
class Analyze extends PreRequirements
13
{
14
15
    /**
16
     * @var array $data
17
     */
18
    private $data;
19
20
21
    /**
22
     * @var Helpers $helpers
23
     */
24
    private $helpers;
25
26
    /**
27
     * @var DOMDocument $dom
28
     */
29
    private $dom;
30
31
    /**
32
     * Initialize from URL via Guzzle
33
     *
34
     * @param string $url
35
     * @return $this
36
     */
37
    public function __construct($url)
38
    {
39
        $response      = $this->Request($url);
40
41
        $this->data    = [
42
            'url'        => $url,
43
            'parsed_url' => parse_url($url),
44
            'status'     => $response->getStatusCode(),
45
            'headers'    => $response->getHeaders(),
46
            'content'    => $response->getBody()->getContents()
47
        ];
48
49
        $this->helpers = new Helpers($this->data);
50
51
        return $this;
52
    }
53
54
    /**
55
     * Initialize DOMDocument
56
     *
57
     * @return DOMDocument
58
     */
59
    private function DOMDocument()
60
    {
61
        libxml_use_internal_errors(true);
62
63
        $this->dom = new DOMDocument();
64
65
        return $this->dom;
66
    }
67
68
    /**
69
     * Initialize DOMXPath
70
     *
71
     * @return DOMXPath
72
     */
73
    private function DOMXPath()
74
    {
75
        return new DOMXPath($this->dom);
76
    }
77
78
    /**
79
     * Standardizes output
80
     *
81
     * @param mixed $return
82
     * @param string $service
83
     * @return array
84
     */
85
    private function Output($return, $service)
86
    {
87
        return [
88
            'url'       => $this->data['url'],
89
            'status'    => $this->data['status'],
90
            'headers'   => $this->data['headers'],
91
            'service'   => preg_replace("([A-Z])", " $0", $service),
92
            'time'      => time(),
93
            'data'      => $return
94
        ];
95
    }
96
97
    /**
98
     * Analyze Broken Links in a page
99
     *
100
     * @return array
101
     */
102
    public function BrokenLinks()
103
    {
104
        $dom    = $this->DOMDocument();
105
        $dom->loadHTML($this->data['content']);
106
107
        $links  = $this->helpers->GetLinks($dom);
108
        $scan   = ['errors' => [], 'passed' => []];
109
        $i      = 0;
110
111
        foreach ($links as $key => $link)
112
        {
113
            $i++;
114
115
            if($i >= 25)
116
                break;
117
118
            $status = $this->Request($link)->getStatusCode();
119
120
            if(substr($status,0,1) > 3 && $status != 999)
121
                $scan['errors']["HTTP {$status}"][] = $link;
122
            else
123
                $scan['passed']["HTTP {$status}"][] = $link;
124
        }
125
        return $this->Output([
126
            'links'   => $links,
127
            'scanned' => $scan
128
        ], __FUNCTION__);
129
    }
130
131
    /**
132
     * Checks header parameters if there is something about cache
133
     *
134
     * @return array
135
     */
136
    public function Cache()
137
    {
138
        $output = ['headers' => [], 'html' => []];
139
140
        foreach ($this->data['headers'] as $header)
141
        {
142
            foreach ($header as $item)
143
            {
144
                if(strpos(mb_strtolower($item),'cache') !== false)
145
                {
146
                    $output['headers'][] = $item;
147
                }
148
            }
149
        }
150
151
        $dom   = $this->DOMDocument();
152
        $dom->loadHTML($this->data['content']);
153
        $xpath = $this->DOMXPath();
154
155
        foreach ($xpath->query('//comment()') as $comment)
156
        {
157
            if(strpos(mb_strtolower($comment->textContent),'cache') !== false)
158
            {
159
                $output['html'][] = '<!-- '.trim($comment->textContent).' //-->';
160
            }
161
        }
162
        return $this->Output($output, __FUNCTION__);
163
    }
164
165
    /**
166
     * Checks canonical tag
167
     *
168
     * @return array
169
     */
170
    public function CanonicalTag()
171
    {
172
        $dom    = $this->DOMDocument();
173
        $dom->loadHTML($this->data['content']);
174
        $output = array();
175
        $links  = $this->helpers->GetAttributes($dom, 'link', 'rel');
176
177
        foreach($links as $item)
178
        {
179
            if($item == 'canonical')
180
            {
181
                $output[] = $item;
182
            }
183
        }
184
185
        return $this->Output($output, __FUNCTION__);
186
    }
187
188
    /**
189
     * Determines character set from headers
190
     *
191
     * @TODO: Use Regex instead of explode
192
     * @return array
193
     */
194
    public function CharacterSet()
195
    {
196
        $output = '';
197
198
        foreach ($this->data['headers'] as $key => $header)
199
        {
200
            if($key == 'Content-Type')
201
            {
202
                $output = explode('=', explode(';',$header[0])[1])[1];
203
            }
204
        }
205
        return $this->Output($output, __FUNCTION__);
206
    }
207
208
    /**
209
     * Calculates code / content percentage
210
     *
211
     * @return array
212
     */
213
    public function CodeContent()
214
    {
215
        $page_size = mb_strlen($this->data['content'], 'utf8');
216
        $dom       = $this->DOMDocument();
217
        $dom->loadHTML($this->data['content']);
218
219
        $script    = $dom->getElementsByTagName('script');
220
        $remove    = array();
221
222
        foreach ($script as $item)
223
        {
224
            $remove[] = $item;
225
        }
226
227
        foreach ($remove as $item)
228
        {
229
            $item->parentNode->removeChild($item);
230
        }
231
232
        $page         = $dom->saveHTML();
233
        $content_size = mb_strlen(strip_tags($page), 'utf8');
234
        $rate         = (round($content_size / $page_size * 100));
235
        $output       = array(
236
            'page_size'     => $page_size,
237
            'code_size'     => ($page_size - $content_size),
238
            'content_size'  => $content_size,
239
            'content'       => $this->helpers->Whitespace(strip_tags($page)),
240
            'percentage'    => "$rate%"
241
        );
242
243
        return $this->Output($output, __FUNCTION__);
244
    }
245
246
    /**
247
     * Checks deprecated HTML tag usage
248
     *
249
     * @return array
250
     */
251
    public function DeprecatedHTML()
252
    {
253
        $dom       = $this->DOMDocument();
254
        $dom->loadHTML($this->data['content']);
255
256
        $deprecated_tags = array(
257
            'acronym',
258
            'applet',
259
            'basefont',
260
            'big',
261
            'center',
262
            'dir',
263
            'font',
264
            'frame',
265
            'frameset',
266
            'isindex',
267
            'noframes',
268
            's',
269
            'strike',
270
            'tt',
271
            'u'
272
        );
273
274
        $output = array();
275
276
        foreach ($deprecated_tags as $tag)
277
        {
278
            $tags   = $dom->getElementsByTagName($tag);
279
280
            if($tags->length > 0)
281
            {
282
                $output[$tag] = $tags->length;
283
            }
284
        }
285
286
        return $this->Output($output, __FUNCTION__);
287
    }
288
289
    /**
290
     * Determines length of the domain
291
     *
292
     * @return array
293
     */
294
    public function DomainLength()
295
    {
296
        $domain = explode('.',$this->data['parsed_url']['host']);
297
298
        array_pop($domain);
299
300
        $domain = implode('.',$domain);
301
302
        return $this->Output(strlen($domain), __FUNCTION__);
303
    }
304
305
    /**
306
     * Looks for a favicon
307
     *
308
     * @return array
309
     */
310
    public function Favicon()
311
    {
312
        $ico    = "{$this->data['parsed_url']['scheme']}://{$this->data['parsed_url']['host']}/favicon.ico";
313
        $link   = '';
314
315
        if($this->Request($ico)->getStatusCode() === 200)
316
        {
317
            $link   = $ico;
318
        } else {
319
320
            $dom    = $this->DOMDocument();
321
            $dom->loadHTML($this->data['content']);
322
323
            $tags   = $dom->getElementsByTagName('link');
324
            $fav    = null;
325
326
            foreach ($tags as $tag)
327
            {
328
                if($tag->getAttribute('rel') == 'shortcut icon' OR $tag->getAttribute('rel') == 'icon')
329
                {
330
                    $fav = $tag->getAttribute('href');
331
                    break;
332
                }
333
            }
334
335
            if (!filter_var($fav, FILTER_VALIDATE_URL) === false && $this->Request($fav)->getStatusCode() == 200)
336
            {
337
                $link = $fav;
338
            } else if($this->Request($this->data['parsed_url']['scheme'].'://'.$this->data['parsed_url']['host'].'/'.$fav)->getStatusCode() == 200)
339
            {
340
                $link = $this->data['parsed_url']['scheme'].'://'.$this->data['parsed_url']['host'].'/'.$fav;
341
            } else if($this->Request($_GET['value'].'/'.$fav)->getStatusCode() == 200)
342
            {
343
                $link = $_GET['value'].'/'.$fav;
344
            } else {
345
                $link = '';
346
            }
347
        }
348
349
350
        return $this->Output($link, __FUNCTION__);
351
    }
352
353
    /**
354
     * Checks if there is a frame in the page
355
     *
356
     * @return array
357
     */
358
    public function Frameset()
359
    {
360
        $dom    = $this->DOMDocument();
361
        $dom->loadHTML($this->data['content']);
362
363
        $tags   = $dom->getElementsByTagName('frameset');
364
        $output = ['frameset' => [], 'frame' => []];
365
        foreach ($tags as $tag)
366
        {
367
            $output['frameset'][] = null;
368
        }
369
370
        $tags   = $dom->getElementsByTagName('frame');
371
        foreach ($tags as $tag)
372
        {
373
            $output['frame'][] = null;
374
        }
375
376
        return $this->Output([
377
            'frameset' => count($output['frameset']),
378
            'frame'    => count($output['frame'])
379
        ], __FUNCTION__);
380
    }
381
382
    /**
383
     * Finds Google Analytics code
384
     *
385
     * @return array
386
     */
387
    public function GoogleAnalytics()
388
    {
389
        $dom    = $this->DOMDocument();
390
        $dom->loadHTML($this->data['content']);
391
392
        $script = '';
393
394
        $tags   = $dom->getElementsByTagName('script');
395
        foreach ($tags as $tag)
396
        {
397
            if($tag->getAttribute('src'))
398
            {
399
                if (0 === strpos($tag->getAttribute('src'), '//'))
400
                {
401
                    $href     = $this->data['parsed_url']['scheme'] . ':'.$tag->getAttribute('src');
402
                } else if (0 !== strpos($tag->getAttribute('src'), 'http'))
403
                {
404
                    $path     = '/' . ltrim($tag->getAttribute('src'), '/');
405
                    $href     = $this->data['parsed_url']['scheme'] . '://';
406
407
                    if (isset($this->data['parsed_url']['user']) && isset($this->data['parsed_url']['pass']))
408
                    {
409
                        $href .= $this->data['parsed_url']['user'] . ':' . $this->data['parsed_url']['pass'] . '@';
410
                    }
411
412
                    $href     .= $this->data['parsed_url']['host'];
413
414
                    if (isset($this->data['parsed_url']['port']))
415
                    {
416
                        $href .= ':' . $this->data['parsed_url']['port'];
417
                    }
418
                    $href    .= $path;
419
                } else {
420
                    $href     = $tag->getAttribute('src');
421
                }
422
423
                $script .= $this->Request($href)->getBody()->getContents();
424
            } else {
425
                $script .= $tag->nodeValue;
426
            }
427
        }
428
429
        $ua_regex        = "/UA-[0-9]{5,}-[0-9]{1,}/";
430
431
        preg_match_all($ua_regex, $script, $ua_id);
432
433
        return $this->Output($ua_id[0][0], __FUNCTION__);
434
    }
435
436
    /**
437
     * Checks h1 HTML tag usage
438
     *
439
     * @return array
440
     */
441
    public function Header1()
442
    {
443
        $dom    = $this->DOMDocument();
444
        $dom->loadHTML($this->data['content']);
445
446
        $tags   = $dom->getElementsByTagName('h1');
447
        $output = array();
448
        foreach ($tags as $tag)
449
        {
450
            $output[] = $tag->nodeValue;
451
        }
452
453
        return $this->Output($output, __FUNCTION__);
454
    }
455
456
    /**
457
     * Checks h2 HTML tag usage
458
     *
459
     * @return array
460
     */
461
    public function Header2()
462
    {
463
        $dom    = $this->DOMDocument();
464
        $dom->loadHTML($this->data['content']);
465
466
        $tags   = $dom->getElementsByTagName('h2');
467
        $output = array();
468
        foreach ($tags as $tag)
469
        {
470
            $output[] = $tag->nodeValue;
471
        }
472
473
        return $this->Output($output, __FUNCTION__);
474
    }
475
476
    /**
477
     * Checks HTTPS
478
     *
479
     * @return array
480
     */
481
    public function Https()
482
    {
483
        $https = ($this->data['parsed_url']['scheme'] === 'https') ? true : false;
484
485
        return $this->Output($https, __FUNCTION__);
486
    }
487
488
    /**
489
     * Checks empty image alts
490
     *
491
     * @return array
492
     */
493
    public function ImageAlt()
494
    {
495
        $dom    = $this->DOMDocument();
496
        $dom->loadHTML($this->data['content']);
497
498
        $tags         = $dom->getElementsByTagName('img');
499
        $images       = array();
500
        $errors       = array();
501
502
        foreach($tags as $item)
503
        {
504
            $src      = $item->getAttribute('src');
505
            $alt      = $item->getAttribute('alt');
506
507
            $images[] = array(
508
                'src' => $src,
509
                'alt' => $alt
510
            );
511
512
            if($alt == '')
513
            {
514
                $link = $src;
515
516
                $errors[] = $link;
517
            }
518
        }
519
520
        $output       = array(
521
            'images'        => $images,
522
            'without_alt'   => $errors
523
        );
524
525
        return $this->Output($output, __FUNCTION__);
526
    }
527
528
    /**
529
     * Gets inbound links
530
     *
531
     * @return array
532
     */
533
    public function InboundLinks()
534
    {
535
        $dom    = $this->DOMDocument();
536
        $dom->loadHTML($this->data['content']);
537
538
        $tags   = $dom->getElementsByTagName('a');
539
        $output = array();
540
541
        foreach($tags as $item)
542
        {
543
            $link = $item->getAttribute('href');
544
545
            if($link != '' && strpos($link,'#') !== 0)
546
            {
547
                $link = parse_url($link);
548
549
                if(!isset($link['scheme']))
550
                {
551
                    $link['scheme'] = $this->data['parsed_url']['scheme'];
552
                }
553
554
                if(!isset($link['host']))
555
                {
556
                    $link['host'] = $this->data['parsed_url']['host'];
557
                }
558
559
                if(!isset($link['path']))
560
                {
561
                    $link['path'] = '';
562
                } else {
563
                    if(strpos($link['path'],'/') === false)
564
                    {
565
                        $link['path'] = '/'.$link['path'];
566
                    }
567
                }
568
569
                if(!isset($link['query']))
570
                {
571
                    $link['query'] = '';
572
                } else {
573
                    $link['query'] = '?'.$link['query'];
574
                }
575
576
                $output[] = $link['scheme'].'://'.$link['host'].$link['path'].$link['query'];
577
            }
578
        }
579
580
        foreach ($output as $key => $link)
581
        {
582
            if (parse_url($link)['host'] != $this->data['parsed_url']['host']) {
583
                unset($output[$key]);
584
                continue;
585
            }
586
        }
587
588
        return $this->Output($output, __FUNCTION__);
589
    }
590
591
    /**
592
     * Gets inbound links
593
     *
594
     * @return array
595
     */
596
    public function InlineCss()
597
    {
598
        $dom    = $this->DOMDocument();
599
        $dom->loadHTML($this->data['content']);
600
601
        $tags   = $dom->getElementsByTagName('style');
602
        $output = array();
603
604
        foreach($tags as $item)
605
        {
606
            $output[] = $this->helpers->Whitespace($item->textContent);
607
        }
608
609
        return $this->Output($output, __FUNCTION__);
610
    }
611
612
    /**
613
     * Gets meta description
614
     *
615
     * @return array
616
     */
617
    public function MetaDescription()
618
    {
619
        $dom    = $this->DOMDocument();
620
        $dom->loadHTML($this->data['content']);
621
        $tags   = $dom->getElementsByTagName('meta');
622
        $output = '';
623
        foreach ($tags as $tag)
624
        {
625
            $content = $tag->getAttribute('content');
626
            if(strtolower($tag->getAttribute('name')) == 'description' && strlen($content) > 0)
627
            {
628
                $output = $content;
629
            }
630
        }
631
632
        return $this->Output($output, __FUNCTION__);
633
    }
634
635
    /**
636
     * Gets meta title
637
     *
638
     * @return array
639
     */
640
    public function MetaTitle()
641
    {
642
        $dom    = $this->DOMDocument();
643
        $dom->loadHTML($this->data['content']);
644
        $tags   = $dom->getElementsByTagName('title');
645
        $output = '';
646
        foreach ($tags as $tag)
647
        {
648
            if(isset($tag->nodeValue) && strlen($tag->nodeValue) > 0)
649
            {
650
                $output = $tag->nodeValue;
651
            }
652
            break;
653
        }
654
655
656
        return $this->Output($output, __FUNCTION__);
657
    }
658
659
    /**
660
     * Gets no-follow tag
661
     *
662
     * @return array
663
     */
664
    public function NofollowTag()
665
    {
666
        $dom    = $this->DOMDocument();
667
        $dom->loadHTML($this->data['content']);
668
669
        $tags   = $dom->getElementsByTagName('meta');
670
        $output = array();
671
        foreach ($tags as $tag)
672
        {
673
            if($tag->getAttribute('name') == 'robots')
674
            {
675
                $output[] = $tag->getAttribute('content');
676
            }
677
        }
678
679
        return $this->Output(in_array('nofollow',$output), __FUNCTION__);
680
    }
681
682
    /**
683
     * Checks HTML page compression
684
     *
685
     * @return array
686
     */
687
    public function PageCompression()
688
    {
689
        $output               = array();
690
691
        $output['actual']     = round(strlen($this->data['content']) / 1024,2);
692
        $output['possible']   = gzcompress($this->data['content'], 9);
693
        $output['possible']   = round(strlen($output['possible']) / 1024,2);
694
        $output['percentage'] = round((($output['possible'] * 100) / $output['actual']),2);
695
        $output['difference'] = round($output['actual'] - $output['possible'],2);
696
697
        return $this->Output($output, __FUNCTION__);
698
    }
699
}