Passed
Push — master ( c2213b...3aaf27 )
by Burak
01:42
created

Analyze::RobotsFile()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 12
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 12
rs 9.4285
c 0
b 0
f 0
cc 2
eloc 6
nc 2
nop 0
1
<?php
2
namespace SEOCheckup;
3
4
/**
5
 * @package seo-checkup
6
 * @author  Burak <[email protected]>
7
 */
8
9
use DOMDocument;
10
use DOMXPath;
11
12
class Analyze extends PreRequirements
13
{
14
15
    /**
16
     * @var array $data
17
     */
18
    private $data;
19
20
21
    /**
22
     * @var Helpers $helpers
23
     */
24
    private $helpers;
25
26
    /**
27
     * @var DOMDocument $dom
28
     */
29
    private $dom;
30
31
    /**
32
     * Initialize from URL via Guzzle
33
     *
34
     * @param string $url
35
     * @return $this
36
     */
37
    public function __construct($url)
38
    {
39
        $started_on    = microtime(true);
40
        $response      = $this->Request($url);
41
42
        $this->data    = [
43
            'url'        => $url,
44
            'parsed_url' => parse_url($url),
45
            'status'     => $response->getStatusCode(),
46
            'headers'    => $response->getHeaders(),
47
            'page_speed' => number_format(( microtime(true) - $started_on), 4),
48
            'content'    => $response->getBody()->getContents()
49
        ];
50
51
        $this->helpers = new Helpers($this->data);
52
53
        return $this;
54
    }
55
56
    /**
57
     * Initialize DOMDocument
58
     *
59
     * @return DOMDocument
60
     */
61
    private function DOMDocument()
62
    {
63
        libxml_use_internal_errors(true);
64
65
        $this->dom = new DOMDocument();
66
67
        return $this->dom;
68
    }
69
70
    /**
71
     * Initialize DOMXPath
72
     *
73
     * @return DOMXPath
74
     */
75
    private function DOMXPath()
76
    {
77
        return new DOMXPath($this->dom);
78
    }
79
80
    /**
81
     * Standardizes output
82
     *
83
     * @param mixed $return
84
     * @param string $service
85
     * @return array
86
     */
87
    private function Output($return, $service)
88
    {
89
        return [
90
            'url'       => $this->data['url'],
91
            'status'    => $this->data['status'],
92
            'headers'   => $this->data['headers'],
93
            'service'   => preg_replace("([A-Z])", " $0", $service),
94
            'time'      => time(),
95
            'data'      => $return
96
        ];
97
    }
98
99
    /**
100
     * Analyze Broken Links in a page
101
     *
102
     * @return array
103
     */
104
    public function BrokenLinks()
105
    {
106
        $dom    = $this->DOMDocument();
107
        $dom->loadHTML($this->data['content']);
108
109
        $links  = $this->helpers->GetLinks($dom);
110
        $scan   = ['errors' => [], 'passed' => []];
111
        $i      = 0;
112
113
        foreach ($links as $key => $link)
114
        {
115
            $i++;
116
117
            if($i >= 25)
118
                break;
119
120
            $status = $this->Request($link)->getStatusCode();
121
122
            if(substr($status,0,1) > 3 && $status != 999)
123
                $scan['errors']["HTTP {$status}"][] = $link;
124
            else
125
                $scan['passed']["HTTP {$status}"][] = $link;
126
        }
127
        return $this->Output([
128
            'links'   => $links,
129
            'scanned' => $scan
130
        ], __FUNCTION__);
131
    }
132
133
    /**
134
     * Checks header parameters if there is something about cache
135
     *
136
     * @return array
137
     */
138
    public function Cache()
139
    {
140
        $output = ['headers' => [], 'html' => []];
141
142
        foreach ($this->data['headers'] as $header)
143
        {
144
            foreach ($header as $item)
145
            {
146
                if(strpos(mb_strtolower($item),'cache') !== false)
147
                {
148
                    $output['headers'][] = $item;
149
                }
150
            }
151
        }
152
153
        $dom   = $this->DOMDocument();
154
        $dom->loadHTML($this->data['content']);
155
        $xpath = $this->DOMXPath();
156
157
        foreach ($xpath->query('//comment()') as $comment)
158
        {
159
            if(strpos(mb_strtolower($comment->textContent),'cache') !== false)
160
            {
161
                $output['html'][] = '<!-- '.trim($comment->textContent).' //-->';
162
            }
163
        }
164
        return $this->Output($output, __FUNCTION__);
165
    }
166
167
    /**
168
     * Checks canonical tag
169
     *
170
     * @return array
171
     */
172
    public function CanonicalTag()
173
    {
174
        $dom    = $this->DOMDocument();
175
        $dom->loadHTML($this->data['content']);
176
        $output = array();
177
        $links  = $this->helpers->GetAttributes($dom, 'link', 'rel');
178
179
        foreach($links as $item)
180
        {
181
            if($item == 'canonical')
182
            {
183
                $output[] = $item;
184
            }
185
        }
186
187
        return $this->Output($output, __FUNCTION__);
188
    }
189
190
    /**
191
     * Determines character set from headers
192
     *
193
     * @TODO: Use Regex instead of explode
194
     * @return array
195
     */
196
    public function CharacterSet()
197
    {
198
        $output = '';
199
200
        foreach ($this->data['headers'] as $key => $header)
201
        {
202
            if($key == 'Content-Type')
203
            {
204
                $output = explode('=', explode(';',$header[0])[1])[1];
205
            }
206
        }
207
        return $this->Output($output, __FUNCTION__);
208
    }
209
210
    /**
211
     * Calculates code / content percentage
212
     *
213
     * @return array
214
     */
215
    public function CodeContent()
216
    {
217
        $page_size = mb_strlen($this->data['content'], 'utf8');
218
        $dom       = $this->DOMDocument();
219
        $dom->loadHTML($this->data['content']);
220
221
        $script    = $dom->getElementsByTagName('script');
222
        $remove    = array();
223
224
        foreach ($script as $item)
225
        {
226
            $remove[] = $item;
227
        }
228
229
        foreach ($remove as $item)
230
        {
231
            $item->parentNode->removeChild($item);
232
        }
233
234
        $page         = $dom->saveHTML();
235
        $content_size = mb_strlen(strip_tags($page), 'utf8');
236
        $rate         = (round($content_size / $page_size * 100));
237
        $output       = array(
238
            'page_size'     => $page_size,
239
            'code_size'     => ($page_size - $content_size),
240
            'content_size'  => $content_size,
241
            'content'       => $this->helpers->Whitespace(strip_tags($page)),
242
            'percentage'    => "$rate%"
243
        );
244
245
        return $this->Output($output, __FUNCTION__);
246
    }
247
248
    /**
249
     * Checks deprecated HTML tag usage
250
     *
251
     * @return array
252
     */
253
    public function DeprecatedHTML()
254
    {
255
        $dom       = $this->DOMDocument();
256
        $dom->loadHTML($this->data['content']);
257
258
        $deprecated_tags = array(
259
            'acronym',
260
            'applet',
261
            'basefont',
262
            'big',
263
            'center',
264
            'dir',
265
            'font',
266
            'frame',
267
            'frameset',
268
            'isindex',
269
            'noframes',
270
            's',
271
            'strike',
272
            'tt',
273
            'u'
274
        );
275
276
        $output = array();
277
278
        foreach ($deprecated_tags as $tag)
279
        {
280
            $tags   = $dom->getElementsByTagName($tag);
281
282
            if($tags->length > 0)
283
            {
284
                $output[$tag] = $tags->length;
285
            }
286
        }
287
288
        return $this->Output($output, __FUNCTION__);
289
    }
290
291
    /**
292
     * Determines length of the domain
293
     *
294
     * @return array
295
     */
296
    public function DomainLength()
297
    {
298
        $domain = explode('.',$this->data['parsed_url']['host']);
299
300
        array_pop($domain);
301
302
        $domain = implode('.',$domain);
303
304
        return $this->Output(strlen($domain), __FUNCTION__);
305
    }
306
307
    /**
308
     * Looks for a favicon
309
     *
310
     * @return array
311
     */
312
    public function Favicon()
313
    {
314
        $ico    = "{$this->data['parsed_url']['scheme']}://{$this->data['parsed_url']['host']}/favicon.ico";
315
        $link   = '';
316
317
        if($this->Request($ico)->getStatusCode() === 200)
318
        {
319
            $link   = $ico;
320
        } else {
321
322
            $dom    = $this->DOMDocument();
323
            $dom->loadHTML($this->data['content']);
324
325
            $tags   = $dom->getElementsByTagName('link');
326
            $fav    = null;
327
328
            foreach ($tags as $tag)
329
            {
330
                if($tag->getAttribute('rel') == 'shortcut icon' OR $tag->getAttribute('rel') == 'icon')
331
                {
332
                    $fav = $tag->getAttribute('href');
333
                    break;
334
                }
335
            }
336
337
            if (!filter_var($fav, FILTER_VALIDATE_URL) === false && $this->Request($fav)->getStatusCode() == 200)
338
            {
339
                $link = $fav;
340
            } else if($this->Request($this->data['parsed_url']['scheme'].'://'.$this->data['parsed_url']['host'].'/'.$fav)->getStatusCode() == 200)
341
            {
342
                $link = $this->data['parsed_url']['scheme'].'://'.$this->data['parsed_url']['host'].'/'.$fav;
343
            } else if($this->Request($_GET['value'].'/'.$fav)->getStatusCode() == 200)
344
            {
345
                $link = $_GET['value'].'/'.$fav;
346
            } else {
347
                $link = '';
348
            }
349
        }
350
351
352
        return $this->Output($link, __FUNCTION__);
353
    }
354
355
    /**
356
     * Checks if there is a frame in the page
357
     *
358
     * @return array
359
     */
360
    public function Frameset()
361
    {
362
        $dom    = $this->DOMDocument();
363
        $dom->loadHTML($this->data['content']);
364
365
        $tags   = $dom->getElementsByTagName('frameset');
366
        $output = ['frameset' => [], 'frame' => []];
367
        foreach ($tags as $tag)
368
        {
369
            $output['frameset'][] = null;
370
        }
371
372
        $tags   = $dom->getElementsByTagName('frame');
373
        foreach ($tags as $tag)
374
        {
375
            $output['frame'][] = null;
376
        }
377
378
        return $this->Output([
379
            'frameset' => count($output['frameset']),
380
            'frame'    => count($output['frame'])
381
        ], __FUNCTION__);
382
    }
383
384
    /**
385
     * Finds Google Analytics code
386
     *
387
     * @return array
388
     */
389
    public function GoogleAnalytics()
390
    {
391
        $dom    = $this->DOMDocument();
392
        $dom->loadHTML($this->data['content']);
393
394
        $script = '';
395
396
        $tags   = $dom->getElementsByTagName('script');
397
        foreach ($tags as $tag)
398
        {
399
            if($tag->getAttribute('src'))
400
            {
401
                if (0 === strpos($tag->getAttribute('src'), '//'))
402
                {
403
                    $href     = $this->data['parsed_url']['scheme'] . ':'.$tag->getAttribute('src');
404
                } else if (0 !== strpos($tag->getAttribute('src'), 'http'))
405
                {
406
                    $path     = '/' . ltrim($tag->getAttribute('src'), '/');
407
                    $href     = $this->data['parsed_url']['scheme'] . '://';
408
409
                    if (isset($this->data['parsed_url']['user']) && isset($this->data['parsed_url']['pass']))
410
                    {
411
                        $href .= $this->data['parsed_url']['user'] . ':' . $this->data['parsed_url']['pass'] . '@';
412
                    }
413
414
                    $href     .= $this->data['parsed_url']['host'];
415
416
                    if (isset($this->data['parsed_url']['port']))
417
                    {
418
                        $href .= ':' . $this->data['parsed_url']['port'];
419
                    }
420
                    $href    .= $path;
421
                } else {
422
                    $href     = $tag->getAttribute('src');
423
                }
424
425
                $script .= $this->Request($href)->getBody()->getContents();
426
            } else {
427
                $script .= $tag->nodeValue;
428
            }
429
        }
430
431
        $ua_regex        = "/UA-[0-9]{5,}-[0-9]{1,}/";
432
433
        preg_match_all($ua_regex, $script, $ua_id);
434
435
        return $this->Output($ua_id[0][0], __FUNCTION__);
436
    }
437
438
    /**
439
     * Checks h1 HTML tag usage
440
     *
441
     * @return array
442
     */
443
    public function Header1()
444
    {
445
        $dom    = $this->DOMDocument();
446
        $dom->loadHTML($this->data['content']);
447
448
        $tags   = $dom->getElementsByTagName('h1');
449
        $output = array();
450
        foreach ($tags as $tag)
451
        {
452
            $output[] = $tag->nodeValue;
453
        }
454
455
        return $this->Output($output, __FUNCTION__);
456
    }
457
458
    /**
459
     * Checks h2 HTML tag usage
460
     *
461
     * @return array
462
     */
463
    public function Header2()
464
    {
465
        $dom    = $this->DOMDocument();
466
        $dom->loadHTML($this->data['content']);
467
468
        $tags   = $dom->getElementsByTagName('h2');
469
        $output = array();
470
        foreach ($tags as $tag)
471
        {
472
            $output[] = $tag->nodeValue;
473
        }
474
475
        return $this->Output($output, __FUNCTION__);
476
    }
477
478
    /**
479
     * Checks HTTPS
480
     *
481
     * @return array
482
     */
483
    public function Https()
484
    {
485
        $https = ($this->data['parsed_url']['scheme'] === 'https') ? true : false;
486
487
        return $this->Output($https, __FUNCTION__);
488
    }
489
490
    /**
491
     * Checks empty image alts
492
     *
493
     * @return array
494
     */
495
    public function ImageAlt()
496
    {
497
        $dom    = $this->DOMDocument();
498
        $dom->loadHTML($this->data['content']);
499
500
        $tags         = $dom->getElementsByTagName('img');
501
        $images       = array();
502
        $errors       = array();
503
504
        foreach($tags as $item)
505
        {
506
            $src      = $item->getAttribute('src');
507
            $alt      = $item->getAttribute('alt');
508
509
            $images[] = array(
510
                'src' => $src,
511
                'alt' => $alt
512
            );
513
514
            if($alt == '')
515
            {
516
                $link = $src;
517
518
                $errors[] = $link;
519
            }
520
        }
521
522
        $output       = array(
523
            'images'        => $images,
524
            'without_alt'   => $errors
525
        );
526
527
        return $this->Output($output, __FUNCTION__);
528
    }
529
530
    /**
531
     * Gets inbound links
532
     *
533
     * @return array
534
     */
535
    public function InboundLinks()
536
    {
537
        $dom    = $this->DOMDocument();
538
        $dom->loadHTML($this->data['content']);
539
540
        $tags   = $dom->getElementsByTagName('a');
541
        $output = array();
542
543
        foreach($tags as $item)
544
        {
545
            $link = $item->getAttribute('href');
546
547
            if($link != '' && strpos($link,'#') !== 0)
548
            {
549
                $link = parse_url($link);
550
551
                if(!isset($link['scheme']))
552
                {
553
                    $link['scheme'] = $this->data['parsed_url']['scheme'];
554
                }
555
556
                if(!isset($link['host']))
557
                {
558
                    $link['host'] = $this->data['parsed_url']['host'];
559
                }
560
561
                if(!isset($link['path']))
562
                {
563
                    $link['path'] = '';
564
                } else {
565
                    if(strpos($link['path'],'/') === false)
566
                    {
567
                        $link['path'] = '/'.$link['path'];
568
                    }
569
                }
570
571
                if(!isset($link['query']))
572
                {
573
                    $link['query'] = '';
574
                } else {
575
                    $link['query'] = '?'.$link['query'];
576
                }
577
578
                $output[] = $link['scheme'].'://'.$link['host'].$link['path'].$link['query'];
579
            }
580
        }
581
582
        foreach ($output as $key => $link)
583
        {
584
            if (parse_url($link)['host'] != $this->data['parsed_url']['host']) {
585
                unset($output[$key]);
586
                continue;
587
            }
588
        }
589
590
        return $this->Output($output, __FUNCTION__);
591
    }
592
593
    /**
594
     * Gets inbound links
595
     *
596
     * @return array
597
     */
598
    public function InlineCss()
599
    {
600
        $dom    = $this->DOMDocument();
601
        $dom->loadHTML($this->data['content']);
602
603
        $tags   = $dom->getElementsByTagName('style');
604
        $output = array();
605
606
        foreach($tags as $item)
607
        {
608
            $output[] = $this->helpers->Whitespace($item->textContent);
609
        }
610
611
        return $this->Output($output, __FUNCTION__);
612
    }
613
614
    /**
615
     * Gets meta description
616
     *
617
     * @return array
618
     */
619
    public function MetaDescription()
620
    {
621
        $dom    = $this->DOMDocument();
622
        $dom->loadHTML($this->data['content']);
623
        $tags   = $dom->getElementsByTagName('meta');
624
        $output = '';
625
        foreach ($tags as $tag)
626
        {
627
            $content = $tag->getAttribute('content');
628
            if(strtolower($tag->getAttribute('name')) == 'description' && strlen($content) > 0)
629
            {
630
                $output = $content;
631
            }
632
        }
633
634
        return $this->Output($output, __FUNCTION__);
635
    }
636
637
    /**
638
     * Gets meta title
639
     *
640
     * @return array
641
     */
642
    public function MetaTitle()
643
    {
644
        $dom    = $this->DOMDocument();
645
        $dom->loadHTML($this->data['content']);
646
        $tags   = $dom->getElementsByTagName('title');
647
        $output = '';
648
        foreach ($tags as $tag)
649
        {
650
            if(isset($tag->nodeValue) && strlen($tag->nodeValue) > 0)
651
            {
652
                $output = $tag->nodeValue;
653
            }
654
            break;
655
        }
656
657
658
        return $this->Output($output, __FUNCTION__);
659
    }
660
661
    /**
662
     * Gets no-follow tag
663
     *
664
     * @return array
665
     */
666
    public function NofollowTag()
667
    {
668
        $dom    = $this->DOMDocument();
669
        $dom->loadHTML($this->data['content']);
670
671
        $tags   = $dom->getElementsByTagName('meta');
672
        $output = array();
673
        foreach ($tags as $tag)
674
        {
675
            if($tag->getAttribute('name') == 'robots')
676
            {
677
                $output[] = $tag->getAttribute('content');
678
            }
679
        }
680
681
        return $this->Output(in_array('nofollow',$output), __FUNCTION__);
682
    }
683
684
    /**
685
     * Gets no-index tag
686
     *
687
     * @return array
688
     */
689
    public function NoindexTag()
690
    {
691
        $dom    = $this->DOMDocument();
692
        $dom->loadHTML($this->data['content']);
693
694
        $tags   = $dom->getElementsByTagName('meta');
695
        $output = array();
696
        foreach ($tags as $tag)
697
        {
698
            if($tag->getAttribute('name') == 'robots')
699
            {
700
                $output[] = $tag->getAttribute('content');
701
            }
702
        }
703
704
        return $this->Output(in_array('noindex',$output), __FUNCTION__);
705
    }
706
707
    /**
708
     * Counts objects in a page
709
     *
710
     * @return array
711
     */
712
    public function ObjectCount()
713
    {
714
        $dom    = $this->DOMDocument();
715
        $dom->loadHTML($this->data['content']);
716
717
        $output = array(
718
            'css'    => array(),
719
            'script' => array(),
720
            'img'    => array()
721
        );
722
723
        $tags   = $dom->getElementsByTagName('link');
724
        foreach ($tags as $tag)
725
        {
726
            if($tag->getAttribute('type') == 'text/css' OR $tag->getAttribute('rel') == 'stylesheet')
727
            {
728
                $output['css'][] = $tag->getAttribute('href');
729
            }
730
        }
731
        $output['css']   = array_unique($output['css']);
732
733
        $tags   = $dom->getElementsByTagName('script');
734
        foreach ($tags as $tag)
735
        {
736
            if($tag->getAttribute('src') != '')
737
            {
738
                $output['script'][] = $tag->getAttribute('src');
739
            }
740
        }
741
        $output['script'] = array_unique($output['script']);
742
743
        $tags   = $dom->getElementsByTagName('img');
744
        foreach ($tags as $tag)
745
        {
746
            if($tag->getAttribute('src') != '')
747
            {
748
                $output['img'][] = $tag->getAttribute('src');
749
            }
750
        }
751
        $output['img']   = array_unique($output['img']);
752
753
        return $this->Output($output, __FUNCTION__);
754
    }
755
756
    /**
757
     * Calculates page speed
758
     *
759
     * @return array
760
     */
761
    public function PageSpeed()
762
    {
763
        return $this->Output($this->data['page_speed'], __FUNCTION__);
764
    }
765
766
    /**
767
     * Checks if there is some plaintext email
768
     *
769
     * @return array
770
     */
771
    public function PlaintextEmail()
772
    {
773
        $dom    = $this->DOMDocument();
774
        $dom->loadHTML($this->data['content']);
775
776
        $script = $dom->getElementsByTagName('script');
777
        $remove = array();
778
779
        foreach($script as $item)
780
        {
781
            $remove[] = $item;
782
        }
783
784
        foreach ($remove as $item)
785
        {
786
            $item->parentNode->removeChild($item);
787
        }
788
        $style        = $dom->getElementsByTagName('style');
789
        $remove       = array();
790
791
        foreach($style as $item)
792
        {
793
            $remove[] = $item;
794
        }
795
796
        foreach ($remove as $item)
797
        {
798
            $item->parentNode->removeChild($item);
799
        }
800
801
        $page   = $dom->saveHTML();
802
        $page   = trim(preg_replace('/<[^>]*>/', ' ', $page));
803
        $page   = preg_replace('/\s+/', ' ',$page);
804
        $page   = explode(' ',$page);
805
806
        $output = array();
807
        foreach ($page as $item)
808
        {
809
            $item = trim($item);
810
811
            if($item != '' && strpos($item,'@') !== false)
812
            {
813
                if (!filter_var($item, FILTER_VALIDATE_EMAIL) === false) {
814
                    $output[] = $item;
815
                }
816
            }
817
        }
818
819
        $output = array_unique($output);
820
821
        return $this->Output($output, __FUNCTION__);
822
    }
823
824
    /**
825
     * Checks HTML page compression
826
     *
827
     * @return array
828
     */
829
    public function PageCompression()
830
    {
831
        $output               = array();
832
833
        $output['actual']     = round(strlen($this->data['content']) / 1024,2);
834
        $output['possible']   = gzcompress($this->data['content'], 9);
835
        $output['possible']   = round(strlen($output['possible']) / 1024,2);
836
        $output['percentage'] = round((($output['possible'] * 100) / $output['actual']),2);
837
        $output['difference'] = round($output['actual'] - $output['possible'],2);
838
839
        return $this->Output($output, __FUNCTION__);
840
    }
841
842
    /**
843
     * Checks robots.txt
844
     *
845
     * @return array
846
     */
847
    public function RobotsFile()
848
    {
849
        $output = $this->Request("{$this->data['parsed_url']['scheme']}://{$this->data['parsed_url']['host']}/robots.txt");
850
851
        if($output->getStatusCode() === 200)
852
        {
853
            $output = $output->getBody()->getContents();
854
        } else {
855
            $output = false;
856
        }
857
858
        return $this->Output($output, __FUNCTION__);
859
    }
860
}