Passed
Push — master ( e7d759...857996 )
by Marcel
02:28
created

RegexService::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 1
c 1
b 0
f 0
nc 1
nop 1
dl 0
loc 5
rs 10
1
<?php
2
/**
3
 * Data Analytics
4
 *
5
 * This file is licensed under the Affero General Public License version 3 or
6
 * later. See the LICENSE.md file.
7
 *
8
 * @author Marcel Scherello <[email protected]>
9
 * @copyright 2019 Marcel Scherello
10
 */
11
12
namespace OCA\Analytics\Datasource;
13
14
use OCP\ILogger;
15
16
class RegexService
17
{
18
    private $logger;
19
20
    public function __construct(
21
        ILogger $logger
22
    )
23
    {
24
        $this->logger = $logger;
25
    }
26
27
    /**
28
     * Grab data from external html via regex
29
     *
30
     * @NoAdminRequired
31
     * @param array $option
32
     * @return array
33
     */
34
    public function read($option)
35
    {
36
        // http headers for requests
37
        $headers = array(
0 ignored issues
show
Unused Code introduced by
The assignment to $headers is dead and can be removed.
Loading history...
38
            'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
39
            'Accept-Language: en-US,en;q=0.5',
40
            'Connection: keep-alive',
41
            'DNT: 1', // :)
42
        );
43
44
        $regex = $option['regex'];
45
        $url = $option['url'];
46
47
        $html = file_get_contents($url);
48
        preg_match_all($regex, $html, $matches);
49
50
        $this->logger->debug('count: ' . count($matches['dimension']));
51
52
        $data = array();
53
        for ($i = 0; $i < count($matches['dimension']); $i++) {
0 ignored issues
show
Performance Best Practice introduced by
It seems like you are calling the size function count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.

If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration:

for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
54
            if (isset($option['limit'])) {
55
                if ($i === (int)$option['limit'] AND (int)$option['limit'] !== 0) break;
56
            }
57
            array_push($data, ['dimension1' => '', 'dimension2' => $matches['dimension'][$i], 'dimension3' => $matches['value'][$i]]);
58
        }
59
60
        $header = array();
61
        $header['dimension1'] = '';
62
        $header['dimension2'] = 'Dimension2';
63
        $header['dimension3'] = 'Count';
64
65
        return [
66
            'header' => $header,
67
            'data' => $data,
68
            'error' => 0,
69
        ];
70
    }
71
72
    /**
73
     * template for options & settings
74
     *
75
     * @NoAdminRequired
76
     * @return array
77
     */
78
    public function getTemplate()
79
    {
80
        $template = array();
81
        array_push($template, ['id' => 'url', 'name' => 'URL', 'placeholder' => 'url']);
82
        array_push($template, ['id' => 'regex', 'name' => 'valid regex', 'placeholder' => '//']);
83
        array_push($template, ['id' => 'limit', 'name' => 'Limit', 'placeholder' => 'Number of records']);
84
        array_push($template, ['id' => 'timestamp', 'name' => 'Timestamp of dataload', 'placeholder' => 'true/false']);
85
        return $template;
86
    }
87
88
    private function backup()
0 ignored issues
show
Unused Code introduced by
The method backup() is not used, and could be removed.

This check looks for private methods that have been defined, but are not used inside the class.

Loading history...
89
    {
90
        /**
91
         * $ch = curl_init();
92
         * curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
93
         * curl_setopt($ch, CURLOPT_COOKIESESSION, true );
94
         * curl_setopt($ch, CURLOPT_COOKIEFILE, '');
95
         * curl_setopt($ch, CURLOPT_ENCODING, 'gzip, deflate');
96
         * curl_setopt($ch, CURLOPT_HEADER, false);
97
         * curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
98
         * curl_setopt($ch, CURLOPT_URL, $url);
99
         * curl_setopt($ch, CURLOPT_REFERER, $url);
100
         * curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
101
         * curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36');
102
         * //$html = curl_exec($ch);
103
         * curl_close($ch);
104
         *
105
         * //$this->logger->debug($result);
106
         * //$html = str_get_html($result, true, false);
107
         * //$this->logger->debug($html);
108
         * //$value = $html->find('a[href=/gp/bestsellers/books/405436/ref=pd_zg_hrsr_books]', 0)->parent->parent->first_child->innertext;
109
         * //ocument.querySelectorAll("a[href='/gp/bestsellers/books/405436/ref=pd_zg_hrsr_books']")[0].parentNode.parentNode.firstElementChild.innerText
110
         * //$string = 'http://www.amazon.de/dp/3964433578';
111
         * //$filter = '/(<span class=\"zg_hrsr_rank\">Nr. )(.*)(<\/span><span class="zg_hrsr_ladder">in&nbsp;<a href="\/gp\/bestsellers)(.*)(hrsr_books">)(.*)(<\/a><\/span>)/';
112
         * //$filter = '/(<span class="zg_hrsr_rank">Nr. )(.*)(<\/span><span class="zg_hrsr_ladder">in&nbsp;<a href="\/gp\/bestsellers\/books\/405436\/ref=pd_zg_hrsr_books">Vietnamesisch)/';
113
         * //$filter = '/(<span class="zg_hrsr_rank">Nr. )(.*)(<\/span>)(.*)(hrsr_books">)/';
114
         * //$filter = '/(<span class="zg_hrsr_rank">Nr. )(?<value>.*)(<\/span>\n    )(.*)(?<dimension>Vietnamesisch lernen)/';
115
         * //$this->logger->debug('values all: '. json_encode($values));
116
         * //$values = $values['value'];
117
         * //$this->logger->debug('values first array: '. json_encode($values));**/
118
    }
119
}