Passed
Push — master ( 9c2d3e...b67124 )
by Marcel
02:34
created

Regex::getId()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 1
c 0
b 0
f 0
nc 1
nop 0
dl 0
loc 3
rs 10
1
<?php
2
/**
3
 * Analytics
4
 *
5
 * This file is licensed under the Affero General Public License version 3 or
6
 * later. See the LICENSE.md file.
7
 *
8
 * @author Marcel Scherello <[email protected]>
9
 * @copyright 2020 Marcel Scherello
10
 */
11
12
namespace OCA\Analytics\Datasource;
13
14
use OCP\IL10N;
15
use OCP\ILogger;
16
17
class Regex implements IDatasource
18
{
19
    private $logger;
20
    private $l10n;
21
22
    public function __construct(
23
        IL10N $l10n,
24
        ILogger $logger
25
    )
26
    {
27
        $this->l10n = $l10n;
28
        $this->logger = $logger;
29
    }
30
31
    /**
32
     * @return string Display Name of the datasource
33
     */
34
    public function getName(): string
35
    {
36
        return $this->l10n->t('HTML grabber');
37
    }
38
39
    /**
40
     * @return int digit unique datasource id
41
     */
42
    public function getId(): int
43
    {
44
        return 5;
45
    }
46
47
    /**
48
     * @return array available options of the datasoure
49
     */
50
    public function getTemplate(): array
51
    {
52
        $template = array();
53
        array_push($template, ['id' => 'url', 'name' => 'URL', 'placeholder' => 'url']);
54
        array_push($template, ['id' => 'regex', 'name' => 'valid regex', 'placeholder' => '//']);
55
        array_push($template, ['id' => 'limit', 'name' => 'Limit', 'placeholder' => 'Number of records']);
56
        array_push($template, ['id' => 'timestamp', 'name' => 'Timestamp of dataload', 'placeholder' => 'true/false']);
57
        array_push($template, ['id' => 'delete', 'name' => 'Delete all data before load', 'placeholder' => 'true/false']);
58
        return $template;
59
    }
60
61
    /**
62
     * Read the Data
63
     * @param $option
64
     * @return array available options of the datasoure
65
     */
66
    public function readData($option): array
67
    {
68
        // http headers for requests
69
        $headers = array(
0 ignored issues
show
Unused Code introduced by
The assignment to $headers is dead and can be removed.
Loading history...
70
            'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
71
            'Accept-Language: en-US,en;q=0.5',
72
            'Connection: keep-alive',
73
            'DNT: 1', // :)
74
        );
75
76
        $regex = $option['regex'];
77
        $url = $option['url'];
78
79
        $context = stream_context_create(
80
            array(
81
                "http" => array(
82
                    "header" => "User-Agent: NextCloud Analytics APP"
83
                )
84
            )
85
        );
86
87
        $html = file_get_contents($url, false, $context);
88
        preg_match_all($regex, $html, $matches);
89
90
        $data = array();
91
        $count = count($matches['dimension']);
92
        for ($i = 0; $i < $count; $i++) {
93
            if (isset($option['limit'])) {
94
                if ($i === (int)$option['limit'] AND (int)$option['limit'] !== 0) break;
95
            }
96
            array_push($data, ['', $matches['dimension'][$i], $matches['value'][$i]]);
97
        }
98
99
        $header = array();
100
        $header[0] = '';
101
        $header[1] = 'Dimension2';
102
        $header[2] = 'Count';
103
104
        return [
105
            'header' => $header,
106
            'data' => $data,
107
            'error' => 0,
108
        ];
109
    }
110
111
    private function backup()
0 ignored issues
show
Unused Code introduced by
The method backup() is not used, and could be removed.

This check looks for private methods that have been defined, but are not used inside the class.

Loading history...
112
    {
113
        /**
114
         * $ch = curl_init();
115
         * curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
116
         * curl_setopt($ch, CURLOPT_COOKIESESSION, true );
117
         * curl_setopt($ch, CURLOPT_COOKIEFILE, '');
118
         * curl_setopt($ch, CURLOPT_ENCODING, 'gzip, deflate');
119
         * curl_setopt($ch, CURLOPT_HEADER, false);
120
         * curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
121
         * curl_setopt($ch, CURLOPT_URL, $url);
122
         * curl_setopt($ch, CURLOPT_REFERER, $url);
123
         * curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
124
         * curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36');
125
         * //$html = curl_exec($ch);
126
         * curl_close($ch);
127
         *
128
         * //$this->logger->debug($result);
129
         * //$html = str_get_html($result, true, false);
130
         * //$this->logger->debug($html);
131
         * //$value = $html->find('a[href=/gp/bestsellers/books/405436/ref=pd_zg_hrsr_books]', 0)->parent->parent->first_child->innertext;
132
         * //ocument.querySelectorAll("a[href='/gp/bestsellers/books/405436/ref=pd_zg_hrsr_books']")[0].parentNode.parentNode.firstElementChild.innerText
133
         * //$string = 'http://www.amazon.de/dp/3964433578';
134
         * //$filter = '/(<span class=\"zg_hrsr_rank\">Nr. )(.*)(<\/span><span class="zg_hrsr_ladder">in&nbsp;<a href="\/gp\/bestsellers)(.*)(hrsr_books">)(.*)(<\/a><\/span>)/';
135
         * //$filter = '/(<span class="zg_hrsr_rank">Nr. )(.*)(<\/span><span class="zg_hrsr_ladder">in&nbsp;<a href="\/gp\/bestsellers\/books\/405436\/ref=pd_zg_hrsr_books">Vietnamesisch)/';
136
         * //$filter = '/(<span class="zg_hrsr_rank">Nr. )(.*)(<\/span>)(.*)(hrsr_books">)/';
137
         * //$filter = '/(<span class="zg_hrsr_rank">Nr. )(?<value>.*)(<\/span>\n	)(.*)(?<dimension>Vietnamesisch lernen)/';
138
         * //$this->logger->debug('values all: '. json_encode($values));
139
         * //$values = $values['value'];
140
         * //$this->logger->debug('values first array: '. json_encode($values));**/
141
    }
142
}
143