Passed
Push — master ( 14e493...9c2d3e )
by Marcel
02:30
created

RegexService::read()   A

Complexity

Conditions 5
Paths 4

Size

Total Lines 42
Code Lines 27

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 5
eloc 27
c 2
b 0
f 0
nc 4
nop 1
dl 0
loc 42
rs 9.1768
1
<?php
2
/**
3
 * Analytics
4
 *
5
 * This file is licensed under the Affero General Public License version 3 or
6
 * later. See the LICENSE.md file.
7
 *
8
 * @author Marcel Scherello <[email protected]>
9
 * @copyright 2020 Marcel Scherello
10
 */
11
12
namespace OCA\Analytics\Datasource;
13
14
use OCP\IL10N;
15
use OCP\ILogger;
16
17
class RegexService
18
{
19
    private $logger;
20
    private $l10n;
21
22
    public function __construct(
23
        IL10N $l10n,
24
        ILogger $logger
25
    )
26
    {
27
        $this->l10n = $l10n;
28
        $this->logger = $logger;
29
    }
30
31
    public function getName(): string
32
    {
33
        return $this->l10n->t('HTML grabber');
34
    }
35
36
    /**
37
     * Grab data from external html via regex
38
     *
39
     * @NoAdminRequired
40
     * @param array $option
41
     * @return array
42
     */
43
    public function read($option)
44
    {
45
        // http headers for requests
46
        $headers = array(
0 ignored issues
show
Unused Code introduced by
The assignment to $headers is dead and can be removed.
Loading history...
47
            'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
48
            'Accept-Language: en-US,en;q=0.5',
49
            'Connection: keep-alive',
50
            'DNT: 1', // :)
51
        );
52
53
        $regex = $option['regex'];
54
        $url = $option['url'];
55
56
        $context = stream_context_create(
57
            array(
58
                "http" => array(
59
                    "header" => "User-Agent: NextCloud Analytics APP"
60
                )
61
            )
62
        );
63
64
        $html = file_get_contents($url, false, $context);
65
        preg_match_all($regex, $html, $matches);
66
67
        $data = array();
68
        $count = count($matches['dimension']);
69
        for ($i = 0; $i < $count; $i++) {
70
            if (isset($option['limit'])) {
71
                if ($i === (int)$option['limit'] AND (int)$option['limit'] !== 0) break;
72
            }
73
            array_push($data, ['', $matches['dimension'][$i], $matches['value'][$i]]);
74
        }
75
76
        $header = array();
77
        $header[0] = '';
78
        $header[1] = 'Dimension2';
79
        $header[2] = 'Count';
80
81
        return [
82
            'header' => $header,
83
            'data' => $data,
84
            'error' => 0,
85
        ];
86
    }
87
88
    /**
89
     * template for options & settings
90
     *
91
     * @NoAdminRequired
92
     * @return array
93
     */
94
    public function getTemplate()
95
    {
96
        $template = array();
97
        array_push($template, ['id' => 'url', 'name' => 'URL', 'placeholder' => 'url']);
98
        array_push($template, ['id' => 'regex', 'name' => 'valid regex', 'placeholder' => '//']);
99
        array_push($template, ['id' => 'limit', 'name' => 'Limit', 'placeholder' => 'Number of records']);
100
        array_push($template, ['id' => 'timestamp', 'name' => 'Timestamp of dataload', 'placeholder' => 'true/false']);
101
        array_push($template, ['id' => 'delete', 'name' => 'Delete all data before load', 'placeholder' => 'true/false']);
102
        return $template;
103
    }
104
105
    private function backup()
0 ignored issues
show
Unused Code introduced by
The method backup() is not used, and could be removed.

This check looks for private methods that have been defined, but are not used inside the class.

Loading history...
106
    {
107
        /**
108
         * $ch = curl_init();
109
         * curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
110
         * curl_setopt($ch, CURLOPT_COOKIESESSION, true );
111
         * curl_setopt($ch, CURLOPT_COOKIEFILE, '');
112
         * curl_setopt($ch, CURLOPT_ENCODING, 'gzip, deflate');
113
         * curl_setopt($ch, CURLOPT_HEADER, false);
114
         * curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
115
         * curl_setopt($ch, CURLOPT_URL, $url);
116
         * curl_setopt($ch, CURLOPT_REFERER, $url);
117
         * curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
118
         * curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36');
119
         * //$html = curl_exec($ch);
120
         * curl_close($ch);
121
         *
122
         * //$this->logger->debug($result);
123
         * //$html = str_get_html($result, true, false);
124
         * //$this->logger->debug($html);
125
         * //$value = $html->find('a[href=/gp/bestsellers/books/405436/ref=pd_zg_hrsr_books]', 0)->parent->parent->first_child->innertext;
126
         * //ocument.querySelectorAll("a[href='/gp/bestsellers/books/405436/ref=pd_zg_hrsr_books']")[0].parentNode.parentNode.firstElementChild.innerText
127
         * //$string = 'http://www.amazon.de/dp/3964433578';
128
         * //$filter = '/(<span class=\"zg_hrsr_rank\">Nr. )(.*)(<\/span><span class="zg_hrsr_ladder">in&nbsp;<a href="\/gp\/bestsellers)(.*)(hrsr_books">)(.*)(<\/a><\/span>)/';
129
         * //$filter = '/(<span class="zg_hrsr_rank">Nr. )(.*)(<\/span><span class="zg_hrsr_ladder">in&nbsp;<a href="\/gp\/bestsellers\/books\/405436\/ref=pd_zg_hrsr_books">Vietnamesisch)/';
130
         * //$filter = '/(<span class="zg_hrsr_rank">Nr. )(.*)(<\/span>)(.*)(hrsr_books">)/';
131
         * //$filter = '/(<span class="zg_hrsr_rank">Nr. )(?<value>.*)(<\/span>\n	)(.*)(?<dimension>Vietnamesisch lernen)/';
132
         * //$this->logger->debug('values all: '. json_encode($values));
133
         * //$values = $values['value'];
134
         * //$this->logger->debug('values first array: '. json_encode($values));**/
135
    }
136
}
137