Issues (1270)

plugins/cache_starred_images/init.php (1 issue)

Labels
Severity
1
<?php
2
class Cache_Starred_Images extends Plugin {
3
4
    /* @var PluginHost $host */
5
    private $host;
6
    /* @var DiskCache $cache */
7
    private $cache;
8
    private $max_cache_attempts = 5; // per-article
9
10
    public function about() {
11
        return array(1.0,
12
            "Automatically cache media files in Starred articles",
13
            "fox");
14
    }
15
16
    public function init($host) {
17
        $this->host = $host;
18
        $this->cache = new DiskCache("starred-images");
19
20
        if ($this->cache->makeDir()) {
21
                    chmod($this->cache->getDir(), 0777);
22
        }
23
24
        if (!$this->cache->exists(".no-auto-expiry")) {
25
                    $this->cache->touch(".no-auto-expiry");
26
        }
27
28
        if ($this->cache->isWritable()) {
29
            $host->add_hook($host::HOOK_HOUSE_KEEPING, $this);
30
            $host->add_hook($host::HOOK_ENCLOSURE_ENTRY, $this);
31
            $host->add_hook($host::HOOK_SANITIZE, $this);
32
        } else {
33
            user_error("Starred cache directory ".$this->cache->getDir()." is not writable.", E_USER_WARNING);
34
        }
35
    }
36
37
    public function hook_house_keeping() {
38
        /* since HOOK_UPDATE_TASK is not available to user plugins, this hook is a next best thing */
39
40
        Debug::log("caching media of starred articles for user ".$this->host->get_owner_uid()."...");
41
42
        $sth = $this->pdo->prepare("SELECT content, ttrss_entries.title,
43
       		ttrss_user_entries.owner_uid, link, site_url, ttrss_entries.id, plugin_data
44
			FROM ttrss_entries, ttrss_user_entries LEFT JOIN ttrss_feeds ON
45
				(ttrss_user_entries.feed_id = ttrss_feeds.id)
46
			WHERE ref_id = ttrss_entries.id AND
47
				marked = true AND
48
				site_url != '' AND
49
			    ttrss_user_entries.owner_uid = ? AND
50
				plugin_data NOT LIKE '%starred_cache_images%'
51
			ORDER BY ".sql_random_function()." LIMIT 100");
52
53
        if ($sth->execute([$this->host->get_owner_uid()])) {
54
55
            $usth = $this->pdo->prepare("UPDATE ttrss_entries SET plugin_data = ? WHERE id = ?");
56
57
            while ($line = $sth->fetch()) {
58
                Debug::log("processing article ".$line["title"], Debug::$LOG_VERBOSE);
59
60
                if ($line["site_url"]) {
61
                    $success = $this->cache_article_images($line["content"], $line["site_url"], $line["owner_uid"], $line["id"]);
62
63
                    if ($success) {
64
                        $plugin_data = "starred_cache_images,${line['owner_uid']}:".$line["plugin_data"];
65
66
                        $usth->execute([$plugin_data, $line['id']]);
67
                    }
68
                }
69
            }
70
        }
71
72
        /* actual housekeeping */
73
74
        Debug::log("expiring ".$this->cache->getDir()."...");
75
76
        $files = glob($this->cache->getDir()."/*.{png,mp4,status}", GLOB_BRACE);
77
78
        $last_article_id = 0;
79
        $article_exists = 1;
80
81
        foreach ($files as $file) {
82
            list ($article_id, $hash) = explode("-", basename($file));
83
84
            if ($article_id != $last_article_id) {
85
                $last_article_id = $article_id;
86
87
                $sth = $this->pdo->prepare("SELECT id FROM ttrss_entries WHERE id = ?");
88
                $sth->execute([$article_id]);
89
90
                $article_exists = $sth->fetch();
91
            }
92
93
            if (!$article_exists) {
94
                unlink($file);
95
            }
96
        }
97
    }
98
99
    public function hook_enclosure_entry($enc, $article_id) {
100
        $local_filename = $article_id."-".sha1($enc["content_url"]);
101
102
        if ($this->cache->exists($local_filename)) {
103
            $enc["content_url"] = $this->cache->getUrl($local_filename);
104
        }
105
106
        return $enc;
107
    }
108
109
    public function hook_sanitize($doc, $site_url, $allowed_elements, $disallowed_attributes, $article_id) {
110
        $xpath = new DOMXpath($doc);
111
112
        if ($article_id) {
113
            $entries = $xpath->query('(//img[@src])|(//video/source[@src])');
114
115
            foreach ($entries as $entry) {
116
                if ($entry->hasAttribute('src')) {
117
                    $src = rewrite_relative_url($site_url, $entry->getAttribute('src'));
118
119
                    $local_filename = $article_id."-".sha1($src);
120
121
                    if ($this->cache->exists($local_filename)) {
122
                        $entry->setAttribute("src", $this->cache->getUrl($local_filename));
123
                        $entry->removeAttribute("srcset");
124
                    }
125
                }
126
            }
127
        }
128
129
        return $doc;
130
    }
131
132
    private function cache_url($article_id, $url) {
133
        $local_filename = $article_id."-".sha1($url);
134
135
        if (!$this->cache->exists($local_filename)) {
136
            Debug::log("cache_images: downloading: $url to $local_filename", Debug::$LOG_VERBOSE);
137
138
            $data = fetch_file_contents(["url" => $url, "max_size" => MAX_CACHE_FILE_SIZE]);
0 ignored issues
show
The constant MAX_CACHE_FILE_SIZE was not found. Maybe you did not declare it correctly or list all dependencies?
Loading history...
139
140
            if ($data) {
141
                            return $this->cache->put($local_filename, $data);
142
            }
143
            ;
144
145
        } else {
146
            //Debug::log("cache_images: local file exists for $url", Debug::$LOG_VERBOSE);
147
148
            return true;
149
        }
150
151
        return false;
152
    }
153
154
    private function cache_article_images($content, $site_url, $owner_uid, $article_id) {
155
        $status_filename = $article_id."-".sha1($site_url).".status";
156
157
        /* housekeeping might run as a separate user, in this case status/media might not be writable */
158
        if (!$this->cache->isWritable($status_filename)) {
159
            Debug::log("status not writable: $status_filename", Debug::$LOG_VERBOSE);
160
            return false;
161
        }
162
163
        Debug::log("status: $status_filename", Debug::$LOG_VERBOSE);
164
165
        if ($this->cache->exists($status_filename)) {
166
                    $status = json_decode($this->cache->get($status_filename), true);
167
        } else {
168
                    $status = [];
169
        }
170
171
        $status["attempt"] += 1;
172
173
        // only allow several download attempts for article
174
        if ($status["attempt"] > $this->max_cache_attempts) {
175
            Debug::log("too many attempts for $site_url", Debug::$LOG_VERBOSE);
176
            return false;
177
        }
178
179
        if (!$this->cache->put($status_filename, json_encode($status))) {
180
            user_error("unable to write status file: $status_filename", E_USER_WARNING);
181
            return false;
182
        }
183
184
        $doc = new DOMDocument();
185
186
        $has_images = false;
187
        $success = false;
188
189
        if ($doc->loadHTML('<?xml encoding="UTF-8">'.$content)) {
190
            $xpath = new DOMXPath($doc);
191
            $entries = $xpath->query('(//img[@src])|(//video/source[@src])');
192
193
            foreach ($entries as $entry) {
194
195
                if ($entry->hasAttribute('src') && strpos($entry->getAttribute('src'), "data:") !== 0) {
196
197
                    $has_images = true;
198
199
                    $src = rewrite_relative_url($site_url, $entry->getAttribute('src'));
200
201
                    if ($this->cache_url($article_id, $src)) {
202
                        $success = true;
203
                    }
204
                }
205
            }
206
        }
207
208
        $esth = $this->pdo->prepare("SELECT content_url FROM ttrss_enclosures WHERE post_id = ? AND
209
			(content_type LIKE '%image%' OR content_type LIKE '%video%')");
210
211
        if ($esth->execute([$article_id])) {
212
            while ($enc = $esth->fetch()) {
213
214
                $has_images = true;
215
                $url = rewrite_relative_url($site_url, $enc["content_url"]);
216
217
                if ($this->cache_url($article_id, $url)) {
218
                    $success = true;
219
                }
220
            }
221
        }
222
223
        return $success || !$has_images;
224
    }
225
226
    public function api_version() {
227
        return 2;
228
    }
229
}
230