Passed
Pull Request — master (#5)
by Cody
03:17
created

Cache_Starred_Images   A

Complexity

Total Complexity 38

Size/Duplication

Total Lines 221
Duplicated Lines 0 %

Importance

Changes 5
Bugs 1 Features 0
Metric Value
eloc 105
c 5
b 1
f 0
dl 0
loc 221
rs 9.36
wmc 38
1
<?php
2
class Cache_Starred_Images extends Plugin {
3
4
	/* @var PluginHost $host */
5
	private $host;
6
	/* @var DiskCache $cache */
7
	private $cache;
8
    private $max_cache_attempts = 5; // per-article
9
10
	public function about() {
11
		return array(1.0,
12
			"Automatically cache media files in Starred articles",
13
			"fox");
14
	}
15
16
	public function init($host) {
17
		$this->host = $host;
18
		$this->cache = new DiskCache("starred-images");
19
20
		if ($this->cache->makeDir())
21
			chmod($this->cache->getDir(), 0777);
22
23
		if (!$this->cache->exists(".no-auto-expiry"))
24
			$this->cache->touch(".no-auto-expiry");
25
26
		if ($this->cache->isWritable()) {
27
			$host->add_hook($host::HOOK_HOUSE_KEEPING, $this);
28
			$host->add_hook($host::HOOK_ENCLOSURE_ENTRY, $this);
29
			$host->add_hook($host::HOOK_SANITIZE, $this);
30
		} else {
31
			user_error("Starred cache directory ".$this->cache->getDir()." is not writable.", E_USER_WARNING);
32
		}
33
	}
34
35
	public function hook_house_keeping() {
36
		/* since HOOK_UPDATE_TASK is not available to user plugins, this hook is a next best thing */
37
38
		Debug::log("caching media of starred articles for user " . $this->host->get_owner_uid() . "...");
39
40
		$sth = $this->pdo->prepare("SELECT content, ttrss_entries.title,
41
       		ttrss_user_entries.owner_uid, link, site_url, ttrss_entries.id, plugin_data
42
			FROM ttrss_entries, ttrss_user_entries LEFT JOIN ttrss_feeds ON
43
				(ttrss_user_entries.feed_id = ttrss_feeds.id)
44
			WHERE ref_id = ttrss_entries.id AND
45
				marked = true AND
46
				site_url != '' AND
47
			    ttrss_user_entries.owner_uid = ? AND
48
				plugin_data NOT LIKE '%starred_cache_images%'
49
			ORDER BY ".sql_random_function()." LIMIT 100");
50
51
		if ($sth->execute([$this->host->get_owner_uid()])) {
52
53
			$usth = $this->pdo->prepare("UPDATE ttrss_entries SET plugin_data = ? WHERE id = ?");
54
55
			while ($line = $sth->fetch()) {
56
				Debug::log("processing article " . $line["title"], Debug::$LOG_VERBOSE);
57
58
				if ($line["site_url"]) {
59
					$success = $this->cache_article_images($line["content"], $line["site_url"], $line["owner_uid"], $line["id"]);
60
61
					if ($success) {
62
						$plugin_data = "starred_cache_images,${line['owner_uid']}:" . $line["plugin_data"];
63
64
						$usth->execute([$plugin_data, $line['id']]);
65
					}
66
				}
67
			}
68
		}
69
70
		/* actual housekeeping */
71
72
		Debug::log("expiring " . $this->cache->getDir() . "...");
73
74
		$files = glob($this->cache->getDir() . "/*.{png,mp4,status}", GLOB_BRACE);
75
76
		$last_article_id = 0;
77
		$article_exists = 1;
78
79
		foreach ($files as $file) {
80
			list ($article_id, $hash) = explode("-", basename($file));
81
82
			if ($article_id != $last_article_id) {
83
				$last_article_id = $article_id;
84
85
				$sth = $this->pdo->prepare("SELECT id FROM ttrss_entries WHERE id = ?");
86
				$sth->execute([$article_id]);
87
88
				$article_exists = $sth->fetch();
89
			}
90
91
			if (!$article_exists) {
92
				unlink($file);
93
			}
94
		}
95
	}
96
97
	public function hook_enclosure_entry($enc, $article_id) {
98
		$local_filename = $article_id . "-" . sha1($enc["content_url"]);
99
100
		if ($this->cache->exists($local_filename)) {
101
			$enc["content_url"] = $this->cache->getUrl($local_filename);
102
		}
103
104
		return $enc;
105
	}
106
107
	public function hook_sanitize($doc, $site_url, $allowed_elements, $disallowed_attributes, $article_id) {
108
		$xpath = new DOMXpath($doc);
109
110
		if ($article_id) {
111
			$entries = $xpath->query('(//img[@src])|(//video/source[@src])');
112
113
			foreach ($entries as $entry) {
114
				if ($entry->hasAttribute('src')) {
115
					$src = rewrite_relative_url($site_url, $entry->getAttribute('src'));
116
117
					$local_filename = $article_id . "-" . sha1($src);
118
119
					if ($this->cache->exists($local_filename)) {
120
						$entry->setAttribute("src", $this->cache->getUrl($local_filename));
121
						$entry->removeAttribute("srcset");
122
					}
123
				}
124
			}
125
		}
126
127
		return $doc;
128
	}
129
130
	private function cache_url($article_id, $url) {
131
		$local_filename = $article_id . "-" . sha1($url);
132
133
		if (!$this->cache->exists($local_filename)) {
134
			Debug::log("cache_images: downloading: $url to $local_filename", Debug::$LOG_VERBOSE);
135
136
			$data = fetch_file_contents(["url" => $url, "max_size" => MAX_CACHE_FILE_SIZE]);
137
138
			if ($data)
139
				return $this->cache->put($local_filename, $data);;
140
141
		} else {
142
			//Debug::log("cache_images: local file exists for $url", Debug::$LOG_VERBOSE);
143
144
			return true;
145
		}
146
147
		return false;
148
	}
149
150
	private function cache_article_images($content, $site_url, $owner_uid, $article_id) {
151
		$status_filename = $article_id . "-" . sha1($site_url) . ".status";
152
153
		/* housekeeping might run as a separate user, in this case status/media might not be writable */
154
		if (!$this->cache->isWritable($status_filename)) {
155
			Debug::log("status not writable: $status_filename", Debug::$LOG_VERBOSE);
156
			return false;
157
		}
158
159
		Debug::log("status: $status_filename", Debug::$LOG_VERBOSE);
160
161
        if ($this->cache->exists($status_filename))
162
            $status = json_decode($this->cache->get($status_filename), true);
163
        else
164
            $status = [];
165
166
        $status["attempt"] += 1;
167
168
        // only allow several download attempts for article
169
        if ($status["attempt"] > $this->max_cache_attempts) {
170
            Debug::log("too many attempts for $site_url", Debug::$LOG_VERBOSE);
171
            return false;
172
        }
173
174
        if (!$this->cache->put($status_filename, json_encode($status))) {
175
            user_error("unable to write status file: $status_filename", E_USER_WARNING);
176
            return false;
177
        }
178
179
		$doc = new DOMDocument();
180
181
		$has_images = false;
182
		$success = false;
183
184
        if ($doc->loadHTML('<?xml encoding="UTF-8">' . $content)) {
185
			$xpath = new DOMXPath($doc);
186
			$entries = $xpath->query('(//img[@src])|(//video/source[@src])');
187
188
			foreach ($entries as $entry) {
189
190
				if ($entry->hasAttribute('src') && strpos($entry->getAttribute('src'), "data:") !== 0) {
191
192
					$has_images = true;
193
194
					$src = rewrite_relative_url($site_url, $entry->getAttribute('src'));
195
196
					if ($this->cache_url($article_id, $src)) {
197
						$success = true;
198
					}
199
				}
200
			}
201
		}
202
203
		$esth = $this->pdo->prepare("SELECT content_url FROM ttrss_enclosures WHERE post_id = ? AND
204
			(content_type LIKE '%image%' OR content_type LIKE '%video%')");
205
206
        if ($esth->execute([$article_id])) {
207
        	while ($enc = $esth->fetch()) {
208
209
        		$has_images = true;
210
        		$url = rewrite_relative_url($site_url, $enc["content_url"]);
211
212
				if ($this->cache_url($article_id, $url)) {
213
					$success = true;
214
				}
215
			}
216
		}
217
218
		return $success || !$has_images;
219
	}
220
221
	public function api_version() {
222
		return 2;
223
	}
224
}
225