Issues (1061)

Sources/tasks/ExportProfileData.php (3 issues)

1
<?php
2
/**
3
 * This file incrementally exports a member's profile data to a downloadable file.
4
 *
5
 * Simple Machines Forum (SMF)
6
 *
7
 * @package SMF
8
 * @author Simple Machines https://www.simplemachines.org
9
 * @copyright 2020 Simple Machines and individual contributors
10
 * @license https://www.simplemachines.org/about/smf/license.php BSD
11
 *
12
 * @version 2.1 RC2
13
 */
14
15
/**
16
 * Class ExportProfileData_Background
17
 */
18
class ExportProfileData_Background extends SMF_BackgroundTask
19
{
20
	/**
21
	 * This is the main dispatcher for the class.
22
	 * It calls the correct private function based on the information stored in
23
	 * the task details.
24
	 *
25
	 * @return bool Always returns true
26
	 */
27
	public function execute()
28
	{
29
		if (!defined('EXPORTING'))
30
			define('EXPORTING', 1);
31
32
		// For exports only, members can always see their own posts, even in boards that they can no longer access.
33
		$member_info = $this->getMinUserInfo(array($this->_details['uid']));
34
		$member_info = array_merge($member_info[$this->_details['uid']], array(
35
			'buddies' => array(),
36
			'query_see_board' => '1=1',
37
			'query_see_message_board' => '1=1',
38
			'query_see_topic_board' => '1=1',
39
			'query_wanna_see_board' => '1=1',
40
			'query_wanna_see_message_board' => '1=1',
41
			'query_wanna_see_topic_board' => '1=1',
42
		));
43
44
		// Use some temporary integration hooks to manipulate BBC parsing during export.
45
		add_integration_function('integrate_pre_parsebbc', 'ExportProfileData_Background::pre_parsebbc', false);
46
		add_integration_function('integrate_post_parsebbc', 'ExportProfileData_Background::post_parsebbc', false);
47
48
		// For now, XML is the only export format we support.
49
		if ($this->_details['format'] == 'XML')
50
			$this->exportXml($member_info);
51
52
		return true;
53
	}
54
55
	/**
56
	 * The workhorse of this class. Compiles profile data to XML files.
57
	 */
58
	protected function exportXml($member_info)
59
	{
60
		global $smcFunc, $sourcedir, $context, $modSettings, $settings, $user_info, $mbname;
61
		global $user_profile, $txt, $scripturl, $query_this_board;
62
63
		// For convenience...
64
		$uid = $this->_details['uid'];
65
		$lang = $this->_details['lang'];
66
		$included = $this->_details['included'];
67
		$start = $this->_details['start'];
68
		$latest = $this->_details['latest'];
69
		$datatype = $this->_details['datatype'];
70
71
		if (!isset($included[$datatype]['func']) || !isset($included[$datatype]['langfile']))
72
			return;
73
74
		require_once($sourcedir . DIRECTORY_SEPARATOR . 'News.php');
75
		require_once($sourcedir . DIRECTORY_SEPARATOR . 'ScheduledTasks.php');
76
77
		// Make sure this has been loaded for use in News.php.
78
		if (!function_exists('cleanXml'))
79
			require_once($sourcedir . DIRECTORY_SEPARATOR . 'QueryString.php');
80
81
		// Setup.
82
		$done = false;
83
		$func = $included[$datatype]['func'];
0 ignored issues
show
The assignment to $func is dead and can be removed.
Loading history...
84
		$context['xmlnews_uid'] = $uid;
85
		$context['xmlnews_limit'] = !empty($modSettings['export_rate']) ? $modSettings['export_rate'] : 250;
86
		$context[$datatype . '_start'] = $start[$datatype];
87
		$datatypes = array_keys($included);
88
89
		// Fake a wee bit of $user_info so that loading the member data & language doesn't choke.
90
		$user_info = $member_info;
91
92
		loadEssentialThemeData();
93
		$settings['actual_theme_dir'] = $settings['theme_dir'];
94
		$context['user']['language'] = $lang;
95
		loadMemberData($uid);
96
		loadLanguage(implode('+', array_unique(array('index', 'Modifications', 'Stats', 'Profile', $included[$datatype]['langfile']))), $lang);
97
98
		// @todo Ask lawyers whether the GDPR requires us to include posts in the recycle bin.
99
		$query_this_board = '{query_see_board}' . (!empty($modSettings['recycle_enable']) && $modSettings['recycle_board'] > 0 ? ' AND b.id_board != ' . $modSettings['recycle_board'] : '');
100
101
		// We need a valid export directory.
102
		if (empty($modSettings['export_dir']) || !file_exists($modSettings['export_dir']))
103
		{
104
			require_once($sourcedir . DIRECTORY_SEPARATOR . 'Profile-Actions.php');
105
			if (create_export_dir() === false)
106
				return;
107
		}
108
109
		$export_dir_slash = $modSettings['export_dir'] . DIRECTORY_SEPARATOR;
110
111
		$idhash = hash_hmac('sha1', $uid, get_auth_secret());
112
		$idhash_ext = $idhash . '.xml';
113
114
		// Increment the file number until we reach one that doesn't exist.
115
		$filenum = 1;
116
		$realfile = $export_dir_slash . $filenum . '_' . $idhash_ext;
117
		while (file_exists($realfile))
118
			$realfile = $export_dir_slash . ++$filenum . '_' . $idhash_ext;
119
120
		$tempfile = $export_dir_slash . $idhash_ext . '.tmp';
121
		$progressfile = $export_dir_slash . $idhash_ext . '.progress.json';
122
123
		$feed_meta = array(
124
			'title' => sprintf($txt['profile_of_username'], $user_profile[$uid]['real_name']),
125
			'desc' => sentence_list(array_map(function ($datatype) use ($txt) { return $txt[$datatype]; }, array_keys($included))),
126
			'author' => $mbname,
127
			'source' => $scripturl . '?action=profile;u=' . $uid,
128
			'self' => $scripturl . '?action=profile;area=download;u=' . $uid . ';t=' . hash_hmac('sha1', $idhash, get_auth_secret()),
129
		);
130
131
		// If a necessary file is missing, we need to start over.
132
		if (!file_exists($progressfile) || !file_exists($tempfile))
133
		{
134
			foreach (array_merge(array($tempfile, $progressfile), glob($export_dir_slash . '*_' . $idhash_ext)) as $fpath)
0 ignored issues
show
It seems like glob($export_dir_slash . '*_' . $idhash_ext) can also be of type false; however, parameter $array2 of array_merge() does only seem to accept array|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

134
			foreach (array_merge(array($tempfile, $progressfile), /** @scrutinizer ignore-type */ glob($export_dir_slash . '*_' . $idhash_ext)) as $fpath)
Loading history...
135
				@unlink($fpath);
136
137
			buildXmlFeed('smf', array(), $feed_meta, 'profile');
138
			file_put_contents($tempfile, implode('', array($context['feed']['header'], $context['feed']['footer'])));
139
		}
140
141
		$progress = file_exists($progressfile) ? $smcFunc['json_decode'](file_get_contents($progressfile), true) : array_fill_keys($datatypes, 0);
142
143
		// Get the data, always in ascending order.
144
		$xml_data = call_user_func($included[$datatype]['func'], 'smf', true);
145
146
		// Build the XML string from the data.
147
		buildXmlFeed('smf', $xml_data, $feed_meta, $datatype);
148
149
		$last_item = end($xml_data);
150
		if (isset($last_item['content'][0]['content']) && $last_item['content'][0]['tag'] === 'id')
151
			$last_id = $last_item['content'][0]['content'];
152
153
		// Some paranoid hosts disable or hamstring the disk space functions in an attempt at security via obscurity.
154
		$diskspace = function_exists('disk_free_space') ? @disk_free_space($modSettings['export_dir']) : false;
155
		if (!is_int($diskspace))
0 ignored issues
show
The condition is_int($diskspace) is always false.
Loading history...
156
			$diskspace = PHP_INT_MAX;
157
158
		if (empty($modSettings['export_min_diskspace_pct']))
159
			$minspace = 0;
160
		else
161
		{
162
			$totalspace = function_exists('disk_total_space') ? @disk_total_space($modSettings['export_dir']) : false;
163
			$minspace = intval($totalspace) < 1440 ? 0 : $totalspace * $modSettings['export_min_diskspace_pct'] / 100;
164
		}
165
166
		// Append the string (assuming there's enough disk space).
167
		if ($diskspace - $minspace > strlen($context['feed']['items']))
168
		{
169
			// If the temporary file has grown to 250MB, save it and start a new one.
170
			if (file_exists($tempfile) && (filesize($tempfile) + strlen($context['feed']['items'])) >= 1024 * 1024 * 250)
171
			{
172
				rename($tempfile, $realfile);
173
				$realfile = $export_dir_slash . ++$filenum . '_' . $idhash_ext;
174
175
				file_put_contents($tempfile, implode('', array($context['feed']['header'], $context['feed']['footer'])));
176
			}
177
178
			// Insert the new data before the feed footer.
179
			$handle = fopen($tempfile, 'r+');
180
			if (is_resource($handle))
181
			{
182
				fseek($handle, strlen($context['feed']['footer']) * -1, SEEK_END);
183
184
				$bytes_written = fwrite($handle, $context['feed']['items'] . $context['feed']['footer']);
185
186
				// If we couldn't write everything, revert the changes and consider the write to have failed.
187
				if ($bytes_written > 0 && $bytes_written < strlen($context['feed']['items'] . $context['feed']['footer']))
188
				{
189
					fseek($handle, $bytes_written * -1, SEEK_END);
190
					$pointer_pos = ftell($handle);
191
					ftruncate($handle, $pointer_pos);
192
					rewind($handle);
193
					fseek($handle, 0, SEEK_END);
194
					fwrite($handle, $context['feed']['footer']);
195
196
					$bytes_written = false;
197
				}
198
199
				fclose($handle);
200
			}
201
202
			// All went well.
203
			if (!empty($bytes_written))
204
			{
205
				// Track progress by ID where appropriate, and by time otherwise.
206
				$progress[$datatype] = !isset($last_id) ? time() : $last_id;
207
208
				// Decide what to do next.
209
				if (!isset($last_id) || $last_id >= $latest[$datatype])
210
				{
211
					$datatype_key = array_search($datatype, $datatypes);
212
					$done = !isset($datatypes[$datatype_key + 1]);
213
214
					if (!$done)
215
						$datatype = $datatypes[$datatype_key + 1];
216
				}
217
218
				$delay = 0;
219
			}
220
			// Write failed. We'll try again next time.
221
			else
222
				$delay = MAX_CLAIM_THRESHOLD;
223
		}
224
		// Not enough disk space, so pause for a day to give the admin a chance to fix it.
225
		else
226
			$delay = 86400;
227
228
		// Remove the .tmp extension so the system knows that the file is ready for download.
229
		if (!empty($done))
230
			rename($tempfile, $realfile);
231
232
		// Oops. Apparently some sneaky monkey cancelled the export while we weren't looking.
233
		elseif (!file_exists($progressfile))
234
		{
235
			@unlink($tempfile);
236
			return;
237
		}
238
239
		// We have more work to do again later.
240
		else
241
		{
242
			$start[$datatype] = $progress[$datatype];
243
244
			$data = $smcFunc['json_encode'](array(
245
				'format' => $this->_details['format'],
246
				'uid' => $uid,
247
				'lang' => $lang,
248
				'included' => $included,
249
				'start' => $start,
250
				'latest' => $latest,
251
				'datatype' => $datatype,
252
			));
253
254
			$smcFunc['db_insert']('insert', '{db_prefix}background_tasks',
255
				array('task_file' => 'string-255', 'task_class' => 'string-255', 'task_data' => 'string', 'claimed_time' => 'int'),
256
				array('$sourcedir/tasks/ExportProfileData.php', 'ExportProfileData_Background', $data, time() - MAX_CLAIM_THRESHOLD + $delay),
257
				array()
258
			);
259
		}
260
261
		file_put_contents($progressfile, $smcFunc['json_encode']($progress));
262
	}
263
264
	public static function pre_parsebbc(&$message, &$smileys, &$cache_id, &$parse_tags)
265
	{
266
		global $modSettings, $context;
267
268
		$smileys = false;
269
		$cache_id = '';
270
271
		if (!isset($modSettings['disabledBBC']))
272
			$modSettings['disabledBBC'] = '';
273
274
		$context['real_disabledBBC'] = $modSettings['disabledBBC'];
275
276
		// "O, that way madness lies; let me shun that; No more of that."
277
		if (strpos($modSettings['disabledBBC'], 'attach') === false)
278
			$modSettings['disabledBBC'] = implode(',', array_merge(array_filter(explode(',', $modSettings['disabledBBC'])), array('attach')));
279
	}
280
281
	public static function post_parsebbc(&$message, &$smileys, &$cache_id, &$parse_tags)
282
	{
283
		global $modSettings, $context;
284
285
		if (isset($context['real_disabledBBC']))
286
			$modSettings['disabledBBC'] = $context['real_disabledBBC'];
287
	}
288
}
289
290
?>