Completed
Pull Request — master (#23)
by
unknown
02:00
created

similar_topics::clean_topic_title()   B

Complexity

Conditions 6
Paths 2

Size

Total Lines 12
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 12
rs 8.8571
c 0
b 0
f 0
cc 6
eloc 5
nc 2
nop 1
1
<?php
2
/**
3
 *
4
 * Precise Similar Topics
5
 *
6
 * @copyright (c) 2013 Matt Friedman
7
 * @license GNU General Public License, version 2 (GPL-2.0)
8
 *
9
 */
10
11
namespace vse\similartopics\core;
12
13
class similar_topics
14
{
15
	/** @var \phpbb\auth\auth */
16
	protected $auth;
17
18
	/** @var \phpbb\cache\service */
19
	protected $cache;
20
21
	/** @var \phpbb\config\config */
22
	protected $config;
23
24
	/** @var \phpbb\db\driver\driver_interface */
25
	protected $db;
26
27
	/** @var \phpbb\event\dispatcher_interface */
28
	protected $dispatcher;
29
30
	/** @var \phpbb\pagination */
31
	protected $pagination;
32
33
	/** @var \phpbb\request\request */
34
	protected $request;
35
36
	/** @var \phpbb\template\template */
37
	protected $template;
38
39
	/** @var \phpbb\user */
40
	protected $user;
41
42
	/** @var \phpbb\content_visibility */
43
	protected $content_visibility;
44
45
	/** @var string phpBB root path  */
46
	protected $root_path;
47
48
	/** @var string PHP file extension */
49
	protected $php_ext;
50
51
	/**
52
	 * Constructor
53
	 *
54
	 * @access public
55
	 * @param \phpbb\auth\auth                  $auth
56
	 * @param \phpbb\cache\service              $cache
57
	 * @param \phpbb\config\config              $config
58
	 * @param \phpbb\db\driver\driver_interface $db
59
	 * @param \phpbb\event\dispatcher_interface $dispatcher
60
	 * @param \phpbb\pagination                 $pagination
61
	 * @param \phpbb\request\request            $request
62
	 * @param \phpbb\template\template          $template
63
	 * @param \phpbb\user                       $user
64
	 * @param \phpbb\content_visibility         $content_visibility
65
	 * @param string                            $root_path
66
	 * @param string                            $php_ext
67
	 */
68
	public function __construct(\phpbb\auth\auth $auth, \phpbb\cache\service $cache, \phpbb\config\config $config, \phpbb\db\driver\driver_interface $db, \phpbb\event\dispatcher_interface $dispatcher, \phpbb\pagination $pagination, \phpbb\request\request $request, \phpbb\template\template $template, \phpbb\user $user, \phpbb\content_visibility $content_visibility, $root_path, $php_ext)
69
	{
70
		$this->auth = $auth;
71
		$this->cache = $cache;
72
		$this->config = $config;
73
		$this->db = $db;
74
		$this->dispatcher = $dispatcher;
75
		$this->pagination = $pagination;
76
		$this->request = $request;
77
		$this->template = $template;
78
		$this->user = $user;
79
		$this->content_visibility = $content_visibility;
80
		$this->root_path = $root_path;
81
		$this->php_ext = $php_ext;
82
	}
83
84
	/**
85
	 * Is similar topics available?
86
	 *
87
	 * @access public
88
	 * @return bool True if available, false otherwise
89
	 */
90
	public function is_available()
91
	{
92
		return $this->is_enabled() && $this->is_viewable() && ($this->is_mysql() || $this->is_postgres());
93
	}
94
95
	/**
96
	 * Is similar topics configured?
97
	 *
98
	 * @access public
99
	 * @return bool True if configured, false otherwise
100
	 */
101
	public function is_enabled()
102
	{
103
		return !empty($this->config['similar_topics']) && !empty($this->config['similar_topics_limit']);
104
	}
105
106
	/**
107
	 * Is similar topics viewable bu the user?
108
	 *
109
	 * @access public
110
	 * @return bool True if viewable, false otherwise
111
	 */
112
	public function is_viewable()
113
	{
114
		return !empty($this->user->data['user_similar_topics']) && $this->auth->acl_get('u_similar_topics');
115
	}
116
117
	/**
118
	 * Get similar topics by matching topic titles
119
	 *
120
	 * NOTE: Currently requires MySQL due to the use of FULLTEXT indexes
121
	 * and MATCH and AGAINST and UNIX_TIMESTAMP. MySQL FULLTEXT has built-in
122
	 * English ignore words. We use phpBB's ignore words for non-English
123
	 * languages. We also remove any admin-defined special ignore words.
124
	 *
125
	 * @access public
126
	 * @param array $topic_data Array with topic data
127
	 */
128
	public function display_similar_topics($topic_data)
129
	{
130
		// If the forum should not display similar topics, no need to continue
131
		if ($topic_data['similar_topics_hide'])
132
		{
133
			return;
134
		}
135
136
		$topic_title = $this->clean_topic_title($topic_data['topic_title']);
137
138
		// If the cleaned up topic_title is empty, no need to continue
139
		if (empty($topic_title))
140
		{
141
			return;
142
		}
143
144
		// Get stored sensitivity value and divide by 10. In query it should be a number between 0.0 to 1.0.
145
		$sensitivity = $this->config->offsetExists('similar_topics_sense') ? $this->config['similar_topics_sense'] / 10 : '0.5';
146
147
		$select = $where = $unix_ts = '';
0 ignored issues
show
Unused Code introduced by
$unix_ts is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
Unused Code introduced by
$where is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
Unused Code introduced by
$select is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
148
		if ($this->is_postgres())
149
		{
150
			$ts_query_text	= $this->db->sql_escape(str_replace(' ', '|',  $topic_title));
151
			$ts_name		= $this->config['similar_topics_postgres_ts_name'];
152
			$select			= "f.forum_id, f.forum_name, t.*,
153
				ts_rank_cd(to_tsvector('$ts_name', t.topic_title), '$ts_query_text', 32) AS score";
154
			$where			= "ts_rank_cd(to_tsvector('$ts_name', t.topic_title), '$ts_query_text', 32) >= " . (float) $sensitivity ;
155
			$unix_ts		= 'extract(epoch from current_timestamp)::integer';
156
		} else {
157
			$select = "f.forum_id, f.forum_name, t.*,
158
				MATCH (t.topic_title) AGAINST ('" . $this->db->sql_escape($topic_title) . "') AS score";
159
			$where = "MATCH (t.topic_title) AGAINST ('" . $this->db->sql_escape($topic_title) . "') >= " . (float) $sensitivity;
160
			$unix_ts		= 'UNIX_TIMESTAMP()';
161
		}
162
163
		// Similar Topics query
164
		$sql_array = array(
165
			'SELECT'	=> $select,
166
			'FROM'		=> array(
167
				TOPICS_TABLE	=> 't',
168
			),
169
			'LEFT_JOIN'	=> array(
170
				array(
171
					'FROM'	=>	array(FORUMS_TABLE	=> 'f'),
172
					'ON'	=> 'f.forum_id = t.forum_id',
173
				),
174
			),
175
			'WHERE'		=> $where . '
176
				AND t.topic_status <> ' . ITEM_MOVED . '
177
				AND t.topic_visibility = ' . ITEM_APPROVED . '
178
				AND t.topic_time > (' . $unix_ts . ' - ' . $this->config['similar_topics_time'] . ')
179
				AND t.topic_id <> ' . (int) $topic_data['topic_id'],
180
		);
181
182
		// Add topic tracking data to the query (only if query caching is off)
183
		if ($this->user->data['is_registered'] && $this->config['load_db_lastread'] && !$this->config['similar_topics_cache'])
184
		{
185
			$sql_array['LEFT_JOIN'][] = array('FROM' => array(TOPICS_TRACK_TABLE => 'tt'), 'ON' => 'tt.topic_id = t.topic_id AND tt.user_id = ' . $this->user->data['user_id']);
186
			$sql_array['LEFT_JOIN'][] = array('FROM' => array(FORUMS_TRACK_TABLE => 'ft'), 'ON' => 'ft.forum_id = f.forum_id AND ft.user_id = ' . $this->user->data['user_id']);
187
			$sql_array['SELECT'] .= ', tt.mark_time, ft.mark_time as f_mark_time';
188
		}
189
		else if ($this->config['load_anon_lastread'] || $this->user->data['is_registered'])
190
		{
191
			// Cookie based tracking copied from search.php
192
			$tracking_topics = $this->request->variable($this->config['cookie_name'] . '_track', '', true, \phpbb\request\request_interface::COOKIE);
193
			$tracking_topics = $tracking_topics ? tracking_unserialize($tracking_topics) : array();
194
		}
195
196
		// We need to exclude passworded forums so we do not leak the topic title
197
		$passworded_forums = $this->user->get_passworded_forums();
198
199
		// See if the admin set this forum to only search a specific group of other forums, and include them
200
		if (!empty($topic_data['similar_topic_forums']))
201
		{
202
			// Remove any passworded forums from this group of forums we will be searching
203
			$included_forums = array_diff(json_decode($topic_data['similar_topic_forums'], true), $passworded_forums);
204
			// if there's nothing left to display (user has no access to the forums we want to search)
205
			if (empty($included_forums))
206
			{
207
				return;
208
			}
209
210
			$sql_array['WHERE'] .= ' AND ' . $this->db->sql_in_set('f.forum_id', $included_forums);
211
		}
212
		// Otherwise exclude any ignored forums
213
		else
214
		{
215
			// Remove any passworded forums
216
			if (count($passworded_forums))
217
			{
218
				$sql_array['WHERE'] .= ' AND ' . $this->db->sql_in_set('f.forum_id', $passworded_forums, true);
219
			}
220
221
			$sql_array['WHERE'] .= ' AND f.similar_topics_ignore = 0';
222
		}
223
224
		$sql_array['ORDER_BY'] = 'score DESC, t.topic_time DESC';
225
226
		/**
227
		 * Event to modify the sql_array for similar topics
228
		 *
229
		 * @event vse.similartopics.get_topic_data
230
		 * @var array sql_array SQL array to get similar topics data
231
		 * @since 1.3.0
232
		 */
233
		$vars = array('sql_array');
234
		extract($this->dispatcher->trigger_event('vse.similartopics.get_topic_data', compact($vars)));
235
236
		$rowset = array();
237
238
		$sql = $this->db->sql_build_query('SELECT', $sql_array);
239
		$result = $this->db->sql_query_limit($sql, $this->config['similar_topics_limit'], 0, $this->config['similar_topics_cache']);
240
		while ($row = $this->db->sql_fetchrow($result))
241
		{
242
			$rowset[(int) $row['topic_id']] = $row;
243
		}
244
		$this->db->sql_freeresult($result);
245
246
		// Grab icons
247
		$icons = $this->cache->obtain_icons();
248
249
		/**
250
		 * Modify the rowset data for similar topics
251
		 *
252
		 * @event vse.similartopics.modify_rowset
253
		 * @var	array rowset Array with the search results data
254
		 * @since 1.4.2
255
		 */
256
		$vars = array('rowset');
257
		extract($this->dispatcher->trigger_event('vse.similartopics.modify_rowset', compact($vars)));
258
259
		foreach ($rowset as $row)
260
		{
261
			$similar_forum_id = (int) $row['forum_id'];
262
			$similar_topic_id = (int) $row['topic_id'];
263
264
			if ($this->auth->acl_get('f_read', $similar_forum_id))
265
			{
266
				// Get topic tracking info
267
				if ($this->user->data['is_registered'] && $this->config['load_db_lastread'] && !$this->config['similar_topics_cache'])
268
				{
269
					$topic_tracking_info = get_topic_tracking($similar_forum_id, $similar_topic_id, $rowset, array($similar_forum_id => $row['f_mark_time']));
270
				}
271
				else if ($this->config['load_anon_lastread'] || $this->user->data['is_registered'])
272
				{
273
					$topic_tracking_info = get_complete_topic_tracking($similar_forum_id, $similar_topic_id);
274
275
					if (!$this->user->data['is_registered'])
276
					{
277
						$this->user->data['user_lastmark'] = isset($tracking_topics['l']) ? ((int) base_convert($tracking_topics['l'], 36, 10) + (int) $this->config['board_startdate']) : 0;
278
					}
279
				}
280
281
				// Replies
282
				$replies = $this->content_visibility->get_count('topic_posts', $row, $similar_forum_id) - 1;
283
284
				// Get folder img, topic status/type related information
285
				$folder_img = $folder_alt = $topic_type = '';
286
				$unread_topic = isset($topic_tracking_info[$similar_topic_id]) && $row['topic_last_post_time'] > $topic_tracking_info[$similar_topic_id];
287
				topic_status($row, $replies, $unread_topic, $folder_img, $folder_alt, $topic_type);
288
289
				$topic_unapproved = $row['topic_visibility'] == ITEM_UNAPPROVED && $this->auth->acl_get('m_approve', $similar_forum_id);
290
				$posts_unapproved = $row['topic_visibility'] == ITEM_APPROVED && $row['topic_posts_unapproved'] && $this->auth->acl_get('m_approve', $similar_forum_id);
291
				$u_mcp_queue = ($topic_unapproved || $posts_unapproved) ? append_sid("{$this->root_path}mcp.{$this->php_ext}", 'i=queue&amp;mode=' . ($topic_unapproved ? 'approve_details' : 'unapproved_posts') . "&amp;t=$similar_topic_id", true, $this->user->session_id) : '';
292
293
				$base_url = append_sid("{$this->root_path}viewtopic.{$this->php_ext}", 'f=' . $similar_forum_id . '&amp;t=' . $similar_topic_id);
294
295
				$topic_row = array(
296
					'TOPIC_AUTHOR_FULL'		=> get_username_string('full', $row['topic_poster'], $row['topic_first_poster_name'], $row['topic_first_poster_colour']),
297
					'FIRST_POST_TIME'		=> $this->user->format_date($row['topic_time']),
298
					'LAST_POST_TIME'		=> $this->user->format_date($row['topic_last_post_time']),
299
					'LAST_POST_AUTHOR_FULL'	=> get_username_string('full', $row['topic_last_poster_id'], $row['topic_last_poster_name'], $row['topic_last_poster_colour']),
300
301
					'TOPIC_REPLIES'			=> $replies,
302
					'TOPIC_VIEWS'			=> $row['topic_views'],
303
					'TOPIC_TITLE'			=> censor_text($row['topic_title']),
304
					'FORUM_TITLE'			=> $row['forum_name'],
305
306
					'TOPIC_IMG_STYLE'		=> $folder_img,
307
					'TOPIC_FOLDER_IMG'		=> $this->user->img($folder_img, $folder_alt),
308
					'TOPIC_FOLDER_IMG_ALT'	=> $this->user->lang($folder_alt),
309
310
					'TOPIC_ICON_IMG'		=> (!empty($icons[$row['icon_id']])) ? $icons[$row['icon_id']]['img'] : '',
311
					'TOPIC_ICON_IMG_WIDTH'	=> (!empty($icons[$row['icon_id']])) ? $icons[$row['icon_id']]['width'] : '',
312
					'TOPIC_ICON_IMG_HEIGHT'	=> (!empty($icons[$row['icon_id']])) ? $icons[$row['icon_id']]['height'] : '',
313
					'ATTACH_ICON_IMG'		=> ($this->auth->acl_get('u_download') && $this->auth->acl_get('f_download', $similar_forum_id) && $row['topic_attachment']) ? $this->user->img('icon_topic_attach', $this->user->lang('TOTAL_ATTACHMENTS')) : '',
314
					'UNAPPROVED_IMG'		=> ($topic_unapproved || $posts_unapproved) ? $this->user->img('icon_topic_unapproved', $topic_unapproved ? 'TOPIC_UNAPPROVED' : 'POSTS_UNAPPROVED') : '',
315
316
					'S_UNREAD_TOPIC'		=> $unread_topic,
317
					'S_TOPIC_REPORTED'		=> !empty($row['topic_reported']) && $this->auth->acl_get('m_report', $similar_forum_id),
318
					'S_TOPIC_UNAPPROVED'	=> $topic_unapproved,
319
					'S_POSTS_UNAPPROVED'	=> $posts_unapproved,
320
					'S_HAS_POLL'			=> (bool) $row['poll_start'],
321
322
					'U_NEWEST_POST'			=> append_sid("{$this->root_path}viewtopic.{$this->php_ext}", 'f=' . $similar_forum_id . '&amp;t=' . $similar_topic_id . '&amp;view=unread') . '#unread',
323
					'U_LAST_POST'			=> append_sid("{$this->root_path}viewtopic.{$this->php_ext}", 'f=' . $similar_forum_id . '&amp;t=' . $similar_topic_id . '&amp;p=' . $row['topic_last_post_id']) . '#p' . $row['topic_last_post_id'],
324
					'U_VIEW_TOPIC'			=> append_sid("{$this->root_path}viewtopic.{$this->php_ext}", 'f=' . $similar_forum_id . '&amp;t=' . $similar_topic_id),
325
					'U_VIEW_FORUM'			=> append_sid("{$this->root_path}viewforum.{$this->php_ext}", 'f=' . $similar_forum_id),
326
					'U_MCP_REPORT'			=> append_sid("{$this->root_path}mcp.{$this->php_ext}", 'i=reports&amp;mode=reports&amp;f=' . $similar_forum_id . '&amp;t=' . $similar_topic_id, true, $this->user->session_id),
327
					'U_MCP_QUEUE'			=> $u_mcp_queue,
328
				);
329
330
				/**
331
				 * Event to modify the similar topics template block
332
				 *
333
				 * @event vse.similartopics.modify_topicrow
334
				 * @var array row       Array with similar topic data
335
				 * @var array topic_row Template block array
336
				 * @since 1.3.0
337
				 */
338
				$vars = array('row', 'topic_row');
339
				extract($this->dispatcher->trigger_event('vse.similartopics.modify_topicrow', compact($vars)));
340
341
				$this->template->assign_block_vars('similar', $topic_row);
342
343
				$this->pagination->generate_template_pagination($base_url, 'similar.pagination', 'start', $replies + 1, $this->config['posts_per_page'], 1, true, true);
344
			}
345
		}
346
347
		$this->user->add_lang_ext('vse/similartopics', 'similar_topics');
348
349
		$this->template->assign_vars(array(
350
			'L_SIMILAR_TOPICS'	=> $this->user->lang('SIMILAR_TOPICS'),
351
			'NEWEST_POST_IMG'	=> $this->user->img('icon_topic_newest', 'VIEW_NEWEST_POST'),
352
			'LAST_POST_IMG'		=> $this->user->img('icon_topic_latest', 'VIEW_LATEST_POST'),
353
			'REPORTED_IMG'		=> $this->user->img('icon_topic_reported', 'TOPIC_REPORTED'),
354
			'POLL_IMG'			=> $this->user->img('icon_topic_poll', 'TOPIC_POLL'),
355
			'S_PST_BRANCH'		=> phpbb_version_compare(max($this->config['phpbb_version'], PHPBB_VERSION), '3.2.0-dev', '<') ? '31x' : '32x',
356
		));
357
	}
358
359
	/**
360
	 * Clean topic title (and if needed, ignore-words)
361
	 *
362
	 * @access public
363
	 * @param string $text The topic title
364
	 * @return string The topic title
365
	 */
366
	public function clean_topic_title($text)
367
	{
368
		// Strip quotes, ampersands
369
		$text = str_replace(array('&quot;', '&amp;'), '', $text);
370
371
		if (($this->is_mysql() && !$this->english_lang()) || $this->has_ignore_words() || ($this->is_postgres() && $this->config['similar_topics_postgres_ts_name'] == 'simple'))
372
		{
373
			$text = $this->strip_stop_words($text);
374
		}
375
376
		return $text;
377
	}
378
379
	/**
380
	 * Remove any non-english and/or custom defined ignore-words
381
	 *
382
	 * @access protected
383
	 * @param string $text The topic title
384
	 * @return string The topic title
385
	 */
386
	protected function strip_stop_words($text)
387
	{
388
		$words = array();
389
390
		// If non-English, look for a list of stop-words to be ignored
391
		// in either the core or the extension (deprecated from core)
392
		if (!$this->english_lang())
393
		{
394
			if (file_exists($search_ignore_words = "{$this->user->lang_path}{$this->user->lang_name}/search_ignore_words.{$this->php_ext}") ||
395
				file_exists($search_ignore_words = "{$this->root_path}ext/vse/similartopics/language/{$this->user->lang_name}/search_ignore_words.{$this->php_ext}"))
396
			{
397
				include($search_ignore_words);
398
			}
399
		}
400
401
		if ($this->has_ignore_words())
402
		{
403
			// Merge any custom defined ignore words from the ACP to the stop-words array
404
			$words = array_merge($this->make_word_array($this->config['similar_topics_words']), $words);
405
		}
406
407
		// Remove stop-words from the topic title text
408
		$words = array_diff($this->make_word_array($text), $words);
409
410
		// Convert our words array back to a string
411
		return implode(' ', $words);
412
	}
413
414
	/**
415
	 * Helper function to split string into an array of words
416
	 *
417
	 * @access protected
418
	 * @param string $text String of plain text words
419
	 * @return array Array of plaintext words
420
	 */
421
	protected function make_word_array($text)
422
	{
423
		// Strip out any non-alpha-numeric characters using PCRE regex syntax
424
		$text = trim(preg_replace('#[^\p{L}\p{N}]+#u', ' ', $text));
425
426
		$words = explode(' ', utf8_strtolower($text));
427
		foreach ($words as $key => $word)
428
		{
429
			// Strip words of 2 characters or less
430
			if (utf8_strlen(trim($word)) < 3)
431
			{
432
				unset($words[$key]);
433
			}
434
		}
435
436
		return $words;
437
	}
438
439
	/**
440
	 * Check if English is the current user's language
441
	 *
442
	 * @access protected
443
	 * @return bool True if lang is 'en' or 'en_us', false otherwise
444
	 */
445
	protected function english_lang()
446
	{
447
		return ($this->user->lang_name === 'en' || $this->user->lang_name === 'en_us');
448
	}
449
450
	/**
451
	 * Check if custom ignore words have been defined for similar topics
452
	 *
453
	 * @access protected
454
	 * @return bool True or false
455
	 */
456
	protected function has_ignore_words()
457
	{
458
		return !empty($this->config['similar_topics_words']);
459
	}
460
461
	/**
462
	 * Check if the database layer is MySQL4 or later
463
	 *
464
	 * @access protected
465
	 * @return bool True is MySQL4 or later, false otherwise
466
	 */
467
	protected function is_mysql()
468
	{
469
		return ($this->db->get_sql_layer() === 'mysql4' || $this->db->get_sql_layer() === 'mysqli');
470
	}
471
472
	/**
473
	 * Check if the database is using PostgreSQL
474
	 *
475
	 * @access public
476
	 * @return bool True if is postgresql, false otherwise
477
	 */
478
	protected function is_postgres()
479
	{
480
		return ($this->db->get_sql_layer() === 'postgres');
481
	}
482
}
483