Issues (1963)

html/inc/text_transform.inc (2 issues)

1
<?php
2
// This file is part of BOINC.
3
// http://boinc.berkeley.edu
4
// Copyright (C) 2008 University of California
5
//
6
// BOINC is free software; you can redistribute it and/or modify it
7
// under the terms of the GNU Lesser General Public License
8
// as published by the Free Software Foundation,
9
// either version 3 of the License, or (at your option) any later version.
10
//
11
// BOINC is distributed in the hope that it will be useful,
12
// but WITHOUT ANY WARRANTY; without even the implied warranty of
13
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14
// See the GNU Lesser General Public License for more details.
15
//
16
// You should have received a copy of the GNU Lesser General Public License
17
// along with BOINC.  If not, see <http://www.gnu.org/licenses/>.
18
19
// Functions that process user-supplied text (e.g. messages)
20
// prior to displaying it to users.
21
// Goals:
22
// - Security (don't send evil javascript)
23
// - obey user preferences
24
// - improve formatting (e.g., convert newlines to <br> tags)
25
26
require_once('../inc/sanitize_html.inc');
27
28
class output_options {
29
    var $bb2html;            // BBCode as HTML? (on)
30
    var $images_as_links;    // Images as hyperlinks? (off)
31
    var $link_popup;        // Links in new windows? (off)
32
    var $nl2br;                // Convert newlines to <br>'s? (on)
33
    var $htmlitems;            // Convert special chars to HTML entities? (on)
34
    var $htmlscrub;            // Scrub "bad" HTML tags? (off)
35
    var $highlight_terms;// Array of terms to be highlighted (off)
36
37
    // Constructor - set the defaults.
38
39
    function __construct() {
40
        $this->bb2html = 1;
41
        $this->images_as_links = 0;
42
        $this->link_popup = 0;
43
        $this->nl2br = 1;
44
        $this->htmlitems = 1;
45
        $this->htmlscrub = 0;
46
        $this->highlight_terms = 0;
47
        return true;
48
    }
49
50
    // Define the terms to be highlighted (for use with searches and such)
51
52
    function setHighlightTerms($terms) {
53
        if (is_array($terms)) {
54
            $this->highlight_terms = $terms;
55
        } else {
56
            return false;
57
        }
58
        return true;
59
    }
60
}
61
62
// Do the actual transformation of the text.
63
// TODO: Make this part of the above class.
64
65
function output_transform($text, $options = NULL) {
66
    // Options is a output_options object, defined above
67
    if (!$options) {
68
        $options = new output_options; // Defaults in the class definition
69
    }
70
    if ($options->htmlitems) {
71
        $text = htmlspecialchars($text, ENT_COMPAT | ENT_HTML401 | ENT_SUBSTITUTE);
72
    }
73
    if (is_array($options->highlight_terms)) {
74
        $text = highlight_terms($text, $options->highlight_terms);
75
    }
76
//    if ($options->htmlscrub) {
77
//        $text = sanitize_html($text);
78
//    }
79
    if ($options->nl2br) {
80
        $text = nl2br($text);
81
    }
82
    if ($options->bb2html) {
83
        $text = bb2html($text);
84
    }
85
    if ($options->images_as_links) {
86
        $text = image_as_link($text);
87
    }
88
    if ($options->link_popup) {
89
        $text = externalize_links($text);
90
    }
91
    return $text;
92
}
93
94
function get_output_options($user) {
95
    $options = new output_options();
96
    if ($user) {
97
        if ($user->prefs->images_as_links) $options->images_as_links = 1;
98
        if ($user->prefs->link_popup) $options->link_popup = 1;
99
    }
100
    return $options;
101
}
102
103
// Converts bbcode to HTML
104
// If $export is true, don't use BOINC CSS
105
106
function substr2($s, $n1, $n2) {
107
    return substr($s, $n1, $n2-$n1);
108
}
109
110
// process non-nestable constructs: [pre] and [code]
111
//
112
function replace_pre_code($text, $export) {
113
    $out = '';
114
    $pos = 0;
115
116
    // there maybe several.  Scan the text
117
    //
118
    while (true) {
119
        // find the first instance of either [code] or [pre]
120
        //
121
        $n1 = strpos($text, '[code]', $pos);
122
        $n2 = strpos($text, '[pre]', $pos);
123
        if ($n1 === false && $n2 === false) {
124
            $out .= substr($text, $pos);
125
            break;
126
        }
127
        if ($n2 === false || $n1 < $n2) {
128
            $n = $n1;
129
            $tag = '[code]';
130
            $tag2 = '[/code]';
131
        } else {
132
            $n = $n2;
133
            $tag = '[pre]';
134
            $tag2 = '[/pre]';
135
        }
136
        // output the part before that
137
        //
138
        $out .= substr2($text, $pos, $n);
139
140
        // find the close tag
141
        //
142
        $n += strlen($tag);
143
        $n2 = strpos($text, $tag2, $n);
144
145
        // if none, output rest of string and we're done
146
        //
147
        if (!$n2) {
148
            $out .= substr($text, $n);
149
            break;
150
        }
151
152
        // get the text between open and close tags, and sanitize it
153
        //
154
        $x = substr2($text, $n, $n2);
155
        $x = remove_br($x);
156
        $x = htmlspecialchars($x, ENT_COMPAT, "UTF-8", false);
157
        $x = str_replace("[", "&#91;", $x);
158
159
        if ($export) {
160
            if ($tag == '[pre]') {
161
                $out .= "<pre>$x</pre>";
162
            } else {
163
                $out .= "<code>$x</code>";
164
            }
165
        } else {
166
            $out .= "<pre style=\"white-space:pre-wrap;\">$x</pre>";
167
        }
168
169
        // check for overflow (debug, shouldn't happen)
170
        if (strlen($out) > 2*strlen($text)) {
171
            return "<P>REPLACE_PRE_CODE BUG<p>text:<p><pre>$text</pre>\n";
172
        }
173
174
        // move past the close tag
175
        //
176
        $pos = $n2 + strlen($tag2);
177
    }
178
    return $out;
179
}
180
181
// process nestable constructs
182
//
183
function bb2html_aux($text, $export) {
184
    $urlregex = "(?:\"?)(?:(http\:\/\/)?)([^\[\"<\ ]+)(?:\"?)";
185
    // NOTE:
186
    // This matches https:// too; I don't understand why.
187
    // sample results:
188
    // Array
189
    // (
190
    //     [0] => [img]https://a.b.c[/img]
191
    //     [1] =>
192
    //     [2] => https://a.b.c
193
    // )
194
    // Array
195
    // (
196
    //     [0] => [img]http://a.b.c[/img]
197
    //     [1] => http://
198
    //     [2] => a.b.c
199
    // )
200
201
    $email_addr_regex = "([A-Za-z0-9\.\-\_\@]*)";
202
        // should match all valid email addrs,
203
        // but not any hacker stuff like " alert(1)
204
    $httpsregex = "(?:\"?)https\:\/\/([^\[\"<\ ]+)(?:\"?)";
205
    // List of allowable tags
206
    $bbtags = array (
0 ignored issues
show
There must be no space between the "array" keyword and the opening parenthesis
Loading history...
207
        "@\[b\](.*?)\[/b\]@is",
208
        "@\[i\](.*?)\[/i\]@is",
209
        "@\[u\](.*?)\[/u\]@is",
210
        "@\[s\](.*?)\[/s\]@is",
211
        "@\[sup\](.*?)\[/sup\]@is",
212
        "@\[url=$httpsregex\](.*?)\[/url\]@is",
213
        "@\[url\]$httpsregex\[/url\]@is",
214
        "@\[link=$urlregex\](.*?)\[/link\]@is",
215
        "@\[link\]$urlregex\[/link\]@is",
216
        "@\[url=$urlregex\](.*?)\[/url\]@is",
217
        "@\[url\]$urlregex\[/url\]@is",
218
        "@\[quote=(.*?)\](.*?)\[/quote\]@is",
219
        "@\[quote\](.*?)\[/quote\]@is",
220
        "@\[list\](.*?)\[/list\]@is",
221
        "@\[list=1\](.*?)\[/list\]@is",
222
        "@\[img\]$urlregex\[/img\]@is",
223
        "@\[sm_img\]$urlregex\[/sm_img\]@is",
224
        "@\[color=(?:\"?)(.{3,8})(?:\"?)\](.*?)\[/color\]@is",
225
        "@((?:<ol>|<ul>).*?)\n\*([^\n]+)\n(.*?(</ol>|</ul>))@is",
226
        "@\[size=([1-9]|[0-2][0-9])\](.*?)\[/size\]@is",
227
        "@\[mailto\]$email_addr_regex\[/mailto\]@is",
228
        "@\[email\]$email_addr_regex\[/email\]@is",
229
        "@\[github\](?:\#|ticket:)(\d+)\[/github\]@is",
230
        "@\[github\]wiki:(.*?)\[/github\]@is",
231
    );
232
233
    // What the above tags are turned in to
234
    if ($export) {
235
        $htmltags = array (
236
            "<b>\\1</b>",
237
            "<i>\\1</i>",
238
            "<u>\\1</u>",
239
            "<s>\\1</s>",
240
            "<sup>\\1</sup>",
241
            "<a href=\"https://\\1\" rel=\"nofollow\">\\2</a>",
242
            "<a href=\"https://\\1\" rel=\"nofollow\">https://\\1</a>",
243
            "<a href=\"http://\\2\" rel=\"nofollow\">\\3</a>",
244
            "<a href=\"http://\\2\" rel=\"nofollow\">http://\\2</a>",
245
            "<a href=\"http://\\2\" rel=\"nofollow\">\\3</a>",
246
            "<a href=\"http://\\2\" rel=\"nofollow\">http://\\2</a>",
247
            "<i>\\1 wrote:</i><blockquote>\\2</blockquote>",
248
            "<blockquote>\\1</blockquote>",
249
            "<ul>\\1</ul><p>",
250
            "<ol>\\1</ol><p>",
251
            "<img hspace=\"8\" src=\"\\1\\2\"> ",
252
            "<img hspace=\"8\" width=400 src=\"\\1\\2\"> ",
253
            "<font color=\"\\1\">\\2</font>",
254
            "\\1<li>\\2\n\\3",
255
            "<span style=\"font-size: \\1px;\">\\2</span>",
256
            "<a href=\"mailto:\\1\">\\1</a>",
257
            "<a href=\"mailto:\\1\">\\1</a>",
258
            "<a href=\"https://github.com/BOINC/boinc/issues/\\1\">#\\1</a>",
259
            "<a href=\"https://github.com/BOINC/boinc/wiki/\\1\">\\1</a>",
260
        );
261
    } else {
262
        $htmltags = array (
263
            "<b>\\1</b>",
264
            "<i>\\1</i>",
265
            "<u>\\1</u>",
266
            "<s>\\1</s>",
267
            "<sup>\\1</sup>",
268
            "<a href=\"https://\\1\" rel=\"nofollow\">\\2</a>",
269
            "<a href=\"https://\\1\" rel=\"nofollow\">https://\\1</a>",
270
            "<a href=\"http://\\2\" rel=\"nofollow\">\\3</a>",
271
            "<a href=\"http://\\2\" rel=\"nofollow\">http://\\2</a>",
272
            "<a href=\"http://\\2\" rel=\"nofollow\">\\3</a>",
273
            "<a href=\"http://\\2\" rel=\"nofollow\">http://\\2</a>",
274
            "<em>\\1 wrote:</em><blockquote>\\2</blockquote>",
275
            "<blockquote>\\1</blockquote>",
276
            "<ul>\\1</ul><p>",
277
            "<ol>\\1</ol><p>",
278
            "<img hspace=\"8\" class=\"img-responsive\" src=\"\\1\\2\"> ",
279
            "<img hspace=\"8\" width=400 src=\"\\1\\2\"> ",
280
            "<font color=\"\\1\">\\2</font>",
281
            "\\1<li>\\2\n\\3",
282
            "<span style=\"font-size: \\1px;\">\\2</span>",
283
            "<a href=\"mailto:\\1\">\\1</a>",
284
            "<a href=\"mailto:\\1\">\\1</a>",
285
            "<a href=\"https://github.com/BOINC/boinc/issues/\\1\">#\\1</a>",
286
            "<a href=\"https://github.com/BOINC/boinc/wiki/\\1\">\\1</a>",
287
        );
288
    }
289
290
    // Do the actual replacing - iterations for nested items
291
    $lasttext = "";
292
    $i = 0;
293
    // $i<1000 to prevent DoS
294
    while ($text != $lasttext && $i<1000) {
295
        $lasttext = $text;
296
        $text = preg_replace($bbtags, $htmltags, $text);
297
        $i = $i + 1;
0 ignored issues
show
Increment operators should be used where possible; found "$i = $i + 1;" but expected "$i++"
Loading history...
298
    }
299
    $text = str_replace("<ul>", '<ul style="word-break:break-word;">', $text);
300
    $text = str_replace("<ol>", '<ol style="word-break:break-word;">', $text);
301
    return $text;
302
}
303
304
function bb2html($text, $export=false) {
305
    $text = replace_pre_code($text, $export);
306
    return bb2html_aux($text, $export);
307
}
308
309
// Remove any <br> tags added by nl2br which are not wanted,
310
// for example inside <pre> containers
311
// The original \n was retained after the br when it was added
312
//
313
function remove_br($text){
314
    return str_replace("<br />", "", $text);
315
}
316
317
// Make links open in new windows.
318
//
319
function externalize_links($text) {
320
    // TODO:  Convert this to PCRE
321
    $i=0;
322
    $linkpos=true;
323
    $out = "";
324
    while (true){
325
        // Find a link
326
        //
327
        $linkpos=strpos($text, "<a ", $i);
328
        if ($linkpos===false) break;
329
330
        // Replace with target='_new'
331
        //
332
        $out .= substr($text, $i, $linkpos-$i)."<a target=\"_new\" ";
333
        $i = $linkpos+3;
334
    }
335
    $out .= substr($text, $i);
336
    return $out;
337
}
338
339
// Converts image tags to links to the images.
340
//
341
function image_as_link($text){
342
    $pattern = '@<img([\S\s]+?)src=([^>]+?)>@si';
343
    $replacement = '<a href=${2}>[Image link]</a>';
344
    return preg_replace($pattern, $replacement, $text);
345
}
346
347
// Highlight terms in text (used in search results)
348
//
349
function highlight_terms($text, $terms) {
350
    $search = $terms;
351
    $replace = array();
352
353
    foreach ($search as $key => $value) {
354
        $replace[$key] = "<span class=\"mark\">".$value."</span>";
355
    }
356
    return str_ireplace($search, $replace, $text);
357
}
358
359
?>
360