Issues (1963)

html/user/job_file.php (7 issues)

1
<?php
2
// This file is part of BOINC.
3
// https://boinc.berkeley.edu
4
// Copyright (C) 2024 University of California
5
//
6
// BOINC is free software; you can redistribute it and/or modify it
7
// under the terms of the GNU Lesser General Public License
8
// as published by the Free Software Foundation,
9
// either version 3 of the License, or (at your option) any later version.
10
//
11
// BOINC is distributed in the hope that it will be useful,
12
// but WITHOUT ANY WARRANTY; without even the implied warranty of
13
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14
// See the GNU Lesser General Public License for more details.
15
//
16
// You should have received a copy of the GNU Lesser General Public License
17
// along with BOINC.  If not, see <http://www.gnu.org/licenses/>.
18
19
// Web RPCs for managing input files for remote job submission
20
// These support systems where users - possibly lots of them -
21
// process jobs without logging on to the BOINC server.
22
//
23
// Issues:
24
//
25
// 1) how are files named on the server (i.e. physical names)?
26
//  That's up to the clients, but they must enforce immutability:
27
//  different files must have different physical names.
28
//  One way to achieve this is to include the MD5 in the name.
29
//
30
// 2) how does the server keep track of the files?
31
//  In the MySQL database, in a table called "job_file".
32
//  Each row describes a file currently on the server.
33
//  In addition, we maintain a table "batch_file_assoc" to record
34
//  that a file is used by a particular batch.
35
//  (Note: the association could be at the job level instead.
36
//  but this way is more efficient if many jobs in a batch use
37
//  a particular file.)
38
//
39
// 3) how does the server clean up unused files?
40
//  A daemon (job_file_deleter) deletes files for which
41
//  - the delete date (if given) is in the past, and
42
//  - there are no associations to active batches
43
//
44
// 4) what are the RPC operations?
45
//  query_files
46
//      in:
47
//          authenticator
48
//          list of physical names
49
//          batch ID (optional)
50
//          new delete time (optional)
51
//      out:
52
//          error message,
53
//          or list of files (indices in the name list) not present on server
54
//      action: for each name in the name list:
55
//          if present on server
56
//              update delete time
57
//              create batch/file association
58
//  upload_files
59
//      in:
60
//          authenticator
61
//          delete time (optional)
62
//          batch ID (optional)
63
//          list of names
64
//          files (as multipart attachments)
65
//      out:
66
//          error message, or success
67
//      action:
68
//          for each file in list
69
//              stage:
70
//                  move to project download dir w/ appropriate name
71
//                  generate .md5 file
72
//              create job_files record
73
//              create batch_file_assoc record if needed
74
75
require_once("../inc/boinc_db.inc");
76
require_once("../inc/submit_db.inc");
77
require_once("../inc/dir_hier.inc");
78
require_once("../inc/xml.inc");
79
require_once("../inc/submit_util.inc");
80
81
function upload_error_description($errno) {
82
    switch ($errno) {
83
        case UPLOAD_ERR_INI_SIZE:
84
            return "The uploaded file exceeds upload_max_filesize of php.ini."; break;
0 ignored issues
show
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
85
        case UPLOAD_ERR_FORM_SIZE:
86
            return "The uploaded file exceeds the MAX_FILE_SIZE specified in the HTML form."; break;
87
        case UPLOAD_ERR_PARTIAL:
88
            return "The uploaded file was only partially uploaded."; break;
89
        case UPLOAD_ERR_NO_FILE:
90
            return "No file was uploaded."; break;
91
        case UPLOAD_ERR_NO_TMP_DIR:
92
            return "Missing a temporary folder."; break;
93
        case UPLOAD_ERR_CANT_WRITE:
94
            return "Failed to write file to disk."; break;
95
        case UPLOAD_ERR_EXTENSION:
96
            return "A PHP extension stopped the file upload."; break;
97
    }
98
}
99
100
function query_files($r) {
101
    xml_start_tag("query_files");
102
    $user = check_remote_submit_permissions($r, null);
103
    $absent_files = [];
104
    $now = time();
105
    $delete_time = (int)$r->delete_time;
106
    $batch_id = (int)$r->batch_id;
107
    $fanout = parse_config(get_config(), "<uldl_dir_fanout>");
108
    $phys_names = [];
109
    foreach ($r->phys_name as $f) {
110
        $phys_names[] = (string)$f;
111
    }
112
    $i = 0;
113
    foreach ($phys_names as $fname) {
114
        if (!is_valid_filename($fname)) {
115
            xml_error(-1, 'bad filename');
116
        }
117
        $path = dir_hier_path($fname, project_dir()."/download", $fanout);
118
119
        // if the job_file record is there,
120
        // update the delete time first to avoid race condition
121
        // with job file deleter
122
        //
123
        $job_file = BoincJobFile::lookup_name($fname);
124
        if ($job_file && $job_file->delete_time < $delete_time) {
125
            $retval = $job_file->update("delete_time=$delete_time");
126
            if ($retval) {
127
                xml_error(-1, "job_file->update() failed: ".BoincDb::error());
128
            }
129
        }
130
        if (file_exists($path)) {
131
            // create the DB record if needed
132
            //
133
            if ($job_file) {
134
                $jf_id = $job_file->id;
135
            } else {
136
                $jf_id = BoincJobFile::insert(
137
                    "(name, create_time, delete_time) values ('$fname', $now, $delete_time)"
138
                );
139
                if (!$jf_id) {
140
                    xml_error(-1, "query_files(): BoincJobFile::insert($fname) failed: ".BoincDb::error());
141
                }
142
            }
143
            // create batch association if needed
144
            //
145
            if ($batch_id) {
146
                BoincBatchFileAssoc::insert(
147
                    "(batch_id, job_file_id) values ($batch_id, $jf_id)"
148
                );
149
                // this return error if assoc already exists; ignore
150
            }
151
        } else {
152
            if ($job_file) {
153
                $ret = $job_file->delete();
154
                if (!$ret) {
155
                    xml_error(-1,
156
                        "BoincJobFile::delete() failed: ".BoincDb::error()
157
                    );
158
                }
159
            }
160
            $absent_files[] = $i;
161
        }
162
        $i++;
163
    }
164
    echo "<absent_files>\n";
165
    foreach ($absent_files as $i) {
166
        echo "<file>$i</file>\n";
167
    }
168
    echo "</absent_files>
169
        </query_files>
170
    ";
171
}
172
173
// if an error occurs, delete the uploaded temp files
174
//
175
function delete_uploaded_files() {
176
    foreach ($_FILES as $f) {
177
        unlink($f['tmp_name']);
178
    }
179
}
180
181
function upload_files($r) {
182
    xml_start_tag("upload_files");
183
    $user = check_remote_submit_permissions($r, null);
184
    $fanout = parse_config(get_config(), "<uldl_dir_fanout>");
185
    $delete_time = (int)$r->delete_time;
186
    $batch_id = (int)$r->batch_id;
187
    //print_r($_FILES);
188
189
    if (count($_FILES) != count($r->phys_name)) {
190
        delete_uploaded_files();
191
        xml_error(-1,
192
            sprintf("# of uploaded files (%d) doesn't agree with request (%d)",
193
                count($_FILES), count($r->phys_name)
194
            )
195
        );
196
    }
197
198
    $phys_names = array();
199
    foreach ($r->phys_name as $cs) {
200
        $fname = (string)$cs;
201
        if (!is_valid_filename($fname)) {
202
            xml_error(-1, 'bad filename');
203
        }
204
        $phys_names[] = $fname;
205
    }
206
207
    foreach ($_FILES as $f) {
208
        $name = $f['name'];
209
        if (!is_valid_filename($fname)) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $fname seems to be defined by a foreach iteration on line 199. Are you sure the iterator is never empty, otherwise this variable is not defined?
Loading history...
210
            xml_error(-1, 'bad FILES filename');
211
        }
212
        $tmp_name = $f['tmp_name'];
213
214
        if ($f['error'] != UPLOAD_ERR_OK) {
215
            delete_uploaded_files();
216
            $reason = upload_error_description($f['error']);
217
            xml_error(-1, "$name upload failed because: $reason");
218
        }
219
220
        if (!is_uploaded_file($tmp_name)) {
221
            delete_uploaded_files();
222
            xml_error(-1, "$name was not uploaded correctly");
223
        }
224
    }
225
226
    $i = 0;
227
    $now = time();
228
    foreach ($_FILES as $f) {
229
        $tmp_name = $f['tmp_name'];
230
        $fname = $phys_names[$i];
231
        $path = dir_hier_path($fname, project_dir()."/download", $fanout);
232
233
        // see if file is in download hierarchy
234
        //
235
        switch (check_download_file($tmp_name, $path)) {
236
        case 0:
0 ignored issues
show
Empty CASE statements are not allowed
Loading history...
237
            // file is already there
238
            // note: check_download_file() generates .md5 in cases 1 and 2
239
            break;
240
        case 1:
241
            // file is not there; move
242
            //
243
            if (!move_uploaded_file($tmp_name, $path)) {
244
                xml_error(-1, "could not move $tmp_name to $path");
245
            }
246
            touch("$path.md5");
247
            break;
248
        case -1:
249
            // file is there but different contents
250
            //
251
            xml_error(-1, "file immutability violation for $fname");
252
        case -2:
253
            xml_error(-1, "file operation failed; check permissions in download/*");
254
        }
255
256
        $jf_id = BoincJobFile::insert(
257
            "(name, create_time, delete_time) values ('$fname', $now, $delete_time)"
258
        );
259
        if (!$jf_id) {
260
            xml_error(-1, "BoincJobFile::insert($fname) failed: ".BoincDb::error());
261
        }
262
        if ($batch_id) {
263
            BoincBatchFileAssoc::insert(
264
                "(batch_id, job_file_id) values ($batch_id, $jf_id)"
265
            );
266
        }
267
        $i++;
268
    }
269
270
    echo "<success/>
271
        </upload_files>
272
    ";
273
}
274
275
if (0) {
276
$r = simplexml_load_string("<query_files>\n<batch_id>0</batch_id>\n   <md5>80bf244b43fb5d39541ea7011883b7e0</md5>\n   <md5>a6037b05afb05f36e6a85a7c5138cbc1</md5>\n</query_files>\n ");
277
submit_batch($r);
0 ignored issues
show
The call to submit_batch() has too few arguments starting with app. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

277
/** @scrutinizer ignore-call */ 
278
submit_batch($r);

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
278
exit;
279
}
280
if (0) {
281
    $r = simplexml_load_string("<upload_files>\n<authenticator>157f96a018b0b2f2b466e2ce3c7f54db</authenticator>\n<batch_id>1</batch_id>\n<md5>80bf244b43fb5d39541ea7011883b7e0</md5>\n<md5>a6037b05afb05f36e6a85a7c5138cbc1</md5>\n</upload_files>");
282
    upload_files($r);
283
    exit;
284
}
285
286
$request_log = parse_config(get_config(), "<remote_submission_log>");
287
if ($request_log) {
288
    $request_log_dir = parse_config(get_config(), "<log_dir>");
289
    if ($request_log_dir) {
290
        $request_log = $request_log_dir."/".$request_log;
291
    }
292
    if ($file = fopen($request_log, "a+")) {
293
        fwrite($file, "\n<job_file date=\"".date(DATE_ATOM)."\">\n".$_POST['request']."\n</job_file>\n");
294
        fclose($file);
295
    }
296
}
297
298
xml_header();
299
$req = $_POST['request'];
300
$r = simplexml_load_string($req);
301
if (!$r) {
0 ignored issues
show
$r is of type SimpleXMLElement, thus it always evaluated to true.
Loading history...
302
    xml_error(-1, "can't parse request message: ".htmlspecialchars($req), __FILE__, __LINE__);
303
}
304
305
switch ($r->getName()) {
306
case 'query_files':
307
    query_files($r);
308
    break;
309
case 'upload_files':
310
    upload_files($r);
311
    break;
312
default:
0 ignored issues
show
DEFAULT keyword must be indented 4 spaces from SWITCH keyword
Loading history...
DEFAULT case must have a breaking statement
Loading history...
313
    xml_error(-1, "no such action");
314
}
315
316
?>
317