Completed
Push — master ( 1d37d4...bf8214 )
by Ori
01:34
created

Factory::isFileZipSource()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 2
nc 1
nop 1
dl 0
loc 4
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace frictionlessdata\datapackage;
4
5
use frictionlessdata\datapackage\Datapackages\BaseDatapackage;
6
use frictionlessdata\datapackage\Resources\BaseResource;
7
use Alchemy\Zippy\Zippy;
8
9
/**
10
 * datapackage and resource have different classes depending on the corresponding profile
11
 * this factory interface allows to validate and create object instances without having to check the profile first.
12
 */
13
class Factory
14
{
15
    /**
16
     * how many lines to validate sample when validating data streams.
17
     */
18
    const VALIDATE_PEEK_LINES = 10;
19
20
    /**
21
     * load, validate and create a datapackage object
22
     * supports loading from the following sources:
23
     *  - native PHP object containing the descriptor
24
     *  - JSON encoded object
25
     *  - URL (must be in either 'http' or 'https' schemes)
26
     *  - local filesystem (POSIX) path.
27
     *  - local or remote zip file
28
     *
29
     * @param mixed       $source
30
     * @param null|string $basePath optional, required only if you want to use relative paths
31
     *
32
     * @return Datapackages\BaseDatapackage
33
     *
34
     * @throws Exceptions\DatapackageInvalidSourceException
35
     * @throws Exceptions\DatapackageValidationFailedException
36
     */
37
    public static function datapackage($source, $basePath = null)
38
    {
39
        $source = static::loadSource($source, $basePath);
40
        $descriptor = $source->descriptor;
41
        $basePath = $source->basePath;
42
        $datapackageClass = static::getDatapackageClass($descriptor);
43
        $datapackage = new $datapackageClass($descriptor, $basePath);
44
45
        return $datapackage;
46
    }
47
48
    /**
49
     * create a resource object.
50
     *
51
     * @param object      $descriptor
52
     * @param null|string $basePath
53
     * @param bool        $skipValidations
54
     *
55
     * @return Resources\BaseResource
56
     *
57
     * @throws Exceptions\ResourceValidationFailedException
58
     */
59
    public static function resource($descriptor, $basePath = null, $skipValidations = false)
60
    {
61
        $resourceClass = static::getResourceClass($descriptor);
62
        $resource = new $resourceClass($descriptor, $basePath, $skipValidations);
63
64
        return $resource;
65
    }
66
67
    /**
68
     * validates a given datapackage descriptor
69
     * will load all resources, and sample 10 lines of data from each data source.
70
     *
71
     * @param mixed       $source   datapackage source - same as in datapackage function
72
     * @param null|string $basePath same as in datapackage function
73
     *
74
     * @return Validators\DatapackageValidationError[]
75
     */
76
    public static function validate($source, $basePath = null)
77
    {
78
        $curResource = 1;
79
        $curLine = null;
80
        try {
81
            $datapackage = static::datapackage($source, $basePath);
82
            foreach ($datapackage as $resource) {
83
                $curLine = 1;
84
                foreach ($resource as $line) {
85
                    if ($curLine == self::VALIDATE_PEEK_LINES) {
86
                        break;
87
                    }
88
                    ++$curLine;
89
                }
90
                ++$curResource;
91
            }
92
            // no validation errors
93
            return [];
94
        } catch (Exceptions\DatapackageInvalidSourceException $e) {
95
            // failed to load the datapackage descriptor
96
            // return a list containing a single LOAD_FAILED validation error
97
            return [
98
                new Validators\DatapackageValidationError(
99
                    Validators\DatapackageValidationError::LOAD_FAILED, $e->getMessage()
100
                ),
101
            ];
102
        } catch (Exceptions\DatapackageValidationFailedException $e) {
103
            // datapackage descriptor failed validation - return the validation errors
104
            return $e->validationErrors;
105
        } catch (Exceptions\ResourceValidationFailedException $e) {
106
            // resource descriptor failed validation - return the validation errors
107
            return [
108
                new Validators\DatapackageValidationError(
109
                    Validators\DatapackageValidationError::RESOURCE_FAILED_VALIDATION,
110
                    [
111
                        'resource' => $curResource,
112
                        'validationErrors' => $e->validationErrors,
113
                    ]
114
                ),
115
            ];
116
        } catch (Exceptions\DataStreamOpenException $e) {
117
            // failed to open data stream
118
            return [
119
                new Validators\DatapackageValidationError(
120
                    Validators\DatapackageValidationError::DATA_STREAM_FAILURE,
121
                    [
122
                        'resource' => $curResource,
123
                        'line' => 0,
124
                        'error' => $e->getMessage(),
125
                    ]
126
                ),
127
            ];
128
        } catch (Exceptions\DataStreamValidationException $e) {
129
            // failed to validate the data stream
130
            return [
131
                new Validators\DatapackageValidationError(
132
                    Validators\DatapackageValidationError::DATA_STREAM_FAILURE,
133
                    [
134
                        'resource' => $curResource,
135
                        'line' => $curLine,
136
                        'error' => $e->getMessage(),
137
                    ]
138
                ),
139
            ];
140
        }
141
    }
142
143
    public static function registerDatapackageClass($datapackageClass)
144
    {
145
        static::$registeredDatapackageClasses[] = $datapackageClass;
146
    }
147
148
    public static function clearRegisteredDatapackageClasses()
149
    {
150
        static::$registeredDatapackageClasses = [];
151
    }
152
153
    /**
154
     * @param $descriptor
155
     *
156
     * @return BaseDatapackage::class
0 ignored issues
show
Documentation introduced by
The doc-type BaseDatapackage::class could not be parsed: Unknown type name "BaseDatapackage::class" at position 0. (view supported doc-types)

This check marks PHPDoc comments that could not be parsed by our parser. To see which comment annotations we can parse, please refer to our documentation on supported doc-types.

Loading history...
157
     */
158 View Code Duplication
    public static function getDatapackageClass($descriptor)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
159
    {
160
        $datapackageClasses = array_merge(
161
            // custom classes
162
            static::$registeredDatapackageClasses,
163
            // core classes
164
            [
165
                "frictionlessdata\\datapackage\\Datapackages\TabularDatapackage",
166
                "frictionlessdata\\datapackage\\Datapackages\DefaultDatapackage",
167
            ]
168
        );
169
        $res = null;
170
        foreach ($datapackageClasses as $datapackageClass) {
171
            if (call_user_func([$datapackageClass, 'handlesDescriptor'], $descriptor)) {
172
                $res = $datapackageClass;
173
                break;
174
            }
175
        }
176
        if (!$res) {
177
            // not matched by any known classes
178
            $res = "frictionlessdata\\datapackage\\Datapackages\CustomDatapackage";
179
        }
180
181
        return $res;
182
    }
183
184
    public static function registerResourceClass($resourceClass)
185
    {
186
        static::$registeredResourceClasses[] = $resourceClass;
187
    }
188
189
    public static function clearRegisteredResourceClasses()
190
    {
191
        static::$registeredResourceClasses = [];
192
    }
193
194
    /**
195
     * @param $descriptor
196
     *
197
     * @return BaseResource::class
0 ignored issues
show
Documentation introduced by
The doc-type BaseResource::class could not be parsed: Unknown type name "BaseResource::class" at position 0. (view supported doc-types)

This check marks PHPDoc comments that could not be parsed by our parser. To see which comment annotations we can parse, please refer to our documentation on supported doc-types.

Loading history...
198
     */
199 View Code Duplication
    public static function getResourceClass($descriptor)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
200
    {
201
        $descriptor = Utils::objectify($descriptor);
202
        $resourceClasses = array_merge(
203
            // custom classes
204
            static::$registeredResourceClasses,
205
            // core classes
206
            [
207
                'frictionlessdata\\datapackage\\Resources\\TabularResource',
208
                'frictionlessdata\\datapackage\\Resources\\DefaultResource',
209
            ]
210
        );
211
        $res = null;
212
        foreach ($resourceClasses as $resourceClass) {
213
            if (call_user_func([$resourceClass, 'handlesDescriptor'], $descriptor)) {
214
                $res = $resourceClass;
215
                break;
216
            }
217
        }
218
        if (!$res) {
219
            // not matched by any known classes
220
            $res = 'frictionlessdata\\datapackage\\Resources\\CustomResource';
221
        }
222
223
        return $res;
224
    }
225
226
    protected static $registeredDatapackageClasses = [];
227
    protected static $registeredResourceClasses = [];
228
229
    /**
230
     * allows extending classes to add custom sources
231
     * used by unit tests to add a mock http source.
232
     */
233
    protected static function normalizeHttpSource($source)
234
    {
235
        return $source;
236
    }
237
238
    /**
239
     * allows extending classes to add custom sources
240
     * used by unit tests to add a mock http source.
241
     */
242
    protected static function isHttpSource($source)
243
    {
244
        return Utils::isHttpSource($source);
245
    }
246
247
    /**
248
     * loads the datapackage descriptor from different sources
249
     * returns an object containing:
250
     *   - the datapackage descriptor as native php object
251
     *   - normalized basePath.
252
     *
253
     * @param $source
254
     * @param $basePath
255
     *
256
     * @return object
257
     *
258
     * @throws Exceptions\DatapackageInvalidSourceException
259
     */
260
    protected static function loadSource($source, $basePath)
261
    {
262
        if (is_object($source)) {
263
            $descriptor = $source;
264
        } elseif (is_string($source)) {
265
            if (Utils::isJsonString($source)) {
266
                try {
267
                    $descriptor = json_decode($source);
268
                } catch (\Exception $e) {
269
                    throw new Exceptions\DatapackageInvalidSourceException(
270
                        'Failed to load source: '.json_encode($source).': '.$e->getMessage()
271
                    );
272
                }
273
            } elseif (static::isHttpSource($source)) {
274
                if (static::isHttpZipSource($source)) {
275
                    return static::loadHttpZipSource($source);
276
                } else {
277
                    try {
278
                        $descriptor = json_decode(file_get_contents(static::normalizeHttpSource($source)));
279
                    } catch (\Exception $e) {
280
                        throw new Exceptions\DatapackageInvalidSourceException(
281
                            'Failed to load source: '.json_encode($source).': '.$e->getMessage()
282
                        );
283
                    }
284
                    // http sources don't allow relative paths, hence basePath should remain null
285
                    $basePath = null;
286
                }
287
            } else {
288
                // not a json string and not a url - assume it's a file path
289
                if (static::isFileZipSource($source)) {
290
                    return static::loadFileZipSource($source);
291
                } else {
292
                    if (empty($basePath)) {
293
                        // no basePath
294
                        // - assume source is the absolute path of the file
295
                        // - set it's directory as the basePath
296
                        $basePath = dirname($source);
297
                    } else {
298
                        // got a basePath
299
                        // - try to prepend it to the source and see if such a file exists
300
                        // - if not - assume it's an absolute path
301
                        $absPath = $basePath.DIRECTORY_SEPARATOR.$source;
302
                        if (file_exists($absPath)) {
303
                            $source = $absPath;
304
                        }
305
                    }
306
                    try {
307
                        $descriptor = json_decode(file_get_contents($source));
308
                    } catch (\Exception $e) {
309
                        throw new Exceptions\DatapackageInvalidSourceException(
310
                            'Failed to load source: '.json_encode($source).': '.$e->getMessage()
311
                        );
312
                    }
313
                }
314
            }
315
        } else {
316
            throw new Exceptions\DatapackageInvalidSourceException(
317
                'Invalid source: '.json_encode($source)
318
            );
319
        }
320
321
        return (object) ['descriptor' => $descriptor, 'basePath' => $basePath];
322
    }
323
324
    protected static function isHttpZipSource($source)
325
    {
326
        return (strtolower(substr($source, -4)) == '.zip');
327
    }
328
329
    protected static function isFileZipSource($source)
330
    {
331
        return (strtolower(substr($source, -4)) == '.zip');
332
    }
333
334
    protected static function loadHttpZipSource($source)
335
    {
336
        $tempfile = tempnam(sys_get_temp_dir(), 'datapackage-php');
337
        unlink($tempfile);
338
        $tempfile.='.zip';
339
        stream_copy_to_stream(fopen($source, 'r'), fopen($tempfile, 'w'));
340
        register_shutdown_function(function() use ($tempfile) {unlink($tempfile);});
341
        return self::loadFileZipSource($tempfile);
342
    }
343
344
    protected static function loadFileZipSource($source)
345
    {
346
        $zippy = Zippy::load();
347
        $tempdir = tempnam(sys_get_temp_dir(), 'datapackage-php');
348
        unlink($tempdir);
349
        mkdir($tempdir);
350
        register_shutdown_function(function() use ($tempdir) {Utils::removeDir($tempdir);});
351
        $zippy->open($source)->extract($tempdir);
352
        if (!file_exists($tempdir."/datapackage.json")) {
353
            throw new Exceptions\DatapackageInvalidSourceException("zip file must contain a datappackage.json file");
354
        }
355
        return static::loadSource($tempdir."/datapackage.json", $tempdir);
356
    }
357
}
358