platine-php /
etl
| 1 | <?php |
||
| 2 | |||
| 3 | /** |
||
| 4 | * Platine ETL |
||
| 5 | * |
||
| 6 | * Platine ETL is a library to Extract-Transform-Load Data from various sources |
||
| 7 | * |
||
| 8 | * This content is released under the MIT License (MIT) |
||
| 9 | * |
||
| 10 | * Copyright (c) 2020 Platine ETL |
||
| 11 | * Copyright (c) 2019 Benoit POLASZEK |
||
| 12 | * |
||
| 13 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
||
| 14 | * of this software and associated documentation files (the "Software"), to deal |
||
| 15 | * in the Software without restriction, including without limitation the rights |
||
| 16 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||
| 17 | * copies of the Software, and to permit persons to whom the Software is |
||
| 18 | * furnished to do so, subject to the following conditions: |
||
| 19 | * |
||
| 20 | * The above copyright notice and this permission notice shall be included in all |
||
| 21 | * copies or substantial portions of the Software. |
||
| 22 | * |
||
| 23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
| 24 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
| 25 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||
| 26 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
| 27 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||
| 28 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||
| 29 | * SOFTWARE. |
||
| 30 | */ |
||
| 31 | |||
| 32 | declare(strict_types=1); |
||
| 33 | |||
| 34 | namespace Platine\Etl\Extractor; |
||
| 35 | |||
| 36 | use Exception; |
||
| 37 | use InvalidArgumentException; |
||
| 38 | use Platine\Etl\Etl; |
||
| 39 | use Platine\Stdlib\Helper\Json; |
||
| 40 | use RuntimeException; |
||
| 41 | use SplFileObject; |
||
| 42 | |||
| 43 | /** |
||
| 44 | * @class JsonExtractor |
||
| 45 | * @package Platine\Etl\Extractor |
||
| 46 | */ |
||
| 47 | class JsonExtractor implements ExtractorInterface |
||
| 48 | { |
||
| 49 | public const EXTRACT_AUTO = 0; |
||
| 50 | public const EXTRACT_FROM_STRING = 1; |
||
| 51 | public const EXTRACT_FROM_FILE = 2; |
||
| 52 | public const EXTRACT_FROM_ARRAY = 3; |
||
| 53 | |||
| 54 | /** |
||
| 55 | * The extract source type |
||
| 56 | * @var int |
||
| 57 | */ |
||
| 58 | protected int $type; |
||
| 59 | |||
| 60 | /** |
||
| 61 | * Create new instance |
||
| 62 | * @param int $type |
||
| 63 | */ |
||
| 64 | public function __construct(int $type = self::EXTRACT_AUTO) |
||
| 65 | { |
||
| 66 | $this->type = $type; |
||
| 67 | } |
||
| 68 | |||
| 69 | |||
| 70 | /** |
||
| 71 | * {@inheritodc} |
||
| 72 | */ |
||
| 73 | public function extract(mixed $input, Etl $etl, array $options = []): iterable |
||
| 74 | { |
||
| 75 | $this->setOptions($options); |
||
| 76 | |||
| 77 | switch ($this->type) { |
||
| 78 | case self::EXTRACT_FROM_ARRAY: |
||
| 79 | return $this->extractFromArray($input); |
||
| 80 | case self::EXTRACT_FROM_FILE: |
||
| 81 | return $this->extractFromFile($input); |
||
| 82 | case self::EXTRACT_FROM_STRING: |
||
| 83 | return $this->extractFromString($input); |
||
| 84 | case self::EXTRACT_AUTO: |
||
| 85 | return $this->extractAuto($input); |
||
| 86 | } |
||
| 87 | |||
| 88 | throw new InvalidArgumentException(sprintf( |
||
| 89 | 'Invalid extract source data type provided [%d], must be one of [%s]', |
||
| 90 | $this->type, |
||
| 91 | implode(',', [ |
||
| 92 | self::EXTRACT_AUTO, |
||
| 93 | self::EXTRACT_FROM_STRING, |
||
| 94 | self::EXTRACT_FROM_FILE, |
||
| 95 | self::EXTRACT_FROM_ARRAY |
||
| 96 | ]) |
||
| 97 | )); |
||
| 98 | } |
||
| 99 | |||
| 100 | /** |
||
| 101 | * Extract source data from array |
||
| 102 | * @param array<mixed> $data |
||
| 103 | * @return iterable<int|string, mixed> |
||
| 104 | */ |
||
| 105 | protected function extractFromArray(array $data): iterable |
||
| 106 | { |
||
| 107 | return $data; |
||
| 108 | } |
||
| 109 | |||
| 110 | /** |
||
| 111 | * Extract source data from string |
||
| 112 | * @param string $data |
||
| 113 | * @return iterable<int|string, mixed> |
||
| 114 | */ |
||
| 115 | protected function extractFromString(string $data): iterable |
||
| 116 | { |
||
| 117 | return Json::decode($data, true); |
||
| 118 | } |
||
| 119 | |||
| 120 | /** |
||
| 121 | * Extract source data from file |
||
| 122 | * @param SplFileObject|string $file |
||
| 123 | * @return iterable<int|string, mixed> |
||
| 124 | */ |
||
| 125 | protected function extractFromFile(SplFileObject|string $file): iterable |
||
| 126 | { |
||
| 127 | if ($file instanceof SplFileObject) { |
||
| 128 | $file = $file->getPathname(); |
||
| 129 | } |
||
| 130 | |||
| 131 | if (is_readable($file) === false) { |
||
| 132 | throw new RuntimeException(sprintf( |
||
| 133 | 'File %s is not readable or does not exist', |
||
| 134 | $file |
||
| 135 | )); |
||
| 136 | } |
||
| 137 | |||
| 138 | return Json::decode((string) file_get_contents($file), true); |
||
| 139 | } |
||
| 140 | |||
| 141 | /** |
||
| 142 | * Extract source data by detect the type |
||
| 143 | * @param array<mixed>|string $data |
||
| 144 | * @return iterable<int|string, mixed> |
||
| 145 | */ |
||
| 146 | protected function extractAuto(array|string $data): iterable |
||
| 147 | { |
||
| 148 | if (is_array($data)) { |
||
|
0 ignored issues
–
show
introduced
by
Loading history...
|
|||
| 149 | return $this->extractFromArray($data); |
||
| 150 | } |
||
| 151 | |||
| 152 | try { |
||
| 153 | $json = Json::decode($data, true); |
||
| 154 | |||
| 155 | return $this->extractFromArray($json); |
||
| 156 | } catch (Exception $e) { |
||
| 157 | if (strlen($data) < 3000 && file_exists($data)) { |
||
| 158 | return $this->extractFromFile($data); |
||
| 159 | } |
||
| 160 | |||
| 161 | throw $e; |
||
| 162 | } |
||
| 163 | } |
||
| 164 | |||
| 165 | /** |
||
| 166 | * Set the options |
||
| 167 | * @param array<string, mixed> $options |
||
| 168 | * @return $this |
||
| 169 | */ |
||
| 170 | protected function setOptions(array $options): self |
||
| 171 | { |
||
| 172 | if (isset($options['type']) && is_int($options['type'])) { |
||
| 173 | $this->type = $options['type']; |
||
| 174 | } |
||
| 175 | |||
| 176 | return $this; |
||
| 177 | } |
||
| 178 | } |
||
| 179 |