1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace ByJG\AnyDataset\Dataset; |
4
|
|
|
|
5
|
|
|
use ByJG\AnyDataset\Enum\FixedTextDefinition; |
6
|
|
|
use ByJG\AnyDataset\Exception\DatasetException; |
7
|
|
|
use ByJG\AnyDataset\Exception\NotFoundException; |
8
|
|
|
use Exception; |
9
|
|
|
use InvalidArgumentException; |
10
|
|
|
|
11
|
|
|
class FixedTextFileDataset |
12
|
|
|
{ |
13
|
|
|
|
14
|
|
|
protected $source; |
15
|
|
|
|
16
|
|
|
/** |
17
|
|
|
* @var FixedTextDefinition[] |
18
|
|
|
*/ |
19
|
|
|
protected $fieldDefinition; |
20
|
|
|
protected $sourceType; |
21
|
|
|
|
22
|
|
|
/** |
23
|
|
|
* Text File Data Set |
24
|
|
|
* |
25
|
|
|
* @param string $source |
26
|
|
|
* @param FixedTextDefinition[] $fieldDefinition |
27
|
|
|
* @throws NotFoundException |
28
|
|
|
*/ |
29
|
2 |
|
public function __construct($source, $fieldDefinition) |
30
|
|
|
{ |
31
|
2 |
|
if (!is_array($fieldDefinition)) { |
32
|
|
|
throw new InvalidArgumentException("You must define an array of field definition."); |
33
|
|
|
} |
34
|
|
|
|
35
|
2 |
|
$this->source = $source; |
36
|
2 |
|
$this->sourceType = "HTTP"; |
37
|
|
|
|
38
|
2 |
|
if (!preg_match("~^https?://~", $source)) { |
39
|
2 |
|
if (!file_exists($this->source)) { |
40
|
|
|
throw new NotFoundException("The specified file " . $this->source . " does not exists"); |
41
|
|
|
} |
42
|
|
|
|
43
|
2 |
|
$this->sourceType = "FILE"; |
44
|
|
|
} |
45
|
|
|
|
46
|
2 |
|
$this->fieldDefinition = $fieldDefinition; |
47
|
2 |
|
} |
48
|
|
|
|
49
|
|
|
/** |
50
|
|
|
* @access public |
51
|
|
|
* @return GenericIterator |
52
|
|
|
* @throws DatasetException |
53
|
|
|
* @throws Exception |
54
|
|
|
*/ |
55
|
2 |
|
public function getIterator() |
56
|
|
|
{ |
57
|
2 |
|
if ($this->sourceType == "HTTP") { |
58
|
|
|
return $this->getIteratorHttp(); |
59
|
|
|
} |
60
|
2 |
|
return $this->getIteratorFile(); |
61
|
|
|
} |
62
|
|
|
|
63
|
|
|
/** |
64
|
|
|
* @return \ByJG\AnyDataset\Dataset\FixedTextFileIterator |
65
|
|
|
* @throws \ByJG\AnyDataset\Exception\DatasetException |
66
|
|
|
* @throws \Exception |
67
|
|
|
* @return GenericIterator |
68
|
|
|
*/ |
69
|
|
|
protected function getIteratorHttp() |
70
|
|
|
{ |
71
|
|
|
// Expression Regular: |
72
|
|
|
// [1]: http or ftp |
|
|
|
|
73
|
|
|
// [2]: Server name |
|
|
|
|
74
|
|
|
// [3]: Full Path |
|
|
|
|
75
|
|
|
$pat = "/(http|ftp|https):\/\/([\w+|\.]+)/i"; |
76
|
|
|
$urlParts = preg_split($pat, $this->source, -1, PREG_SPLIT_DELIM_CAPTURE); |
77
|
|
|
|
78
|
|
|
$handle = fsockopen($urlParts[2], 80, $errno, $errstr, 30); |
79
|
|
|
if (!$handle) { |
80
|
|
|
throw new DatasetException("TextFileDataset Socket error: $errstr ($errno)"); |
81
|
|
|
} |
82
|
|
|
|
83
|
|
|
$out = "GET " . $urlParts[4] . " HTTP/1.1\r\n"; |
84
|
|
|
$out .= "Host: " . $urlParts[2] . "\r\n"; |
85
|
|
|
$out .= "Connection: Close\r\n\r\n"; |
86
|
|
|
|
87
|
|
|
fwrite($handle, $out); |
88
|
|
|
|
89
|
|
|
try { |
90
|
|
|
return new FixedTextFileIterator($handle, $this->fieldDefinition); |
91
|
|
|
} catch (Exception $ex) { |
92
|
|
|
fclose($handle); |
93
|
|
|
throw $ex; |
94
|
|
|
} |
95
|
|
|
} |
96
|
|
|
|
97
|
|
|
/** |
98
|
|
|
* @return \ByJG\AnyDataset\Dataset\FixedTextFileIterator |
99
|
|
|
* @throws \ByJG\AnyDataset\Exception\DatasetException |
100
|
|
|
* @throws \Exception |
101
|
|
|
* @return GenericIterator |
102
|
|
|
*/ |
103
|
2 |
|
protected function getIteratorFile() |
104
|
|
|
{ |
105
|
2 |
|
$handle = fopen($this->source, "r"); |
106
|
2 |
|
if (!$handle) { |
107
|
|
|
throw new DatasetException("TextFileDataset File open error"); |
108
|
|
|
} |
109
|
|
|
|
110
|
|
|
try { |
111
|
2 |
|
return new FixedTextFileIterator($handle, $this->fieldDefinition); |
112
|
|
|
} catch (Exception $ex) { |
113
|
|
|
fclose($handle); |
114
|
|
|
throw $ex; |
115
|
|
|
} |
116
|
|
|
} |
117
|
|
|
} |
118
|
|
|
|
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.