|
1
|
|
|
<?php |
|
2
|
|
|
/** |
|
3
|
|
|
* CSVelte: Slender, elegant CSV for PHP |
|
4
|
|
|
* |
|
5
|
|
|
* Inspired by Python's CSV module and Frictionless Data and the W3C's CSV |
|
6
|
|
|
* standardization efforts, CSVelte was written in an effort to take all the |
|
7
|
|
|
* suck out of working with CSV. |
|
8
|
|
|
* |
|
9
|
|
|
* @copyright Copyright (c) 2018 Luke Visinoni |
|
10
|
|
|
* @author Luke Visinoni <[email protected]> |
|
11
|
|
|
* @license See LICENSE file (MIT license) |
|
12
|
|
|
*/ |
|
13
|
|
|
namespace CSVelte\Sniffer; |
|
14
|
|
|
|
|
15
|
|
|
use function Noz\collect; |
|
16
|
|
|
use function Stringy\create as s; |
|
17
|
|
|
use Stringy\Stringy; |
|
18
|
|
|
|
|
19
|
|
|
class SniffHeaderByDataType extends AbstractSniffer |
|
20
|
|
|
{ |
|
21
|
|
|
/** |
|
22
|
|
|
* Guess whether there is a header row |
|
23
|
|
|
* |
|
24
|
|
|
* Guesses whether the data has a header row by comparing the data types of the first row with the types of |
|
25
|
|
|
* corresponding columns in other rows. |
|
26
|
|
|
* |
|
27
|
|
|
* @note Unlike the original version of this method, this one will be used to ALSO determine HOW MANY header rows |
|
28
|
|
|
* there likely are. So, compare the header to rows at the END of the sample. |
|
29
|
|
|
* |
|
30
|
|
|
* @param string $data The data to analyze |
|
31
|
|
|
* |
|
32
|
|
|
* @return bool |
|
33
|
|
|
*/ |
|
34
|
1 |
|
public function sniff($data) |
|
35
|
|
|
{ |
|
36
|
1 |
|
$delimiter = $this->getOption('delimiter'); |
|
37
|
1 |
|
$data = s($data); |
|
38
|
1 |
|
$lines = collect($data->lines()) |
|
39
|
|
|
->map(function($line) use ($delimiter) { |
|
40
|
1 |
|
return s($this->replaceQuotedSpecialChars($line, $delimiter)); |
|
41
|
1 |
|
}); |
|
42
|
1 |
|
$header = collect($lines->shift()->split($delimiter)) |
|
43
|
|
|
->map(function($val){ return $this->unQuote($val); }) |
|
44
|
|
|
->map(function($val) { |
|
45
|
|
|
return [ |
|
46
|
1 |
|
'type' => $this->getType($val), |
|
47
|
1 |
|
'length' => s($val)->length() |
|
48
|
1 |
|
]; |
|
49
|
1 |
|
}); |
|
50
|
1 |
|
$lines->pop(); // get rid of the last line because it may be incomplete |
|
51
|
1 |
|
$comparison = $lines->slice(0, 10) |
|
52
|
|
|
->map(function($line, $line_no) use ($header, $delimiter) { |
|
|
|
|
|
|
53
|
|
|
/** @var Stringy $line */ |
|
54
|
1 |
|
$values = collect($line->split($delimiter)); |
|
55
|
|
|
return $values->map(function($str, $pos) use ($header) { |
|
56
|
1 |
|
$comp = $header->get($pos); |
|
57
|
1 |
|
$type = $this->getType($str); |
|
58
|
|
|
return [ |
|
59
|
|
|
// true if same, false otherwise |
|
|
|
|
|
|
60
|
1 |
|
'type' => $comp['type'] == $type, |
|
61
|
|
|
// return the difference in length |
|
62
|
1 |
|
'length' => $comp['length'] - s($str)->length() |
|
63
|
1 |
|
]; |
|
64
|
1 |
|
}); |
|
65
|
1 |
|
}); |
|
66
|
|
|
|
|
67
|
1 |
|
$hasHeader = collect(); |
|
68
|
1 |
|
$comparison->each(function($line) use ($hasHeader) { |
|
69
|
1 |
|
foreach ($line as $val) { |
|
70
|
1 |
|
if ($val['type']) { |
|
71
|
1 |
|
$hasHeader->add(1); |
|
72
|
1 |
|
} else { |
|
73
|
1 |
|
if ($val['length'] === 0) { |
|
74
|
1 |
|
$hasHeader->add(1); |
|
75
|
1 |
|
} else { |
|
76
|
1 |
|
$hasHeader->add(-1); |
|
77
|
|
|
} |
|
78
|
|
|
} |
|
79
|
1 |
|
} |
|
80
|
1 |
|
}); |
|
81
|
|
|
|
|
82
|
1 |
|
return $hasHeader->sum() > 0; |
|
|
|
|
|
|
83
|
|
|
} |
|
84
|
|
|
|
|
85
|
1 |
|
protected function getType($value) |
|
86
|
|
|
{ |
|
87
|
1 |
|
$str = s($value); |
|
88
|
1 |
|
switch (true) { |
|
89
|
1 |
|
case is_numeric($value): |
|
90
|
1 |
|
return 'numeric'; |
|
91
|
1 |
|
case is_string($value): |
|
92
|
1 |
|
if (strtotime($value) !== false) { |
|
93
|
1 |
|
return 'datetime'; |
|
94
|
|
|
} |
|
95
|
1 |
|
if (preg_match('/^[+-]?[¥£€$]\d+(\.\d+)$/', $value)) { |
|
96
|
|
|
return 'currency'; |
|
97
|
|
|
} |
|
98
|
1 |
|
if ($str->isAlpha()) { |
|
99
|
1 |
|
return 'alpha'; |
|
100
|
|
|
} |
|
101
|
1 |
|
if ($str->isAlphanumeric()) { |
|
102
|
|
|
return 'alnum'; |
|
103
|
|
|
} |
|
104
|
1 |
|
if ($str->isBlank()) { |
|
105
|
|
|
return 'blank'; |
|
106
|
|
|
} |
|
107
|
1 |
|
if ($str->isJson()) { |
|
108
|
|
|
return 'json'; |
|
109
|
|
|
} |
|
110
|
1 |
|
} |
|
111
|
1 |
|
return 'unknown'; |
|
112
|
|
|
} |
|
113
|
|
|
} |
This check looks from parameters that have been defined for a function or method, but which are not used in the method body.