Completed
Push — master ( b10a48...29cde4 )
by Ori
05:27
created

FieldsInferrer::castRows()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 4
rs 10
cc 1
eloc 2
nc 1
nop 0
1
<?php
2
namespace frictionlessdata\tableschema\Fields;
3
4
use frictionlessdata\tableschema\Exceptions\FieldValidationException;
5
6
class FieldsInferrer
7
{
8
    /**
9
     * @param null|array $rows optional initial rows to infer by, each row is an array of field name => field value
10
     */
11
    public function __construct($rows=null, $lenient=false)
12
    {
13
        $this->lenient = $lenient;
14
        if (!empty($rows)) {
15
            $this->addRows($rows);
16
        }
17
    }
18
19
    /**
20
     * add rows and updates the fieldsPopularity array - to make the inferred fields more accurate
21
     * @param $rows
22
     * @throws FieldValidationException
23
     */
24
    public function addRows($rows)
25
    {
26
        foreach ($rows as $row) {
27
            $this->inputRows[] = $row;
28
            $inferredRow = $this->inferRow($row);
29
            foreach ($this->getFieldNames() as $fieldName) {
30
                /** @var BaseField $inferredField */
31
                $inferredField = $inferredRow[$fieldName];
32
                $inferredFieldType = $inferredField->getInferIdentifier($this->lenient);
33
                if (!array_key_exists($fieldName, $this->fieldsPopularity)) {
34
                    $this->fieldsPopularity[$fieldName] = [];
35
                    $this->fieldsPopularityObjects[$fieldName] = [];
36
                }
37
                if (!array_key_exists($inferredFieldType, $this->fieldsPopularity[$fieldName])) {
38
                    $this->fieldsPopularity[$fieldName][$inferredFieldType] = 0;
39
                    $this->fieldsPopularityObjects[$fieldName][$inferredFieldType] = $inferredField;
40
                }
41
                $this->fieldsPopularity[$fieldName][$inferredFieldType]++;
42
                arsort($this->fieldsPopularity[$fieldName]);
43
            }
44
        }
45
    }
46
47
    /**
48
     * return the best inferred fields along with the best value casting according to the rows received so far
49
     * @return array field name => inferred field object
50
     * @throws FieldValidationException
51
     */
52
    public function infer()
53
    {
54
        $bestInferredFields = [];
55
        foreach ($this->fieldsPopularity as $fieldName => $fieldTypesPopularity) {
56
            $bestInferredFields[$fieldName] = $this->inferField($fieldName, $fieldTypesPopularity);
57
        }
58
        return $bestInferredFields;
59
    }
60
61
    /**
62
     * returns all the input rows got so far with the best cast value for each field
63
     * @return array of arrays of field name => best cast value
64
     */
65
    public function castRows()
66
    {
67
        return $this->castRows;
68
    }
69
70
    protected $inputRows = [];
71
    protected $castRows = [];
72
    protected $fieldsPopularity = [];
73
    protected $fieldsPopularityObjects = [];
74
    protected $lenient;
75
76
    /**
77
     * infer field objects for the given row
78
     * raises exception if fails to infer a field
79
     * @param $row array field name => value to infer by
80
     * @return array field name => inferred field object
81
     * @throws FieldValidationException
82
     */
83
    protected function inferRow($row)
84
    {
85
        $rowFields = [];
86
        foreach ($row as $k => $v) {
87
            $rowFields[$k] = FieldsFactory::infer($v, (object)["name" => $k], $this->lenient);
88
        }
89
        return $rowFields;
90
    }
91
92
    /**
93
     * @return array
94
     */
95
    protected function getFieldNames()
96
    {
97
        // we assume csv file where all rows have the same column positions
98
        // so we can use the first row to get the field names
99
        return array_keys($this->inputRows[0]);
100
    }
101
102
    /**
103
     * finds the best inferred fields for the given field name according to the popularity
104
     * also updates the castRows array with the latest cast values
105
     * @param $fieldName
106
     * @param $fieldTypesPopularity
107
     * @return BaseField|null
108
     */
109
    protected function inferField($fieldName, $fieldTypesPopularity)
110
    {
111
        // the $fieldTypesPopularity array is already sorted with most popular fields first
112
        $inferredField = null;
113
        foreach (array_keys($fieldTypesPopularity) as $inferredFieldType) {
114
            /** @var BaseField $inferredField */
115
            $inferredField = $this->fieldsPopularityObjects[$fieldName][$inferredFieldType];
116
            try {
117
                $rowNum = 0;
118
                foreach ($this->inputRows as $inputRow) {
119
                    if (!array_key_exists($rowNum, $this->castRows)) $this->castRows[$rowNum] = [];
120
                    $this->castRows[$rowNum][$fieldName] = $inferredField->castValue($inputRow[$fieldName]);
121
                    $rowNum++;
122
                }
123
                break;
124
            } catch (FieldValidationException $e) {
125
                // a row failed validation for this field type, will continue to the next one according to popularity
126
                continue;
127
            }
128
        }
129
        return $inferredField;
130
    }
131
}