| 1 |  |  | <?php | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | namespace Cocur\Arff; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | use Cocur\Arff\Column\DateColumn; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  | use Cocur\Arff\Column\NominalColumn; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  | use Cocur\Arff\Column\NumericColumn; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  | use Cocur\Arff\Column\StringColumn; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  | class Reader | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  | { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |      * @param string $filename | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |      * @return Document | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 16 |  |  |      */ | 
            
                                                                        
                            
            
                                    
            
            
                | 17 | 1 |  |     public function readFile($filename) | 
            
                                                                        
                            
            
                                    
            
            
                | 18 |  |  |     { | 
            
                                                                        
                            
            
                                    
            
            
                | 19 | 1 |  |         $lines    = explode("\n", file_get_contents($filename)); | 
            
                                                                        
                            
            
                                    
            
            
                | 20 | 1 |  |         $document = new Document($this->parseName($lines[0])); | 
            
                                                                        
                            
            
                                    
            
            
                | 21 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 22 | 1 |  |         $this->parseColumns($document, $lines); | 
            
                                                                        
                            
            
                                    
            
            
                | 23 | 1 |  |         $this->parseData($document, $lines); | 
            
                                                                        
                            
            
                                    
            
            
                | 24 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 25 | 1 |  |         return $document; | 
            
                                                                        
                            
            
                                    
            
            
                | 26 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |      * @param string $line | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |      * @return string|null | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |     protected function parseName($line) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |         if (preg_match('/^@RELATION ([a-zA-Z-_\.\/\d]+)$/i', $line, $matches)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |             return $matches[1]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |         return null; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |      * @param Document $document | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |      * @param string[] $lines | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |     protected function parseColumns(Document $document, array $lines) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |         foreach ($lines as $line) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |             if (preg_match('/ATTRIBUTE\s([a-zA-Z0-9_-]+)\s(.*)/i', $line, $matches)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |                 $type   = $matches[2]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |                 $column = null; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |                 if (strcasecmp($type, 'string') === 0) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |                     $column = new StringColumn($matches[1]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |                 } else if (strcasecmp($type, 'numeric') === 0) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |                     $column = new NumericColumn($matches[1]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |                 } else if (preg_match('/^\{(.*)\}$/', $matches[2], $classMatches)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |                     $column = new NominalColumn($matches[1], array_map(function ($value) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |                         return trim($value, "'"); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |                     }, preg_split( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |                             "/,(?=(?:[^\']*\'[^\']*\')*(?![^\']*\'))/", | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |                             $classMatches[1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |                         ) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |                     )); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |                 } else if (preg_match('/date\s\"/', $matches[2])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |                     preg_match('/date\s"([A-Za-z0-9-: ]+)"/', $line, $dateMatches); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |                     $column = new DateColumn($matches[1], $dateMatches[1]); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |                 if ($column) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |                     $document->addColumn($column); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |      * @param Document $document | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |      * @param string[] $lines | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |     protected function parseData(Document $document, array $lines) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |         $index = 0; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |         while (!preg_match('/@DATA/i', $lines[$index])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |             $index++; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |         $columns     = $document->getColumns(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |         $columnNames = array_keys($columns); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |         for ($i = $index+1; $i < count($lines); $i += 1) { | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |             $row    = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |             $splits = preg_split( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |                 "/,(?=(?:[^\']*\'[^\']*\')*(?![^\']*\'))/", | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |                 $lines[$i], | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |                 -1, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |                 PREG_SPLIT_DELIM_CAPTURE | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |             ); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |             foreach ($splits as $j => $value) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |                 if (isset($columnNames[$j])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |                     $row[$columns[$columnNames[$j]]->getName()] = trim($value, "'"); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |             if (count($row) != count($columnNames)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |                 continue; // malformed, probably and empty line | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |             $document->addData($row); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |     } | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 108 |  |  | } | 
            
                                                        
            
                                    
            
            
                | 109 |  |  |  | 
            
                        
If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration: