| 1 |  |  | <?php | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  | /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  |  * CSVelte: Slender, elegant CSV for PHP | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  |  * Inspired by Python's CSV module and Frictionless Data and the W3C's CSV | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |  * standardization efforts, CSVelte was written in an effort to take all the | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  |  * suck out of working with CSV. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |  * @copyright Copyright (c) 2018 Luke Visinoni | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |  * @author    Luke Visinoni <[email protected]> | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |  * @license   See LICENSE file (MIT license) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |  */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  | namespace CSVelte\Sniffer; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  | use CSVelte\Sniffer; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  | use CSVelte\Exception\SnifferException; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  | use RuntimeException; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  | use function Noz\collect; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  | class SniffQuoteAndDelimByAdjacency extends AbstractSniffer | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  | { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |      /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |      * Guess quote and delimiter character(s) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |      * If there are quoted values within the data, it is often easiest to guess the quote and delimiter characters at | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |      * the same time by analyzing their adjacency to one-another. That is to say, in cases where certain values are | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |      * wrapped in quotes, it can often be determined what not only that quote character is, but also the delimiter | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |      * because it is often on either side of the quote character. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |      * @param string $data The data to analyze | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |      * @return string[] | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 34 |  |  |      */ | 
            
                                                        
            
                                    
            
            
                | 35 | 1 |  |     public function sniff($data) | 
            
                                                        
            
                                    
            
            
                | 36 |  |  |     { | 
            
                                                        
            
                                    
            
            
                | 37 |  |  |         /** | 
            
                                                        
            
                                    
            
            
                | 38 |  |  |          * @var array An array of pattern matches | 
            
                                                        
            
                                    
            
            
                | 39 |  |  |          */ | 
            
                                                        
            
                                    
            
            
                | 40 | 1 |  |         $matches = null; | 
            
                                                        
            
                                    
            
            
                | 41 |  |  |         /** | 
            
                                                        
            
                                    
            
            
                | 42 |  |  |          * @var array An array of patterns (regex) | 
            
                                                        
            
                                    
            
            
                | 43 |  |  |          */ | 
            
                                                        
            
                                    
            
            
                | 44 | 1 |  |         $patterns = []; | 
            
                                                        
            
                                    
            
            
                | 45 | 1 |  |         $lineTerminator = $this->getOption('lineTerminator') ?: PHP_EOL; | 
            
                                                        
            
                                    
            
            
                | 46 |  |  |         // delim can be anything but line breaks, quotes, alphanumeric, underscore, backslash, or any type of spaces | 
            
                                                        
            
                                    
            
            
                | 47 | 1 |  |         $antidelims = implode(["\r", "\n", "\w", preg_quote('"', '/'), preg_quote("'", '/'), preg_quote(chr(Sniffer::SPACE), '/')]); | 
            
                                                        
            
                                    
            
            
                | 48 | 1 |  |         $delim      = "(?P<delim>[^{$antidelims}])"; | 
            
                                                        
            
                                    
            
            
                | 49 | 1 |  |         $quote      = "(?P<quoteChar>\"|'|`)"; // @todo I think MS Excel uses some strange encoding for fancy open/close quotes | 
            
                                                        
            
                                    
            
            
                | 50 |  |  |         // @todo something happeened when I changed to double quotes that causes this to match things like ,"0.8"\n"2", as one when it should be two | 
            
                                                        
            
                                    
            
            
                | 51 | 1 |  |         $patterns[] = "/{$delim} ?{$quote}.*?\\2\\1/ms"; // ,"something", - anything but whitespace or quotes followed by a possible space followed by a quote followed by anything followed by same quote, followed by same anything but whitespace | 
            
                                                        
            
                                    
            
            
                | 52 | 1 |  |         $patterns[] = "/(?:^|{$lineTerminator}){$quote}.*?\\1{$delim} ?/ms"; // 'something', - beginning of line or line break, followed by quote followed by anything followed by quote followed by anything but whitespace or quotes | 
            
                                                        
            
                                    
            
            
                | 53 | 1 |  |         $patterns[] = "/{$delim} ?{$quote}.*?\\2(?:$|{$lineTerminator})/ms"; // ,'something' - anything but whitespace or quote followed by possible space followed by quote followed by anything followed by quote, followed by end of line | 
            
                                                        
            
                                    
            
            
                | 54 | 1 |  |         $patterns[] = "/(?:^|{$lineTerminator}){$quote}.*?\\2(?:$|{$lineTerminator})/ms"; // 'something' - beginning of line followed by quote followed by anything followed by quote followed by same quote followed by end of line | 
            
                                                        
            
                                    
            
            
                | 55 | 1 |  |         foreach ($patterns as $pattern) { | 
            
                                                        
            
                                    
            
            
                | 56 |  |  |             // @todo I had to add the error suppression char here because it was | 
            
                                                        
            
                                    
            
            
                | 57 |  |  |             //     causing undefined offset errors with certain data sets. strange... | 
            
                                                        
            
                                    
            
            
                | 58 | 1 |  |             if (preg_match_all($pattern, $data, $matches) && $matches) { | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 59 | 1 |  |                 break; | 
            
                                                        
            
                                    
            
            
                | 60 |  |  |             } | 
            
                                                        
            
                                    
            
            
                | 61 | 1 |  |         } | 
            
                                                        
            
                                    
            
            
                | 62 | 1 |  |         if ($matches) { | 
            
                                                        
            
                                    
            
            
                | 63 |  |  |             try { | 
            
                                                        
            
                                    
            
            
                | 64 | 1 |  |                 return collect($matches) | 
            
                                                        
            
                                    
            
            
                | 65 | 1 |  |                     ->kintersect(array_flip(['quoteChar', 'delim'])) | 
            
                                                        
            
                                    
            
            
                | 66 | 1 |  |                     ->map(function($val) { | 
            
                                                        
            
                                    
            
            
                | 67 | 1 |  |                         return collect($val)->frequency()->sort()->reverse()->getKeyAt(1); | 
            
                                                        
            
                                    
            
            
                | 68 | 1 |  |                     }) | 
            
                                                        
            
                                    
            
            
                | 69 | 1 |  |                     ->ksort() | 
            
                                                        
            
                                    
            
            
                | 70 | 1 |  |                     ->reverse() | 
            
                                                        
            
                                    
            
            
                | 71 | 1 |  |                     ->values() | 
            
                                                        
            
                                    
            
            
                | 72 | 1 |  |                     ->toArray(); | 
            
                                                        
            
                                    
            
            
                | 73 |  |  |             } catch (RuntimeException $e) { | 
            
                                                        
            
                                    
            
            
                | 74 |  |  |                 // eat this exception and let the sniffer exception below be thrown instead... | 
            
                                                        
            
                                    
            
            
                | 75 |  |  |             } | 
            
                                                        
            
                                    
            
            
                | 76 |  |  |         } | 
            
                                                        
            
                                    
            
            
                | 77 |  |  |         throw new SnifferException('quoteChar and delimiter cannot be determined', SnifferException::ERR_QUOTE_AND_DELIM); | 
            
                                                        
            
                                    
            
            
                | 78 |  |  |     } | 
            
                                                        
            
                                    
            
            
                | 79 |  |  | } | 
            
                        
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.
Consider making the comparison explicit by using
empty(..)or! empty(...)instead.