@@ -15,12 +15,10 @@ |
||
| 15 | 15 | |
| 16 | 16 | use CSVelte\Collection\AbstractCollection; |
| 17 | 17 | use CSVelte\Collection\CharCollection; |
| 18 | -use CSVelte\Collection\Collection; |
|
| 19 | 18 | use CSVelte\Collection\NumericCollection; |
| 20 | 19 | use CSVelte\Collection\TabularCollection; |
| 21 | 20 | use CSVelte\Contract\Streamable; |
| 22 | 21 | use CSVelte\Exception\TasterException; |
| 23 | - |
|
| 24 | 22 | use DateTime; |
| 25 | 23 | use Exception; |
| 26 | 24 | use OutOfBoundsException; |
@@ -232,12 +232,12 @@ discard block |
||
| 232 | 232 | $types = new TabularCollection(); |
| 233 | 233 | |
| 234 | 234 | // callback to build the aforementioned collection |
| 235 | - $buildTypes = function ($line, $line_no) use ($types, $delim, $eol) { |
|
| 235 | + $buildTypes = function($line, $line_no) use ($types, $delim, $eol) { |
|
| 236 | 236 | if ($line_no > 2) { |
| 237 | 237 | return; |
| 238 | 238 | } |
| 239 | 239 | $line = str_replace(self::PLACEHOLDER_NEWLINE, $eol, $line); |
| 240 | - $getType = function ($field, $colpos) use ($types, $line, $line_no, $delim) { |
|
| 240 | + $getType = function($field, $colpos) use ($types, $line, $line_no, $delim) { |
|
| 241 | 241 | $field = str_replace(self::PLACEHOLDER_DELIM, $delim, $field); |
| 242 | 242 | $fieldMeta = [ |
| 243 | 243 | 'value' => $field, |
@@ -264,8 +264,8 @@ discard block |
||
| 264 | 264 | |
| 265 | 265 | $hasHeader = new NumericCollection(); |
| 266 | 266 | $possibleHeader = collect($types->shift()); |
| 267 | - $types->walk(function (AbstractCollection $row) use ($hasHeader, $possibleHeader) { |
|
| 268 | - $row->walk(function (AbstractCollection $fieldMeta, $col_no) use ($hasHeader, $possibleHeader) { |
|
| 267 | + $types->walk(function(AbstractCollection $row) use ($hasHeader, $possibleHeader) { |
|
| 268 | + $row->walk(function(AbstractCollection $fieldMeta, $col_no) use ($hasHeader, $possibleHeader) { |
|
| 269 | 269 | try { |
| 270 | 270 | $col = collect($possibleHeader->get($col_no, null, true)); |
| 271 | 271 | if ($fieldMeta->get('type') == self::TYPE_STRING) { |
@@ -329,9 +329,9 @@ discard block |
||
| 329 | 329 | { |
| 330 | 330 | $str = $this->removeQuotedStrings($this->sample); |
| 331 | 331 | $eols = [ |
| 332 | - self::EOL_WINDOWS => "\r\n", // 0x0D - 0x0A - Windows, DOS OS/2 |
|
| 333 | - self::EOL_UNIX => "\n", // 0x0A - - Unix, OSX |
|
| 334 | - self::EOL_TRS80 => "\r", // 0x0D - - Apple ][, TRS80 |
|
| 332 | + self::EOL_WINDOWS => "\r\n", // 0x0D - 0x0A - Windows, DOS OS/2 |
|
| 333 | + self::EOL_UNIX => "\n", // 0x0A - - Unix, OSX |
|
| 334 | + self::EOL_TRS80 => "\r", // 0x0D - - Apple ][, TRS80 |
|
| 335 | 335 | ]; |
| 336 | 336 | |
| 337 | 337 | $curCount = 0; |
@@ -374,12 +374,12 @@ discard block |
||
| 374 | 374 | $patterns = []; |
| 375 | 375 | // delim can be anything but line breaks, quotes, alphanumeric, underscore, backslash, or any type of spaces |
| 376 | 376 | $antidelims = implode(["\r", "\n", "\w", preg_quote('"', '/'), preg_quote("'", '/'), preg_quote(chr(self::SPACE), '/')]); |
| 377 | - $delim = '(?P<delim>[^' . $antidelims . '])'; |
|
| 377 | + $delim = '(?P<delim>[^'.$antidelims.'])'; |
|
| 378 | 378 | $quote = '(?P<quoteChar>"|\'|`)'; // @todo I think MS Excel uses some strange encoding for fancy open/close quotes |
| 379 | - $patterns[] = '/' . $delim . ' ?' . $quote . '.*?\2\1/ms'; // ,"something", - anything but whitespace or quotes followed by a possible space followed by a quote followed by anything followed by same quote, followed by same anything but whitespace |
|
| 380 | - $patterns[] = '/(?:^|\n)' . $quote . '.*?\1' . $delim . ' ?/ms'; // 'something', - beginning of line or line break, followed by quote followed by anything followed by quote followed by anything but whitespace or quotes |
|
| 381 | - $patterns[] = '/' . $delim . ' ?' . $quote . '.*?\2(?:^|\n)/ms'; // ,'something' - anything but whitespace or quote followed by possible space followed by quote followed by anything followed by quote, followed by end of line |
|
| 382 | - $patterns[] = '/(?:^|\n)' . $quote . '.*?\2(?:$|\n)/ms'; // 'something' - beginning of line followed by quote followed by anything followed by quote followed by same quote followed by end of line |
|
| 379 | + $patterns[] = '/'.$delim.' ?'.$quote.'.*?\2\1/ms'; // ,"something", - anything but whitespace or quotes followed by a possible space followed by a quote followed by anything followed by same quote, followed by same anything but whitespace |
|
| 380 | + $patterns[] = '/(?:^|\n)'.$quote.'.*?\1'.$delim.' ?/ms'; // 'something', - beginning of line or line break, followed by quote followed by anything followed by quote followed by anything but whitespace or quotes |
|
| 381 | + $patterns[] = '/'.$delim.' ?'.$quote.'.*?\2(?:^|\n)/ms'; // ,'something' - anything but whitespace or quote followed by possible space followed by quote followed by anything followed by quote, followed by end of line |
|
| 382 | + $patterns[] = '/(?:^|\n)'.$quote.'.*?\2(?:$|\n)/ms'; // 'something' - beginning of line followed by quote followed by anything followed by quote followed by same quote followed by end of line |
|
| 383 | 383 | foreach ($patterns as $pattern) { |
| 384 | 384 | // @todo I had to add the error suppression char here because it was |
| 385 | 385 | // causing undefined offset errors with certain data sets. strange... |
@@ -430,15 +430,15 @@ discard block |
||
| 430 | 430 | // each frequency (in 10 lines, "tab" occurred 5 times on 7 of those |
| 431 | 431 | // lines, 6 times on 2 lines, and 7 times on 1 line) |
| 432 | 432 | collect(explode($eol, $this->removeQuotedStrings($this->sample))) |
| 433 | - ->walk(function ($line, $line_no) use ($frequencies) { |
|
| 433 | + ->walk(function($line, $line_no) use ($frequencies) { |
|
| 434 | 434 | collect(str_split($line)) |
| 435 | - ->filter(function ($c) { |
|
| 435 | + ->filter(function($c) { |
|
| 436 | 436 | return collect($this->delims)->contains($c); |
| 437 | 437 | }) |
| 438 | 438 | ->frequency() |
| 439 | 439 | ->sort() |
| 440 | 440 | ->reverse() |
| 441 | - ->walk(function ($count, $char) use ($frequencies, $line_no) { |
|
| 441 | + ->walk(function($count, $char) use ($frequencies, $line_no) { |
|
| 442 | 442 | try { |
| 443 | 443 | $char_counts = $frequencies->get($char, null, true); |
| 444 | 444 | } catch (OutOfBoundsException $e) { |
@@ -451,8 +451,8 @@ discard block |
||
| 451 | 451 | // the above only finds frequencies for characters if they exist in |
| 452 | 452 | // a given line. This will go back and fill in zeroes where a char |
| 453 | 453 | // didn't occur at all in a given line (needed to determine mode) |
| 454 | - ->walk(function ($line, $line_no) use ($frequencies) { |
|
| 455 | - $frequencies->walk(function ($counts, $char) use ($line_no, $frequencies) { |
|
| 454 | + ->walk(function($line, $line_no) use ($frequencies) { |
|
| 455 | + $frequencies->walk(function($counts, $char) use ($line_no, $frequencies) { |
|
| 456 | 456 | try { |
| 457 | 457 | $char_counts = $frequencies->get($char, null, true); |
| 458 | 458 | } catch (OutOfBoundsException $e) { |
@@ -471,8 +471,8 @@ discard block |
||
| 471 | 471 | foreach ($frequencies as $char => $freq) { |
| 472 | 472 | $modes->set($char, (new NumericCollection($freq))->mode()); |
| 473 | 473 | } |
| 474 | - $frequencies->walk(function ($f, $chr) use ($modes, $consistencies) { |
|
| 475 | - collect($f)->walk(function ($num) use ($modes, $chr, $consistencies) { |
|
| 474 | + $frequencies->walk(function($f, $chr) use ($modes, $consistencies) { |
|
| 475 | + collect($f)->walk(function($num) use ($modes, $chr, $consistencies) { |
|
| 476 | 476 | if ($expected = $modes->get($chr)) { |
| 477 | 477 | if ($num == $expected) { |
| 478 | 478 | // met the goal, yay! |
@@ -563,19 +563,19 @@ discard block |
||
| 563 | 563 | // @todo Write a method that does this... |
| 564 | 564 | $lines = collect(explode($eol, $this->removeQuotedStrings($this->sample))); |
| 565 | 565 | |
| 566 | - return $delims[collect($delims)->map(function ($delim) use (&$distrib, $lines) { |
|
| 566 | + return $delims[collect($delims)->map(function($delim) use (&$distrib, $lines) { |
|
| 567 | 567 | $linedist = collect(); |
| 568 | - $lines->walk(function ($line, $line_no) use (&$linedist, $delim) { |
|
| 568 | + $lines->walk(function($line, $line_no) use (&$linedist, $delim) { |
|
| 569 | 569 | if (!strlen($line)) { |
| 570 | 570 | return; |
| 571 | 571 | } |
| 572 | 572 | $sectstot = 10; |
| 573 | 573 | $sectlen = (int) (strlen($line) / $sectstot); |
| 574 | 574 | $sections = collect(str_split($line, $sectlen)) |
| 575 | - ->map(function ($section) use ($delim) { |
|
| 575 | + ->map(function($section) use ($delim) { |
|
| 576 | 576 | return substr_count($section, $delim); |
| 577 | 577 | }) |
| 578 | - ->filter(function ($count) { |
|
| 578 | + ->filter(function($count) { |
|
| 579 | 579 | return (bool) $count; |
| 580 | 580 | }); |
| 581 | 581 | if (is_numeric($count = $sections->count())) { |
@@ -584,7 +584,7 @@ discard block |
||
| 584 | 584 | }); |
| 585 | 585 | |
| 586 | 586 | return $linedist; |
| 587 | - })->map(function ($dists) { |
|
| 587 | + })->map(function($dists) { |
|
| 588 | 588 | return $dists->average(); |
| 589 | 589 | })->sort() |
| 590 | 590 | ->reverse() |
@@ -634,9 +634,9 @@ discard block |
||
| 634 | 634 | |
| 635 | 635 | // walk through each line from the data sample to determine which fields |
| 636 | 636 | // are quoted and which aren't |
| 637 | - $qsFunc = function ($line) use (&$quoting_styles, &$freq, $eol, $delim) { |
|
| 637 | + $qsFunc = function($line) use (&$quoting_styles, &$freq, $eol, $delim) { |
|
| 638 | 638 | $line = str_replace(self::PLACEHOLDER_NEWLINE, $eol, $line); |
| 639 | - $qnqaFunc = function ($field) use (&$quoting_styles, &$freq, $delim) { |
|
| 639 | + $qnqaFunc = function($field) use (&$quoting_styles, &$freq, $delim) { |
|
| 640 | 640 | $field = str_replace(self::PLACEHOLDER_DELIM, $delim, $field); |
| 641 | 641 | if ($this->isQuoted($field)) { |
| 642 | 642 | $field = $this->unQuote($field); |
@@ -657,7 +657,7 @@ discard block |
||
| 657 | 657 | $lines->walk($qsFunc->bindTo($this)); |
| 658 | 658 | |
| 659 | 659 | $types = $freq->get('quoted')->unique(); |
| 660 | - $quoting_styles = $quoting_styles->filter(function ($val) { |
|
| 660 | + $quoting_styles = $quoting_styles->filter(function($val) { |
|
| 661 | 661 | return (bool) $val; |
| 662 | 662 | }); |
| 663 | 663 | // if quoting_styles still has QUOTE_ALL or QUOTE_NONE, then return |
@@ -677,7 +677,7 @@ discard block |
||
| 677 | 677 | if ($types->contains(self::DATA_NONNUMERIC)) { |
| 678 | 678 | // allow for a SMALL amount of error here |
| 679 | 679 | $counts = collect([self::DATA_SPECIAL => 0, self::DATA_NONNUMERIC => 0]); |
| 680 | - $freq->get('quoted')->walk(function ($type) use (&$counts) { |
|
| 680 | + $freq->get('quoted')->walk(function($type) use (&$counts) { |
|
| 681 | 681 | $counts->increment($type); |
| 682 | 682 | }); |
| 683 | 683 | // @todo is all this even necessary? seems unnecessary to me... |
@@ -770,7 +770,7 @@ discard block |
||
| 770 | 770 | */ |
| 771 | 771 | protected function replaceQuotedSpecialChars($data, $delim) |
| 772 | 772 | { |
| 773 | - return preg_replace_callback('/([\'"])(.*)\1/imsU', function ($matches) use ($delim) { |
|
| 773 | + return preg_replace_callback('/([\'"])(.*)\1/imsU', function($matches) use ($delim) { |
|
| 774 | 774 | $ret = preg_replace("/([\r\n])/", self::PLACEHOLDER_NEWLINE, $matches[0]); |
| 775 | 775 | $ret = str_replace($delim, self::PLACEHOLDER_DELIM, $ret); |
| 776 | 776 | |
@@ -810,7 +810,7 @@ discard block |
||
| 810 | 810 | $day = '[0-3]?[0-9]'; |
| 811 | 811 | $sep = '[\/\.\-]?'; |
| 812 | 812 | $time = '([0-2]?[0-9](:[0-5][0-9]){1,2}(am|pm)?|[01]?[0-9](am|pm))'; |
| 813 | - $date = '(' . $month . $sep . $day . $sep . $year . '|' . $day . $sep . $month . $sep . $year . '|' . $year . $sep . $month . $sep . $day . ')'; |
|
| 813 | + $date = '('.$month.$sep.$day.$sep.$year.'|'.$day.$sep.$month.$sep.$year.'|'.$year.$sep.$month.$sep.$day.')'; |
|
| 814 | 814 | $dt = new DateTime($data); |
| 815 | 815 | $dt->setTime(0, 0, 0); |
| 816 | 816 | $now = new DateTime(); |
@@ -297,7 +297,7 @@ discard block |
||
| 297 | 297 | return $this->data[$index]; |
| 298 | 298 | } |
| 299 | 299 | if ($throw) { |
| 300 | - throw new OutOfBoundsException(__CLASS__ . ' could not find value at index ' . $index); |
|
| 300 | + throw new OutOfBoundsException(__CLASS__.' could not find value at index '.$index); |
|
| 301 | 301 | } |
| 302 | 302 | |
| 303 | 303 | return $default; |
@@ -338,7 +338,7 @@ discard block |
||
| 338 | 338 | unset($this->data[$index]); |
| 339 | 339 | } else { |
| 340 | 340 | if ($throw) { |
| 341 | - throw new OutOfBoundsException('No value found at given index: ' . $index); |
|
| 341 | + throw new OutOfBoundsException('No value found at given index: '.$index); |
|
| 342 | 342 | } |
| 343 | 343 | } |
| 344 | 344 | |
@@ -493,7 +493,7 @@ discard block |
||
| 493 | 493 | */ |
| 494 | 494 | public function contains($value, $index = null) |
| 495 | 495 | { |
| 496 | - return (bool) $this->first(function ($val, $key) use ($value, $index) { |
|
| 496 | + return (bool) $this->first(function($val, $key) use ($value, $index) { |
|
| 497 | 497 | if (is_callable($value)) { |
| 498 | 498 | $found = $value($val, $key); |
| 499 | 499 | } else { |
@@ -525,11 +525,11 @@ discard block |
||
| 525 | 525 | public function duplicates() |
| 526 | 526 | { |
| 527 | 527 | $dups = []; |
| 528 | - $this->walk(function ($val, $key) use (&$dups) { |
|
| 528 | + $this->walk(function($val, $key) use (&$dups) { |
|
| 529 | 529 | $dups[$val][] = $key; |
| 530 | 530 | }); |
| 531 | 531 | |
| 532 | - return static::factory($dups)->filter(function ($val) { |
|
| 532 | + return static::factory($dups)->filter(function($val) { |
|
| 533 | 533 | return count($val) > 1; |
| 534 | 534 | }); |
| 535 | 535 | } |
@@ -671,7 +671,7 @@ discard block |
||
| 671 | 671 | public function pairs() |
| 672 | 672 | { |
| 673 | 673 | return static::factory(array_map( |
| 674 | - function ($key, $val) { |
|
| 674 | + function($key, $val) { |
|
| 675 | 675 | return [$key, $val]; |
| 676 | 676 | }, |
| 677 | 677 | array_keys($this->data), |
@@ -880,7 +880,7 @@ discard block |
||
| 880 | 880 | } |
| 881 | 881 | } |
| 882 | 882 | // if row contains an array it isn't tabular |
| 883 | - if (array_reduce($row, function ($carry, $item) { |
|
| 883 | + if (array_reduce($row, function($carry, $item) { |
|
| 884 | 884 | return is_array($item) && $carry; |
| 885 | 885 | }, true)) { |
| 886 | 886 | return false; |
@@ -989,7 +989,7 @@ discard block |
||
| 989 | 989 | protected function assertCorrectInputDataType($data) |
| 990 | 990 | { |
| 991 | 991 | if (!$this->isConsistentDataStructure($data)) { |
| 992 | - throw new InvalidArgumentException(__CLASS__ . ' expected traversable data, got: ' . gettype($data)); |
|
| 992 | + throw new InvalidArgumentException(__CLASS__.' expected traversable data, got: '.gettype($data)); |
|
| 993 | 993 | } |
| 994 | 994 | } |
| 995 | 995 | |
@@ -40,7 +40,7 @@ discard block |
||
| 40 | 40 | return call_user_func_array([$column, $method], $args); |
| 41 | 41 | } |
| 42 | 42 | } |
| 43 | - throw new BadMethodCallException('Method does not exist: ' . __CLASS__ . "::{$method}()"); |
|
| 43 | + throw new BadMethodCallException('Method does not exist: '.__CLASS__."::{$method}()"); |
|
| 44 | 44 | } |
| 45 | 45 | |
| 46 | 46 | /** |
@@ -76,7 +76,7 @@ discard block |
||
| 76 | 76 | return static::factory($values); |
| 77 | 77 | } |
| 78 | 78 | if ($throw) { |
| 79 | - throw new OutOfBoundsException(__CLASS__ . ' could not find column: ' . $column); |
|
| 79 | + throw new OutOfBoundsException(__CLASS__.' could not find column: '.$column); |
|
| 80 | 80 | } |
| 81 | 81 | |
| 82 | 82 | return false; |