@@ -513,12 +513,12 @@ discard block |
||
| 513 | 513 | return $this->guessDelimByDistribution($decision, $eol); |
| 514 | 514 | } catch (TasterException $e) { |
| 515 | 515 | // if somehow we STILL can't come to a consensus, then fall back to a |
| 516 | - // "preferred delimiters" list... |
|
| 517 | - foreach ($this->delims as $key => $chr) { |
|
| 518 | - if (collect($decision)->contains($chr)) { |
|
| 519 | - return $chr; |
|
| 520 | - } |
|
| 521 | - } |
|
| 516 | + // "preferred delimiters" list... |
|
| 517 | + foreach ($this->delims as $key => $chr) { |
|
| 518 | + if (collect($decision)->contains($chr)) { |
|
| 519 | + return $chr; |
|
| 520 | + } |
|
| 521 | + } |
|
| 522 | 522 | } |
| 523 | 523 | } |
| 524 | 524 | |
@@ -588,8 +588,8 @@ discard block |
||
| 588 | 588 | })->map(function ($dists) { |
| 589 | 589 | return $dists->average(); |
| 590 | 590 | })->sort() |
| 591 | - ->reverse() |
|
| 592 | - ->getKeyAtPosition(0)]; |
|
| 591 | + ->reverse() |
|
| 592 | + ->getKeyAtPosition(0)]; |
|
| 593 | 593 | } catch (Exception $e) { |
| 594 | 594 | throw new TasterException('delimiter cannot be determined by distribution', TasterException::ERR_DELIMITER); |
| 595 | 595 | } |
@@ -233,11 +233,11 @@ discard block |
||
| 233 | 233 | $types = new TabularCollection(); |
| 234 | 234 | |
| 235 | 235 | // callback to build the aforementioned collection |
| 236 | - $buildTypes = function ($line, $line_no) use ($types, $delim, $eol) { |
|
| 236 | + $buildTypes = function($line, $line_no) use ($types, $delim, $eol) { |
|
| 237 | 237 | |
| 238 | 238 | if ($line_no > 2) return; |
| 239 | 239 | $line = str_replace(self::PLACEHOLDER_NEWLINE, $eol, $line); |
| 240 | - $getType = function ($field, $colpos) use ($types, $line, $line_no, $delim) { |
|
| 240 | + $getType = function($field, $colpos) use ($types, $line, $line_no, $delim) { |
|
| 241 | 241 | $field = str_replace(self::PLACEHOLDER_DELIM, $delim, $field); |
| 242 | 242 | $fieldMeta = [ |
| 243 | 243 | "value" => $field, |
@@ -265,8 +265,8 @@ discard block |
||
| 265 | 265 | |
| 266 | 266 | $hasHeader = new NumericCollection(); |
| 267 | 267 | $possibleHeader = collect($types->shift()); |
| 268 | - $types->walk(function (AbstractCollection $row) use ($hasHeader, $possibleHeader) { |
|
| 269 | - $row->walk(function (AbstractCollection $fieldMeta, $col_no) use ($hasHeader, $possibleHeader) { |
|
| 268 | + $types->walk(function(AbstractCollection $row) use ($hasHeader, $possibleHeader) { |
|
| 269 | + $row->walk(function(AbstractCollection $fieldMeta, $col_no) use ($hasHeader, $possibleHeader) { |
|
| 270 | 270 | try { |
| 271 | 271 | $col = collect($possibleHeader->get($col_no, null, true)); |
| 272 | 272 | if ($fieldMeta->get('type') == self::TYPE_STRING) { |
@@ -330,9 +330,9 @@ discard block |
||
| 330 | 330 | { |
| 331 | 331 | $str = $this->removeQuotedStrings($this->sample); |
| 332 | 332 | $eols = [ |
| 333 | - self::EOL_WINDOWS => "\r\n", // 0x0D - 0x0A - Windows, DOS OS/2 |
|
| 334 | - self::EOL_UNIX => "\n", // 0x0A - - Unix, OSX |
|
| 335 | - self::EOL_TRS80 => "\r", // 0x0D - - Apple ][, TRS80 |
|
| 333 | + self::EOL_WINDOWS => "\r\n", // 0x0D - 0x0A - Windows, DOS OS/2 |
|
| 334 | + self::EOL_UNIX => "\n", // 0x0A - - Unix, OSX |
|
| 335 | + self::EOL_TRS80 => "\r", // 0x0D - - Apple ][, TRS80 |
|
| 336 | 336 | ]; |
| 337 | 337 | |
| 338 | 338 | $curCount = 0; |
@@ -375,12 +375,12 @@ discard block |
||
| 375 | 375 | $patterns = []; |
| 376 | 376 | // delim can be anything but line breaks, quotes, alphanumeric, underscore, backslash, or any type of spaces |
| 377 | 377 | $antidelims = implode(["\r", "\n", "\w", preg_quote('"', '/'), preg_quote("'", '/'), preg_quote(chr(self::SPACE), '/')]); |
| 378 | - $delim = '(?P<delim>[^' . $antidelims . '])'; |
|
| 378 | + $delim = '(?P<delim>[^'.$antidelims.'])'; |
|
| 379 | 379 | $quote = '(?P<quoteChar>"|\'|`)'; // @todo I think MS Excel uses some strange encoding for fancy open/close quotes |
| 380 | - $patterns[] = '/' . $delim . ' ?' . $quote . '.*?\2\1/ms'; // ,"something", - anything but whitespace or quotes followed by a possible space followed by a quote followed by anything followed by same quote, followed by same anything but whitespace |
|
| 381 | - $patterns[] = '/(?:^|\n)' . $quote . '.*?\1' . $delim . ' ?/ms'; // 'something', - beginning of line or line break, followed by quote followed by anything followed by quote followed by anything but whitespace or quotes |
|
| 382 | - $patterns[] = '/' . $delim . ' ?' . $quote . '.*?\2(?:^|\n)/ms'; // ,'something' - anything but whitespace or quote followed by possible space followed by quote followed by anything followed by quote, followed by end of line |
|
| 383 | - $patterns[] = '/(?:^|\n)' . $quote . '.*?\2(?:$|\n)/ms'; // 'something' - beginning of line followed by quote followed by anything followed by quote followed by same quote followed by end of line |
|
| 380 | + $patterns[] = '/'.$delim.' ?'.$quote.'.*?\2\1/ms'; // ,"something", - anything but whitespace or quotes followed by a possible space followed by a quote followed by anything followed by same quote, followed by same anything but whitespace |
|
| 381 | + $patterns[] = '/(?:^|\n)'.$quote.'.*?\1'.$delim.' ?/ms'; // 'something', - beginning of line or line break, followed by quote followed by anything followed by quote followed by anything but whitespace or quotes |
|
| 382 | + $patterns[] = '/'.$delim.' ?'.$quote.'.*?\2(?:^|\n)/ms'; // ,'something' - anything but whitespace or quote followed by possible space followed by quote followed by anything followed by quote, followed by end of line |
|
| 383 | + $patterns[] = '/(?:^|\n)'.$quote.'.*?\2(?:$|\n)/ms'; // 'something' - beginning of line followed by quote followed by anything followed by quote followed by same quote followed by end of line |
|
| 384 | 384 | foreach ($patterns as $pattern) { |
| 385 | 385 | // @todo I had to add the error suppression char here because it was |
| 386 | 386 | // causing undefined offset errors with certain data sets. strange... |
@@ -389,7 +389,7 @@ discard block |
||
| 389 | 389 | } |
| 390 | 390 | } |
| 391 | 391 | if ($matches) { |
| 392 | - $qcad = array_intersect_key($matches, array_flip(['quoteChar','delim'])); |
|
| 392 | + $qcad = array_intersect_key($matches, array_flip(['quoteChar', 'delim'])); |
|
| 393 | 393 | if (!empty($matches['quoteChar']) && !empty($matches['delim'])) { |
| 394 | 394 | try { |
| 395 | 395 | return [ |
@@ -431,15 +431,15 @@ discard block |
||
| 431 | 431 | // each frequency (in 10 lines, "tab" occurred 5 times on 7 of those |
| 432 | 432 | // lines, 6 times on 2 lines, and 7 times on 1 line) |
| 433 | 433 | collect(explode($eol, $this->removeQuotedStrings($this->sample))) |
| 434 | - ->walk(function ($line, $line_no) use ($frequencies) { |
|
| 434 | + ->walk(function($line, $line_no) use ($frequencies) { |
|
| 435 | 435 | collect(str_split($line)) |
| 436 | - ->filter(function ($c) { |
|
| 436 | + ->filter(function($c) { |
|
| 437 | 437 | return collect($this->delims)->contains($c); |
| 438 | 438 | }) |
| 439 | 439 | ->frequency() |
| 440 | 440 | ->sort() |
| 441 | 441 | ->reverse() |
| 442 | - ->walk(function ($count, $char) use ($frequencies, $line_no) { |
|
| 442 | + ->walk(function($count, $char) use ($frequencies, $line_no) { |
|
| 443 | 443 | try { |
| 444 | 444 | $char_counts = $frequencies->get($char, null, true); |
| 445 | 445 | } catch (OutOfBoundsException $e) { |
@@ -452,8 +452,8 @@ discard block |
||
| 452 | 452 | // the above only finds frequencies for characters if they exist in |
| 453 | 453 | // a given line. This will go back and fill in zeroes where a char |
| 454 | 454 | // didn't occur at all in a given line (needed to determine mode) |
| 455 | - ->walk(function ($line, $line_no) use ($frequencies) { |
|
| 456 | - $frequencies->walk(function ($counts, $char) use ($line_no, $frequencies) { |
|
| 455 | + ->walk(function($line, $line_no) use ($frequencies) { |
|
| 456 | + $frequencies->walk(function($counts, $char) use ($line_no, $frequencies) { |
|
| 457 | 457 | try { |
| 458 | 458 | $char_counts = $frequencies->get($char, null, true); |
| 459 | 459 | } catch (OutOfBoundsException $e) { |
@@ -472,8 +472,8 @@ discard block |
||
| 472 | 472 | foreach ($frequencies as $char => $freq) { |
| 473 | 473 | $modes->set($char, (new NumericCollection($freq))->mode()); |
| 474 | 474 | } |
| 475 | - $frequencies->walk(function ($f, $chr) use ($modes, $consistencies) { |
|
| 476 | - collect($f)->walk(function ($num) use ($modes, $chr, $consistencies) { |
|
| 475 | + $frequencies->walk(function($f, $chr) use ($modes, $consistencies) { |
|
| 476 | + collect($f)->walk(function($num) use ($modes, $chr, $consistencies) { |
|
| 477 | 477 | if ($expected = $modes->get($chr)) { |
| 478 | 478 | if ($num == $expected) { |
| 479 | 479 | // met the goal, yay! |
@@ -564,19 +564,19 @@ discard block |
||
| 564 | 564 | // @todo Write a method that does this... |
| 565 | 565 | $lines = collect(explode($eol, $this->removeQuotedStrings($this->sample))); |
| 566 | 566 | |
| 567 | - return $delims[collect($delims)->map(function ($delim) use (&$distrib, $lines) { |
|
| 567 | + return $delims[collect($delims)->map(function($delim) use (&$distrib, $lines) { |
|
| 568 | 568 | $linedist = collect(); |
| 569 | - $lines->walk(function ($line, $line_no) use (&$linedist, $delim) { |
|
| 569 | + $lines->walk(function($line, $line_no) use (&$linedist, $delim) { |
|
| 570 | 570 | if (!strlen($line)) { |
| 571 | 571 | return; |
| 572 | 572 | } |
| 573 | 573 | $sectstot = 10; |
| 574 | 574 | $sectlen = (int) (strlen($line) / $sectstot); |
| 575 | 575 | $sections = collect(str_split($line, $sectlen)) |
| 576 | - ->map(function ($section) use ($delim) { |
|
| 576 | + ->map(function($section) use ($delim) { |
|
| 577 | 577 | return substr_count($section, $delim); |
| 578 | 578 | }) |
| 579 | - ->filter(function ($count) { |
|
| 579 | + ->filter(function($count) { |
|
| 580 | 580 | return (bool) $count; |
| 581 | 581 | }); |
| 582 | 582 | if (is_numeric($count = $sections->count())) { |
@@ -585,7 +585,7 @@ discard block |
||
| 585 | 585 | }); |
| 586 | 586 | |
| 587 | 587 | return $linedist; |
| 588 | - })->map(function ($dists) { |
|
| 588 | + })->map(function($dists) { |
|
| 589 | 589 | return $dists->average(); |
| 590 | 590 | })->sort() |
| 591 | 591 | ->reverse() |
@@ -635,9 +635,9 @@ discard block |
||
| 635 | 635 | |
| 636 | 636 | // walk through each line from the data sample to determine which fields |
| 637 | 637 | // are quoted and which aren't |
| 638 | - $qsFunc = function ($line) use (&$quoting_styles, &$freq, $eol, $delim) { |
|
| 638 | + $qsFunc = function($line) use (&$quoting_styles, &$freq, $eol, $delim) { |
|
| 639 | 639 | $line = str_replace(self::PLACEHOLDER_NEWLINE, $eol, $line); |
| 640 | - $qnqaFunc = function ($field) use (&$quoting_styles, &$freq, $delim) { |
|
| 640 | + $qnqaFunc = function($field) use (&$quoting_styles, &$freq, $delim) { |
|
| 641 | 641 | $field = str_replace(self::PLACEHOLDER_DELIM, $delim, $field); |
| 642 | 642 | if ($this->isQuoted($field)) { |
| 643 | 643 | $field = $this->unQuote($field); |
@@ -658,7 +658,7 @@ discard block |
||
| 658 | 658 | $lines->walk($qsFunc->bindTo($this)); |
| 659 | 659 | |
| 660 | 660 | $types = $freq->get('quoted')->unique(); |
| 661 | - $quoting_styles = $quoting_styles->filter(function ($val) { |
|
| 661 | + $quoting_styles = $quoting_styles->filter(function($val) { |
|
| 662 | 662 | return (bool) $val; |
| 663 | 663 | }); |
| 664 | 664 | // if quoting_styles still has QUOTE_ALL or QUOTE_NONE, then return |
@@ -678,7 +678,7 @@ discard block |
||
| 678 | 678 | if ($types->contains(self::DATA_NONNUMERIC)) { |
| 679 | 679 | // allow for a SMALL amount of error here |
| 680 | 680 | $counts = collect([self::DATA_SPECIAL => 0, self::DATA_NONNUMERIC => 0]); |
| 681 | - $freq->get('quoted')->walk(function ($type) use (&$counts) { |
|
| 681 | + $freq->get('quoted')->walk(function($type) use (&$counts) { |
|
| 682 | 682 | $counts->increment($type); |
| 683 | 683 | }); |
| 684 | 684 | // @todo is all this even necessary? seems unnecessary to me... |
@@ -771,7 +771,7 @@ discard block |
||
| 771 | 771 | */ |
| 772 | 772 | protected function replaceQuotedSpecialChars($data, $delim) |
| 773 | 773 | { |
| 774 | - return preg_replace_callback('/([\'"])(.*)\1/imsU', function ($matches) use ($delim) { |
|
| 774 | + return preg_replace_callback('/([\'"])(.*)\1/imsU', function($matches) use ($delim) { |
|
| 775 | 775 | $ret = preg_replace("/([\r\n])/", self::PLACEHOLDER_NEWLINE, $matches[0]); |
| 776 | 776 | $ret = str_replace($delim, self::PLACEHOLDER_DELIM, $ret); |
| 777 | 777 | |
@@ -811,7 +811,7 @@ discard block |
||
| 811 | 811 | $day = '[0-3]?[0-9]'; |
| 812 | 812 | $sep = '[\/\.\-]?'; |
| 813 | 813 | $time = '([0-2]?[0-9](:[0-5][0-9]){1,2}(am|pm)?|[01]?[0-9](am|pm))'; |
| 814 | - $date = '(' . $month . $sep . $day . $sep . $year . '|' . $day . $sep . $month . $sep . $year . '|' . $year . $sep . $month . $sep . $day . ')'; |
|
| 814 | + $date = '('.$month.$sep.$day.$sep.$year.'|'.$day.$sep.$month.$sep.$year.'|'.$year.$sep.$month.$sep.$day.')'; |
|
| 815 | 815 | $dt = new DateTime($data); |
| 816 | 816 | $dt->setTime(0, 0, 0); |
| 817 | 817 | $now = new DateTime(); |
@@ -64,7 +64,7 @@ discard block |
||
| 64 | 64 | return static::factory($values); |
| 65 | 65 | } |
| 66 | 66 | if ($throw) { |
| 67 | - throw new OutOfBoundsException(__CLASS__ . " could not find column: " . $column); |
|
| 67 | + throw new OutOfBoundsException(__CLASS__." could not find column: ".$column); |
|
| 68 | 68 | } |
| 69 | 69 | return false; |
| 70 | 70 | } |
@@ -169,6 +169,6 @@ discard block |
||
| 169 | 169 | return call_user_func_array([$column, $method], $args); |
| 170 | 170 | } |
| 171 | 171 | } |
| 172 | - throw new BadMethodCallException("Method does not exist: " . __CLASS__ . "::{$method}()"); |
|
| 172 | + throw new BadMethodCallException("Method does not exist: ".__CLASS__."::{$method}()"); |
|
| 173 | 173 | } |
| 174 | 174 | } |
| 175 | 175 | \ No newline at end of file |