Tokenizer - Code Metrics - Inspection of "[WIP] Rework the PHP compactor" - humbug/box - Measure and Improve Code Quality continuously with Scrutinizer

Passed
Pull Request — master (#315)

by Théo
created 2018-11-04 23:08 UTC
Tokenizer C

↳ Parent: Project
Complexity

Total Complexity
Size/Duplication

Total Lines	556
Duplicated Lines	0 %
Importance

Changes
Metric	Value
wmc	57
eloc	199
dl	0
loc	556
rs	5.04
c	0
b	0
f	0
16 Methods

Rating	Name	Size	Complexity
B	getPlainValue()	44	10
A	retrieveAnnotationValues()	50	5
A	match()	11	2
A	createLexer()	12	3
A	parse()	29	4
A	getArray()	39	4
A	retrieveAnnotation()	30	5
A	ignore()	3	1
A	getConstant()	21	4
A	getAssignedValue()	12	1
A	getValue()	10	2
A	parseDocblock()	7	1
A	getArrayEntry()	49	5
A	retrieveIdentifier()	28	4
A	retrieveAnnotations()	43	4
A	matchAny()	14	2
How to fix Complexity

<?php

declare(strict_types=1);

/*
 * This file is part of the box project.
 *
 * (c) Kevin Herrera <[email protected]>
 *     Théo Fidry <[email protected]>
 *
 * This source file is subject to the MIT license that is bundled
 * with this source code in the file LICENSE.
 */

namespace KevinGH\Box\Annotation;

use function array_keys;
use function array_merge;
use function array_values;
use Assert\Assertion;
use Doctrine\Common\Annotations\DocLexer;
use Hoa\Compiler\Llk\Llk;
use Hoa\Compiler\Llk\TreeNode;
use Hoa\Compiler\Visitor\Dump;
.
|-- OtherDir
|   |-- Bar.php
|   `-- Foo.php
`-- SomeDir
    `-- Foo.php
use Hoa\File\Read;
use function in_array;
use KevinGH\Box\Annotation\Exception\Exception;
use KevinGH\Box\Annotation\Exception\SyntaxException;
use function ltrim;
use function strlen;
use function strpos;
use function trim;

/**
 * Parses annotation tokens from a docblock.
 *
 * This class will use a lexer to parse out a series of tokens from a given docblock. Each token in the series
 * represents a portion of an annotation that was parsed. These tokens can be used to generate alternative
 * representations, such as native values.
 *
 * @private
 */
final class Tokenizer
{
    /**
     * The namespace aliases.
     */
    private $aliases = [];

    /**
     * @var int[] The list of valid class identifiers
     */
    private static $classIdentifiers = [
        DocLexer::T_IDENTIFIER,
        DocLexer::T_TRUE,
        DocLexer::T_FALSE,
        DocLexer::T_NULL,
    ];

    /**
     * The list of ignored annotation identifiers.
     */
    private $ignored = [];

    /**
     * Parses the docblock and returns its annotation tokens.
     *
     * @param string $docblock
     * @param array<string, string>  $annotationAliases
     *
     * Annotation aliases are for when the annotations are imported and aliased via a use statement, for example:
     *
     * ```php
     * use Doctrine\ORM\Mapping as ORM;
     * ```
     *
     * Would require the following alias configuration:
     *
     * ```php
     * $annotationAliases = [
     *     'ORM' => 'Doctrine\ORM\Mapping',
     * ];
     * ```
     *
     * @return array the list of tokens
     */
    public function parse(string $docblock, array $annotationAliases = []): TreeNode
    {
        Assertion::allString($annotationAliases);
        Assertion::allString(array_keys($annotationAliases));

        if (0 !== strpos(ltrim($docblock), '/**')) {
            return [];

        }

        $dumper   = new Dump();

        $compiler = Llk::load(new Read(__DIR__ . '/../../res/annotation-grammar.pp'));
        return $compiler->parse($docblock);


        $nodes = $this->parseDocblock($docblock);
function fx() {
    try {
        doSomething();
        return true;
    }
    catch (\Exception $e) {
        return false;
    }

    return false;
}

        if (false === ($position = strpos($docblock, '@'))) {
            return [];
        }

        if (0 < $position) {
            --$position;
        }

        $docblock = substr($docblock, $position);
        $docblock = trim($docblock, '*/ ');

        $lexer = $this->createLexer($docblock);

        return $this->retrieveAnnotations($lexer, $annotationAliases);
    }

    private function parseDocblock(string $docblock): TreeNode
    {
        $dumper   = new Dump();

        $compiler = Llk::load(new Read(__DIR__ . '/../../res/annotation-grammar.pp'));
        $ast   = $compiler->parse($docblock);

        return $ast;
    }

    private function createLexer(string $docblock): DocLexer
    {
        $lexer = new DocLexer();

        $lexer->setInput($docblock);

        // Start at the first @ symbol
        while (null === $lexer->token || DocLexer::T_AT !== $lexer->token['type']) {
            $lexer->moveNext();
        }

        return $lexer;
    }

    /**
     * Sets the annotation identifiers to ignore.
     *
     * @param array $ignore the list of ignored identifiers
     */
    public function ignore(array $ignore): void
    {
        $this->ignored = $ignore;
    }

    /**
     * Returns the tokens for the next annotation.
     *
     * @return array the tokens
     */
    private function retrieveAnnotation(DocLexer $lexer): ?array
    {
        // Get the complete name
        $identifier = $this->retrieveIdentifier($lexer);

        // Skip if necessary
        if (in_array($identifier, $this->ignored, true)) {
            return null;
        }

        // use alias if applicable
        if (false !== ($pos = strpos($identifier, '\\'))) {
            $alias = substr($identifier, 0, $pos);

            if (isset($this->aliases[$alias])) {
                $identifier = $this->aliases[$alias]
                            .'\\'
                            .substr($identifier, $pos + 1);
            }
        } elseif (isset($this->aliases[$identifier])) {
            $identifier = $this->aliases[$identifier];
        }

        // return the @, name, and any values found
        return array_merge(
            [
                [DocLexer::T_AT],
                [DocLexer::T_IDENTIFIER, $identifier],
            ],
            $this->retrieveAnnotationValues($lexer)
        );
    }

    private function retrieveAnnotations(DocLexer $lexer, array $annotationAliases): array
    {
        $tokens = [];

        while (true) {
            /**
             * @var string $value
             * @var int $type
             * @var int $position
             */
            [$value, $type, $position] = array_values($lexer->token);


            // something about being preceded by a non-catchable pattern
            // TODO: what is that comment about?
            $position = $lexer->token['position'] + strlen($lexer->token['value']);


//            if ((null !== $this->lexer->token)
//                && ($this->lexer->lookahead['position'] === $position)) {
//                $this->lexer->moveNext();
//
//                continue;
//            }

//            // make sure we get a valid annotation name
//            if ((null === ($glimpse = $this->lexer->glimpse()))
//                || ((DocLexer::T_NAMESPACE_SEPARATOR !== $glimpse['type'])
//                    && !in_array($glimpse['type'], self::$classIdentifiers, true))) {
//                $this->lexer->moveNext();
//
//                continue;
//            }

            // find them all and merge them to the list
            if (null !== ($annotationToken = $this->retrieveAnnotation($lexer))) {
                $tokens = array_merge($tokens, $annotationToken);
            }

            if (null === $lexer->lookahead) {
                break;
            }
        }

        return $tokens;
    }

    /**
     * Returns the tokens for the next array of values.
     *
     * @return array the tokens
     */
    private function getArray(DocLexer $lexer): array
    {
        $this->match($lexer, DocLexer::T_OPEN_CURLY_BRACES);

        $tokens = [
            [DocLexer::T_OPEN_CURLY_BRACES],
        ];

        // check if empty array, bail early if it is
        if ($lexer->isNextToken(DocLexer::T_CLOSE_CURLY_BRACES)) {
            $this->match($lexer, DocLexer::T_CLOSE_CURLY_BRACES);

            $tokens[] = [DocLexer::T_CLOSE_CURLY_BRACES];

            return $tokens;
        }

        // collect the first value
        $tokens = array_merge($tokens, $this->getArrayEntry($lexer));

        // collect the remaining values
        while ($lexer->isNextToken(DocLexer::T_COMMA)) {
            $this->match($lexer, DocLexer::T_COMMA);

            $tokens[] = [DocLexer::T_COMMA];

            if ($lexer->isNextToken(DocLexer::T_CLOSE_CURLY_BRACES)) {
                break;
            }

            $tokens = array_merge($tokens, $this->getArrayEntry($lexer));
        }

        // end the collection
        $this->match($lexer, DocLexer::T_CLOSE_CURLY_BRACES);

        $tokens[] = [DocLexer::T_CLOSE_CURLY_BRACES];

        return $tokens;
    }

    /**
     * Returns the tokens for the next array entry.
     *
     * @return array the tokens
     */
    private function getArrayEntry(DocLexer $lexer): array
    {
        $glimpse = $lexer->glimpse();
        $tokens = [];

        // append the correct assignment token: ":" or "="
        if (DocLexer::T_COLON === $glimpse['type']) {
            $token = [DocLexer::T_COLON];
        } elseif (DocLexer::T_EQUALS === $glimpse['type']) {
            $token = [DocLexer::T_EQUALS];
        }

        // is it an assignment?
        if (isset($token)) {
            // if the key is a constant, hand off
            if ($lexer->isNextToken(DocLexer::T_IDENTIFIER)) {
                $tokens = $this->getConstant($lexer);

            // match only integer and string keys
            } else {
                $this->matchAny(
                    $lexer,
                    [
                        DocLexer::T_INTEGER,
                        DocLexer::T_STRING,
                    ]
                );

                $tokens = [
                    [
                        $lexer->token['type'],
                        $lexer->token['value'],
                    ],
                ];
            }

            $tokens[] = $token;

            $this->matchAny(
                $lexer,
                [
                    DocLexer::T_COLON,
                    DocLexer::T_EQUALS,
                ]
            );
        }

        // merge in the value
        return array_merge($tokens, $this->getPlainValue($lexer));
    }

    /**
     * Returns the tokens for the next assigned (key/value) value.
     *
     * @return array the tokens
     */
    private function getAssignedValue(DocLexer $lexer): array
    {
        $this->match($lexer, DocLexer::T_IDENTIFIER);

        $tokens = [
            [DocLexer::T_IDENTIFIER, $lexer->token['value']],
            [DocLexer::T_EQUALS],
        ];

        $this->match($lexer, DocLexer::T_EQUALS);

        return array_merge($tokens, $this->getPlainValue($lexer));
    }

    /**
     * Returns the current constant value for the current annotation.
     *
     * @return array the tokens
     */
    private function getConstant(DocLexer $lexer): array
    {
        $identifier = $this->retrieveIdentifier($lexer);
        $tokens = [];

        // check for a special constant type
        switch (strtolower($identifier)) {
            case 'true':
                $tokens[] = [DocLexer::T_TRUE, $identifier];
                break;
            case 'false':
                $tokens[] = [DocLexer::T_FALSE, $identifier];
                break;
            case 'null':
                $tokens[] = [DocLexer::T_NULL, $identifier];
                break;
            default:
                $tokens[] = [DocLexer::T_IDENTIFIER, $identifier];
        }

        return $tokens;
    }

    /**
     * Returns the next identifier.
     *
     * @throws Exception
     * @throws SyntaxException if a syntax error is found
     *
     * @return string the identifier
     */
    private function retrieveIdentifier(DocLexer $lexer): string
    {
        // grab the first bit of the identifier
        if ($lexer->isNextTokenAny(self::$classIdentifiers)) {
            $lexer->moveNext();

            $name = $lexer->token['value'];
        } else {
            throw SyntaxException::expectedToken(
                'namespace separator or identifier',
                null,
                $lexer
            );
        }

        // grab the remaining bits
        $position = $lexer->token['position']
                  + strlen($lexer->token['value']);

        while (($lexer->lookahead['position'] === $position)
            && $lexer->isNextToken(DocLexer::T_NAMESPACE_SEPARATOR)) {
            $this->match($lexer, DocLexer::T_NAMESPACE_SEPARATOR);
            $this->matchAny(self::$classIdentifiers);
                   matchAny(self::$classIdentifiers);

            $name .= '\\'.$lexer->token['value'];
        }

        return $name;
    }

    /**
     * Returns the tokens for the next "plain" value.
     *
     * @throws Exception
     * @throws SyntaxException if a syntax error is found
     *
     * @return array the tokens
     */
    private function getPlainValue(DocLexer $lexer): array
    {
        // check if array, then hand off
        if ($lexer->isNextToken(DocLexer::T_OPEN_CURLY_BRACES)) {
            return $this->getArray($lexer);
        }

        // check if nested annotation, then hand off
        if ($lexer->isNextToken(DocLexer::T_AT)) {
            return $this->retrieveAnnotation($lexer);

        }

        // check if constant, then hand off
        if ($lexer->isNextToken(DocLexer::T_IDENTIFIER)) {
            return $this->getConstant($lexer);
        }

        $tokens = [];

        // determine type, or throw syntax error if unrecognized
        switch ($lexer->lookahead['type']) {
            case DocLexer::T_FALSE:
            case DocLexer::T_FLOAT:
            case DocLexer::T_INTEGER:
            case DocLexer::T_NULL:
            case DocLexer::T_STRING:
            case DocLexer::T_TRUE:
                $this->match($lexer, $lexer->lookahead['type']);

                $tokens[] = [
                    $lexer->token['type'],
                    $lexer->token['value'],
                ];

                break;
            default:
                throw SyntaxException::expectedToken(
                    'PlainValue',
                    null,
                    $lexer
                );
        }

        return $tokens;
    }

    /**
     * Returns the tokens for the next value.
     *
     * @return array the tokens
     */
    private function getValue(DocLexer $lexer): array
    {
        $glimpse = $lexer->glimpse();

        // check if it's an assigned value: @example(assigned="value")
        if (DocLexer::T_EQUALS === $glimpse['type']) {
            return $this->getAssignedValue($lexer);
        }

        return $this->getPlainValue($lexer);
    }

    /**
     * Returns the tokens for all of the values for the current annotation.
     *
     * @throws Exception
     * @throws SyntaxException if a syntax error is found
     *
     * @return array the tokens
     */
    private function retrieveAnnotationValues(DocLexer $lexer): array
    {
        $tokens = [];

        // check if a value list is given
        if ($lexer->isNextToken(DocLexer::T_OPEN_PARENTHESIS)) {
            $this->match($lexer, DocLexer::T_OPEN_PARENTHESIS);

            $tokens[] = [DocLexer::T_OPEN_PARENTHESIS];

            // skip if we are given an empty list: @example()
            if ($lexer->isNextToken(DocLexer::T_CLOSE_PARENTHESIS)) {
                $this->match($lexer, DocLexer::T_CLOSE_PARENTHESIS);

                $tokens[] = [DocLexer::T_CLOSE_PARENTHESIS];

                return $tokens;
            }

            // skip if no list is given
        } else {
            return $tokens;
        }

        // collect the first value
        $tokens = array_merge($tokens, $this->getValue($lexer));

        // check for comma separated values and collect those too
        while ($lexer->isNextToken(DocLexer::T_COMMA)) {
            $this->match($lexer, DocLexer::T_COMMA);

            $tokens[] = [DocLexer::T_COMMA];

            $token = $lexer->lookahead;
            $value = $this->getValue($lexer);

            // no multiple trailing commas
            if (empty($value)) {
                throw SyntaxException::expectedToken('Value', $token);
            }

            $tokens = array_merge($tokens, $value);
        }

        // end the list
        $this->match($lexer, DocLexer::T_CLOSE_PARENTHESIS);

        $tokens[] = [DocLexer::T_CLOSE_PARENTHESIS];

        return $tokens;
    }

    /**
     * Matches the next token and advances.
     *
     * @param int $token the next token to match
     *
     * @throws Exception
     * @throws SyntaxException if a syntax error is found
     *
     * @return null|array TRUE if the next token matches, FALSE if not
     */
    private function match(DocLexer $lexer, $token): bool
    {
        if (!$lexer->isNextToken($token)) {
            throw SyntaxException::expectedToken(
                $lexer->getLiteral($token),
                null,
                $lexer
            );
        }

        return $lexer->moveNext();

    }

    /**
     * Matches any one of the tokens and advances.
     *
     * @param array $tokens the list of tokens
     *
     * @throws Exception
     * @throws SyntaxException if a syntax error is found
     *
     * @return bool TRUE if the next token matches, FALSE if not
     */
    private function matchAny(DocLexer $lexer, array $tokens): bool
    {
        if (!$lexer->isNextTokenAny($tokens)) {
            throw SyntaxException::expectedToken(
                implode(
                    ' or ',
                    array_map([$lexer, 'getLiteral'], $tokens)
                ),
                null,
                $lexer
            );
        }

        return $lexer->moveNext();
    }
}

humbug / box

Pull Request — master (#315)

Tokenizer C

Complexity

Size/Duplication

Importance

16 Methods

How to fix Complexity

Complex Class

Duplication Side-by-Side

Filter issues like