CardinalityAggregator - Code Metrics - level23/druid-client - Measure and Improve Code Quality continuously with Scrutinizer

CardinalityAggregator A
last analyzed 2025-10-22 07:59 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	79
Duplicated Lines	0 %

Test Coverage

Coverage

100%

Importance

Changes	1
Bugs	0	Features	0

Metric	Value
wmc	2
eloc	15
dl	0
loc	79
ccs	12
cts	12
cp	1
rs	10
c	1
b	0
f	0

2 Methods

Rating	Name	Duplication	Size	Complexity
A	__construct()	0	10	1
A	toArray()	0	8	1

<?php
declare(strict_types=1);

namespace Level23\Druid\Aggregations;

use Level23\Druid\Collections\DimensionCollection;

class CardinalityAggregator implements AggregatorInterface
{
    protected string $outputName;

    protected bool $byRow;

    protected bool $round;

    protected DimensionCollection $dimensions;

    /**
     * CardinalityAggregator constructor.
     *
     * Computes the cardinality of a set of Apache Druid (incubating) dimensions, using HyperLogLog to estimate the
     * cardinality. Please note that this aggregator will be much slower than indexing a column with the hyperUnique
     * aggregator. This aggregator also runs over a dimension column, which means the string dimension cannot be
     * removed from the dataset to improve rollup. In general, we strongly recommend using the hyperUnique aggregator
     * instead of the cardinality aggregator if you do not care about the individual values of a dimension.
     *
     * The HyperLogLog algorithm generates decimal estimates with some error. "round" can be set to true to round off
     * estimated values to whole numbers. Note that even with rounding, the cardinality is still an estimate. The
     * "round" field only affects query-time behavior, and is ignored at ingestion-time.
     *
     * When setting byRow to false (the default) it computes the cardinality of the set composed of the union of all
     * dimension values for all the given dimensions. For a single dimension, this is equivalent to:
     * ```
     * SELECT COUNT(DISTINCT(dimension)) FROM <datasource>
     * ```
     *
     * For multiple dimensions, this is equivalent to something akin to
     * ```
     * SELECT COUNT(DISTINCT(value)) FROM (
     * SELECT dim_1 as value FROM <datasource>
     * UNION
     * SELECT dim_2 as value FROM <datasource>
     * UNION
     * SELECT dim_3 as value FROM <datasource>
     * )
     * ```
     *
     * When setting byRow to true it computes the cardinality by row, i.e. the cardinality of distinct dimension
     * combinations. This is equivalent to something akin to
     *
     * ```
     * SELECT COUNT(*) FROM ( SELECT DIM1, DIM2, DIM3 FROM <datasource> GROUP BY DIM1, DIM2, DIM3 )
     * ```
     *
     * @see https://druid.apache.org/docs/latest/querying/hll-old.html
     *
     * @param string                                         $outputName
     * @param \Level23\Druid\Collections\DimensionCollection $dimensions
     * @param bool                                           $byRow
     * @param bool                                           $round
     */
    public function __construct(
        string $outputName,
        DimensionCollection $dimensions,
        bool $byRow = false,
        bool $round = false
    ) {
        $this->outputName = $outputName;
        $this->byRow      = $byRow;
        $this->round      = $round;
        $this->dimensions = $dimensions;
    }

    /**
     * Return the aggregator as it can be used in a druid query.
     *
     * @return array<string|bool|array<int,array<mixed>>>
     */
    public function toArray(): array
    {
        return [
            'type'   => 'cardinality',
            'name'   => $this->outputName,
            'fields' => $this->dimensions->toArray(),
            'byRow'  => $this->byRow,
            'round'  => $this->round,
        ];
    }
}

1		<?php
2		declare(strict_types=1);
3
4		namespace Level23\Druid\Aggregations;
5
6		use Level23\Druid\Collections\DimensionCollection;
7
8		class CardinalityAggregator implements AggregatorInterface
9		{
10		protected string $outputName;
11
12		protected bool $byRow;
13
14		protected bool $round;
15
16		protected DimensionCollection $dimensions;
17
18		/**
19		* CardinalityAggregator constructor.
20		*
21		* Computes the cardinality of a set of Apache Druid (incubating) dimensions, using HyperLogLog to estimate the
22		* cardinality. Please note that this aggregator will be much slower than indexing a column with the hyperUnique
23		* aggregator. This aggregator also runs over a dimension column, which means the string dimension cannot be
24		* removed from the dataset to improve rollup. In general, we strongly recommend using the hyperUnique aggregator
25		* instead of the cardinality aggregator if you do not care about the individual values of a dimension.
26		*
27		* The HyperLogLog algorithm generates decimal estimates with some error. "round" can be set to true to round off
28		* estimated values to whole numbers. Note that even with rounding, the cardinality is still an estimate. The
29		* "round" field only affects query-time behavior, and is ignored at ingestion-time.
30		*
31		* When setting byRow to false (the default) it computes the cardinality of the set composed of the union of all
32		* dimension values for all the given dimensions. For a single dimension, this is equivalent to:
33		* ```
34		* SELECT COUNT(DISTINCT(dimension)) FROM <datasource>
35		* ```
36		*
37		* For multiple dimensions, this is equivalent to something akin to
38		* ```
39		* SELECT COUNT(DISTINCT(value)) FROM (
40		* SELECT dim_1 as value FROM <datasource>
41		* UNION
42		* SELECT dim_2 as value FROM <datasource>
43		* UNION
44		* SELECT dim_3 as value FROM <datasource>
45		* )
46		* ```
47		*
48		* When setting byRow to true it computes the cardinality by row, i.e. the cardinality of distinct dimension
49		* combinations. This is equivalent to something akin to
50		*
51		* ```
52		* SELECT COUNT(*) FROM ( SELECT DIM1, DIM2, DIM3 FROM <datasource> GROUP BY DIM1, DIM2, DIM3 )
53		* ```
54		*
55		* @see https://druid.apache.org/docs/latest/querying/hll-old.html
56		*
57		* @param string $outputName
58		* @param \Level23\Druid\Collections\DimensionCollection $dimensions
59		* @param bool $byRow
60		* @param bool $round
61		*/
62	4	public function __construct(
63		string $outputName,
64		DimensionCollection $dimensions,
65		bool $byRow = false,
66		bool $round = false
67		) {
68	4	$this->outputName = $outputName;
69	4	$this->byRow = $byRow;
70	4	$this->round = $round;
71	4	$this->dimensions = $dimensions;
72		}
73
74		/**
75		* Return the aggregator as it can be used in a druid query.
76		*
77		* @return array<string\|bool\|array<int,array<mixed>>>
78		*/
79	4	public function toArray(): array
80		{
81	4	return [
82	4	'type' => 'cardinality',
83	4	'name' => $this->outputName,
84	4	'fields' => $this->dimensions->toArray(),
85	4	'byRow' => $this->byRow,
86	4	'round' => $this->round,
87	4	];
88		}
89		}

level23 / druid-client

CardinalityAggregator A last analyzed 2025-10-22 07:59 UTC

Complexity

Size/Duplication

Test Coverage

Importance

2 Methods

Duplication Side-by-Side

Filter issues like

CardinalityAggregator A
last analyzed 2025-10-22 07:59 UTC