Passed
Branch feature/2.0 (d2af8f)
by Jonathan
13:07
created

SosaStatisticsService::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 2
c 1
b 0
f 0
nc 1
nop 2
dl 0
loc 4
rs 10
1
<?php
2
3
/**
4
 * webtrees-lib: MyArtJaub library for webtrees
5
 *
6
 * @package MyArtJaub\Webtrees
7
 * @subpackage Sosa
8
 * @author Jonathan Jaubart <[email protected]>
9
 * @copyright Copyright (c) 2009-2020, Jonathan Jaubart
10
 * @license http://www.gnu.org/licenses/gpl.html GNU General Public License, version 3
11
 */
12
13
declare(strict_types=1);
14
15
namespace MyArtJaub\Webtrees\Module\Sosa\Services;
16
17
use Fisharebest\Webtrees\Individual;
18
use Fisharebest\Webtrees\Registry;
19
use Fisharebest\Webtrees\Tree;
20
use Fisharebest\Webtrees\User;
21
use Illuminate\Database\Capsule\Manager as DB;
22
use Illuminate\Database\Query\Builder;
23
use Illuminate\Database\Query\JoinClause;
24
use Illuminate\Support\Collection;
25
26
/**
27
 * Service for retrieving Sosa statistics
28
 *
29
 */
30
class SosaStatisticsService
31
{
32
    
33
    /**
34
     * Reference user
35
     * @var User $user
36
     */
37
    private $user;
38
    
39
    /**
40
     * Reference tree
41
     * @var Tree $tree
42
     */
43
    private $tree;
44
    
45
    /**
46
     * Constructor for Sosa Statistics Service
47
     * 
48
     * @param Tree $tree
49
     * @param User $user
50
     */
51
    public function __construct(Tree $tree, User $user)
52
    {
53
        $this->tree = $tree;
54
        $this->user = $user;
55
    }
56
    
57
    /**
58
     * Return the root individual for the reference tree and user.
59
     * 
60
     * @return Individual|NULL
61
     */
62
    public function rootIndividual() : ?Individual
63
    {
64
        $root_indi_id = $this->tree->getUserPreference($this->user, 'MAJ_SOSA_ROOT_ID');
65
        return Registry::individualFactory()->make($root_indi_id, $this->tree);
66
    }
67
    
68
    /**
69
     * Returns how many individuals exist in the tree.
70
     *
71
     * @return int
72
     */
73
    public function totalIndividuals(): int
74
    {
75
        return DB::table('individuals')
76
            ->where('i_file', '=', $this->tree->id())
77
            ->count();
78
    }
79
    
80
    /**
81
     * Get the total count of Sosa ancestors for all generations
82
     * 
83
     * @return int
84
     */
85
    public function totalAncestors() : int
86
    {
87
        return DB::table('maj_sosa')
88
            ->where('majs_gedcom_id', '=', $this->tree->id())
89
            ->where('majs_user_id', '=', $this->user->id())
90
            ->count();
91
    }
92
    
93
    /**
94
     * Get the total count of distinct Sosa ancestors for all generations
95
     * 
96
     * @return int
97
     */
98
    public function totalDistinctAncestors() : int
99
    {
100
        return DB::table('maj_sosa')
101
        ->where('majs_gedcom_id', '=', $this->tree->id())
102
        ->where('majs_user_id', '=', $this->user->id())
103
        ->distinct()
104
        ->count('majs_i_id');
105
    }
106
    
107
    /**
108
     * Get the mean generation time, as the slope of the linear regression of birth years vs generations
109
     * 
110
     * @return float
111
     */
112
    public function meanGenerationTime() : float
113
    {
114
        $row = DB::table('maj_sosa')
115
            ->where('majs_gedcom_id', '=', $this->tree->id())
116
            ->where('majs_user_id', '=', $this->user->id())
117
            ->whereNotNull('majs_birth_year')
118
            ->selectRaw('COUNT(majs_sosa) AS n')
119
            ->selectRaw('SUM(majs_gen * majs_birth_year) AS sum_xy')
120
            ->selectRaw('SUM(majs_gen) AS sum_x')
121
            ->selectRaw('SUM(majs_birth_year) AS sum_y')
122
            ->selectRaw('SUM(majs_gen * majs_gen) AS sum_x2')
123
            ->get()->first();
124
        
125
        return $row->n == 0 ? 0 :
126
            -($row->n * $row->sum_xy - $row->sum_x * $row->sum_y) / ($row->n * $row->sum_x2 - pow($row->sum_x, 2));
127
    }
128
    
129
    /**
130
     * Get the statistic array detailed by generation.
131
     * Statistics for each generation are:
132
     * 	- The number of Sosa in generation
133
     * 	- The number of Sosa up to generation
134
     *  - The number of distinct Sosa up to generation
135
     *  - The year of the first birth in generation
136
     *  - The year of the first estimated birth in generation
137
     *  - The year of the last birth in generation
138
     *  - The year of the last estimated birth in generation
139
     *  - The average year of birth in generation
140
     *
141
     * @return array Statistics array
142
     */
143
    public function statisticsByGenerations() : array
144
    {
145
        $stats_by_gen = $this->statisticsByGenerationBasicData();
146
        $cumul_stats_by_gen = $this->statisticsByGenerationCumulativeData();
147
        
148
        $statistics_by_gen = [];
149
        foreach($stats_by_gen as $gen => $stats_gen) {
150
            $statistics_by_gen[(int) $stats_gen->gen] = array(
151
                'sosaCount'				=>	(int) $stats_gen->total_sosa,
152
                'sosaTotalCount'		=>	(int) $cumul_stats_by_gen[$gen]->total_cumul,
153
                'diffSosaTotalCount'	=>	(int) $cumul_stats_by_gen[$gen]->total_distinct_cumul,
154
                'firstBirth'			=>	$stats_gen->first_year,
155
                'firstEstimatedBirth'	=>	$stats_gen->first_est_year,
156
                'lastBirth'				=>	$stats_gen->last_year,
157
                'lastEstimatedBirth'	=>	$stats_gen->last_est_year
158
            );
159
        }
160
        
161
        return $statistics_by_gen;
162
    }
163
    
164
    /**
165
     * Returns the basic statistics data by generation.
166
     * 
167
     * @return Collection
168
     */
169
    private function statisticsByGenerationBasicData() : Collection
170
    {
171
        return DB::table('maj_sosa')
172
            ->where('majs_gedcom_id', '=', $this->tree->id())
173
            ->where('majs_user_id', '=', $this->user->id())
174
            ->groupBy('majs_gen')
175
            ->orderBy('majs_gen', 'asc')
176
            ->select('majs_gen AS gen')
177
            ->selectRaw('COUNT(majs_sosa) AS total_sosa')
178
            ->selectRaw('MIN(majs_birth_year) AS first_year')
179
            ->selectRaw('MIN(majs_birth_year_est) AS first_est_year')
180
            ->selectRaw('MAX(majs_birth_year) AS last_year')
181
            ->selectRaw('MAX(majs_birth_year_est) AS last_est_year')
182
            ->get()->keyBy('gen');
183
    }
184
    
185
    /**
186
     * Returns the cumulative statistics data by generation
187
     * 
188
     * @return Collection
189
     */
190
    private function statisticsByGenerationCumulativeData() : Collection
191
    {
192
        $list_gen = DB::table('maj_sosa')->select('majs_gen')->distinct()
193
            ->where('majs_gedcom_id', '=', $this->tree->id())
194
            ->where('majs_user_id', '=', $this->user->id());
195
        
196
        return DB::table('maj_sosa')
197
            ->joinSub($list_gen, 'list_gen', function(JoinClause $join) : void {
198
                $join->on('maj_sosa.majs_gen', '<=', 'list_gen.majs_gen')
199
                ->where('majs_gedcom_id', '=', $this->tree->id())
200
                ->where('majs_user_id', '=', $this->user->id());
201
            })
202
            ->groupBy('list_gen.majs_gen')
203
            ->select('list_gen.majs_gen AS gen')
204
            ->selectRaw('COUNT(majs_i_id) AS total_cumul')
205
            ->selectRaw('COUNT(DISTINCT majs_i_id) AS total_distinct_cumul')
206
            ->get()->keyBy('gen');
207
    }
208
    
209
    /**
210
     * Return a Collection of the mean generation depth and standard deviation for all Sosa ancestors at a given generation.
211
     * Sosa 1 is of generation 1.
212
     *
213
     * Mean generation depth and deviation are calculated based on the works of Marie-Héléne Cazes and Pierre Cazes,
214
     * published in Population (French Edition), Vol. 51, No. 1 (Jan. - Feb., 1996), pp. 117-140
215
     * http://kintip.net/index.php?option=com_jdownloads&task=download.send&id=9&catid=4&m=0
216
     *
217
     * Format:
218
     *  - key : sosa number of the ancestor
219
     *  - values: 
220
     *      - root_ancestor_id : ID of the ancestor
221
     *      - mean_gen_depth : Mean generation depth
222
     *      - stddev_gen_depth : Standard deviation of generation depth
223
     *
224
     * @param int $gen Sosa generation
225
     * @return Collection
226
     */
227
    public function generationDepthStatsAtGeneration(int $gen) : Collection
228
    {
229
        $table_prefix = DB::connection()->getTablePrefix();
230
        $missing_ancestors_by_gen = DB::table('maj_sosa AS sosa')
231
            ->selectRaw($table_prefix. 'sosa.majs_gen - ? AS majs_gen_norm', [$gen])
232
            ->selectRaw('FLOOR(((' . $table_prefix .'sosa.majs_sosa / POW(2, ' . $table_prefix .'sosa.majs_gen -1 )) - 1) * POWER(2, ? - 1)) + POWER(2, ? - 1) AS root_ancestor', [$gen, $gen])
233
            ->selectRaw('SUM(CASE WHEN ' . $table_prefix .'sosa_fat.majs_i_id IS NULL AND ' . $table_prefix .'sosa_mot.majs_i_id IS NULL THEN 1 ELSE 0 END) AS full_root_count')
234
            ->selectRaw('SUM(CASE WHEN ' . $table_prefix .'sosa_fat.majs_i_id IS NULL AND ' . $table_prefix .'sosa_mot.majs_i_id IS NULL THEN 0 ELSE 1 END) As semi_root_count')
235
            ->leftJoin('maj_sosa AS sosa_fat', function(JoinClause $join) use ($table_prefix) : void {    // Link to sosa's father
236
                $join->whereRaw($table_prefix . 'sosa_fat.majs_sosa = 2 * ' . $table_prefix . 'sosa.majs_sosa')
237
                ->where('sosa_fat.majs_gedcom_id', '=', $this->tree->id())
238
                ->where('sosa_fat.majs_user_id', '=', $this->user->id());
239
            })
240
            ->leftJoin('maj_sosa AS sosa_mot', function(JoinClause $join) use ($table_prefix) : void {    // Link to sosa's mother
241
                $join->whereRaw($table_prefix . 'sosa_mot.majs_sosa = 2 * ' . $table_prefix . 'sosa.majs_sosa + 1')
242
                ->where('sosa_mot.majs_gedcom_id', '=', $this->tree->id())
243
                ->where('sosa_mot.majs_user_id', '=', $this->user->id());
244
            })
245
            ->where('sosa.majs_gedcom_id', '=', $this->tree->id())
246
            ->where('sosa.majs_user_id', '=', $this->user->id())
247
            ->where('sosa.majs_gen', '>=', $gen)
248
            ->where(function(Builder $query) : void {
249
                $query->whereNull('sosa_fat.majs_i_id')
250
                    ->orWhereNull('sosa_mot.majs_i_id');
251
            })
252
            ->groupBy(['sosa.majs_gen', 'root_ancestor']);
253
254
        return DB::table('maj_sosa AS sosa_list')
255
            ->select(['stats_by_gen.root_ancestor AS root_ancestor_sosa', 'sosa_list.majs_i_id as root_ancestor_id'])
256
            ->selectRaw('1 + SUM( (majs_gen_norm) * ( 2 * full_root_count + semi_root_count) /  (2 * POWER(2, majs_gen_norm))) AS mean_gen_depth')
257
            ->selectRaw(' SQRT('.
258
                '   SUM(POWER(majs_gen_norm, 2) * ( 2 * full_root_count + semi_root_count) /  (2 * POWER(2, majs_gen_norm)))'.
259
                '   - POWER( SUM( (majs_gen_norm) * ( 2 * full_root_count + semi_root_count) /  (2 * POWER(2, majs_gen_norm))), 2)'.
260
                ' ) AS stddev_gen_depth')
261
            ->joinSub($missing_ancestors_by_gen, 'stats_by_gen', function(JoinClause $join) : void {
262
                $join->on('sosa_list.majs_sosa', '=', 'stats_by_gen.root_ancestor')
263
                    ->where('sosa_list.majs_gedcom_id', '=', $this->tree->id())
264
                    ->where('sosa_list.majs_user_id', '=', $this->user->id());
265
            })
266
            ->groupBy(['stats_by_gen.root_ancestor', 'sosa_list.majs_i_id'])
267
            ->orderBy('stats_by_gen.root_ancestor')
268
            ->get()->keyBy('root_ancestor_sosa');
269
    }
270
    
271
    /**
272
     * Return a collection of the most duplicated root Sosa ancestors.
273
     * The number of ancestors to return is limited by the parameter $limit.
274
     * If several individuals are tied when reaching the limit, none of them are returned,
275
     * which means that there can be less individuals returned than requested.
276
     *
277
     * Format:
278
     *  - value:
279
     *      - sosa_i_id : sosa individual
280
     *      - sosa_count: number of duplications of the ancestor (e.g. 3 if it appears 3 times)
281
     *
282
     * @param int $limit
283
     * @return Collection
284
     */
285
    public function topMultipleAncestorsWithNoTies(int $limit) : Collection
286
    {
287
        $table_prefix = DB::connection()->getTablePrefix();
288
        $multiple_ancestors = DB::table('maj_sosa AS sosa')
289
            ->select('sosa.majs_i_id AS sosa_i_id')
290
            ->selectRaw('COUNT('. $table_prefix .'sosa.majs_sosa) AS sosa_count')
291
            ->leftJoin('maj_sosa AS sosa_fat', function(JoinClause $join) use ($table_prefix) : void {    // Link to sosa's father
292
                $join->whereRaw($table_prefix . 'sosa_fat.majs_sosa = 2 * ' . $table_prefix . 'sosa.majs_sosa')
293
                    ->where('sosa_fat.majs_gedcom_id', '=', $this->tree->id())
294
                    ->where('sosa_fat.majs_user_id', '=', $this->user->id());
295
            })
296
            ->leftJoin('maj_sosa AS sosa_mot', function(JoinClause $join) use ($table_prefix) : void {    // Link to sosa's mother
297
                $join->whereRaw($table_prefix . 'sosa_mot.majs_sosa = 2 * ' . $table_prefix . 'sosa.majs_sosa + 1')
298
                ->where('sosa_mot.majs_gedcom_id', '=', $this->tree->id())
299
                ->where('sosa_mot.majs_user_id', '=', $this->user->id());
300
            })
301
            ->where('sosa.majs_gedcom_id', '=', $this->tree->id())
302
            ->where('sosa.majs_user_id', '=', $this->user->id())
303
            ->whereNull('sosa_fat.majs_sosa')   // We keep only root individuals, i.e. those with no father or mother
304
            ->whereNull('sosa_mot.majs_sosa')
305
            ->groupBy('sosa.majs_i_id')
306
            ->havingRaw('COUNT('. $table_prefix .'sosa.majs_sosa) > 1')    // Limit to the duplicate sosas.
307
            ->orderByRaw('COUNT('. $table_prefix .'sosa.majs_sosa) DESC, MIN('. $table_prefix .'sosa.majs_sosa) ASC')
308
            ->limit($limit + 1)     // We want to select one more than required, for ties
309
            ->get();
310
            
311
        if($multiple_ancestors->count() > $limit)
312
        {
313
            $last_count = $multiple_ancestors->last()->sosa_count;
314
            $multiple_ancestors = $multiple_ancestors->reject(function($element) use ($last_count) : bool {
315
                return $element->sosa_count ==  $last_count;
316
            });
317
        }
318
        return $multiple_ancestors;
319
    }
320
    
321
    /**
322
     * Return a computed array of statistics about the dispersion of ancestors across the ancestors
323
     * at a specified generation.
324
     * 
325
     * Format: 
326
     *  - key : rank of the ancestor in generation G for which exclusive ancestors have been found
327
     *          For instance 3 represent the maternal grand father
328
     *          0 is used for shared ancestors
329
     *  - values: number of ancestors exclusively in the ancestors of the ancestor in key
330
     *  
331
     *  For instance a result at generation 3 could be :
332
     *      array (   0     =>  12      -> 12 ancestors are shared by the grand-parents
333
     *                1     =>  32      -> 32 ancestors are exclusive to the paternal grand-father
334
     *                2     =>  25      -> 25 ancestors are exclusive to the paternal grand-mother
335
     *                3     =>  12      -> 12 ancestors are exclusive to the maternal grand-father
336
     *                4     =>  30      -> 30 ancestors are exclusive to the maternal grand-mother
337
     *            )
338
     * 
339
     * @param int $gen
340
     * @return Collection
341
     */
342
    public function ancestorsDispersionForGeneration(int $gen) : Collection
343
    {
344
        $ancestors_branches = DB::table('maj_sosa')
345
            ->select('majs_i_id AS i_id')
346
            ->selectRaw('FLOOR(majs_sosa / POW(2, (majs_gen - ?))) - POW(2, ? -1) + 1 AS branch', [$gen, $gen])
347
            ->where('majs_gedcom_id', '=', $this->tree->id())
348
            ->where('majs_user_id', '=', $this->user->id())
349
            ->where('majs_gen', '>=', $gen)
350
            ->groupBy('majs_i_id', 'branch');
351
        
352
            
353
        $consolidated_ancestors_branches = DB::table('maj_sosa')
354
            ->fromSub($ancestors_branches, 'indi_branch')
355
            ->select('i_id')
356
            ->selectRaw('CASE WHEN COUNT(branch) > 1 THEN 0 ELSE MIN(branch) END AS branches')
357
            ->groupBy('i_id');
358
            
359
        return DB::table('maj_sosa')
360
            ->fromSub($consolidated_ancestors_branches, 'indi_branch_consolidated')
361
            ->select('branches')
362
            ->selectRaw('COUNT(i_id) AS count_indi')
363
            ->groupBy('branches')
364
            ->get()->pluck('count_indi', 'branches');
365
    }
366
}
367