Passed
Push — 1.11.x ( bce6cd...c146d9 )
by Angel Fernando Quiroz
12:25
created

main/cron/lang/langstats_file_builder.php (1 issue)

1
<?php
2
/* For licensing terms, see /license.txt */
3
4
/**
5
 * This script generates a directory based on the English language variables
6
 * but only composed of the 10,000 (can be configured) most frequent words
7
 * used in Chamilo. This implies first using the langstats.php script, which
8
 * in turn implies configuring an additional variable in configuration.php
9
 * (see langstats.php for more info).
10
 * When running the language_builder, please make sure this parameter is
11
 * set to 0 in the configuration.php file, otherwise it will take *ages*.
12
 */
13
require_once '../../inc/global.inc.php';
14
require_once 'langstats.class.php';
15
global $_configuration;
16
$_configuration['language_measure_frequency'] = 0;
17
$langstats = new langstats();
18
$orig_lang = 'english';
19
/**
20
 * Init.
21
 */
22
$words_limit = 10000; //change this if you want more words
23
$terms_limit = 3000; //change this if you think you'll need more terms
24
$terms = $langstats->get_popular_terms($terms_limit);
25
$words_counter = 0;
26
$i = 0;
27
$terms_in_limit = [];
28
$lang_dir = api_get_path(SYS_LANG_PATH);
29
$arch_dir = api_get_path(SYS_ARCHIVE_PATH);
30
/**
31
 * Code run.
32
 */
33
foreach ($terms as $row) {
34
    if ($words_counter > 10000) {
35
        break;
36
    }
37
    $words = str_word_count(get_lang($row['term_name'], null, $orig_lang));
38
    $words_counter += $words;
39
    $terms_in_limit[$row['term_name']] = $i;
40
    //echo "Term <b>".$row['term_name']."</b> is <b>'".get_lang($row['term_name'],null,$orig_lang)."'</b> which means $words words<br /><br />\n";
41
    //if ($words_counter%1000 >= 0) {
42
    //echo "Reached $words_counter words at term $i (".$row['term_name']." used ".$row['term_count']." times)...<br />\n";
43
    //}
44
    $i++;
45
}
46
//echo $words_counter.'<br />';
47
48
echo "Reached ".count($terms_in_limit)." terms for the $words_counter most-used words<br /><br />\n";
49
50
echo "Scanning English files, trying to find these terms...<br />\n";
51
if (!is_dir($arch_dir.'/langstats')) {
52
    mkdir($arch_dir.'/langstats');
53
    mkdir($arch_dir.'/langstats/'.$orig_lang);
54
}
55
$list_files = scandir($lang_dir.'/'.$orig_lang);
56
$j = 1;
57
$terms_found = [];
58
$words_found = 0;
59
$global_var = []; //keep the combination of all vars
60
$terms_in_limit = array_flip($terms_in_limit);
61
foreach ($list_files as $file) {
62
    if (substr($file, 0, 1) == '.') {
63
        continue;
64
    }
65
    //echo "'".substr($file,0,-8)."',<br />"; //print in a PHP array format
66
    $vars = file($lang_dir.'/'.$orig_lang.'/'.$file);
67
    $local_var = [];
68
    $file_string = '<?php'."\n";
69
    foreach ($vars as $line) {
70
        $var = [];
71
        $res = preg_match('/^(\$\w*)/', $line, $var);
72
        if ($res > 0) {
73
            //echo $var[1]."<br />";
74
75
            if (in_array(substr($var[1], 1), $terms_in_limit)) {
76
                //echo "Var ".$var[1]." was in the limit<br />";
77
                $local_var[$var[1]] = $line;
78
                $file_string .= $line;
79
                $terms_found[] = substr($var[1], 1); //e.g. store Tools
80
                $words_found += str_word_count(get_lang($var[1], null, $orig_lang));
81
            } elseif (in_array(substr($var[1], 5), $terms_in_limit)) {
82
                //echo "Var ".$var[1]." was in the limit<br />";
83
                $local_var[$var[1]] = $line;
84
                $file_string .= $line;
85
                $terms_found[] = substr($var[1], 5); //e.g. store langTools
86
                $words_found += str_word_count(get_lang(substr($var[1], 5), null, $orig_lang));
87
            } //else do not care
88
        }
89
    }
90
    echo "Writing ".$arch_dir.'/langstats/'.$orig_lang.'/'.$file."<br />\n";
91
    file_put_contents($arch_dir.'/langstats/'.$orig_lang.'/'.$file, $file_string);
92
    $global_var += $local_var;
93
}
94
$terms_diff = count($global_var) - count($terms_in_limit);
95
echo count(
96
        $global_var
97
    )." terms found in English files (summing up to $words_found words). Some terms ($terms_diff in this case) might have appeared in two different files<br />";
98
/**
99
 * Display results.
100
 */
101
echo "Difference between filtered and found in English:<br />";
102
//print_r($terms_found);
103
echo "<pre>".print_r(array_diff($terms_in_limit, $terms_found), 1)."</pre>";
0 ignored issues
show
Are you sure print_r(array_diff($term...imit, $terms_found), 1) of type string|true can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

103
echo "<pre>"./** @scrutinizer ignore-type */ print_r(array_diff($terms_in_limit, $terms_found), 1)."</pre>";
Loading history...
104
echo "#";
105