1 | <?php |
||
2 | /* For licensing terms, see /license.txt */ |
||
3 | |||
4 | /** |
||
5 | * This script generates a directory based on the English language variables |
||
6 | * but only composed of the 10,000 (can be configured) most frequent words |
||
7 | * used in Chamilo. This implies first using the langstats.php script, which |
||
8 | * in turn implies configuring an additional variable in configuration.php |
||
9 | * (see langstats.php for more info). |
||
10 | * When running the language_builder, please make sure this parameter is |
||
11 | * set to 0 in the configuration.php file, otherwise it will take *ages*. |
||
12 | */ |
||
13 | require_once '../../inc/global.inc.php'; |
||
14 | require_once 'langstats.class.php'; |
||
15 | global $_configuration; |
||
16 | $_configuration['language_measure_frequency'] = 0; |
||
17 | $langstats = new langstats(); |
||
18 | $orig_lang = 'english'; |
||
19 | /** |
||
20 | * Init. |
||
21 | */ |
||
22 | $words_limit = 10000; //change this if you want more words |
||
23 | $terms_limit = 3000; //change this if you think you'll need more terms |
||
24 | $terms = $langstats->get_popular_terms($terms_limit); |
||
25 | $words_counter = 0; |
||
26 | $i = 0; |
||
27 | $terms_in_limit = []; |
||
28 | $lang_dir = api_get_path(SYS_LANG_PATH); |
||
29 | $arch_dir = api_get_path(SYS_ARCHIVE_PATH); |
||
30 | /** |
||
31 | * Code run. |
||
32 | */ |
||
33 | foreach ($terms as $row) { |
||
34 | if ($words_counter > 10000) { |
||
35 | break; |
||
36 | } |
||
37 | $words = str_word_count(get_lang($row['term_name'], null, $orig_lang)); |
||
38 | $words_counter += $words; |
||
39 | $terms_in_limit[$row['term_name']] = $i; |
||
40 | //echo "Term <b>".$row['term_name']."</b> is <b>'".get_lang($row['term_name'],null,$orig_lang)."'</b> which means $words words<br /><br />\n"; |
||
41 | //if ($words_counter%1000 >= 0) { |
||
42 | //echo "Reached $words_counter words at term $i (".$row['term_name']." used ".$row['term_count']." times)...<br />\n"; |
||
43 | //} |
||
44 | $i++; |
||
45 | } |
||
46 | //echo $words_counter.'<br />'; |
||
47 | |||
48 | echo "Reached ".count($terms_in_limit)." terms for the $words_counter most-used words<br /><br />\n"; |
||
49 | |||
50 | echo "Scanning English files, trying to find these terms...<br />\n"; |
||
51 | if (!is_dir($arch_dir.'/langstats')) { |
||
52 | mkdir($arch_dir.'/langstats'); |
||
53 | mkdir($arch_dir.'/langstats/'.$orig_lang); |
||
54 | } |
||
55 | $list_files = scandir($lang_dir.'/'.$orig_lang); |
||
56 | $j = 1; |
||
57 | $terms_found = []; |
||
58 | $words_found = 0; |
||
59 | $global_var = []; //keep the combination of all vars |
||
60 | $terms_in_limit = array_flip($terms_in_limit); |
||
61 | foreach ($list_files as $file) { |
||
62 | if (substr($file, 0, 1) == '.') { |
||
63 | continue; |
||
64 | } |
||
65 | //echo "'".substr($file,0,-8)."',<br />"; //print in a PHP array format |
||
66 | $vars = file($lang_dir.'/'.$orig_lang.'/'.$file); |
||
67 | $local_var = []; |
||
68 | $file_string = '<?php'."\n"; |
||
69 | foreach ($vars as $line) { |
||
70 | $var = []; |
||
71 | $res = preg_match('/^(\$\w*)/', $line, $var); |
||
72 | if ($res > 0) { |
||
73 | //echo $var[1]."<br />"; |
||
74 | |||
75 | if (in_array(substr($var[1], 1), $terms_in_limit)) { |
||
76 | //echo "Var ".$var[1]." was in the limit<br />"; |
||
77 | $local_var[$var[1]] = $line; |
||
78 | $file_string .= $line; |
||
79 | $terms_found[] = substr($var[1], 1); //e.g. store Tools |
||
80 | $words_found += str_word_count(get_lang($var[1], null, $orig_lang)); |
||
81 | } elseif (in_array(substr($var[1], 5), $terms_in_limit)) { |
||
82 | //echo "Var ".$var[1]." was in the limit<br />"; |
||
83 | $local_var[$var[1]] = $line; |
||
84 | $file_string .= $line; |
||
85 | $terms_found[] = substr($var[1], 5); //e.g. store langTools |
||
86 | $words_found += str_word_count(get_lang(substr($var[1], 5), null, $orig_lang)); |
||
87 | } //else do not care |
||
88 | } |
||
89 | } |
||
90 | echo "Writing ".$arch_dir.'/langstats/'.$orig_lang.'/'.$file."<br />\n"; |
||
91 | file_put_contents($arch_dir.'/langstats/'.$orig_lang.'/'.$file, $file_string); |
||
92 | $global_var += $local_var; |
||
93 | } |
||
94 | $terms_diff = count($global_var) - count($terms_in_limit); |
||
95 | echo count( |
||
96 | $global_var |
||
97 | )." terms found in English files (summing up to $words_found words). Some terms ($terms_diff in this case) might have appeared in two different files<br />"; |
||
98 | /** |
||
99 | * Display results. |
||
100 | */ |
||
101 | echo "Difference between filtered and found in English:<br />"; |
||
102 | //print_r($terms_found); |
||
103 | echo "<pre>".print_r(array_diff($terms_in_limit, $terms_found), 1)."</pre>"; |
||
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||
104 | echo "#"; |
||
105 |