| @@ 59-151 (lines=93) @@ | ||
| 56 | predicted_proba = model.predict_proba(test_data[0]) |
|
| 57 | return predicted_y, predicted_proba |
|
| 58 | ||
| 59 | if __name__ == '__main__': |
|
| 60 | args_ok = False |
|
| 61 | parser = argparse.ArgumentParser(description='Run Decision Tree on single resident CASAS datasets.') |
|
| 62 | parser.add_argument('-d', '--dataset', help='Directory to original datasets') |
|
| 63 | parser.add_argument('-o', '--output', help='Output folder') |
|
| 64 | parser.add_argument('--h5py', help='HDF5 dataset folder') |
|
| 65 | args = parser.parse_args() |
|
| 66 | # Default parameters |
|
| 67 | log_filename = os.path.basename(__file__).split('.')[0] + \ |
|
| 68 | '-%s.log' % datetime.now().strftime('%y%m%d_%H:%M:%S') |
|
| 69 | # Setup output directory |
|
| 70 | output_dir = args.output |
|
| 71 | if output_dir is not None: |
|
| 72 | output_dir = os.path.abspath(os.path.expanduser(output_dir)) |
|
| 73 | if os.path.exists(output_dir): |
|
| 74 | # Found output_dir, check if it is a directory |
|
| 75 | if not os.path.isdir(output_dir): |
|
| 76 | exit('Output directory %s is found, but not a directory. Abort.' % output_dir) |
|
| 77 | else: |
|
| 78 | # Create directory |
|
| 79 | os.makedirs(output_dir) |
|
| 80 | else: |
|
| 81 | output_dir = '.' |
|
| 82 | # If dataset is specified, update h5py |
|
| 83 | casas_data_dir = args.dataset |
|
| 84 | if casas_data_dir is not None: |
|
| 85 | casas_data_dir = os.path.abspath(os.path.expanduser(casas_data_dir)) |
|
| 86 | if not os.path.isdir(casas_data_dir): |
|
| 87 | exit('CASAS dataset at %s does not exist. Abort.' % casas_data_dir) |
|
| 88 | # Find h5py dataset first |
|
| 89 | h5py_dir = args.h5py |
|
| 90 | if h5py_dir is not None: |
|
| 91 | h5py_dir = os.path.abspath(os.path.expanduser(h5py_dir)) |
|
| 92 | else: |
|
| 93 | # Default location |
|
| 94 | h5py_dir = os.path.join(output_dir, 'h5py') |
|
| 95 | if os.path.exists(h5py_dir): |
|
| 96 | if not os.path.isdir(h5py_dir): |
|
| 97 | exit('h5py dataset location %s is not a directory. Abort.' % h5py_dir) |
|
| 98 | log_filename = os.path.join(output_dir, log_filename) |
|
| 99 | # Setup Logging as early as possible |
|
| 100 | logging.basicConfig(level=logging.DEBUG, |
|
| 101 | format='[%(asctime)s] %(name)s:%(levelname)s:%(message)s', |
|
| 102 | handlers=[logging.FileHandler(log_filename), |
|
| 103 | logging.StreamHandler()]) |
|
| 104 | if not CASASFuel.files_exist(h5py_dir): |
|
| 105 | # Finish check and creating all directory needed - now load datasets |
|
| 106 | if casas_data_dir is not None: |
|
| 107 | casas_data = CASASData(path=casas_data_dir) |
|
| 108 | casas_data.summary() |
|
| 109 | # SVM needs to use statistical feature with per-sensor and normalization |
|
| 110 | casas_data.populate_feature(method='stat', normalized=False, per_sensor=False) |
|
| 111 | casas_data.export_hdf5(h5py_dir) |
|
| 112 | casas_fuel = CASASFuel(dir_name=h5py_dir) |
|
| 113 | # Prepare learning result |
|
| 114 | result_pkl_file = os.path.join(output_dir, 'result.pkl') |
|
| 115 | result = None |
|
| 116 | if os.path.isfile(result_pkl_file): |
|
| 117 | f = open(result_pkl_file, 'rb') |
|
| 118 | result = pickle.load(f) |
|
| 119 | f.close() |
|
| 120 | if result.data != h5py_dir: |
|
| 121 | logger.error('Result pickle file found for different dataset %s' % result.data) |
|
| 122 | exit('Cannot save learning result at %s' % result_pkl_file) |
|
| 123 | else: |
|
| 124 | result = LearningResult(name='DecisionTree', data=h5py_dir, mode='by_week') |
|
| 125 | num_classes = casas_fuel.get_output_dims() |
|
| 126 | # Open Fuel and get all splits |
|
| 127 | split_list = casas_fuel.get_set_list() |
|
| 128 | train_names = ('week 24', 'week 23', 'week 22', 'week 21') |
|
| 129 | test_names = ('week 25', 'week 26', 'week 27', 'week 28') |
|
| 130 | test_name = 'single_test' |
|
| 131 | train_set = casas_fuel.get_dataset(train_names, load_in_memory=True) |
|
| 132 | (train_set_data) = train_set.data_sources |
|
| 133 | test_set = casas_fuel.get_dataset(test_names, load_in_memory=True) |
|
| 134 | (test_set_data) = test_set.data_sources |
|
| 135 | # Prepare Back Annotation |
|
| 136 | fp_back_annotated = open(os.path.join(output_dir, 'back_annotated.txt'), 'w') |
|
| 137 | fp_back_probability = open(os.path.join(output_dir, 'back_annotated_proba.txt'), 'w') |
|
| 138 | # run svm |
|
| 139 | logger.info('Training on %s, Testing on %s' % (str(train_names), str(test_names))) |
|
| 140 | if result.get_record_by_key(test_name) is None: |
|
| 141 | prediction, prediction_proba = training_and_test(test_name, train_set_data, test_set_data, num_classes, result) |
|
| 142 | else: |
|
| 143 | prediction, prediction_proba = load_and_test(test_name, test_set_data, num_classes, result) |
|
| 144 | casas_fuel.back_annotate(fp_back_annotated, prediction=prediction, split_name=test_names) |
|
| 145 | casas_fuel.back_annotate_with_proba(fp_back_probability, prediction_proba=prediction_proba, split_name=test_names) |
|
| 146 | train_name = test_name |
|
| 147 | train_set_data = test_set_data |
|
| 148 | f = open(result_pkl_file, 'wb') |
|
| 149 | pickle.dump(obj=result, file=f, protocol=pickle.HIGHEST_PROTOCOL) |
|
| 150 | f.close() |
|
| 151 | result.export_to_xlsx(os.path.join(output_dir, 'result.xlsx')) |
|
| 152 | ||
| 153 | ||
| @@ 57-149 (lines=93) @@ | ||
| 54 | predicted_proba = decision_tree.predict_proba(test_data[0]) |
|
| 55 | return predicted_y, predicted_proba |
|
| 56 | ||
| 57 | if __name__ == '__main__': |
|
| 58 | args_ok = False |
|
| 59 | parser = argparse.ArgumentParser(description='Run Decision Tree on single resident CASAS datasets.') |
|
| 60 | parser.add_argument('-d', '--dataset', help='Directory to original datasets') |
|
| 61 | parser.add_argument('-o', '--output', help='Output folder') |
|
| 62 | parser.add_argument('--h5py', help='HDF5 dataset folder') |
|
| 63 | args = parser.parse_args() |
|
| 64 | # Default parameters |
|
| 65 | log_filename = os.path.basename(__file__).split('.')[0] + \ |
|
| 66 | '-%s.log' % datetime.now().strftime('%y%m%d_%H:%M:%S') |
|
| 67 | # Setup output directory |
|
| 68 | output_dir = args.output |
|
| 69 | if output_dir is not None: |
|
| 70 | output_dir = os.path.abspath(os.path.expanduser(output_dir)) |
|
| 71 | if os.path.exists(output_dir): |
|
| 72 | # Found output_dir, check if it is a directory |
|
| 73 | if not os.path.isdir(output_dir): |
|
| 74 | exit('Output directory %s is found, but not a directory. Abort.' % output_dir) |
|
| 75 | else: |
|
| 76 | # Create directory |
|
| 77 | os.makedirs(output_dir) |
|
| 78 | else: |
|
| 79 | output_dir = '.' |
|
| 80 | # If dataset is specified, update h5py |
|
| 81 | casas_data_dir = args.dataset |
|
| 82 | if casas_data_dir is not None: |
|
| 83 | casas_data_dir = os.path.abspath(os.path.expanduser(casas_data_dir)) |
|
| 84 | if not os.path.isdir(casas_data_dir): |
|
| 85 | exit('CASAS dataset at %s does not exist. Abort.' % casas_data_dir) |
|
| 86 | # Find h5py dataset first |
|
| 87 | h5py_dir = args.h5py |
|
| 88 | if h5py_dir is not None: |
|
| 89 | h5py_dir = os.path.abspath(os.path.expanduser(h5py_dir)) |
|
| 90 | else: |
|
| 91 | # Default location |
|
| 92 | h5py_dir = os.path.join(output_dir, 'h5py') |
|
| 93 | if os.path.exists(h5py_dir): |
|
| 94 | if not os.path.isdir(h5py_dir): |
|
| 95 | exit('h5py dataset location %s is not a directory. Abort.' % h5py_dir) |
|
| 96 | log_filename = os.path.join(output_dir, log_filename) |
|
| 97 | # Setup Logging as early as possible |
|
| 98 | logging.basicConfig(level=logging.DEBUG, |
|
| 99 | format='[%(asctime)s] %(name)s:%(levelname)s:%(message)s', |
|
| 100 | handlers=[logging.FileHandler(log_filename), |
|
| 101 | logging.StreamHandler()]) |
|
| 102 | if not CASASFuel.files_exist(h5py_dir): |
|
| 103 | # Finish check and creating all directory needed - now load datasets |
|
| 104 | if casas_data_dir is not None: |
|
| 105 | casas_data = CASASData(path=casas_data_dir) |
|
| 106 | casas_data.summary() |
|
| 107 | # SVM needs to use statistical feature with per-sensor and normalization |
|
| 108 | casas_data.populate_feature(method='stat', normalized=False, per_sensor=False) |
|
| 109 | casas_data.export_hdf5(h5py_dir) |
|
| 110 | casas_fuel = CASASFuel(dir_name=h5py_dir) |
|
| 111 | # Prepare learning result |
|
| 112 | result_pkl_file = os.path.join(output_dir, 'result.pkl') |
|
| 113 | result = None |
|
| 114 | if os.path.isfile(result_pkl_file): |
|
| 115 | f = open(result_pkl_file, 'rb') |
|
| 116 | result = pickle.load(f) |
|
| 117 | f.close() |
|
| 118 | if result.data != h5py_dir: |
|
| 119 | logger.error('Result pickle file found for different dataset %s' % result.data) |
|
| 120 | exit('Cannot save learning result at %s' % result_pkl_file) |
|
| 121 | else: |
|
| 122 | result = LearningResult(name='DecisionTree', data=h5py_dir, mode='by_week') |
|
| 123 | num_classes = casas_fuel.get_output_dims() |
|
| 124 | # Open Fuel and get all splits |
|
| 125 | split_list = casas_fuel.get_set_list() |
|
| 126 | train_names = ('week 24', 'week 23', 'week 22', 'week 21') |
|
| 127 | test_names = ('week 25', 'week 26', 'week 27', 'week 28') |
|
| 128 | test_name = 'single_test' |
|
| 129 | train_set = casas_fuel.get_dataset(train_names, load_in_memory=True) |
|
| 130 | (train_set_data) = train_set.data_sources |
|
| 131 | test_set = casas_fuel.get_dataset(test_names, load_in_memory=True) |
|
| 132 | (test_set_data) = test_set.data_sources |
|
| 133 | # Prepare Back Annotation |
|
| 134 | fp_back_annotated = open(os.path.join(output_dir, 'back_annotated.txt'), 'w') |
|
| 135 | fp_back_probability = open(os.path.join(output_dir, 'back_annotated_proba.txt'), 'w') |
|
| 136 | # run svm |
|
| 137 | logger.info('Training on %s, Testing on %s' % (str(train_names), str(test_names))) |
|
| 138 | if result.get_record_by_key(test_name) is None: |
|
| 139 | prediction, prediction_proba = training_and_test(test_name, train_set_data, test_set_data, num_classes, result) |
|
| 140 | else: |
|
| 141 | prediction, prediction_proba = load_and_test(test_name, test_set_data, num_classes, result) |
|
| 142 | casas_fuel.back_annotate(fp_back_annotated, prediction=prediction, split_name=test_names) |
|
| 143 | casas_fuel.back_annotate_with_proba(fp_back_probability, prediction_proba=prediction_proba, split_name=test_names) |
|
| 144 | train_name = test_name |
|
| 145 | train_set_data = test_set_data |
|
| 146 | f = open(result_pkl_file, 'wb') |
|
| 147 | pickle.dump(obj=result, file=f, protocol=pickle.HIGHEST_PROTOCOL) |
|
| 148 | f.close() |
|
| 149 | result.export_to_xlsx(os.path.join(output_dir, 'result.xlsx')) |
|
| 150 | ||
| 151 | ||