training_and_test() - Code Metrics - Inspection of "example: Add SVM script for single resident activi..." - TinghuiWang/pyActLearn - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 154b9f...3cbafe )

by Tinghui

created 2016-11-29 03:22 UTC

training_and_test() A

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	1
Bugs	0	Features	0

Metric	Value
cc	1
c	1
b	0
f	0
dl	0
loc	18
rs	9.4285

import os
import pickle
import logging
import argparse
import sklearn.svm
from datetime import datetime
from pyActLearn.CASAS.data import CASASData
from pyActLearn.CASAS.fuel import CASASFuel
from pyActLearn.performance.record import LearningResult
from pyActLearn.performance import get_confusion_matrix

logger = logging.getLogger(__file__)


def training_and_test(token, train_data, test_data, num_classes, result):
    """Train and test

    Args:
        token (:obj:`str`): token representing this run
        train_data (:obj:`tuple` of :obj:`numpy.array`): Tuple of training feature and label
        test_data (:obj:`tuple` of :obj:`numpy.array`): Tuple of testing feature and label
        num_classes (:obj:`int`): Number of classes
        result (:obj:`pyActLearn.performance.record.LearningResult`): LearningResult object to hold learning result
    """
    svm_model = sklearn.svm.SVC(kernel='rbf')
    svm_model.fit(train_data[0], train_data[1].flatten())
    # Test
    predicted_y = svm_model.predict(test_data[0])
    # Evaluate the Test and Store Result
    confusion_matrix = get_confusion_matrix(num_classes=num_classes,
                                            label=test_data[1].flatten(), predicted=predicted_y)
    result.add_record(svm_model, key=token, confusion_matrix=confusion_matrix)


if __name__ == '__main__':
    args_ok = False
    parser = argparse.ArgumentParser(description='Run Support Vector Machine on single resident CASAS datasets.')
    parser.add_argument('-d', '--dataset', help='Directory to original datasets')
    parser.add_argument('-o', '--output', help='Output folder')
    parser.add_argument('--h5py', help='HDF5 dataset folder')
    parser.add_argument('-k', '--kernel', help='svm kernel')
    args = parser.parse_args()
    # Default parameters
    log_filename = os.path.basename(__file__).split('.')[0] + \
                   '-%s.log' % datetime.now().strftime('%y%m%d_%H:%M:%S')
    # Setup output directory
    output_dir = args.output
    if output_dir is not None:
        output_dir = os.path.abspath(os.path.expanduser(output_dir))
        if os.path.exists(output_dir):
            # Found output_dir, check if it is a directory
            if not os.path.isdir(output_dir):
                exit('Output directory %s is found, but not a directory. Abort.' % output_dir)
        else:
            # Create directory
            os.mkdir(output_dir)
    else:
        output_dir = '.'
    log_filename = os.path.join(output_dir, log_filename)
    # Setup Logging as early as possible
    logging.basicConfig(level=logging.DEBUG,
                        format='[%(asctime)s] %(name)s:%(levelname)s:%(message)s',
                        handlers=[logging.FileHandler(log_filename),
                                  logging.StreamHandler()])
    # If dataset is specified, update h5py
    casas_data_dir = args.dataset
    if casas_data_dir is not None:
        casas_data_dir = os.path.abspath(os.path.expanduser(casas_data_dir))
        if not os.path.isdir(casas_data_dir):
            exit('CASAS dataset at %s does not exist. Abort.' % casas_data_dir)
    # Find h5py dataset first
    h5py_dir = args.h5py
    if h5py_dir is not None:
        h5py_dir = os.path.abspath(os.path.expanduser(h5py_dir))
    else:
        # Default location
        h5py_dir = os.path.join(output_dir, 'h5py')
    if os.path.exists(h5py_dir):
        if not os.path.isdir(h5py_dir):
            exit('h5py dataset location %s is not a directory. Abort.' % h5py_dir)
    else:
        os.mkdir(h5py_dir)
    # Finish check and creating all directory needed - now load datasets
    if casas_data_dir is not None:
        casas_data = CASASData(path=casas_data_dir)
        casas_data.summary()
        # SVM needs to use statistical feature with per-sensor and normalization
        casas_data.populate_feature(method='stat', normalized=True, per_sensor=True)
        casas_data.export_hdf5(h5py_dir)
    casas_fuel = CASASFuel(dir_name=h5py_dir)
    # Prepare learning result
    result_pkl_file = os.path.join(output_dir, 'result.pkl')
    result = None
    if os.path.isfile(result_pkl_file):
        f = open(result_pkl_file, 'rb')
        result = pickle.load(f)
        f.close()
        if result.data != h5py_dir:
            logger.error('Result pickle file found for different dataset %s' % result.data)
            exit('Cannot save learning result at %s' % result_pkl_file)
    else:
        result = LearningResult(name='SVM', data=h5py_dir, mode='by_week')
    num_classes = casas_fuel.get_output_dims()
    # Open Fuel and get all splits
    split_list = casas_fuel.get_set_list()
    train_name = split_list[0]
    train_set = casas_fuel.get_dataset((train_name,), load_in_memory=True)
    (train_set_data) = train_set.data_sources
    for i in range(1, len(split_list)):
        test_name = split_list[i]
        test_set = casas_fuel.get_dataset((test_name,), load_in_memory=True)
        (test_set_data) = test_set.data_sources
        # run svm
        logger.info('Training on %s, Testing on %s' % (train_name, test_name))
        if result.get_record_by_key(test_name) is None:
            training_and_test(test_name, train_set_data, test_set_data, num_classes, result)
        train_name = test_name
        train_set_data = test_set_data
    f = open(result_pkl_file, 'wb')
    pickle.dump(obj=result, file=f, protocol=pickle.HIGHEST_PROTOCOL)
    f.close()
    result.export_to_xlsx(os.path.join(output_dir, 'result.xlsx'))



1			import os
2			import pickle
3			import logging
4			import argparse
5			import sklearn.svm
6			from datetime import datetime
7			from pyActLearn.CASAS.data import CASASData
8			from pyActLearn.CASAS.fuel import CASASFuel
9			from pyActLearn.performance.record import LearningResult
10			from pyActLearn.performance import get_confusion_matrix
11
12			logger = logging.getLogger(__file__)
13
14
15			def training_and_test(token, train_data, test_data, num_classes, result):
16			"""Train and test
17
18			Args:
19			token (:obj:`str`): token representing this run
20			train_data (:obj:`tuple` of :obj:`numpy.array`): Tuple of training feature and label
21			test_data (:obj:`tuple` of :obj:`numpy.array`): Tuple of testing feature and label
22			num_classes (:obj:`int`): Number of classes
23			result (:obj:`pyActLearn.performance.record.LearningResult`): LearningResult object to hold learning result
24			"""
25			svm_model = sklearn.svm.SVC(kernel='rbf')
26			svm_model.fit(train_data[0], train_data[1].flatten())
27			# Test
28			predicted_y = svm_model.predict(test_data[0])
29			# Evaluate the Test and Store Result
30			confusion_matrix = get_confusion_matrix(num_classes=num_classes,
31			label=test_data[1].flatten(), predicted=predicted_y)
32			result.add_record(svm_model, key=token, confusion_matrix=confusion_matrix)
33
34
35			if __name__ == '__main__':
36			args_ok = False
37			parser = argparse.ArgumentParser(description='Run Support Vector Machine on single resident CASAS datasets.')
38			parser.add_argument('-d', '--dataset', help='Directory to original datasets')
39			parser.add_argument('-o', '--output', help='Output folder')
40			parser.add_argument('--h5py', help='HDF5 dataset folder')
41			parser.add_argument('-k', '--kernel', help='svm kernel')
42			args = parser.parse_args()
43			# Default parameters
44			log_filename = os.path.basename(__file__).split('.')[0] + \
45			'-%s.log' % datetime.now().strftime('%y%m%d_%H:%M:%S')
46			# Setup output directory
47			output_dir = args.output
48			if output_dir is not None:
49			output_dir = os.path.abspath(os.path.expanduser(output_dir))
50			if os.path.exists(output_dir):
51			# Found output_dir, check if it is a directory
52			if not os.path.isdir(output_dir):
53			exit('Output directory %s is found, but not a directory. Abort.' % output_dir)
54			else:
55			# Create directory
56			os.mkdir(output_dir)
57			else:
58			output_dir = '.'
59			log_filename = os.path.join(output_dir, log_filename)
60			# Setup Logging as early as possible
61			logging.basicConfig(level=logging.DEBUG,
62			format='[%(asctime)s] %(name)s:%(levelname)s:%(message)s',
63			handlers=[logging.FileHandler(log_filename),
64			logging.StreamHandler()])
65			# If dataset is specified, update h5py
66			casas_data_dir = args.dataset
67			if casas_data_dir is not None:
68			casas_data_dir = os.path.abspath(os.path.expanduser(casas_data_dir))
69			if not os.path.isdir(casas_data_dir):
70			exit('CASAS dataset at %s does not exist. Abort.' % casas_data_dir)
71			# Find h5py dataset first
72			h5py_dir = args.h5py
73			if h5py_dir is not None:
74			h5py_dir = os.path.abspath(os.path.expanduser(h5py_dir))
75			else:
76			# Default location
77			h5py_dir = os.path.join(output_dir, 'h5py')
78			if os.path.exists(h5py_dir):
79			if not os.path.isdir(h5py_dir):
80			exit('h5py dataset location %s is not a directory. Abort.' % h5py_dir)
81			else:
82			os.mkdir(h5py_dir)
83			# Finish check and creating all directory needed - now load datasets
84			if casas_data_dir is not None:
85			casas_data = CASASData(path=casas_data_dir)
86			casas_data.summary()
87			# SVM needs to use statistical feature with per-sensor and normalization
88			casas_data.populate_feature(method='stat', normalized=True, per_sensor=True)
89			casas_data.export_hdf5(h5py_dir)
90			casas_fuel = CASASFuel(dir_name=h5py_dir)
91			# Prepare learning result
92			result_pkl_file = os.path.join(output_dir, 'result.pkl')
93			result = None
94			if os.path.isfile(result_pkl_file):
95			f = open(result_pkl_file, 'rb')
96			result = pickle.load(f)
97			f.close()
98			if result.data != h5py_dir:
99			logger.error('Result pickle file found for different dataset %s' % result.data)
100			exit('Cannot save learning result at %s' % result_pkl_file)
101			else:
102			result = LearningResult(name='SVM', data=h5py_dir, mode='by_week')
103			num_classes = casas_fuel.get_output_dims()
104			# Open Fuel and get all splits
105			split_list = casas_fuel.get_set_list()
106			train_name = split_list[0]
107			train_set = casas_fuel.get_dataset((train_name,), load_in_memory=True)
108			(train_set_data) = train_set.data_sources
109			for i in range(1, len(split_list)):
110			test_name = split_list[i]
111			test_set = casas_fuel.get_dataset((test_name,), load_in_memory=True)
112			(test_set_data) = test_set.data_sources
113			# run svm
114			logger.info('Training on %s, Testing on %s' % (train_name, test_name))
115			if result.get_record_by_key(test_name) is None:
116			training_and_test(test_name, train_set_data, test_set_data, num_classes, result)
117			train_name = test_name
118			train_set_data = test_set_data
119			f = open(result_pkl_file, 'wb')
120			pickle.dump(obj=result, file=f, protocol=pickle.HIGHEST_PROTOCOL)
121			f.close()
122			result.export_to_xlsx(os.path.join(output_dir, 'result.xlsx'))
123
124

TinghuiWang / pyActLearn

Push — master ( 154b9f...3cbafe )

training_and_test() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like