Conditions | 9 |
Total Lines | 84 |
Lines | 0 |
Ratio | 0 % |
Changes | 1 | ||
Bugs | 0 | Features | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
1 | import math |
||
7 | def sensor2vec(num_sensors, sensor_event_list, embedding_size=20, |
||
8 | batch_size=128, num_skips=8, skip_window=5, |
||
9 | num_neg_samples=64, learning_rate=1.0): |
||
10 | """Sensor to Vector |
||
11 | """ |
||
12 | if num_neg_samples > num_sensors: |
||
13 | num_neg_samples = num_sensors |
||
14 | # Initialize a SkipGram Injector |
||
15 | injector = SkipGramInjector(sensor_event_list, batch_size, num_skips, skip_window) |
||
16 | # Build Training Model |
||
17 | graph = tf.Graph() |
||
18 | with graph.as_default(): |
||
19 | # Input Place Holder |
||
20 | train_inputs = tf.placeholder(tf.int32, shape=[batch_size]) |
||
21 | train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1]) |
||
22 | # As we normally do not have too many sensors - it is OK to use all of them |
||
23 | valid_dataset = tf.constant([i for i in range(num_sensors)], dtype=tf.int32) |
||
24 | # Only CPU supports NCE loss |
||
25 | with tf.device('/cpu:0'): |
||
26 | # Look up embeddings for inputs. |
||
27 | embeddings = tf.Variable( |
||
28 | tf.random_uniform([num_sensors, embedding_size], -1.0, 1.0)) |
||
29 | embed = tf.nn.embedding_lookup(embeddings, train_inputs) |
||
30 | |||
31 | # Construct the variables for the NCE loss |
||
32 | nce_weights = tf.Variable( |
||
33 | tf.truncated_normal([num_sensors, embedding_size], |
||
34 | stddev=1.0 / math.sqrt(embedding_size))) |
||
35 | nce_biases = tf.Variable(tf.zeros([num_sensors])) |
||
36 | |||
37 | # Compute the average NCE loss for the batch. |
||
38 | # tf.nce_loss automatically draws a new sample of the negative labels each |
||
39 | # time we evaluate the loss. |
||
40 | loss = tf.reduce_mean( |
||
41 | tf.nn.nce_loss(weights=nce_weights, |
||
42 | biases=nce_biases, |
||
43 | labels=train_labels, |
||
44 | inputs=embed, |
||
45 | num_sampled=num_neg_samples, |
||
46 | num_classes=num_sensors)) |
||
47 | |||
48 | # Construct the Optimizer |
||
49 | optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss) |
||
50 | |||
51 | # Compute the cosine similarity between minibatch examples and all embeddings. |
||
52 | norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True)) |
||
53 | normalized_embeddings = embeddings / norm |
||
54 | valid_embeddings = tf.nn.embedding_lookup( |
||
55 | normalized_embeddings, valid_dataset) |
||
56 | similarity = tf.matmul( |
||
57 | valid_embeddings, normalized_embeddings, transpose_b=True) |
||
58 | |||
59 | # Add variable initializer. |
||
60 | init = tf.initialize_all_variables() |
||
61 | |||
62 | # Begin training. |
||
63 | num_steps = 100001 |
||
64 | |||
65 | with tf.Session(graph=graph) as session: |
||
66 | # We must initialize all variables before we use them. |
||
67 | init.run() |
||
68 | print("Initialized") |
||
69 | |||
70 | average_loss = 0 |
||
71 | for step in range(num_steps): |
||
72 | batch_inputs, batch_labels = injector.next_batch() |
||
73 | feed_dict = {train_inputs: batch_inputs, train_labels: batch_labels} |
||
74 | |||
75 | # We perform one update step by evaluating the optimizer op (including it |
||
76 | # in the list of returned values for session.run() |
||
77 | _, loss_val = session.run([optimizer, loss], feed_dict=feed_dict) |
||
78 | average_loss += loss_val |
||
79 | |||
80 | if step % 2000 == 0: |
||
81 | if step > 0: |
||
82 | average_loss /= 2000 |
||
83 | # The average loss is an estimate of the loss over the last 2000 batches. |
||
84 | print("Average loss at step ", step, ": ", average_loss) |
||
85 | average_loss = 0 |
||
86 | |||
87 | final_embeddings = normalized_embeddings.eval() |
||
88 | final_similarity = 1 - similarity.eval() |
||
89 | distance_matrix = final_similarity / np.max(final_similarity, axis=1)[:, None] |
||
90 | return final_embeddings, distance_matrix |
||
91 | |||
252 |