forked from cristianmatache/machine_learning_emotion_rec
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
180 lines (134 loc) · 6.05 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import pandas as pd
import numpy as np
import random as rand
import sys
from multiprocessing import Process
from multiprocessing import Queue
import time
import decision_tree as dtree
import utilities as util
import constants as cnst
import measures
import decision_forest as dforest
from node import TreeNode
def make_and_save_d_forest(df_labels, df_data, N):
print(">> Running decision forest algorithm on multiple processes.\n")
res = pd.DataFrame(0, index=cnst.EMOTIONS_INDICES, columns=cnst.EMOTIONS_INDICES)
forest_T = []
samples = dforest.split_in_random(df_data, df_labels)
print("Building decision forest...")
for e in cnst.EMOTIONS_LIST:
T= []
processes = []
queue_list = []
for (sample_target, sample_data) in samples:
print("Building decision tree for emotion...", e)
train_binary_targets = util.filter_for_emotion(sample_target, cnst.EMOTIONS_DICT[e])
q = Queue()
queue_list.append(q)
process = Process(target=dtree.decision_tree_parallel, args=(sample_data, set(cnst.AU_INDICES), train_binary_targets, q))
processes.append(process)
process.start()
for p in processes:
p.join()
for q in queue_list:
T.append(q.get())
forest_T.append(T)
util.save_forest_to_file(forest_T)
def load_and_apply_d_forest(test_df_targets, test_df_data):
forest_T = util.load_forest(len(cnst.EMOTIONS_LIST))
predictions_forest = dforest.test_forest_trees(forest_T, test_df_data)
confusion_matrix = dtree.compare_pred_expect(predictions_forest, test_df_targets)
print("----------------------------------- CONFUSION MATRIX -----------------------------------\n")
print(confusion_matrix)
res = confusion_matrix
diag_res = sum(pd.Series(np.diag(res),
index=[res.index, res.columns]))
sum_all_res = res.values.sum()
accuracy_res = (diag_res/sum_all_res) * 100
print("----------------------------------- AVERAGE ACCURACY -----------------------------------\n:", accuracy_res)
res = res.div(res.sum(axis=1), axis=0)
for e in cnst.EMOTIONS_LIST:
print("----------------------------------- MEASUREMENTS -----------------------------------")
print(measures.compute_binary_confusion_matrix(res, cnst.EMOTIONS_DICT[e]))
def make_and_save_d_tree(df_labels, df_data, N):
print(">> Running decision tree algorithm on a single process.\n")
res = pd.DataFrame(0, index=cnst.EMOTIONS_INDICES, columns=cnst.EMOTIONS_INDICES)
segments = util.preprocess_for_cross_validation(N)
T = []
# Split data into 90% Training and 10% Testing
validation_data, validation_targets, train_df_data, train_df_targets = util.divide_data(segments[-1], N, df_data, df_labels)
# Train Trees
for e in cnst.EMOTIONS_LIST:
print("Building decision tree for emotion: ", e)
train_binary_targets = util.filter_for_emotion(train_df_targets, cnst.EMOTIONS_DICT[e])
root = dtree.decision_tree(train_df_data, set(cnst.AU_INDICES), train_binary_targets)
print("Decision tree built. Now appending...")
T.append(root)
# Use validation data to set a priority to each tree based on which is more accurate
percentage = []
T_P = []
for e in cnst.EMOTIONS_LIST:
print("\nValidation phase for emotion: ", e)
validation_binary_targets = util.filter_for_emotion(validation_targets, cnst.EMOTIONS_DICT[e])
results = []
# Calculate how accurate each tree is when predicting emotions
for i in validation_data.index.values:
results.append(TreeNode.dfs2(T[cnst.EMOTIONS_DICT[e]- 1], validation_data.loc[i], validation_binary_targets.loc[i].at[0]))
ones = results.count(1)
percentage.append(ones/len(results))
print("Validation phase ended. Priority levels have been set.")
print("All decision trees built.\n")
# List containing (Tree, Percentage) tuples
T_P = list(zip(T, percentage))
util.save_trees_to_file(T_P)
def load_and_apply_d_trees(df_labels, df_data):
T_P = util.load_trees(len(cnst.EMOTIONS_LIST))
predictions = dtree.test_trees(T_P, df_data)
confusion_matrix = dtree.compare_pred_expect(predictions, df_labels)
print(confusion_matrix)
# Print accuracy for each fold
diag = sum(pd.Series(np.diag(confusion_matrix),
index=[confusion_matrix.index, confusion_matrix.columns]))
sum_all = confusion_matrix.values.sum()
accuracy = (diag/sum_all) * 100
print("Accuracy:", accuracy)
res = confusion_matrix
res = res.div(res.sum(axis=1), axis=0)
for e in cnst.EMOTIONS_LIST:
print("----------------------------------- MEASUREMENTS -----------------------------------")
print(measures.compute_binary_confusion_matrix(res, cnst.EMOTIONS_DICT[e]))
return res
def convert_arguments():
algorithm = None
if (len(sys.argv) == 2):
algorithm = load_and_apply_d_forest
elif (len(sys.argv) == 3):
tree_or_forest = sys.argv[2]
if tree_or_forest == 'tree':
algorithm = load_and_apply_d_trees
elif tree_or_forest == 'forest':
algorithm = load_and_apply_d_forest
else:
print("Second argument not valid")
sys.exit()
return algorithm
def main():
if len(sys.argv) < 1:
print("Please insert the name of the file you want to test on")
sys.exit()
if len(sys.argv) < 2:
print("Examples are being tested on forest implementation by default")
TEST_FILE = sys.argv[1]
START_TIME = time.time()
labels, data = util.load_raw_data(TEST_FILE)
A = np.array(labels)
labels = [row[0] for row in A]
df_labels, df_data = util.to_dataframe(labels, data)
algorithm = convert_arguments()
res = algorithm(df_labels, df_data)
print(res)
print("----------------------------------- TOTAL EXECUTION TIME -----------------------------------\n")
END_TIME = time.time()
print(END_TIME - START_TIME)
if __name__ == "__main__": main()