-
Notifications
You must be signed in to change notification settings - Fork 0
/
analyze_citrination_results.py
128 lines (112 loc) · 4.46 KB
/
analyze_citrination_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import os
import numpy as np
import math
import pandas as pd
import csv
import matplotlib.pyplot as plt
import matplotlib
from sklearn.metrics import mean_squared_error
# Read in actual values collected from extracting data.
actual_values = "summary_data.csv" #"C:\\Users\\mvane\\Documents\\Skunkworks\\BMG\\Data\\BMG_full_dataset_with_energies.csv"
# Read in values of actual data and make into numpy arrays
actual_vals = pd.read_csv(actual_values)
formulas = actual_vals['formula'].values
act_trg = actual_vals['PROPERTY: Trg'].values
act_gamma = actual_vals['PROPERTY: $\gamma$'].values
act_omega = actual_vals['PROPERTY: $\omega$'].values
act_tg = actual_vals['PROPERTY: Tg (K)'].values
act_tl = actual_vals['PROPERTY: Tl (K)'].values
act_tx = actual_vals['PROPERTY: Tx (K)'].values
# Read in csv file from obtained using citrination_retrieve_predicted_vals (predictions_output.csv)
predicted_csv = "summary_data.csv" #"predictions_output.csv"
predicted_data = pd.read_csv(predicted_csv)
tg_pred = predicted_data['Predicted Tg'].values
tl_pred = predicted_data['Predicted Tl'].values
tx_pred = predicted_data['Predicted Tx'].values
trg_pred = predicted_data['Predicted Trg'].values#tg_pred/tl_pred
gamma_pred = predicted_data['Predicted $\gamma$'].values#tx_pred/(tg_pred + tl_pred)
omega_pred = predicted_data['Predicted $\omega$'].values #(tg_pred/tx_pred) - 2*(tg_pred/(tg_pred + tl_pred))
# Reformat np strings to plain old strings so they can be checked for equality later.
temp_form = []
for f in formulas:
f_str = str(f)
temp_form.append(f_str)
pred_form = temp_form
# Check if the formulas are in the same order. Add formulas that are not to lists to remove.
remove_indices = []
counter = 0
for i in range(0, len(formulas)):
if str(formulas[i]) != str(pred_form[i]):
remove_indices.append(i)
counter+=1
print("bad formulas: " + str(pred_form[i]) + ", " + str(act_form[i]))
if len(remove_indices) != 0:
for x in all_act_props:
x = np.delete(x, remove_indices)
print('Removed %d alloys from set due to mismatched formula orders.'%counter)
# Calculate stats for every property
all_act_props = [act_trg, act_gamma, act_omega, act_tg, act_tl, act_tx]
all_pred_props = [trg_pred, gamma_pred, omega_pred, tg_pred, tl_pred, tx_pred]
names = ["Trg", "gamma", "omega", "Tg", "Tl", "Tx"]
all_rmse = []
all_ndme = []
for x in range(0, len(all_act_props)):
print(names[x])
# Remove nan
remove_indices = []
copy_form = formulas
for i in range(0, len(all_act_props[x])):
if np.isnan(all_act_props[x][i]) or math.isnan(all_pred_props[x][i]):
remove_indices.append(i)
all_pred_props[x] = np.delete(all_pred_props[x], remove_indices)
all_act_props[x] = np.delete(all_act_props[x], remove_indices)
copy_form = np.delete(copy_form, remove_indices)
std = np.std(all_act_props[x])
rmse = math.sqrt(mean_squared_error(all_act_props[x], all_pred_props[x]))
ndme = rmse/std
all_rmse.append(rmse)
all_ndme.append(ndme)
# Make some Parity plots
plt.rc('font', size=20)
plt.rc('axes', titlesize=20)
plt.rc('axes', labelsize=20)
plt.rc('xtick', labelsize=20)
plt.rc('ytick', labelsize=20)
plt.rc('figure', titlesize=20)
f = plt.figure()
ax = f.add_subplot(111)
plt.scatter(all_act_props[x], all_pred_props[x], color='r', edgecolor='k')
# Adds y = x line to scatter
plt.plot(all_act_props[x], all_act_props[x], 'k')
plt.title(names[x])
plt.xlabel("Actual " + str(names[x]) + " Value")
plt.ylabel("Predicted " + str(names[x]) + " Value")
plt.text(.65,.05,"NDME: %.2f"%ndme, transform = ax.transAxes, fontsize=18)
#plt.text(np.amin(all_act_props[x]) + .65*(np.amax(all_act_props[x]) - np.amin(all_act_props[x])),
# np.amin(all_pred_props[x]) + .05*(np.amax(all_pred_props[x]) - np.amin(all_pred_props[x])), "NDME: %.2f"%ndme, fontsize=16)
plt.tight_layout()
plt.savefig(str(names[x]) + "_Parity.png")
# Save data used to make plots to CSV
with open(str(names[x]) + "_predictions.csv", 'w', newline = '') as outfile:
wr = csv.writer(outfile)
header = ["Formula", "Actual " + str(names[x]), "Predicted " + str(names[x])]
wr.writerow(header)
rows = zip(copy_form, all_act_props[x], all_pred_props[x])
for row in rows:
wr.writerow(row)
outfile.close()
with open("RMSE and NDME.csv", 'w', newline = '') as fp:
writer = csv.writer(fp)
header = [""]
for x in names:
header.append(x)
writer.writerow(header)
rmse = ["RMSE"]
for r in all_rmse:
rmse.append(r)
ndme = ["NDME"]
for n in all_ndme:
ndme.append(n)
writer.writerow(rmse)
writer.writerow(ndme)
fp.close()