-
Notifications
You must be signed in to change notification settings - Fork 0
/
analyze_err_bar.py
131 lines (110 loc) · 4.12 KB
/
analyze_err_bar.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import os
import sys
import requests
import datetime
import pandas as pd
import numpy as np
from numpy import trapz
import csv
import matplotlib.pyplot as plt
from scipy.stats import norm
import math
from citrination_client.client import CitrinationClient
def main():
# This method assumes you've already made predictions and they're stored in a CSV.
# Can use make_predictions.py in this repo to make predictions
# Set up client
client = CitrinationClient(os.environ["CITRINATION_API_KEY"], 'https://citrination.com')
# Analyze the outputs
# Read in the predicted values from the CSV just created (pred_file) and the actual values (exp_file)
# Both files should be sorted alphabetically by formula.
pred_file = "predictions_output.csv"
exp_file = "C:\\Users\\mvane\\Documents\\Skunkworks\\BMG\\Data\\BMG_full_dataset_with_energies - Copy.csv"
predicted_vals = pd.read_csv(pred_file)
exp_data = pd.read_csv(exp_file)
formulas = predicted_vals['Formula']
tg_pred = predicted_vals['Property Tg']
tl_pred = predicted_vals['Property Tl']
tx_pred = predicted_vals['Property Tx']
tg_act = exp_data['PROPERTY: Tg (K)']
tl_act = exp_data['PROPERTY: Tl (K)']
tx_act = exp_data['PROPERTY: Tx (K)']
# trg_act = exp_data['PROPERTY: Trg']
# gamma_act = exp_data['PROPERTY: $\\gamma$']
# omega_act = exp_data['PROPERTY: $\\omega$']
tg_loss = predicted_vals['Property Tg Uncertainty'].tolist()
tl_loss = predicted_vals['Property Tl Uncertainty'].tolist()
tx_loss = predicted_vals['Property Tx Uncertainty'].tolist()
trg_loss = predicted_vals['Property Trg Uncertainty'].tolist()
gamma_loss = predicted_vals['Property $\\gamma$ Uncertainty'].tolist()
omega_loss = predicted_vals['Property $\\omega$ Uncertainty'].tolist()
tg_act_err = []
tl_act_err = []
tx_act_err = []
tg_err_bar = []
tl_err_bar = []
tx_err_bar = []
trg_err_bar = []
gamma_err_bar = []
omega_err_bar = []
trg_act_err = []
gamma_act_err = []
omega_act_err = []
trg_act_err = []
gamma_act_err = []
omega_act_err = []
# GFA Metrics
trg_calc = tg_pred/tl_pred
gamma_calc = tx_pred/(tg_pred + tl_pred)
omega_calc = (tg_pred/tx_pred) - ((2*tg_pred)/(tg_pred + tl_pred))
trg_act = tg_act/tl_act
gamma_act = tx_act/(tg_act + tl_act)
omega_act = (tg_act/tx_act) - 2*(tg_act/(tg_act + tl_act))
for i in reversed(range(0, len(formulas))):
if not np.isnan(tg_act[i]) and not np.isnan(tl_act[i]) and not np.isnan(tx_act[i]):
tg_err = abs(tg_pred[i] - tg_act[i])
tg_act_err.append(tg_err)
tg_err_bar.append(abs(tg_err - tg_loss[i]))
tl_err = abs(tl_pred[i] - tl_act[i])
tl_act_err.append(tl_err)
tl_err_bar.append(abs(tl_err - tl_loss[i]))
tx_err = abs(tx_pred[i] - tx_act[i])
tx_act_err.append(tx_err)
tx_err_bar.append(abs(tx_err - tx_loss[i]))
trg_err = abs(trg_calc[i] - trg_act[i])
trg_act_err.append(trg_err)
trg_err_bar.append(abs(trg_err - trg_loss[i]))
gamma_err = abs(gamma_calc[i] - gamma_act[i])
gamma_act_err.append(gamma_err)
gamma_err_bar.append(abs(gamma_err - gamma_loss[i]))
omega_err = abs(omega_calc[i] - omega_act[i])
omega_act_err.append(omega_err)
omega_err_bar.append(abs(omega_err - omega_loss[i]))
# Plotting Gaussian vs. standard deviations
metric = trg_err_bar
mu = np.mean(metric)
sigma = np.std(metric)
n_bins = 100
fig, ax = plt.subplots(figsize=(8, 4))
# plot the cumulative histogram
n, bins, patches = ax.hist(metric, n_bins, [min(metric), max(metric)], density=True, histtype='step',
cumulative=True, label='$\gamma$ Error', linewidth=3)
# Add a line showing the expected distribution.
y = norm.pdf(bins, mu, sigma).cumsum()
y /= y[-1]
ax.plot(bins, y, 'k--', linewidth=3, label='Gaussian')
# tidy up the figure
ax.grid(True)
ax.legend(loc='right',fontsize=20)
ax.set_title('Cumulative step histograms', fontsize=24)
ax.set_xlabel('$\gamma$ Error (K)', fontsize=22)
ax.set_ylabel('Likelihood of occurrence',fontsize=22)
plt.tick_params(axis='both', labelsize=16)
plt.tight_layout()
plt.show()
area_under_gaussian = trapz(y)
area_under_err_curve = trapz(n)
print(abs(area_under_gaussian - area_under_err_curve))
# Run the script:
if __name__ == '__main__':
main()