forked from facebookresearch/fairchem
-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathadslab_parse_stats.py
More file actions
120 lines (100 loc) · 3.89 KB
/
adslab_parse_stats.py
File metadata and controls
120 lines (100 loc) · 3.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import re
import json
from collections import defaultdict
from pathlib import Path
import matplotlib.cm as cmx
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import numpy as np
from minydra import resolved_args
from tqdm import tqdm
if __name__ == "__main__":
args = resolved_args()
assert args.file is not None
assert Path(args.file).exists()
assert Path(args.file).is_file()
with open(args.file, "r") as f:
lines = f.read()
samples = [
s
for s in lines.split(
"------------------------------\n------------------------------"
)
if "Actions to Data" in s and "ABORTING" not in s
]
times = defaultdict(list)
metadatas = []
time_regex = re.compile(r"(.*) \| Done! \((.*)s\)")
total_adsorbed_regex = re.compile(r"Total adsorbed_surfaces: (\d+)")
non_reasonable_regex = re.compile(r"Non reasonable configs: (\d+)/(\d+)")
metadata_regexs = {
"adsorbate_id": re.compile(
r"args(?:\.actions|)\.adsorbate_id is None, choosing (\d+)"
),
"adsorbate_desc": re.compile(r"# Selected adsorbate: (.+)"),
"bulk_id": re.compile(r"args\.actions\.bulk_id is None, choosing (\d+)"),
"bulk_desc": re.compile(r"# Selected bulk: (.+)"),
"surface_id": re.compile(r"args\.actions\.surface_id is None, choosing (\d+)"),
"surface_desc": re.compile(r"# Selected surface: (.+)"),
"bond_indices": re.compile(r"bond_indices: (.+)"),
}
keys = []
time_keys = []
for s, sample in tqdm(enumerate(samples), total=len(samples)):
metadatas.append({})
matches = time_regex.findall(sample)
if not time_keys:
time_keys = set([k.strip() for k, _ in matches] + ["Actions to Data"])
matches += [
(
"Total adsorbed_surfaces",
int(total_adsorbed_regex.findall(sample)[0]),
),
(
"Proportion of non reasonable adsorbed_surfaces",
float(non_reasonable_regex.findall(sample)[0][0])
/ float(non_reasonable_regex.findall(sample)[0][1]),
),
]
for k, v in matches:
k = k.strip()
if "Actions to Data" in k:
k = "Actions to Data"
times[k].append(float(v))
metadatas[-1][k] = float(v)
if s == 0:
keys.append(k)
for name, reg in metadata_regexs.items():
meta = reg.findall(sample)[0]
if "id" in name:
meta = int(meta)
metadatas[-1][name] = meta
means = {k: m for k, v in times.items() if ((m := np.mean(v)) > 0.1)}
stds = {k: np.std(v) for k, v in times.items() if k in means}
keys = [k for k in keys if k in means]
cmap = plt.get_cmap("viridis")
cnorm = colors.Normalize(vmin=0, vmax=len(samples))
scalar_map = cmx.ScalarMappable(norm=cnorm, cmap=cmap)
n_plots = len(means.keys())
ncols = args.plot_ncols or 3
nrows = n_plots // ncols
if n_plots % ncols != 0:
nrows += 1
fig, axs = plt.subplots(nrows, ncols, figsize=(ncols * 5, nrows * 4))
for i, k in tqdm(enumerate(keys), total=len(keys)):
ax = axs.flat[i]
bars = ax.bar(range(len(times[k])), times[k])
for b, bar in enumerate(bars):
bar.set_color(scalar_map.to_rgba(b))
title = k
if k in time_keys:
title += f" ({means[k]:.2f}s +/- {stds[k]:.2f}s)"
else:
title += " (count)"
ax.set_title(title, fontsize=8)
ax.xaxis.set_tick_params(labelsize=6)
ax.yaxis.set_tick_params(labelsize=6)
plt.suptitle(f"Time (s) for operations or printed counts ({len(samples)} samples)")
plt.savefig(args.out_png or f"{Path(args.file).stem}.png", dpi=150)
with open(args.out_json or f"{Path(args.file).stem}.json", "w") as f:
json.dump(metadatas, f)