partial update for section 1

2025-11-01 16:28:48 -05:00
parent 0f109ac389
commit ac986ec69a
40 changed files with 1439 additions and 3 deletions
--- a/plot_result.py
+++ b/plot_result.py
@@ -0,0 +1,71 @@
+# read tensorboard logs to dataframe
+
+# based on https://stackoverflow.com/a/52095336
+
+import os
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+from collections import defaultdict
+from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
+
+def tabulate_events(dpath):
+    # parse single experiment
+    experiment_dirs = os.listdir(dpath)
+    summary_iterators = [EventAccumulator(os.path.join(dpath, dname)).Reload() for dname in experiment_dirs]
+    df_tables = []
+    for i,event_iterator in enumerate(summary_iterators):
+        tags = event_iterator.Tags()['scalars']
+
+        for it in summary_iterators:
+            assert it.Tags()['scalars'] == tags
+
+        out = defaultdict(list)
+        steps = []
+
+        for tag in tags:
+            if tag== 'Initial_DataCollection_AverageReturn':
+                continue
+            steps = [e.step for e in event_iterator.Scalars(tag)]
+
+            for e in event_iterator.Scalars(tag):
+                # print(e.step, e.value)
+                out[tag].append(e.value)
+
+        # debug
+        # for key, value in out.items():
+        #     print(f"{key}: {len(value)}")
+
+        # ignore the useless key
+
+        df = pd.DataFrame(out)
+        df_tables.append((experiment_dirs[i], df.copy()))
+    return df_tables
+
+if __name__ == '__main__':
+    # In the first graph, compare the learning curves (average return vs. number of environment steps) for the experiments running with batch size of 1000.
+    root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p1311')
+    df_tables = tabulate_events(root_dir)
+    for experiment_dir, df in df_tables:
+        # debug
+        print(f"{experiment_dir}:\n {df}")
+        plt.plot(df['Train_EnvstepsSoFar'], df['Train_AverageReturn'], label=experiment_dir)
+    plt.legend()
+    plt.xlabel('Number of Environment Steps')
+    plt.ylabel('Average Return')
+    plt.title('Learning Curves for Batch Size of 1000')
+    plt.show()
+    # – In the second graph, compare the learning curves for the experiments running with
+    # batch size of 4000. (The large batch experiments.)
+    root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p1312')
+    df_tables = tabulate_events(root_dir)
+    for experiment_dir, df in df_tables:
+        # debug
+        print(f"{experiment_dir}:\n {df}")
+        plt.plot(df['Train_EnvstepsSoFar'], df['Train_AverageReturn'], label=experiment_dir)
+    plt.legend()
+    plt.xlabel('Number of Environment Steps')
+    plt.ylabel('Average Return')
+    plt.title('Learning Curves for Batch Size of 4000')
+    plt.show()