part2 complete

2025-11-02 12:55:49 -06:00
parent ac986ec69a
commit 248051db0d
36 changed files with 680 additions and 953 deletions
--- a/plot_result.py
+++ b/plot_result.py
@@ -19,7 +19,8 @@ def tabulate_events(dpath):
        tags = event_iterator.Tags()['scalars']

        for it in summary_iterators:
-            assert it.Tags()['scalars'] == tags
+            print(f'comparing {it.Tags()["scalars"]} and {tags}')
+            # assert it.Tags()['scalars'] == tags

        out = defaultdict(list)
        steps = []
@@ -45,20 +46,100 @@ def tabulate_events(dpath):

 if __name__ == '__main__':
    # In the first graph, compare the learning curves (average return vs. number of environment steps) for the experiments running with batch size of 1000.
-    root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p1311')
-    df_tables = tabulate_events(root_dir)
-    for experiment_dir, df in df_tables:
-        # debug
-        print(f"{experiment_dir}:\n {df}")
-        plt.plot(df['Train_EnvstepsSoFar'], df['Train_AverageReturn'], label=experiment_dir)
-    plt.legend()
-    plt.xlabel('Number of Environment Steps')
-    plt.ylabel('Average Return')
-    plt.title('Learning Curves for Batch Size of 1000')
-    plt.show()
+    # root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p1311')
+    # df_tables = tabulate_events(root_dir)
+    # for experiment_dir, df in df_tables:
+    #     # debug
+    #     print(f"{experiment_dir}:\n {df}")
+    #     plt.plot(df['Train_EnvstepsSoFar'], df['Train_AverageReturn'], label=experiment_dir)
+    # plt.legend()
+    # plt.xlabel('Number of Environment Steps')
+    # plt.ylabel('Average Return')
+    # plt.title('Learning Curves for Batch Size of 1000')
+    # plt.show()
+
    # – In the second graph, compare the learning curves for the experiments running with
    # batch size of 4000. (The large batch experiments.)
-    root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p1312')
+    # root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p1312')
+    # df_tables = tabulate_events(root_dir)
+    # for experiment_dir, df in df_tables:
+    #     # debug
+    #     print(f"{experiment_dir}:\n {df}")
+    #     plt.plot(df['Train_EnvstepsSoFar'], df['Train_AverageReturn'], label=experiment_dir)
+    # plt.legend()
+    # plt.xlabel('Number of Environment Steps')
+    # plt.ylabel('Average Return')
+    # plt.title('Learning Curves for Batch Size of 4000')
+    # plt.show()
+
+    # Plot a learning curve for the baseline loss.
+    # root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p231')
+    # df_tables = tabulate_events(root_dir)
+    # for experiment_dir, df in df_tables:
+    #     # debug
+    #     print(f"{experiment_dir}:\n {df}")
+    #     plt.plot(df['Train_EnvstepsSoFar'], df['Baseline_Loss'], label=experiment_dir)
+    # plt.legend()
+    # plt.xlabel('Number of Environment Steps')
+    # plt.ylabel('Baseline Loss')
+    # plt.title('Learning Curve for Baseline Loss')
+    # plt.show()
+
+    # Plot a learning curve for the evaluation return. You should expect to converge to the maximum reward of 500.
+    # root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p232')
+    # df_tables = tabulate_events(root_dir)
+    # for experiment_dir, df in df_tables:
+    #     # debug
+    #     print(f"{experiment_dir}:\n {df}")
+    #     plt.plot(df['Train_EnvstepsSoFar'], df['Train_AverageReturn'], label=experiment_dir)
+    # plt.legend()
+    # plt.xlabel('Number of Environment Steps')
+    # plt.ylabel('Average Return')
+    # plt.title('Learning Curve for Average Return')
+    # plt.show()
+
+    # Run another experiment with a decreased number of baseline gradient steps (-bgs in command line) and/or baseline learning rate (-blr in command line). How does this affect 
+    # (a) the baseline learning curve and 
+    # root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p233')
+    # df_tables = tabulate_events(root_dir)
+    # for experiment_dir, df in df_tables:
+    #     # debug
+    #     print(f"{experiment_dir}:\n {df}")
+    #     plt.plot(df['Train_EnvstepsSoFar'], df['Baseline_Loss'], label=experiment_dir)
+    # plt.legend()
+    # plt.xlabel('Number of Environment Steps')
+    # plt.ylabel('Baseline Loss')
+    # plt.title('Learning Curve for Baseline Loss for Batch Size of 5000 with Decreased Baseline Gradient Steps and/or Baseline Learning Rate')
+    # plt.show()
+
+    # (b) the performance of the policy?
+    # root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p233')
+    # df_tables = tabulate_events(root_dir)
+    # for experiment_dir, df in df_tables:
+    #     # debug
+    #     print(f"{experiment_dir}:\n {df}")
+    #     plt.plot(df['Train_EnvstepsSoFar'], df['Train_AverageReturn'], label=experiment_dir)
+    # plt.legend()
+    # plt.xlabel('Number of Environment Steps')
+    # plt.ylabel('Average Return')
+    # plt.title('Learning Curve for Average Return for Batch Size of 5000 with Decreased Baseline Gradient Steps and/or Baseline Learning Rate')
+    # plt.show()
+
+    # How does the command line argument -na influence the performance? Why is that the case?
+    # root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p234')
+    # df_tables = tabulate_events(root_dir)
+    # for experiment_dir, df in df_tables:
+    #     # debug
+    #     print(f"{experiment_dir}:\n {df}")
+    #     plt.plot(df['Train_EnvstepsSoFar'], df['Train_AverageReturn'], label=experiment_dir)
+    # plt.legend()
+    # plt.xlabel('Number of Environment Steps')
+    # plt.ylabel('Average Return')
+    # plt.title('Learning Curve for Average Return for Batch Size of 5000 with Command Line Argument -na')
+    # plt.show()
+
+    # HalfCheetah
+    root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p24')
    df_tables = tabulate_events(root_dir)
    for experiment_dir, df in df_tables:
        # debug
@@ -67,5 +148,5 @@ if __name__ == '__main__':
    plt.legend()
    plt.xlabel('Number of Environment Steps')
    plt.ylabel('Average Return')
-    plt.title('Learning Curves for Batch Size of 4000')
+    plt.title('Learning Curve for Average Return for HalfCheetah')
    plt.show()