Files
CSE5100H3/plot_result.py
2025-11-02 12:55:49 -06:00

152 lines
6.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# read tensorboard logs to dataframe
# based on https://stackoverflow.com/a/52095336
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import defaultdict
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
def tabulate_events(dpath):
# parse single experiment
experiment_dirs = os.listdir(dpath)
summary_iterators = [EventAccumulator(os.path.join(dpath, dname)).Reload() for dname in experiment_dirs]
df_tables = []
for i,event_iterator in enumerate(summary_iterators):
tags = event_iterator.Tags()['scalars']
for it in summary_iterators:
print(f'comparing {it.Tags()["scalars"]} and {tags}')
# assert it.Tags()['scalars'] == tags
out = defaultdict(list)
steps = []
for tag in tags:
if tag== 'Initial_DataCollection_AverageReturn':
continue
steps = [e.step for e in event_iterator.Scalars(tag)]
for e in event_iterator.Scalars(tag):
# print(e.step, e.value)
out[tag].append(e.value)
# debug
# for key, value in out.items():
# print(f"{key}: {len(value)}")
# ignore the useless key
df = pd.DataFrame(out)
df_tables.append((experiment_dirs[i], df.copy()))
return df_tables
if __name__ == '__main__':
# In the first graph, compare the learning curves (average return vs. number of environment steps) for the experiments running with batch size of 1000.
# root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p1311')
# df_tables = tabulate_events(root_dir)
# for experiment_dir, df in df_tables:
# # debug
# print(f"{experiment_dir}:\n {df}")
# plt.plot(df['Train_EnvstepsSoFar'], df['Train_AverageReturn'], label=experiment_dir)
# plt.legend()
# plt.xlabel('Number of Environment Steps')
# plt.ylabel('Average Return')
# plt.title('Learning Curves for Batch Size of 1000')
# plt.show()
# In the second graph, compare the learning curves for the experiments running with
# batch size of 4000. (The large batch experiments.)
# root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p1312')
# df_tables = tabulate_events(root_dir)
# for experiment_dir, df in df_tables:
# # debug
# print(f"{experiment_dir}:\n {df}")
# plt.plot(df['Train_EnvstepsSoFar'], df['Train_AverageReturn'], label=experiment_dir)
# plt.legend()
# plt.xlabel('Number of Environment Steps')
# plt.ylabel('Average Return')
# plt.title('Learning Curves for Batch Size of 4000')
# plt.show()
# Plot a learning curve for the baseline loss.
# root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p231')
# df_tables = tabulate_events(root_dir)
# for experiment_dir, df in df_tables:
# # debug
# print(f"{experiment_dir}:\n {df}")
# plt.plot(df['Train_EnvstepsSoFar'], df['Baseline_Loss'], label=experiment_dir)
# plt.legend()
# plt.xlabel('Number of Environment Steps')
# plt.ylabel('Baseline Loss')
# plt.title('Learning Curve for Baseline Loss')
# plt.show()
# Plot a learning curve for the evaluation return. You should expect to converge to the maximum reward of 500.
# root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p232')
# df_tables = tabulate_events(root_dir)
# for experiment_dir, df in df_tables:
# # debug
# print(f"{experiment_dir}:\n {df}")
# plt.plot(df['Train_EnvstepsSoFar'], df['Train_AverageReturn'], label=experiment_dir)
# plt.legend()
# plt.xlabel('Number of Environment Steps')
# plt.ylabel('Average Return')
# plt.title('Learning Curve for Average Return')
# plt.show()
# Run another experiment with a decreased number of baseline gradient steps (-bgs in command line) and/or baseline learning rate (-blr in command line). How does this affect
# (a) the baseline learning curve and
# root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p233')
# df_tables = tabulate_events(root_dir)
# for experiment_dir, df in df_tables:
# # debug
# print(f"{experiment_dir}:\n {df}")
# plt.plot(df['Train_EnvstepsSoFar'], df['Baseline_Loss'], label=experiment_dir)
# plt.legend()
# plt.xlabel('Number of Environment Steps')
# plt.ylabel('Baseline Loss')
# plt.title('Learning Curve for Baseline Loss for Batch Size of 5000 with Decreased Baseline Gradient Steps and/or Baseline Learning Rate')
# plt.show()
# (b) the performance of the policy?
# root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p233')
# df_tables = tabulate_events(root_dir)
# for experiment_dir, df in df_tables:
# # debug
# print(f"{experiment_dir}:\n {df}")
# plt.plot(df['Train_EnvstepsSoFar'], df['Train_AverageReturn'], label=experiment_dir)
# plt.legend()
# plt.xlabel('Number of Environment Steps')
# plt.ylabel('Average Return')
# plt.title('Learning Curve for Average Return for Batch Size of 5000 with Decreased Baseline Gradient Steps and/or Baseline Learning Rate')
# plt.show()
# How does the command line argument -na influence the performance? Why is that the case?
# root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p234')
# df_tables = tabulate_events(root_dir)
# for experiment_dir, df in df_tables:
# # debug
# print(f"{experiment_dir}:\n {df}")
# plt.plot(df['Train_EnvstepsSoFar'], df['Train_AverageReturn'], label=experiment_dir)
# plt.legend()
# plt.xlabel('Number of Environment Steps')
# plt.ylabel('Average Return')
# plt.title('Learning Curve for Average Return for Batch Size of 5000 with Command Line Argument -na')
# plt.show()
# HalfCheetah
root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p24')
df_tables = tabulate_events(root_dir)
for experiment_dir, df in df_tables:
# debug
print(f"{experiment_dir}:\n {df}")
plt.plot(df['Train_EnvstepsSoFar'], df['Train_AverageReturn'], label=experiment_dir)
plt.legend()
plt.xlabel('Number of Environment Steps')
plt.ylabel('Average Return')
plt.title('Learning Curve for Average Return for HalfCheetah')
plt.show()