part2 complete
This commit is contained in:
109
plot_result.py
109
plot_result.py
@@ -19,7 +19,8 @@ def tabulate_events(dpath):
|
||||
tags = event_iterator.Tags()['scalars']
|
||||
|
||||
for it in summary_iterators:
|
||||
assert it.Tags()['scalars'] == tags
|
||||
print(f'comparing {it.Tags()["scalars"]} and {tags}')
|
||||
# assert it.Tags()['scalars'] == tags
|
||||
|
||||
out = defaultdict(list)
|
||||
steps = []
|
||||
@@ -45,20 +46,100 @@ def tabulate_events(dpath):
|
||||
|
||||
if __name__ == '__main__':
|
||||
# In the first graph, compare the learning curves (average return vs. number of environment steps) for the experiments running with batch size of 1000.
|
||||
root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p1311')
|
||||
df_tables = tabulate_events(root_dir)
|
||||
for experiment_dir, df in df_tables:
|
||||
# debug
|
||||
print(f"{experiment_dir}:\n {df}")
|
||||
plt.plot(df['Train_EnvstepsSoFar'], df['Train_AverageReturn'], label=experiment_dir)
|
||||
plt.legend()
|
||||
plt.xlabel('Number of Environment Steps')
|
||||
plt.ylabel('Average Return')
|
||||
plt.title('Learning Curves for Batch Size of 1000')
|
||||
plt.show()
|
||||
# root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p1311')
|
||||
# df_tables = tabulate_events(root_dir)
|
||||
# for experiment_dir, df in df_tables:
|
||||
# # debug
|
||||
# print(f"{experiment_dir}:\n {df}")
|
||||
# plt.plot(df['Train_EnvstepsSoFar'], df['Train_AverageReturn'], label=experiment_dir)
|
||||
# plt.legend()
|
||||
# plt.xlabel('Number of Environment Steps')
|
||||
# plt.ylabel('Average Return')
|
||||
# plt.title('Learning Curves for Batch Size of 1000')
|
||||
# plt.show()
|
||||
|
||||
# – In the second graph, compare the learning curves for the experiments running with
|
||||
# batch size of 4000. (The large batch experiments.)
|
||||
root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p1312')
|
||||
# root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p1312')
|
||||
# df_tables = tabulate_events(root_dir)
|
||||
# for experiment_dir, df in df_tables:
|
||||
# # debug
|
||||
# print(f"{experiment_dir}:\n {df}")
|
||||
# plt.plot(df['Train_EnvstepsSoFar'], df['Train_AverageReturn'], label=experiment_dir)
|
||||
# plt.legend()
|
||||
# plt.xlabel('Number of Environment Steps')
|
||||
# plt.ylabel('Average Return')
|
||||
# plt.title('Learning Curves for Batch Size of 4000')
|
||||
# plt.show()
|
||||
|
||||
# Plot a learning curve for the baseline loss.
|
||||
# root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p231')
|
||||
# df_tables = tabulate_events(root_dir)
|
||||
# for experiment_dir, df in df_tables:
|
||||
# # debug
|
||||
# print(f"{experiment_dir}:\n {df}")
|
||||
# plt.plot(df['Train_EnvstepsSoFar'], df['Baseline_Loss'], label=experiment_dir)
|
||||
# plt.legend()
|
||||
# plt.xlabel('Number of Environment Steps')
|
||||
# plt.ylabel('Baseline Loss')
|
||||
# plt.title('Learning Curve for Baseline Loss')
|
||||
# plt.show()
|
||||
|
||||
# Plot a learning curve for the evaluation return. You should expect to converge to the maximum reward of 500.
|
||||
# root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p232')
|
||||
# df_tables = tabulate_events(root_dir)
|
||||
# for experiment_dir, df in df_tables:
|
||||
# # debug
|
||||
# print(f"{experiment_dir}:\n {df}")
|
||||
# plt.plot(df['Train_EnvstepsSoFar'], df['Train_AverageReturn'], label=experiment_dir)
|
||||
# plt.legend()
|
||||
# plt.xlabel('Number of Environment Steps')
|
||||
# plt.ylabel('Average Return')
|
||||
# plt.title('Learning Curve for Average Return')
|
||||
# plt.show()
|
||||
|
||||
# Run another experiment with a decreased number of baseline gradient steps (-bgs in command line) and/or baseline learning rate (-blr in command line). How does this affect
|
||||
# (a) the baseline learning curve and
|
||||
# root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p233')
|
||||
# df_tables = tabulate_events(root_dir)
|
||||
# for experiment_dir, df in df_tables:
|
||||
# # debug
|
||||
# print(f"{experiment_dir}:\n {df}")
|
||||
# plt.plot(df['Train_EnvstepsSoFar'], df['Baseline_Loss'], label=experiment_dir)
|
||||
# plt.legend()
|
||||
# plt.xlabel('Number of Environment Steps')
|
||||
# plt.ylabel('Baseline Loss')
|
||||
# plt.title('Learning Curve for Baseline Loss for Batch Size of 5000 with Decreased Baseline Gradient Steps and/or Baseline Learning Rate')
|
||||
# plt.show()
|
||||
|
||||
# (b) the performance of the policy?
|
||||
# root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p233')
|
||||
# df_tables = tabulate_events(root_dir)
|
||||
# for experiment_dir, df in df_tables:
|
||||
# # debug
|
||||
# print(f"{experiment_dir}:\n {df}")
|
||||
# plt.plot(df['Train_EnvstepsSoFar'], df['Train_AverageReturn'], label=experiment_dir)
|
||||
# plt.legend()
|
||||
# plt.xlabel('Number of Environment Steps')
|
||||
# plt.ylabel('Average Return')
|
||||
# plt.title('Learning Curve for Average Return for Batch Size of 5000 with Decreased Baseline Gradient Steps and/or Baseline Learning Rate')
|
||||
# plt.show()
|
||||
|
||||
# How does the command line argument -na influence the performance? Why is that the case?
|
||||
# root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p234')
|
||||
# df_tables = tabulate_events(root_dir)
|
||||
# for experiment_dir, df in df_tables:
|
||||
# # debug
|
||||
# print(f"{experiment_dir}:\n {df}")
|
||||
# plt.plot(df['Train_EnvstepsSoFar'], df['Train_AverageReturn'], label=experiment_dir)
|
||||
# plt.legend()
|
||||
# plt.xlabel('Number of Environment Steps')
|
||||
# plt.ylabel('Average Return')
|
||||
# plt.title('Learning Curve for Average Return for Batch Size of 5000 with Command Line Argument -na')
|
||||
# plt.show()
|
||||
|
||||
# HalfCheetah
|
||||
root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p24')
|
||||
df_tables = tabulate_events(root_dir)
|
||||
for experiment_dir, df in df_tables:
|
||||
# debug
|
||||
@@ -67,5 +148,5 @@ if __name__ == '__main__':
|
||||
plt.legend()
|
||||
plt.xlabel('Number of Environment Steps')
|
||||
plt.ylabel('Average Return')
|
||||
plt.title('Learning Curves for Batch Size of 4000')
|
||||
plt.title('Learning Curve for Average Return for HalfCheetah')
|
||||
plt.show()
|
||||
Reference in New Issue
Block a user