CSE5100H3/plot_result.py

# read tensorboard logs to dataframe

# based on https://stackoverflow.com/a/52095336

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from collections import defaultdict
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator

def tabulate_events(dpath):
    # parse single experiment
    experiment_dirs = os.listdir(dpath)
    summary_iterators = [EventAccumulator(os.path.join(dpath, dname)).Reload() for dname in experiment_dirs]
    df_tables = []
    for i,event_iterator in enumerate(summary_iterators):
        tags = event_iterator.Tags()['scalars']

        for it in summary_iterators:
            print(f'comparing {it.Tags()["scalars"]} and {tags}')
            # assert it.Tags()['scalars'] == tags

        out = defaultdict(list)
        steps = []

        for tag in tags:
            if tag== 'Initial_DataCollection_AverageReturn':
                continue
            steps = [e.step for e in event_iterator.Scalars(tag)]

            for e in event_iterator.Scalars(tag):
                # print(e.step, e.value)
                out[tag].append(e.value)

        # debug
        # for key, value in out.items():
        #     print(f"{key}: {len(value)}")

        # ignore the useless key

        df = pd.DataFrame(out)
        df_tables.append((experiment_dirs[i], df.copy()))
    return df_tables

if __name__ == '__main__':
    # In the first graph, compare the learning curves (average return vs. number of environment steps) for the experiments running with batch size of 1000.
    # root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p1311')
    # df_tables = tabulate_events(root_dir)
    # for experiment_dir, df in df_tables:
    #     # debug
    #     print(f"{experiment_dir}:\n {df}")
    #     plt.plot(df['Train_EnvstepsSoFar'], df['Train_AverageReturn'], label=experiment_dir)
    # plt.legend()
    # plt.xlabel('Number of Environment Steps')
    # plt.ylabel('Average Return')
    # plt.title('Learning Curves for Batch Size of 1000')
    # plt.show()

    # – In the second graph, compare the learning curves for the experiments running with
    # batch size of 4000. (The large batch experiments.)
    # root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p1312')
    # df_tables = tabulate_events(root_dir)
    # for experiment_dir, df in df_tables:
    #     # debug
    #     print(f"{experiment_dir}:\n {df}")
    #     plt.plot(df['Train_EnvstepsSoFar'], df['Train_AverageReturn'], label=experiment_dir)
    # plt.legend()
    # plt.xlabel('Number of Environment Steps')
    # plt.ylabel('Average Return')
    # plt.title('Learning Curves for Batch Size of 4000')
    # plt.show()

    # Plot a learning curve for the baseline loss.
    # root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p231')
    # df_tables = tabulate_events(root_dir)
    # for experiment_dir, df in df_tables:
    #     # debug
    #     print(f"{experiment_dir}:\n {df}")
    #     plt.plot(df['Train_EnvstepsSoFar'], df['Baseline_Loss'], label=experiment_dir)
    # plt.legend()
    # plt.xlabel('Number of Environment Steps')
    # plt.ylabel('Baseline Loss')
    # plt.title('Learning Curve for Baseline Loss')
    # plt.show()

    # Plot a learning curve for the evaluation return. You should expect to converge to the maximum reward of 500.
    # root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p232')
    # df_tables = tabulate_events(root_dir)
    # for experiment_dir, df in df_tables:
    #     # debug
    #     print(f"{experiment_dir}:\n {df}")
    #     plt.plot(df['Train_EnvstepsSoFar'], df['Train_AverageReturn'], label=experiment_dir)
    # plt.legend()
    # plt.xlabel('Number of Environment Steps')
    # plt.ylabel('Average Return')
    # plt.title('Learning Curve for Average Return')
    # plt.show()

    # Run another experiment with a decreased number of baseline gradient steps (-bgs in command line) and/or baseline learning rate (-blr in command line). How does this affect
    # (a) the baseline learning curve and
    # root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p233')
    # df_tables = tabulate_events(root_dir)
    # for experiment_dir, df in df_tables:
    #     # debug
    #     print(f"{experiment_dir}:\n {df}")
    #     plt.plot(df['Train_EnvstepsSoFar'], df['Baseline_Loss'], label=experiment_dir)
    # plt.legend()
    # plt.xlabel('Number of Environment Steps')
    # plt.ylabel('Baseline Loss')
    # plt.title('Learning Curve for Baseline Loss for Batch Size of 5000 with Decreased Baseline Gradient Steps and/or Baseline Learning Rate')
    # plt.show()

    # (b) the performance of the policy?
    # root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p233')
    # df_tables = tabulate_events(root_dir)
    # for experiment_dir, df in df_tables:
    #     # debug
    #     print(f"{experiment_dir}:\n {df}")
    #     plt.plot(df['Train_EnvstepsSoFar'], df['Train_AverageReturn'], label=experiment_dir)
    # plt.legend()
    # plt.xlabel('Number of Environment Steps')
    # plt.ylabel('Average Return')
    # plt.title('Learning Curve for Average Return for Batch Size of 5000 with Decreased Baseline Gradient Steps and/or Baseline Learning Rate')
    # plt.show()

    # How does the command line argument -na influence the performance? Why is that the case?
    # root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p234')
    # df_tables = tabulate_events(root_dir)
    # for experiment_dir, df in df_tables:
    #     # debug
    #     print(f"{experiment_dir}:\n {df}")
    #     plt.plot(df['Train_EnvstepsSoFar'], df['Train_AverageReturn'], label=experiment_dir)
    # plt.legend()
    # plt.xlabel('Number of Environment Steps')
    # plt.ylabel('Average Return')
    # plt.title('Learning Curve for Average Return for Batch Size of 5000 with Command Line Argument -na')
    # plt.show()

    # HalfCheetah
    root_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'hw3', 'data','p24')
    df_tables = tabulate_events(root_dir)
    for experiment_dir, df in df_tables:
        # debug
        print(f"{experiment_dir}:\n {df}")
        plt.plot(df['Train_EnvstepsSoFar'], df['Train_AverageReturn'], label=experiment_dir)
    plt.legend()
    plt.xlabel('Number of Environment Steps')
    plt.ylabel('Average Return')
    plt.title('Learning Curve for Average Return for HalfCheetah')
    plt.show()