adaptive-nback/benchmarks/nb_gm_002_bm.py at aec0248ce4a1a45bec142df8f7dffe34b845b50a

Fork: 0
morteza / adaptive-nback
Find file
Newer
Older
adaptive-nback / benchmarks / nb_gm_002_bm.py
Morteza Ansarinia on 28 Feb 2019 3 KB diagrams to compare nb_gm_002 vs nb_gm_004
Raw Blame History
import heapq
import csv
import random

from benchmarks.common import *


def to_csv_row(sample_index, seq):
    trials = len(seq)
    freqs = [float(seq.count(c)) for c in choices]
    ralph_skewed = sum(heapq.nlargest(int(len(choices) / 2), freqs)) > (trials * 2 / 3)
    return [str(sample_index)] + freqs + [''.join(seq), str(ralph_skewed)]


def benchmark(filename):
    from generators.nb_gm_002 import SequenceGenerator

    generator = SequenceGenerator(choices, n)

    # TODO store timing details
    with open(filename, mode='w') as benchmark_results_file:
        writer = csv.writer(benchmark_results_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        writer.writerow(['index'] + choices + ['seq', 'ralph_skewed'])
        for i in range(sample_size):
            trials = random.randint(trials_range[0], trials_range[1])
            seq = generator.generate(trials, int(trials/3), 0, 0)
            print(f"sequence {i}/{sample_size}: {trials} trials")
            writer.writerow(to_csv_row(i, seq))


def skewness_diagram(csv_filename, figure_title):
    import pandas as pd
    import numpy as np
    from matplotlib import pyplot as plt
    data = pd.read_csv(csv_filename)
    data['trials'] = data[choices].sum(axis=1)
    max_trials = data['trials'].max()
    min_trials = data['trials'].min()
    stats = []
    for sequence_length in range(int(min_trials), int(max_trials) + 1):
        num_of_sequences = np.sum(data[data.trials == sequence_length].trials)
        skewed_sequences = np.sum(data[np.logical_and(data.trials == sequence_length, data.ralph_skewed)].trials)
        if num_of_sequences == 0:
            continue
        skewness = skewed_sequences * 100.0 / num_of_sequences
        stats.append([sequence_length, skewness])
    stats = pd.DataFrame(stats, columns=['trials', 'skewness'])  # .dropna(subset=['skewness'])
    # print(stats.trials)
    plt.ylim([0, 110])
    plt.scatter(stats.trials, stats.skewness, alpha=0.1)
    p = np.poly1d(np.polyfit(stats.trials, stats.skewness, 3))
    plt.plot(stats.trials, p(stats.trials), color='red')
    plt.title(figure_title)
    plt.ylabel('skewed blocks (%)')
    plt.xlabel('# of trials')
    plt.savefig(f'results/{figure_title}.png', bbox_inches='tight')
    plt.show()


def targets_ratio_diagram(csv_filename, figure_title):
    import pandas as pd
    from matplotlib import pyplot as plt
    data = pd.read_csv(csv_filename)
    data['trials'] = data[choices].sum(axis=1)
    data['targets'] = data.apply(lambda s: count_targets_and_lures(s.seq, n)[0], axis=1)
    data['targets_ratio'] = data.apply(lambda s: s.targets * 100 / s.trials, axis=1)
    plt.title(f"{figure_title} - expected targets ratio = 33%")
    plt.ylabel('targets (%)')
    plt.xlabel('# of trials')
    plt.scatter(data.trials, data.targets_ratio, alpha=0.3)
    plt.savefig(f'results/{figure_title}_targets_ratio.png', bbox_inches='tight')
    plt.show()


import time
import numpy as np
import pandas as pd

import benchmarks.common as common

def profile():
    from generators.nb_gm_002 import SequenceGenerator

    res = []

    for s in range(common.sample_size):
        trials = np.random.randint(common.trials_range[0], common.trials_range[1])
        targets = int(trials / 3)
        lures = int(targets / 6)  # tl_ratio = 2.0
        n = np.random.randint(2, 8)
        gen = SequenceGenerator(common.choices, n)
        st = time.time()
        seq = gen.generate(trials, targets, int(lures/2), int(lures/2))
        st = time.time() - st
        t, lu = common.count_targets_and_lures(seq, n)
        skewed = common.skewness(seq, common.choices)
        res.append(['nb_gm_004', n, trials, st, t, lu, skewed, ''.join(seq)])
        print(f"sequence #{s+1} generated")
    res_df = pd.DataFrame(res, columns=['alg', 'n', 'trials', 'time', 'targets', 'lures', 'skewed', 'sequence'])
    res_df.to_csv('benchmarks/results/nb_gm_002_profile.csv', sep=',', encoding='utf-8')

if __name__ == '__main__':
    alg = 'nb_gm_002'
    fn = f'results/{alg}_{n}back.csv'
    profile()