import heapq
import csv
import random
import time
from benchmarks.common import *
def to_csv_row(sample_index, seq, time_per_block):
trials = len(seq)
freqs = [float(seq.count(c)) for c in choices]
ralph_skewed = sum(heapq.nlargest(int(len(choices) / 2), freqs)) > (trials * 2 / 3)
return [str(sample_index)] + freqs + [time_per_block, ''.join(seq), str(ralph_skewed)]
def benchmark_skewness(filename):
from generators.nb_gm_003 import SequenceGenerator
generator = SequenceGenerator(choices, n)
# TODO store timing details
with open(filename, mode='w') as benchmark_results_file:
writer = csv.writer(benchmark_results_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
writer.writerow(['index'] + choices + ['time_per_block', 'seq', 'ralph_skewed'])
for i in range(sample_size):
trials = random.randint(trials_range[0], trials_range[1])
targets = trials / 3
t = time.time()
seq = generator.generate(trials, targets)
t = time.time() - t
print(f"sequence {i}/{sample_size}: {trials} trials, took {t}s.")
writer.writerow(to_csv_row(i, seq, t))
plot_skewness(fn, 'nb_gm_003')
def plot_skewness(csv_filename, figure_title):
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
data = pd.read_csv(csv_filename)
data['trials'] = data[choices].sum(axis=1)
max_trials = data['trials'].max()
min_trials = data['trials'].min()
stats = []
for sequence_length in range(int(min_trials), int(max_trials) + 1):
num_of_sequences = np.sum(data[data.trials == sequence_length].trials)
skewed_sequences = np.sum(data[np.logical_and(data.trials == sequence_length, data.ralph_skewed)].trials)
if num_of_sequences == 0:
continue
skewness = skewed_sequences * 100.0 / num_of_sequences
stats.append([sequence_length, skewness])
stats = pd.DataFrame(stats, columns=['trials', 'skewness']) # .dropna(subset=['skewness'])
plt.ylim([0, 110])
plt.scatter(stats.trials, stats.skewness, alpha=0.1)
p = np.poly1d(np.polyfit(stats.trials, stats.skewness, 3))
plt.plot(stats.trials, p(stats.trials), color='red')
plt.title(figure_title)
plt.ylabel('skewed blocks (%)')
plt.xlabel('# of trials')
plt.savefig(f'results/{figure_title}.png', bbox_inches='tight')
plt.show()
def plot_targets_ratio(csv_filename, figure_title):
import pandas as pd
from matplotlib import pyplot as plt
data = pd.read_csv(csv_filename)
data['trials'] = data[choices].sum(axis=1)
data['targets'] = data.apply(lambda s: count_targets_and_lures(s.seq, n)[0], axis=1)
data['targets_ratio'] = data.apply(lambda s: s.targets * 100 / s.trials, axis=1)
plt.title(f"{figure_title} - expected targets ratio = 33%")
plt.ylabel('targets (%)')
plt.xlabel('# of trials')
plt.scatter(data.trials, data.targets_ratio, alpha=0.3)
plt.savefig(f'results/{figure_title}_targets_ratio.png', bbox_inches='tight')
plt.show()
def benchmark_targets_ratio():
pass
def benchmark_chunking():
pass
def benchmark_n():
pass
def plot_timing(csv_filename, figure_title):
import pandas as pd
from matplotlib import pyplot as plt
data = pd.read_csv(csv_filename)
data['trials'] = data[choices].sum(axis=1)
data['time_per_trial'] = data.apply(lambda s: s.time_per_block / s.trials, axis=1)
plt.style.use('ggplot')
plt.title(f"{figure_title} - average time to generate a trial")
plt.ylabel('time (s)')
plt.xlabel('# of trials')
plt.scatter(data.trials, data.time_per_trial, alpha=0.5)
# draw fitted curve
# import numpy as np
# p = np.poly1d(np.polyfit(data.trials, data.time_per_trial, 3))
# plt.plot(data.trials, p(data.trials), color='red')
plt.savefig(f'results/{figure_title}_timing.png', bbox_inches='tight')
plt.show()
if __name__ == '__main__':
alg = 'nb_gm_003'
fn = f'results/{alg}_{n}back.csv'
benchmark_skewness(fn)