import heapq import csv import random from benchmarks.common import * def to_csv_row(sample_index, seq): trials = len(seq) freqs = [float(seq.count(c)) for c in choices] ralph_skewed = sum(heapq.nlargest(int(len(choices) / 2), freqs)) > (trials * 2 / 3) return [str(sample_index)] + freqs + [''.join(seq), str(ralph_skewed)] def benchmark(filename): from generators.nb_gm_002 import SequenceGenerator generator = SequenceGenerator(choices, n) # TODO store timing details with open(filename, mode='w') as benchmark_results_file: writer = csv.writer(benchmark_results_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) writer.writerow(['index'] + choices + ['seq', 'ralph_skewed']) for i in range(sample_size): trials = random.randint(trials_range[0], trials_range[1]) seq = generator.generate(trials, int(trials/3), 0, 0) print(f"sequence {i}/{sample_size}: {trials} trials") writer.writerow(to_csv_row(i, seq)) def skewness_diagram(csv_filename, figure_title): import pandas as pd import numpy as np from matplotlib import pyplot as plt data = pd.read_csv(csv_filename) data['trials'] = data[choices].sum(axis=1) max_trials = data['trials'].max() min_trials = data['trials'].min() stats = [] for sequence_length in range(int(min_trials), int(max_trials) + 1): num_of_sequences = np.sum(data[data.trials == sequence_length].trials) skewed_sequences = np.sum(data[np.logical_and(data.trials == sequence_length, data.ralph_skewed)].trials) if num_of_sequences == 0: continue skewness = skewed_sequences * 100.0 / num_of_sequences stats.append([sequence_length, skewness]) stats = pd.DataFrame(stats, columns=['trials', 'skewness']) # .dropna(subset=['skewness']) # print(stats.trials) plt.ylim([0, 110]) plt.scatter(stats.trials, stats.skewness, alpha=0.1) p = np.poly1d(np.polyfit(stats.trials, stats.skewness, 3)) plt.plot(stats.trials, p(stats.trials), color='red') plt.title(figure_title) plt.ylabel('skewed blocks (%)') plt.xlabel('# of trials') plt.savefig(f'results/{figure_title}.png', bbox_inches='tight') plt.show() def targets_ratio_diagram(csv_filename, figure_title): import pandas as pd from matplotlib import pyplot as plt data = pd.read_csv(csv_filename) data['trials'] = data[choices].sum(axis=1) data['targets'] = data.apply(lambda s: count_targets_and_lures(s.seq, n)[0], axis=1) data['targets_ratio'] = data.apply(lambda s: s.targets * 100 / s.trials, axis=1) plt.title(f"{figure_title} - expected targets ratio = 33%") plt.ylabel('targets (%)') plt.xlabel('# of trials') plt.scatter(data.trials, data.targets_ratio, alpha=0.3) plt.savefig(f'results/{figure_title}_targets_ratio.png', bbox_inches='tight') plt.show() import time import numpy as np import pandas as pd import benchmarks.common as common def profile(): from generators.nb_gm_002 import SequenceGenerator res = [] for s in range(common.sample_size): trials = np.random.randint(common.trials_range[0], common.trials_range[1]) targets = int(trials / 3) lures = int(targets / 6) # tl_ratio = 2.0 n = np.random.randint(2, 8) gen = SequenceGenerator(common.choices, n) st = time.time() seq = gen.generate(trials, targets, int(lures/2), int(lures/2)) st = time.time() - st t, lu = common.count_targets_and_lures(seq, n) skewed = common.skewness(seq, common.choices) res.append(['nb_gm_004', n, trials, st, t, lu, skewed, ''.join(seq)]) print(f"sequence #{s+1} generated") res_df = pd.DataFrame(res, columns=['alg', 'n', 'trials', 'time', 'targets', 'lures', 'skewed', 'sequence']) res_df.to_csv('benchmarks/results/nb_gm_002_profile.csv', sep=',', encoding='utf-8') if __name__ == '__main__': alg = 'nb_gm_002' fn = f'results/{alg}_{n}back.csv' profile()