import random import scipy.stats class SequenceGenerator: """nb_gm_003: - pseudo-random sampling. - specific number of matching trials. - even distribution of stimuli. """ def __init__( self, choices, trials, n=3, match_ratio=0.33 # ratio of matched trials (targets) in all trials ): self.trials, self.choices, self.n, self.targets_ratio = trials, choices, n, match_ratio self.seq = [] self.evendist_norm = scipy.stats.norm(0, trials/len(self.choices)) self.matchratio_norm = scipy.stats.norm(match_ratio, 0.2) self.tlratio_norm = scipy.stats.norm(2.0, trials/2) def generate(self): while not self.seq or len(self.seq) < self.trials: # self.seq += self.__find_best_next_choice() self.seq += self.__find_best_next_chunk(self.n + 1) # DEBUG print(self.evendist_cost(self.seq)) return self.seq def __find_best_next_chunk(self, chunk_size) -> list: import sys from itertools import permutations min_cost = sys.float_info.max best_chunk = [] all_chunks = list(permutations(self.choices, chunk_size)) random.shuffle(all_chunks) for chunk in all_chunks: cost = self.cost(self.seq + list(chunk)) if cost < min_cost: min_cost, best_chunk = cost, list(chunk) return best_chunk def __find_best_next_choice(self) -> list: import sys min_cost = sys.float_info.max random.shuffle(self.choices) # to avoid ordering effect best_choice = None for choice in self.choices: cost = self.cost(self.seq + [choice]) if cost < min_cost: min_cost, best_choice = cost, choice return list(best_choice) def cost(self, seq): # DEBUG print(self.matchratio_cost(seq), self.evendist_cost(seq)) return self.evendist_cost(seq) + self.matchratio_cost(seq) def evendist_cost(self, seq): dist = {c: 0.0 for c in self.choices} for c in list(seq): dist[c] += (1.0 if dist.__contains__(c) else 0.0) even_dist = self.trials / len(self.choices) dist = {k: abs(v - even_dist) for k, v in dist.items()} max_deviation_from_even_dist = max(dist.values()) v = 1.0 - self.evendist_norm.pdf(max_deviation_from_even_dist) # DEBUG print(f"{seq}: {v} (or {max_deviation_from_even_dist})") return v def matchratio_cost(self, seq): matches, _ = self.count_matches_and_lures(seq) return 1.0 - self.matchratio_norm.pdf(matches/len(seq)) #return 1.0 - self.matchratio_norm.cdf(matches/self.trials) def count_matches_and_lures(self, seq): n, targets, lures = self.n, 0.0, 0.0 for index in range(n, len(seq)): if seq[index] == seq[index - n]: targets += 1.0 elif seq[index] in seq[index - (n-1):index - (n+1)]: # this 'elif' does not get satisfied for targets. lures += 1.0 return targets, lures def tlratio_cost(self, seq): """Calculates the T/L ratio in a block of trials.""" matches, lures = self.count_matches_and_lures(seq) if lures < 0.01: # avoid division by zero lures = 0.01 return 1.0 - self.tlratio_norm.pdf(matches/lures) def __generate_stat_csv(filename): alphabetic_choices = ['A', 'B', 'C', 'D', 'E', 'F'] min_trials, max_trials = 24, 100 n = 2 import csv import heapq with open(filename, mode='w') as stat_dist_file: writer = csv.writer(stat_dist_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) writer.writerow(['index'] + alphabetic_choices + ['ralph_skewed']) for i in range(100): trials = random.randint(min_trials, max_trials) generator = SequenceGenerator(alphabetic_choices, n=n, trials=trials) seq = generator.generate() print(f'matches: {generator.count_matches_and_lures(seq)[0]}/{trials} for {"".join(seq)}') print('-----------------------------------------') dist = [float(seq.count(c)) for c in alphabetic_choices] ralph_skewed = sum(heapq.nlargest(int(len(alphabetic_choices)/2), dist)) > (trials*2/3) writer.writerow([str(i)] + dist + [str(ralph_skewed)]) __show_skweness_diagram(filename) def __show_skweness_diagram(filename, choices): import pandas as pd from matplotlib import pyplot as plt print(filename) data = pd.read_csv(filename) data['trials'] = data[choices].sum(axis=1) max_trials = data['trials'].max() min_trials = data['trials'].min() stats = [] for t in range(int(min_trials), int(max_trials) + 1): dt = data[data.trials == t].trials.count() st = data[(data.trials == t) & (data['ralph_skewed']==True)].trials.count() stats.append([t, dt, st]) stats = pd.DataFrame(stats, columns=['trials', 'num_of_sequences','num_of_skewed_sequences']) plt.scatter(stats.trials,stats.num_of_skewed_sequences) plt.show() if __name__ == '__main__': __generate_stat_csv('../benchmarks/nb_gm_003_2back_24trials.csv')