Newer
Older
adaptive-nback / generators / nb_gm_003.py
Morteza Ansarinia on 25 Feb 2019 5 KB refactor benchmark and add visualization
import random
import scipy.stats


class SequenceGenerator:
    """nb_gm_003:
        - pseudo-random sampling.
        - specific number of matching trials.
        - even distribution of stimuli.
    """

    def __init__(
        self,
        choices,
        trials,
        n=3,
        match_ratio=0.33            # ratio of matched trials (targets) in all trials
    ):
        self.trials, self.choices, self.n, self.targets_ratio = trials, choices, n, match_ratio
        self.seq = []

        self.evendist_norm = scipy.stats.norm(0, trials/len(self.choices))
        self.matchratio_norm = scipy.stats.norm(match_ratio, 0.2)
        self.tlratio_norm = scipy.stats.norm(2.0, trials/2)

    def generate(self):
        while not self.seq or len(self.seq) < self.trials:
            # self.seq += self.__find_best_next_choice()
            self.seq += self.__find_best_next_chunk(self.n + 1)
            # DEBUG print(self.evendist_cost(self.seq))
        return self.seq

    def __find_best_next_chunk(self, chunk_size) -> list:
        import sys
        from itertools import permutations
        min_cost = sys.float_info.max
        best_chunk = []
        all_chunks = list(permutations(self.choices, chunk_size))
        random.shuffle(all_chunks)
        for chunk in all_chunks:
            cost = self.cost(self.seq + list(chunk))
            if cost < min_cost:
                min_cost, best_chunk = cost, list(chunk)
        return best_chunk

    def __find_best_next_choice(self) -> list:
        import sys
        min_cost = sys.float_info.max
        random.shuffle(self.choices)  # to avoid ordering effect
        best_choice = None
        for choice in self.choices:
            cost = self.cost(self.seq + [choice])
            if cost < min_cost:
                min_cost, best_choice = cost, choice
        return list(best_choice)

    def cost(self, seq):
        # DEBUG print(self.matchratio_cost(seq), self.evendist_cost(seq))
        return self.evendist_cost(seq) + self.matchratio_cost(seq)

    def evendist_cost(self, seq):
        dist = {c: 0.0 for c in self.choices}
        for c in list(seq):
            dist[c] += (1.0 if dist.__contains__(c) else 0.0)
        even_dist = self.trials / len(self.choices)
        dist = {k: abs(v - even_dist) for k, v in dist.items()}
        max_deviation_from_even_dist = max(dist.values())
        v = 1.0 - self.evendist_norm.pdf(max_deviation_from_even_dist)
        # DEBUG print(f"{seq}: {v} (or {max_deviation_from_even_dist})")
        return v

    def matchratio_cost(self, seq):
        matches, _ = self.count_matches_and_lures(seq)
        return 1.0 - self.matchratio_norm.pdf(matches/len(seq))
        #return 1.0 - self.matchratio_norm.cdf(matches/self.trials)

    def count_matches_and_lures(self, seq):
        n, targets, lures = self.n, 0.0, 0.0
        for index in range(n, len(seq)):
            if seq[index] == seq[index - n]:
                targets += 1.0
            elif seq[index] in seq[index - (n-1):index - (n+1)]:
                # this 'elif' does not get satisfied for targets.
                lures += 1.0
        return targets, lures

    def tlratio_cost(self, seq):
        """Calculates the T/L ratio in a block of trials."""
        matches, lures = self.count_matches_and_lures(seq)
        if lures < 0.01:  # avoid division by zero
            lures = 0.01
        return 1.0 - self.tlratio_norm.pdf(matches/lures)


def __generate_stat_csv(filename):
    alphabetic_choices = ['A', 'B', 'C', 'D', 'E', 'F']
    min_trials, max_trials = 24, 100
    n = 2
    import csv
    import heapq
    with open(filename, mode='w') as stat_dist_file:
        writer = csv.writer(stat_dist_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        writer.writerow(['index'] + alphabetic_choices + ['ralph_skewed'])
        for i in range(100):
            trials = random.randint(min_trials, max_trials)
            generator = SequenceGenerator(alphabetic_choices, n=n, trials=trials)
            seq = generator.generate()
            print(f'matches: {generator.count_matches_and_lures(seq)[0]}/{trials} for {"".join(seq)}')
            print('-----------------------------------------')
            dist = [float(seq.count(c)) for c in alphabetic_choices]
            ralph_skewed = sum(heapq.nlargest(int(len(alphabetic_choices)/2), dist)) > (trials*2/3)
            writer.writerow([str(i)] + dist + [str(ralph_skewed)])
    __show_skweness_diagram(filename)


def __show_skweness_diagram(filename, choices):
    import pandas as pd
    from matplotlib import pyplot as plt
    print(filename)
    data = pd.read_csv(filename)
    data['trials'] = data[choices].sum(axis=1)
    max_trials = data['trials'].max()
    min_trials = data['trials'].min()
    stats = []
    for t in range(int(min_trials), int(max_trials) + 1):
        dt = data[data.trials == t].trials.count()
        st = data[(data.trials == t) & (data['ralph_skewed']==True)].trials.count()
        stats.append([t, dt, st])
    stats = pd.DataFrame(stats, columns=['trials', 'num_of_sequences','num_of_skewed_sequences'])
    plt.scatter(stats.trials,stats.num_of_skewed_sequences)
    plt.show()

if __name__ == '__main__':
    __generate_stat_csv('../benchmarks/nb_gm_003_2back_24trials.csv')