Newer
Older
adaptive-nback / generators / nb_gm_004.py
Morteza Ansarinia on 25 Feb 2019 5 KB refactor benchmark and add visualization
import random
import scipy.stats


class SequenceGenerator:
    """Generate a sequence progressively according to a predefined TL ratio and an even distribution"""

    def __init__(self, choices, trials, tl=4.0, n=3, targets_ratio=0.33):
        """Initialize the genetic algorithm optimizer for n-back sequences.
        :param choices:
        :param trials:
        :param tl:
        :param n:
        """
        self.tl, self.trials, self.choices, self.n, self.targets_ratio = tl, trials, choices, n, targets_ratio
        self.sequence = list()
        self.norm_even_dist = scipy.stats.norm(0, trials/2)
        self.norm_targets_ratio_dist = scipy.stats.norm(targets_ratio, 0.5)
        self.norm_tl_ratio_dist = scipy.stats.norm(tl, trials/2)

    def generate(self):
        while not self.sequence or len(self.sequence) < self.trials:
            self.sequence = self.__find_best_next_sequence(self.sequence, self.choices)
        return self.sequence

    def next_trial(self):
        if self.sequence and len(self.sequence) >= self.trials:
            return None
        self.sequence = self.__find_best_next_sequence(self.sequence, self.choices)
        return self.sequence[-1]

    def __find_best_next_sequence(self, seq: list, choices: list) -> list:
        import sys
        min_cost = sys.float_info.max
        best_seq = seq
        random.shuffle(choices)  # to avoid ordering effect
        for choice in choices:
            tmp_seq = seq + list(choice)
            cost = self.cost(tmp_seq)
            if cost < min_cost:
                min_cost = cost
                best_seq = tmp_seq
        return best_seq

    def calc_even_distribution_distance(self, seq):
        """
        Calculate fitness according to the similarity to the desired uniform distribution.
        :param seq: a string
        :return:
        """
        costs = {c: 0.0 for c in self.choices}
        for c in list(seq):
            costs[c] += (1.0 if costs.__contains__(c) else 0.0)
        even_ratio = self.trials / len(self.choices)
        costs = {k: abs(v - even_ratio)/self.trials for k, v in costs.items()}
        return max(list(costs.values()))

    def cost(self, seq):
        """
        Calculate overall fitness of a sequence (block of trials).
        Right now it's a cost function, so we try to minimize this cost.
        :param seq:
        :return:
        """

        targets, lures = self.count_targets_and_lures(seq)
        targets_ratio_cost = 1.0 - self.norm_targets_ratio_dist.pdf(targets/self.trials)
        tl_ratio_cost = 1.0 - self.norm_tl_ratio_dist.pdf(self.calc_tl_ratio(seq))
        even_dist_cost = 1.0 - self.norm_even_dist.pdf(self.calc_even_distribution_distance(seq))
        # print(targets_ratio_cost, tl_ratio_cost, even_dist_cost)
        return targets_ratio_cost + tl_ratio_cost + even_dist_cost

    def count_targets_and_lures(self, seq):
        n = self.n
        targets = 0.0
        lures = 0.0
        for index in range(n, len(seq)):
            if seq[index] == seq[index - n]:
                targets += 1.0
            elif seq[index] == seq[index - (n-1)] or seq[index] == seq[index - (n+1)]:
                lures += 1.0
        return targets, lures

    def calc_tl_ratio(self, seq):
        """Calculates the T/L ratio in a block of trials."""
        targets, lures = self.count_targets_and_lures(seq)
        if lures < 0.01:  # avoid division by zero
            lures = 0.01
        return targets/lures


def __generate_stat_csv(filename):
    alphabetic_choices = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']
    trials = 64
    n = 2
    import csv
    import heapq
    with open(filename, mode='w') as stat_dist_file:
        writer = csv.writer(stat_dist_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        writer.writerow(['index'] + alphabetic_choices + ['ralph_skewed'])
        for i in range(10):
            print(f'generating sequence {i}...')
            generator = SequenceGenerator(alphabetic_choices, n=n, trials=trials)
            seq = generator.generate()
            dist = [float(seq.count(c)) for c in alphabetic_choices]
            ralph_skewed = sum(heapq.nlargest(int(len(alphabetic_choices)/2), dist)) > (trials*2/3)
            writer.writerow([str(i)] + dist + [str(ralph_skewed)])
    __show_skweness_diagram(filename)

def __show_skweness_diagram(filename, choices):
    import pandas as pd
    from matplotlib import pyplot as plt
    print(filename)
    data = pd.read_csv(filename)
    data['trials'] = data[choices].sum(axis=1)
    max_trials = data['trials'].max()
    min_trials = data['trials'].min()
    stats = []
    for t in range(int(min_trials), int(max_trials) + 1):
        dt = data[data.trials == t].trials.count()
        st = data[(data.trials == t) & (data['ralph_skewed']==True)].trials.count()
        stats.append([t, dt, st])
    stats = pd.DataFrame(stats, columns=['trials', 'num_of_sequences','num_of_skewed_sequences'])
    plt.scatter(stats.trials,stats.num_of_skewed_sequences)
    plt.show()


if __name__ == '__main__':
    __generate_stat_csv('../benchmarks/nb_gm_004_2back_24trials.csv')