import random
import scipy.stats
class SequenceGenerator:
"""Generate a sequence progressively according to a predefined TL ratio and an even distribution"""
def __init__(self, choices, trials, tl=4.0, n=3, targets_ratio=0.2):
"""Initialize the genetic algorithm optimizer for n-back sequences.
:param choices:
:param trials:
:param tl:
:param n:
"""
self.tl, self.trials, self.choices, self.n, self.targets_ratio = tl, trials, choices, n, targets_ratio
self.sequence = list()
self.norm_even_dist = scipy.stats.norm(0, trials/2)
self.norm_targets_ratio_dist = scipy.stats.norm(targets_ratio, 0.5)
self.norm_tl_ratio_dist = scipy.stats.norm(tl, trials/2)
def generate(self):
while not self.sequence or len(self.sequence) < self.trials:
self.sequence = self.__find_best_next_sequence(self.sequence, self.choices)
return self.sequence
def next_trial(self):
if self.sequence and len(self.sequence) >= self.trials:
return None
self.sequence = self.__find_best_next_sequence(self.sequence, self.choices)
return self.sequence[-1]
def __find_best_next_sequence(self, seq: list, choices: list) -> list:
import sys
min_cost = sys.float_info.max
best_seq = seq
random.shuffle(choices) # to avoid ordering effect
for choice in choices:
tmp_seq = seq + list(choice)
cost = self.cost(tmp_seq)
if cost < min_cost:
min_cost = cost
best_seq = tmp_seq
return best_seq
def calc_even_distribution_distance(self, seq):
"""
Calculate fitness according to the similarity to the desired uniform distribution.
:param seq: a string
:return:
"""
costs = {c: 0.0 for c in self.choices}
for c in list(seq):
costs[c] += (1.0 if costs.__contains__(c) else 0.0)
even_ratio = self.trials / len(self.choices)
costs = {k: abs(v - even_ratio)/self.trials for k,v in costs.items()}
return max(list(costs.values()))
def cost(self, seq):
"""
Calculate overall fitness of a sequence (block of trials).
Right now it's a cost function, so we try to minimize this cost.
:param seq:
:return:
"""
targets, lures = self.count_targets_and_lures(seq, self.n)
targets_ratio_cost = 1.0 - self.norm_targets_ratio_dist.pdf(targets/self.trials)
tl_ratio_cost = 1.0 - self.norm_tl_ratio_dist.pdf(self.calc_tl_ratio(seq, self.n))
even_dist_cost = 1.0 - self.norm_even_dist.pdf(self.calc_even_distribution_distance(seq))
# print(targets_ratio_cost, tl_ratio_cost, even_dist_cost)
return targets_ratio_cost + tl_ratio_cost + even_dist_cost
@staticmethod
def count_targets_and_lures(seq, n: int):
targets = 0.0
lures = 0.0
for index in range(n, len(seq)):
if seq[index] == seq[index - n]:
targets += 1.0
elif seq[index] == seq[index - (n-1)] or seq[index] == seq[index - (n+1)]:
lures += 1.0
return targets, lures
def calc_tl_ratio(self, seq, n: int):
"""Calculates the T/L ratio in a block of trials."""
targets, lures = self.count_targets_and_lures(seq, n)
if lures < 0.01: # avoid division by zero
lures = 0.01
return targets/lures
if __name__ == '__main__':
n = 3
generator = SequenceGenerator(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'], trials=128, n=n)
sq = generator.generate()
tl_ratio = generator.calc_tl_ratio(sq, n=n)
even_dist_distance = generator.calc_even_distribution_distance(sq)
print('Progressively-Optimized Sequence: targets=%d, lures=%d' % generator.count_targets_and_lures(sq, n=n), 'with tl_ratio=%f' % tl_ratio, 'and even_dist_cost=%f' % even_dist_distance)