Coverage for /builds/ase/ase/ase/ga/population.py : 26.18%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1""" Implementation of a population for maintaining a GA population and
2proposing structures to pair. """
3from math import tanh, sqrt, exp
4from operator import itemgetter
5import numpy as np
7from ase.db.core import now
8from ase.ga import get_raw_score
11def count_looks_like(a, all_cand, comp):
12 """Utility method for counting occurrences."""
13 n = 0
14 for b in all_cand:
15 if a.info['confid'] == b.info['confid']:
16 continue
17 if comp.looks_like(a, b):
18 n += 1
19 return n
22class Population:
23 """Population class which maintains the current population
24 and proposes which candidates to pair together.
26 Parameters:
28 data_connection: DataConnection object
29 Bla bla bla.
31 population_size: int
32 The number of candidates in the population.
34 comparator: Comparator object
35 this will tell if two configurations are equal.
36 Default compare atoms objects directly.
38 logfile: str
39 Text file that contains information about the population
40 The format is::
42 timestamp: generation(if available): id1,id2,id3...
44 Using this file greatly speeds up convergence checks.
45 Default None meaning that no file is written.
47 use_extinct: boolean
48 Set this to True if mass extinction and the extinct key
49 are going to be used. Default is False.
51 rng: Random number generator
52 By default numpy.random.
53 """
55 def __init__(self, data_connection, population_size,
56 comparator=None, logfile=None, use_extinct=False,
57 rng=np.random):
58 self.dc = data_connection
59 self.pop_size = population_size
60 if comparator is None:
61 from ase.ga.standard_comparators import AtomsComparator
62 comparator = AtomsComparator()
63 self.comparator = comparator
64 self.logfile = logfile
65 self.use_extinct = use_extinct
66 self.rng = rng
67 self.pop = []
68 self.pairs = None
69 self.all_cand = None
70 self.__initialize_pop__()
72 def __initialize_pop__(self):
73 """ Private method that initializes the population when
74 the population is created. """
76 # Get all relaxed candidates from the database
77 ue = self.use_extinct
78 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)
79 all_cand.sort(key=lambda x: x.info['key_value_pairs']['raw_score'],
80 reverse=True)
81 # all_cand.sort(key=lambda x: x.get_potential_energy())
83 # Fill up the population with the self.pop_size most stable
84 # unique candidates.
85 i = 0
86 while i < len(all_cand) and len(self.pop) < self.pop_size:
87 c = all_cand[i]
88 i += 1
89 eq = False
90 for a in self.pop:
91 if self.comparator.looks_like(a, c):
92 eq = True
93 break
94 if not eq:
95 self.pop.append(c)
97 for a in self.pop:
98 a.info['looks_like'] = count_looks_like(a, all_cand,
99 self.comparator)
101 self.all_cand = all_cand
102 self.__calc_participation__()
104 def __calc_participation__(self):
105 """ Determines, from the database, how many times each
106 candidate has been used to generate new candidates. """
107 (participation, pairs) = self.dc.get_participation_in_pairing()
108 for a in self.pop:
109 if a.info['confid'] in participation.keys():
110 a.info['n_paired'] = participation[a.info['confid']]
111 else:
112 a.info['n_paired'] = 0
113 self.pairs = pairs
115 def update(self, new_cand=None):
116 """ New candidates can be added to the database
117 after the population object has been created.
118 This method extracts these new candidates from the
119 database and includes them in the population. """
121 if len(self.pop) == 0:
122 self.__initialize_pop__()
124 if new_cand is None:
125 ue = self.use_extinct
126 new_cand = self.dc.get_all_relaxed_candidates(only_new=True,
127 use_extinct=ue)
129 for a in new_cand:
130 self.__add_candidate__(a)
131 self.all_cand.append(a)
132 self.__calc_participation__()
133 self._write_log()
135 def get_current_population(self):
136 """ Returns a copy of the current population. """
137 self.update()
138 return [a.copy() for a in self.pop]
140 def get_population_after_generation(self, gen):
141 """ Returns a copy of the population as it where
142 after generation gen"""
143 if self.logfile is not None:
144 fd = open(self.logfile, 'r')
145 gens = {}
146 for line in fd:
147 _, no, popul = line.split(':')
148 gens[int(no)] = [int(i) for i in popul.split(',')]
149 fd.close()
150 return [c.copy() for c in self.all_cand[::-1]
151 if c.info['relax_id'] in gens[gen]]
153 all_candidates = [c for c in self.all_cand
154 if c.info['key_value_pairs']['generation'] <= gen]
155 cands = [all_candidates[0]]
156 for b in all_candidates:
157 if b not in cands:
158 for a in cands:
159 if self.comparator.looks_like(a, b):
160 break
161 else:
162 cands.append(b)
163 pop = cands[:self.pop_size]
164 return [a.copy() for a in pop]
166 def __add_candidate__(self, a):
167 """ Adds a single candidate to the population. """
169 # check if the structure is too low in raw score
170 raw_score_a = get_raw_score(a)
171 raw_score_worst = get_raw_score(self.pop[-1])
172 if raw_score_a < raw_score_worst \
173 and len(self.pop) == self.pop_size:
174 return
176 # check if the new candidate should
177 # replace a similar structure in the population
178 for (i, b) in enumerate(self.pop):
179 if self.comparator.looks_like(a, b):
180 if get_raw_score(b) < raw_score_a:
181 del self.pop[i]
182 a.info['looks_like'] = count_looks_like(a,
183 self.all_cand,
184 self.comparator)
185 self.pop.append(a)
186 self.pop.sort(key=lambda x: get_raw_score(x),
187 reverse=True)
188 return
190 # the new candidate needs to be added, so remove the highest
191 # energy one
192 if len(self.pop) == self.pop_size:
193 del self.pop[-1]
195 # add the new candidate
196 a.info['looks_like'] = count_looks_like(a,
197 self.all_cand,
198 self.comparator)
199 self.pop.append(a)
200 self.pop.sort(key=lambda x: get_raw_score(x), reverse=True)
202 def __get_fitness__(self, indecies, with_history=True):
203 """Calculates the fitness using the formula from
204 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816
206 Sign change on the fitness compared to the formulation in the
207 abovementioned paper due to maximizing raw_score instead of
208 minimizing energy. (Set raw_score=-energy to optimize the energy)
209 """
211 scores = [get_raw_score(x) for x in self.pop]
212 min_s = min(scores)
213 max_s = max(scores)
214 T = min_s - max_s
215 if isinstance(indecies, int):
216 indecies = [indecies]
218 f = [0.5 * (1. - tanh(2. * (scores[i] - max_s) / T - 1.))
219 for i in indecies]
220 if with_history:
221 M = [float(self.pop[i].info['n_paired']) for i in indecies]
222 L = [float(self.pop[i].info['looks_like']) for i in indecies]
223 f = [f[i] * 1. / sqrt(1. + M[i]) * 1. / sqrt(1. + L[i])
224 for i in range(len(f))]
225 return f
227 def get_two_candidates(self, with_history=True):
228 """ Returns two candidates for pairing employing the
229 fitness criteria from
230 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816
231 and the roulete wheel selection scheme described in
232 R.L. Johnston Dalton Transactions,
233 Vol. 22, No. 22. (2003), pp. 4193-4207
234 """
236 if len(self.pop) < 2:
237 self.update()
239 if len(self.pop) < 2:
240 return None
242 fit = self.__get_fitness__(range(len(self.pop)), with_history)
243 fmax = max(fit)
244 c1 = self.pop[0]
245 c2 = self.pop[0]
246 used_before = False
247 while c1.info['confid'] == c2.info['confid'] and not used_before:
248 nnf = True
249 while nnf:
250 t = self.rng.randint(len(self.pop))
251 if fit[t] > self.rng.random() * fmax:
252 c1 = self.pop[t]
253 nnf = False
254 nnf = True
255 while nnf:
256 t = self.rng.randint(len(self.pop))
257 if fit[t] > self.rng.random() * fmax:
258 c2 = self.pop[t]
259 nnf = False
261 c1id = c1.info['confid']
262 c2id = c2.info['confid']
263 used_before = (min([c1id, c2id]), max([c1id, c2id])) in self.pairs
264 return (c1.copy(), c2.copy())
266 def get_one_candidate(self, with_history=True):
267 """Returns one candidate for mutation employing the
268 fitness criteria from
269 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816
270 and the roulete wheel selection scheme described in
271 R.L. Johnston Dalton Transactions,
272 Vol. 22, No. 22. (2003), pp. 4193-4207
273 """
274 if len(self.pop) < 1:
275 self.update()
277 if len(self.pop) < 1:
278 return None
280 fit = self.__get_fitness__(range(len(self.pop)), with_history)
281 fmax = max(fit)
282 nnf = True
283 while nnf:
284 t = self.rng.randint(len(self.pop))
285 if fit[t] > self.rng.random() * fmax:
286 c1 = self.pop[t]
287 nnf = False
289 return c1.copy()
291 def _write_log(self):
292 """Writes the population to a logfile.
294 The format is::
296 timestamp: generation(if available): id1,id2,id3..."""
297 if self.logfile is not None:
298 ids = [str(a.info['relax_id']) for a in self.pop]
299 if ids != []:
300 try:
301 gen_nums = [c.info['key_value_pairs']['generation']
302 for c in self.all_cand]
303 max_gen = max(gen_nums)
304 except KeyError:
305 max_gen = ' '
306 fd = open(self.logfile, 'a')
307 fd.write('{time}: {gen}: {pop}\n'.format(time=now(),
308 pop=','.join(ids),
309 gen=max_gen))
310 fd.close()
312 def is_uniform(self, func, min_std, pop=None):
313 """Tests whether the current population is uniform or diverse.
314 Returns True if uniform, False otherwise.
316 Parameters:
318 func: function
319 that takes one argument an atoms object and returns a value that
320 will be used for testing against the rest of the population.
322 min_std: int or float
323 The minimum standard deviation, if the population has a lower
324 std dev it is uniform.
326 pop: list, optional
327 use this list of Atoms objects instead of the current population.
328 """
329 if pop is None:
330 pop = self.pop
331 vals = [func(a) for a in pop]
332 stddev = np.std(vals)
333 if stddev < min_std:
334 return True
335 return False
337 def mass_extinction(self, ids):
338 """Kills every candidate in the database with gaid in the
339 supplied list of ids. Typically used on the main part of the current
340 population if the diversity is to small.
342 Parameters:
344 ids: list
345 list of ids of candidates to be killed.
347 """
348 for confid in ids:
349 self.dc.kill_candidate(confid)
350 self.pop = []
353class RandomPopulation(Population):
354 def __init__(self, data_connection, population_size,
355 comparator=None, logfile=None, exclude_used_pairs=False,
356 bad_candidates=0, use_extinct=False):
357 self.exclude_used_pairs = exclude_used_pairs
358 self.bad_candidates = bad_candidates
359 Population.__init__(self, data_connection, population_size,
360 comparator, logfile, use_extinct)
362 def __initialize_pop__(self):
363 """ Private method that initializes the population when
364 the population is created. """
366 # Get all relaxed candidates from the database
367 ue = self.use_extinct
368 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)
369 all_cand.sort(key=lambda x: get_raw_score(x), reverse=True)
370 # all_cand.sort(key=lambda x: x.get_potential_energy())
372 if len(all_cand) > 0:
373 # Fill up the population with the self.pop_size most stable
374 # unique candidates.
375 ratings = []
376 best_raw = get_raw_score(all_cand[0])
377 i = 0
378 while i < len(all_cand):
379 c = all_cand[i]
380 i += 1
381 eq = False
382 for a in self.pop:
383 if self.comparator.looks_like(a, c):
384 eq = True
385 break
386 if not eq:
387 if len(self.pop) < self.pop_size - self.bad_candidates:
388 self.pop.append(c)
389 else:
390 exp_fact = exp(get_raw_score(c) / best_raw)
391 ratings.append([c, (exp_fact - 1) * self.rng.random()])
392 ratings.sort(key=itemgetter(1), reverse=True)
394 for i in range(self.bad_candidates):
395 self.pop.append(ratings[i][0])
397 for a in self.pop:
398 a.info['looks_like'] = count_looks_like(a, all_cand,
399 self.comparator)
401 self.all_cand = all_cand
402 self.__calc_participation__()
404 def update(self):
405 """ The update method in Population will add to the end of
406 the population, that can't be used here since we might have
407 bad candidates that need to stay in the population, therefore
408 just recalc the population every time. """
410 self.pop = []
411 self.__initialize_pop__()
413 self._write_log()
415 def get_one_candidate(self):
416 """Returns one candidates at random."""
417 if len(self.pop) < 1:
418 self.update()
420 if len(self.pop) < 1:
421 return None
423 t = self.rng.randint(len(self.pop))
424 c = self.pop[t]
426 return c.copy()
428 def get_two_candidates(self):
429 """Returns two candidates at random."""
430 if len(self.pop) < 2:
431 self.update()
433 if len(self.pop) < 2:
434 return None
436 c1 = self.pop[0]
437 c2 = self.pop[0]
438 used_before = False
439 while c1.info['confid'] == c2.info['confid'] and not used_before:
440 t = self.rng.randint(len(self.pop))
441 c1 = self.pop[t]
442 t = self.rng.randint(len(self.pop))
443 c2 = self.pop[t]
445 c1id = c1.info['confid']
446 c2id = c2.info['confid']
447 used_before = (tuple(sorted([c1id, c2id])) in self.pairs and
448 self.exclude_used_pairs)
449 return (c1.copy(), c2.copy())
452class FitnessSharingPopulation(Population):
453 """ Fitness sharing population that penalizes structures if they are
454 too similar. This is determined by a distance measure
456 Parameters:
458 comp_key: string
459 Key where the distance measure can be found in the
460 atoms.info['key_value_pairs'] dictionary.
462 threshold: float or int
463 Value above which no penalization of the fitness takes place
465 alpha_sh: float or int
466 Determines the shape of the sharing function.
467 Default is 1, which gives a linear sharing function.
469 """
471 def __init__(self, data_connection, population_size,
472 comp_key, threshold, alpha_sh=1.,
473 comparator=None, logfile=None, use_extinct=False):
474 self.comp_key = comp_key
475 self.dt = threshold # dissimilarity threshold
476 self.alpha_sh = alpha_sh
477 self.fit_scaling = 1.
479 self.sh_cache = dict()
481 Population.__init__(self, data_connection, population_size,
482 comparator, logfile, use_extinct)
484 def __get_fitness__(self, candidates):
485 """Input should be sorted according to raw_score."""
486 max_s = get_raw_score(candidates[0])
487 min_s = get_raw_score(candidates[-1])
488 T = min_s - max_s
490 shared_fit = []
491 for c in candidates:
492 sc = get_raw_score(c)
493 obj_fit = 0.5 * (1. - tanh(2. * (sc - max_s) / T - 1.))
494 m = 1.
495 ck = c.info['key_value_pairs'][self.comp_key]
496 for other in candidates:
497 if other != c:
498 name = tuple(sorted([c.info['confid'],
499 other.info['confid']]))
500 if name not in self.sh_cache:
501 ok = other.info['key_value_pairs'][self.comp_key]
502 d = abs(ck - ok)
503 if d < self.dt:
504 v = 1 - (d / self.dt)**self.alpha_sh
505 self.sh_cache[name] = v
506 else:
507 self.sh_cache[name] = 0
508 m += self.sh_cache[name]
510 shf = (obj_fit ** self.fit_scaling) / m
511 shared_fit.append(shf)
512 return shared_fit
514 def update(self):
515 """ The update method in Population will add to the end of
516 the population, that can't be used here since the shared fitness
517 will change for all candidates when new are added, therefore
518 just recalc the population every time. """
520 self.pop = []
521 self.__initialize_pop__()
523 self._write_log()
525 def __initialize_pop__(self):
526 # Get all relaxed candidates from the database
527 ue = self.use_extinct
528 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)
529 all_cand.sort(key=lambda x: get_raw_score(x), reverse=True)
531 if len(all_cand) > 0:
532 shared_fit = self.__get_fitness__(all_cand)
533 all_sorted = list(zip(*sorted(zip(shared_fit, all_cand),
534 reverse=True)))[1]
536 # Fill up the population with the self.pop_size most stable
537 # unique candidates.
538 i = 0
539 while i < len(all_sorted) and len(self.pop) < self.pop_size:
540 c = all_sorted[i]
541 i += 1
542 eq = False
543 for a in self.pop:
544 if self.comparator.looks_like(a, c):
545 eq = True
546 break
547 if not eq:
548 self.pop.append(c)
550 for a in self.pop:
551 a.info['looks_like'] = count_looks_like(a, all_cand,
552 self.comparator)
553 self.all_cand = all_cand
555 def get_two_candidates(self):
556 """ Returns two candidates for pairing employing the
557 fitness criteria from
558 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816
559 and the roulete wheel selection scheme described in
560 R.L. Johnston Dalton Transactions,
561 Vol. 22, No. 22. (2003), pp. 4193-4207
562 """
564 if len(self.pop) < 2:
565 self.update()
567 if len(self.pop) < 2:
568 return None
570 fit = self.__get_fitness__(self.pop)
571 fmax = max(fit)
572 c1 = self.pop[0]
573 c2 = self.pop[0]
574 while c1.info['confid'] == c2.info['confid']:
575 nnf = True
576 while nnf:
577 t = self.rng.randint(len(self.pop))
578 if fit[t] > self.rng.random() * fmax:
579 c1 = self.pop[t]
580 nnf = False
581 nnf = True
582 while nnf:
583 t = self.rng.randint(len(self.pop))
584 if fit[t] > self.rng.random() * fmax:
585 c2 = self.pop[t]
586 nnf = False
588 return (c1.copy(), c2.copy())
591class RankFitnessPopulation(Population):
592 """ Ranks the fitness relative to set variable to flatten the surface
593 in a certain direction such that mating across variable is equally
594 likely irrespective of raw_score.
596 Parameters:
598 variable_function: function
599 A function that takes as input an Atoms object and returns
600 the variable that differentiates the ranks.
602 exp_function: boolean
603 If True use an exponential function for ranking the fitness.
604 If False use the same as in Population. Default True.
606 exp_prefactor: float
607 The prefactor used in the exponential fitness scaling function.
608 Default 0.5
609 """
611 def __init__(self, data_connection, population_size, variable_function,
612 comparator=None, logfile=None, use_extinct=False,
613 exp_function=True, exp_prefactor=0.5):
614 self.exp_function = exp_function
615 self.exp_prefactor = exp_prefactor
616 self.vf = variable_function
617 # The current fitness is set at each update of the population
618 self.current_fitness = None
620 Population.__init__(self, data_connection, population_size,
621 comparator, logfile, use_extinct)
623 def get_rank(self, rcand, key=None):
624 # Set the initial order of the candidates, will need to
625 # be returned in this order at the end of ranking.
626 ordered = list(zip(range(len(rcand)), rcand))
628 # Niche and rank candidates.
629 rec_nic = []
630 rank_fit = []
631 for o, c in ordered:
632 if o not in rec_nic:
633 ntr = []
634 ce1 = self.vf(c)
635 rec_nic.append(o)
636 ntr.append([o, c])
637 for oother, cother in ordered:
638 if oother not in rec_nic:
639 ce2 = self.vf(cother)
640 if ce1 == ce2:
641 # put the now processed in oother
642 # in rec_nic as well
643 rec_nic.append(oother)
644 ntr.append([oother, cother])
645 # Each niche is sorted according to raw_score and
646 # assigned a fitness according to the ranking of
647 # the candidates
648 ntr.sort(key=lambda x: x[1].info['key_value_pairs'][key],
649 reverse=True)
650 start_rank = -1
651 cor = 0
652 for on, cn in ntr:
653 rank = start_rank - cor
654 rank_fit.append([on, cn, rank])
655 cor += 1
656 # The original order is reformed
657 rank_fit.sort(key=itemgetter(0), reverse=False)
658 return np.array(list(zip(*rank_fit))[2])
660 def __get_fitness__(self, candidates):
661 expf = self.exp_function
662 rfit = self.get_rank(candidates, key='raw_score')
664 if not expf:
665 rmax = max(rfit)
666 rmin = min(rfit)
667 T = rmin - rmax
668 # If using obj_rank probability, must have non-zero T val.
669 # pop_size must be greater than number of permutations.
670 # We test for this here
671 msg = "Equal fitness for best and worst candidate in the "
672 msg += "population! Fitness scaling is impossible! "
673 msg += "Try with a larger population."
674 assert T != 0., msg
675 return 0.5 * (1. - np.tanh(2. * (rfit - rmax) / T - 1.))
676 else:
677 return self.exp_prefactor ** (-rfit - 1)
679 def update(self):
680 """ The update method in Population will add to the end of
681 the population, that can't be used here since the fitness
682 will potentially change for all candidates when new are added,
683 therefore just recalc the population every time. """
685 self.pop = []
686 self.__initialize_pop__()
687 self.current_fitness = self.__get_fitness__(self.pop)
689 self._write_log()
691 def __initialize_pop__(self):
692 # Get all relaxed candidates from the database
693 ue = self.use_extinct
694 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)
695 all_cand.sort(key=lambda x: get_raw_score(x), reverse=True)
697 if len(all_cand) > 0:
698 fitf = self.__get_fitness__(all_cand)
699 all_sorted = list(zip(fitf, all_cand))
700 all_sorted.sort(key=itemgetter(0), reverse=True)
701 sort_cand = []
702 for _, t2 in all_sorted:
703 sort_cand.append(t2)
704 all_sorted = sort_cand
706 # Fill up the population with the self.pop_size most stable
707 # unique candidates.
708 i = 0
709 while i < len(all_sorted) and len(self.pop) < self.pop_size:
710 c = all_sorted[i]
711 c_vf = self.vf(c)
712 i += 1
713 eq = False
714 for a in self.pop:
715 a_vf = self.vf(a)
716 # Only run comparator if the variable_function (self.vf)
717 # returns the same. If it returns something different the
718 # candidates are inherently different.
719 # This is done to speed up.
720 if a_vf == c_vf:
721 if self.comparator.looks_like(a, c):
722 eq = True
723 break
724 if not eq:
725 self.pop.append(c)
726 self.all_cand = all_cand
728 def get_two_candidates(self):
729 """ Returns two candidates for pairing employing the
730 roulete wheel selection scheme described in
731 R.L. Johnston Dalton Transactions,
732 Vol. 22, No. 22. (2003), pp. 4193-4207
733 """
735 if len(self.pop) < 2:
736 self.update()
738 if len(self.pop) < 2:
739 return None
741 # Use saved fitness
742 fit = self.current_fitness
743 fmax = max(fit)
744 c1 = self.pop[0]
745 c2 = self.pop[0]
746 while c1.info['confid'] == c2.info['confid']:
747 nnf = True
748 while nnf:
749 t = self.rng.randint(len(self.pop))
750 if fit[t] > self.rng.random() * fmax:
751 c1 = self.pop[t]
752 nnf = False
753 nnf = True
754 while nnf:
755 t = self.rng.randint(len(self.pop))
756 if fit[t] > self.rng.random() * fmax:
757 c2 = self.pop[t]
758 nnf = False
760 return (c1.copy(), c2.copy())
763class MultiObjectivePopulation(RankFitnessPopulation):
764 """ Allows for assignment of fitness based on a set of two variables
765 such that fitness is ranked according to a Pareto-front of
766 non-dominated candidates.
768 Parameters
769 ----------
770 abs_data: list
771 Set of key_value_pairs in atoms object for which fitness should
772 be assigned based on absolute value.
774 rank_data: list
775 Set of key_value_pairs in atoms object for which data should
776 be ranked in order to ascribe fitness.
778 variable_function: function
779 A function that takes as input an Atoms object and returns
780 the variable that differentiates the ranks. Only use if
781 data is ranked.
783 exp_function: boolean
784 If True use an exponential function for ranking the fitness.
785 If False use the same as in Population. Default True.
787 exp_prefactor: float
788 The prefactor used in the exponential fitness scaling function.
789 Default 0.5
791 """
793 def __init__(self, data_connection, population_size,
794 variable_function=None, comparator=None, logfile=None,
795 use_extinct=False, abs_data=None, rank_data=None,
796 exp_function=True, exp_prefactor=0.5):
797 # The current fitness is set at each update of the population
798 self.current_fitness = None
800 if rank_data is None:
801 rank_data = []
802 self.rank_data = rank_data
804 if abs_data is None:
805 abs_data = []
806 self.abs_data = abs_data
808 RankFitnessPopulation.__init__(self, data_connection, population_size,
809 variable_function, comparator, logfile,
810 use_extinct, exp_function,
811 exp_prefactor)
813 def get_nonrank(self, nrcand, key=None):
814 """"Returns a list of fitness values."""
815 nrc_list = []
816 for nrc in nrcand:
817 nrc_list.append(nrc.info['key_value_pairs'][key])
818 return nrc_list
820 def __get_fitness__(self, candidates):
821 # There are no defaults set for the datasets to be
822 # used in this function, as such we test that the
823 # user has specified at least two here.
824 msg = "This is a multi-objective fitness function"
825 msg += " so there must be at least two datasets"
826 msg += " stated in the rank_data and abs_data variables"
827 assert len(self.rank_data) + len(self.abs_data) >= 2, msg
829 expf = self.exp_function
831 all_fitnesses = []
832 used = set()
833 for rd in self.rank_data:
834 used.add(rd)
835 # Build ranked fitness based on rd
836 all_fitnesses.append(self.get_rank(candidates, key=rd))
838 for d in self.abs_data:
839 if d not in used:
840 used.add(d)
841 # Build fitness based on d
842 all_fitnesses.append(self.get_nonrank(candidates, key=d))
844 # Set the initial order of the ranks, will need to
845 # be returned in this order at the end.
846 fordered = list(zip(range(len(all_fitnesses[0])), *all_fitnesses))
847 mvf_rank = -1 # Start multi variable rank at -1.
848 rec_vrc = [] # A record of already ranked candidates.
849 mvf_list = [] # A list for all candidate ranks.
850 # Sort by raw_score_1 in case this is different from
851 # the stored raw_score() variable that all_cands are
852 # sorted by.
853 fordered.sort(key=itemgetter(1), reverse=True)
854 # Niche candidates with equal or better raw_score to
855 # current candidate.
856 for a in fordered:
857 order, rest = a[0], a[1:]
858 if order not in rec_vrc:
859 pff = []
860 pff2 = []
861 rec_vrc.append(order)
862 pff.append((order, rest))
863 for b in fordered:
864 border, brest = b[0], b[1:]
865 if border not in rec_vrc:
866 if np.any(np.array(brest) >= np.array(rest)):
867 pff.append((border, brest))
868 # Remove any candidate from pff list that is dominated
869 # by another in the list.
870 for na in pff:
871 norder, nrest = na[0], na[1:]
872 dom = False
873 for nb in pff:
874 nborder, nbrest = nb[0], nb[1:]
875 if norder != nborder:
876 if np.all(np.array(nbrest) > np.array(nrest)):
877 dom = True
878 if not dom:
879 pff2.append((norder, nrest))
880 # Assign pareto rank from -1 to -N niches.
881 for ffa in pff2:
882 fforder, ffrest = ffa[0], ffa[1:]
883 rec_vrc.append(fforder)
884 mvf_list.append((fforder, mvf_rank, ffrest))
885 mvf_rank = mvf_rank - 1
886 # The original order is reformed
887 mvf_list.sort(key=itemgetter(0), reverse=False)
888 rfro = np.array(list(zip(*mvf_list))[1])
890 if not expf:
891 rmax = max(rfro)
892 rmin = min(rfro)
893 T = rmin - rmax
894 # If using obj_rank probability, must have non-zero T val.
895 # pop_size must be greater than number of permutations.
896 # We test for this here
897 msg = "Equal fitness for best and worst candidate in the "
898 msg += "population! Fitness scaling is impossible! "
899 msg += "Try with a larger population."
900 assert T != 0., msg
901 return 0.5 * (1. - np.tanh(2. * (rfro - rmax) / T - 1.))
902 else:
903 return self.exp_prefactor ** (-rfro - 1)
905 def __initialize_pop__(self):
906 # Get all relaxed candidates from the database
907 ue = self.use_extinct
908 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)
909 all_cand.sort(key=lambda x: get_raw_score(x), reverse=True)
911 if len(all_cand) > 0:
912 fitf = self.__get_fitness__(all_cand)
913 all_sorted = list(zip(fitf, all_cand))
914 all_sorted.sort(key=itemgetter(0), reverse=True)
915 sort_cand = []
916 for _, t2 in all_sorted:
917 sort_cand.append(t2)
918 all_sorted = sort_cand
920 # Fill up the population with the self.pop_size most stable
921 # unique candidates.
922 i = 0
923 while i < len(all_sorted) and len(self.pop) < self.pop_size:
924 c = all_sorted[i]
925 # Use variable_function to decide whether to run comparator
926 # if the function has been defined by the user. This does not
927 # need to be dependent on using the rank_data function.
928 if self.vf is not None:
929 c_vf = self.vf(c)
930 i += 1
931 eq = False
932 for a in self.pop:
933 if self.vf is not None:
934 a_vf = self.vf(a)
935 # Only run comparator if the variable_function
936 # (self.vf) returns the same. If it returns something
937 # different the candidates are inherently different.
938 # This is done to speed up.
939 if a_vf == c_vf:
940 if self.comparator.looks_like(a, c):
941 eq = True
942 break
943 else:
944 if self.comparator.looks_like(a, c):
945 eq = True
946 break
947 if not eq:
948 self.pop.append(c)
949 self.all_cand = all_cand