How to fix premature convergence in simple GA (Python)?
Asked Answered
X

1

9

Yesterday i started exploring the genetic algorithms, and when i ended up with some basic theory, i tried to write simple GA on Python, that solves Diophantine equation. I'm new to Python and GAs, so please, don't judge my code strictly.

Problem

I cant get any result due to premature convergence (there is some no-return point (n-population), population[n] == population[n+i], where i is any integer. even the random mutatuion element cant change this, the generation is degradating very quickly)

GA is using crossover to breed, and weighted choice of parents.

  • Q1: Is there any design mistakes in my code (below)?
  • Q1.2: Do i need to add elitism?
  • Q1.3: Do i need to change breed logic?
  • Q2: Is there realy needed deep copy?

Code:

# -*- coding: utf-8 -*-
from random import randint
from copy import deepcopy
from math import floor
import random

class Organism:
    #initiate
    def __init__(self, alleles, fitness, likelihood):
        self.alleles = alleles
        self.fitness = fitness
        self.likelihood = likelihood
        self.result = 0
    def __unicode__(self):
        return '%s [%s - %s]' % (self.alleles, self.fitness, self.likelihood)

class  CDiophantine:
    def __init__(self, coefficients,  result):
        self.coefficients = coefficients
        self.result = result

    maxPopulation = 40
    organisms = []
    def GetGene (self,i):
        return self.organisms[i]

    def OrganismFitness (self,gene):
        gene.result = 0
        for i in range (0, len(self.coefficients)):
            gene.result += self.coefficients[i]*gene.alleles[i]
        gene.fitness = abs(gene.result - self.result)
        return gene.fitness

    def Fitness (self):
        for organism in self.organisms:
            organism.fitness = self.OrganismFitness(organism)
            if organism.fitness == 0:
                return  organism
        return None


    def MultiplyFitness (self):
        coefficientSum = 0
        for organism in self.organisms:
            coefficientSum += 1/float(organism.fitness)
        return coefficientSum

    def GenerateLikelihoods (self):
        last = 0
        multiplyFitness = self.MultiplyFitness()
        for organism in self.organisms:
            last = ((1/float(organism.fitness)/multiplyFitness)*100)
            #print '1/%s/%s*100 - %s' % (organism.fitness, multiplyFitness, last)
            organism.likelihood = last

    def Breed (self, parentOne, parentTwo):
        crossover = randint (1,len(self.coefficients)-1)
        child = deepcopy(parentOne)
        initial = 0
        final = len(parentOne.alleles) - 1
        if randint (1,100) < 50:
            father = parentOne
            mother = parentTwo
        else:
            father = parentTwo
            mother = parentOne
        child.alleles = mother.alleles[:crossover] + father.alleles[crossover:]
        if randint (1,100) < 5:
            for i in range(initial,final):    
                child.alleles[i] = randint (0,self.result)

        return child

    def CreateNewOrganisms (self):
        #generating new population
        tempPopulation = []
        for _ in self.organisms:
            iterations = 0
            father = deepcopy(self.organisms[0])
            mother = deepcopy(self.organisms[1])
            while father.alleles == mother.alleles:
                father = self.WeightedChoice()
                mother = self.WeightedChoice()
                iterations+=1
                if iterations > 35:
                    break
            kid = self.Breed(father,mother)
            tempPopulation.append(kid)
        self.organisms = tempPopulation

    def WeightedChoice (self):
        list = []
        for organism in self.organisms:
            list.append((organism.likelihood,organism))
        list = sorted((random.random() * x[0], x[1]) for x in list)
        return list[-1][1]


    def AverageFitness (self):
        sum = 0
        for organism in self.organisms:
            sum += organism.fitness
        return float(sum)/len(self.organisms)

    def AverageLikelihoods (self):
        sum = 0
        for organism in self.organisms:
            sum += organism.likelihood
        return sum/len(self.organisms)

    def Solve (self):
        solution = None
        for i in range(0,self.maxPopulation):
            alleles = []
            #
            for j in range(0, len(self.coefficients)):
                alleles.append(randint(0, self.result))
            self.organisms.append(Organism(alleles,0,0))
        solution = self.Fitness()
        if solution:
            return solution.alleles
        iterations = 0
        while not solution and  iterations <3000:
            self.GenerateLikelihoods()
            self.CreateNewOrganisms()
            solution = self.Fitness()
            if solution:
                print 'SOLUTION FOUND IN %s ITERATIONS' % iterations
                return solution.alleles
            iterations += 1
        return  -1

if __name__ == "__main__":
    diophantine = CDiophantine ([1,2,3,4],30)
    #cProfile.run('diophantine.Solve()')
    print diophantine.Solve()

I tried to change breed and weighted random choice logic but with no results. This GA supposed to be work, i dont know, what's wrong. I know that there are some GA libraries on Python, i'm trying to understand them at the moment - it seems that they are quite complex to me. Sorry for mistakes, english is not my native language. Thank you for your understanding.

NECROUPDATE: Store chromosomes in Gray Code, not in integer.

Xylol answered 28/6, 2011 at 19:49 Comment(3)
I am confused by the mutation code in your Breed method. Is that giving a 5% chance that after the crossover the child will be completely mutated? Typically you'd only mutate one (or a small randomly chosen subset) of the genes, not the entire genetic code of the offspring. Or am I reading this incorrectly?Hemostat
Joe Holloway, yes, you are reading it correctly. I've tried to mutate only part of gene already, but due to code changes (i fully rewrited Breed, i forgot to fix it back) I tried it now again (changed to mutate only one allele), and it gets 200-300 iterations with default settings to solve equation. It is still to much (or not?). Thank you.Xylol
I'm not particularly knowledgeable with genetic programming, but if you're doing genetic programming in python, you might be interested in github.com/sidious1741/python-genetic-programmingPeewee
A
3

Slight logic error: parentTwo is slightly more likely to be the father than the mother. Even odds would be randint (1,100) <= 50, not randint (1,100) < 50. Won't be what's causing the issue here.

  1. Your population size is fairly small. 40 is very few for most problems. That will cause it to converge quickly.
  2. Elitism will cause your population to converge faster, not slower.
  3. Your WeightedChoice function appears to be rather inefficient, if I'm reading it correctly. I haven't used Python recently enough to really understand what's going on there, but looking at it it certainly feels like something inefficient. If you can improve on that, it should speed up the processing so you can increase the population size (and, seeing as I'm figuring your algorithm there is probably at least O(n^2), that'll be really important).

With such a small population size, 200-300 generations is not surprising to solve the problem. If you increase the population, it should reduce the generations required.

Note: I found some old code that I wrote a few years ago for solving a similar problem. It's in C, and uses tournament selection, but perhaps it can give you a few ideas:

/*Diophantine equation solving genetic algorithm
Copyright (C) 2009- by Joel Rein
Licensed under the terms of the MIT License*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define POP 100
//number of variables to solve for
#define VAR 4
//maximum value for a) result and b) variables
#define MAX 100 
#define MAX_GENS 500
//probability of crossover (otherwise just one parent will be used)
#define CROSSOVER 0.7
//probability of mutation (per gene)
#define MUTATION 0.4
//print out debug information each generation (recommended: if used, keep RUNS low)
#define DEBUG
//print result of each run individually
#define PRINT_RESULT
//how many times to run the GA
#define RUNS 1

int pop[POP][VAR], scores[POP], new[POP][VAR];
int coefficients[VAR];
int result=0;

int score(int index){
    int sum=0;
    for(int i=0;i<VAR;i++)
        sum+=coefficients[i]*pop[index][i];
    return abs(sum-result);
}

int tournament(int size){
    int best=rand()%POP;
    for(int i=1;i<size;i++){
        int comp=rand()%POP;
        if(scores[comp]<scores[best])
            best=comp;
    }
    return best;
}

void breed(int target){
    int a=tournament(3), b=tournament(3);
    //copy a
    for(int i=0;i<VAR;i++)
        new[target][i]=pop[a][i];
    //crossover
    if((float)rand()/RAND_MAX<CROSSOVER){
        int x=rand()%VAR;
        for(int i=x;i<VAR;i++)
            new[target][i]=pop[b][i];
    }
    //mutation
    for(int i=0;i<VAR;i++)
        if((float)rand()/RAND_MAX<MUTATION)
            new[target][i]=rand()%(result*2)-result;
}

void debug(int gen, int best){
#ifdef DEBUG
    printf("Gen: %3i Score: %3i --- ", gen, scores[best]);
    int sum=0;
    for(int i=0;i<VAR;i++){
        sum+=coefficients[i]*pop[best][i];
        printf("%3i*%3i+", coefficients[i], pop[best][i]);
    }
    printf("0= %3i (target: %i)\n", sum, result);
#endif
}

int ga(int run){
    srand(time(NULL)+run);
    //calculate a result for the equation. 
    //this mustn't be 0, else we get division-by-zero errors while initialising & mutating.
    while(!result)
        result=rand()%MAX;
    for(int i=0;i<VAR;i++)
        coefficients[i]=rand()%result;
    //initialise population
    for(int i=0;i<POP;i++)
        for(int j=0;j<VAR;j++)
            pop[i][j]=rand()%(result*2)-result;
    //main loop
    int gen, best;
    for(gen=0;gen<MAX_GENS;gen++){
        best=0;
        //evaluate population
        for(int i=0;i<POP;i++){
            scores[i]=score(i);
            if(scores[i]<scores[best])
                best=i;
        }
        debug(gen, best);
        if(scores[best]==0)
            break;
        //breed and replace
        for(int i=0;i<POP;i++)
            breed(i);
        for(int i=0;i<POP;i++)
            for(int j=0;j<VAR;j++)
                pop[i][j]=new[i][j];
    }
#ifdef PRINT_RESULT
    printf("Terminated after %4i generations with a score of %3i\n", gen, scores[best]); 
#else
    printf(".");
#endif
    return gen;
}

int main(){
    int total=0;
    for(int i=0;i<RUNS;i++)
        total+=ga(i);
    printf("\nAverage runtime: %i generations\n", total/RUNS);
}
Apollonian answered 29/6, 2011 at 2:22 Comment(1)
Jivlain, thank you very much for the research material you gave. You are right about inefficiency of choice function. There is profiler info with my code: gist.github.com/1053666Xylol

© 2022 - 2024 — McMap. All rights reserved.