如何修复简单 GA (Python) 中的过早收敛?

发布于 2024-11-17 14:34:00 字数 5110 浏览 4 评论 0原文

昨天我开始探索遗传算法,当我结束了一些基本理论时,我尝试在Python上编写简单的遗传算法,来解决丢番图方程。我是Python和GA的新手,所以请不要严格判断我的代码。

问题

由于过早收敛,我无法得到任何结果(有一些无返回点(n-population),population[ n] == Population[n+i],其中i是任意整数,即使随机变异元素也无法改变这一点,世代退化​​得非常快)

GA使用交叉来繁殖,并加权选择父母。< /强>

  • Q1:我的设计是否存在任何设计错误? 代码(如下)?
  • Q1.2:我需要添加精英主义吗?
  • Q1.3:我需要改变品种吗 逻辑?
  • Q2:是否真的需要深拷贝?

代码:

# -*- coding: utf-8 -*-
from random import randint
from copy import deepcopy
from math import floor
import random

class Organism:
    #initiate
    def __init__(self, alleles, fitness, likelihood):
        self.alleles = alleles
        self.fitness = fitness
        self.likelihood = likelihood
        self.result = 0
    def __unicode__(self):
        return '%s [%s - %s]' % (self.alleles, self.fitness, self.likelihood)

class  CDiophantine:
    def __init__(self, coefficients,  result):
        self.coefficients = coefficients
        self.result = result

    maxPopulation = 40
    organisms = []
    def GetGene (self,i):
        return self.organisms[i]

    def OrganismFitness (self,gene):
        gene.result = 0
        for i in range (0, len(self.coefficients)):
            gene.result += self.coefficients[i]*gene.alleles[i]
        gene.fitness = abs(gene.result - self.result)
        return gene.fitness

    def Fitness (self):
        for organism in self.organisms:
            organism.fitness = self.OrganismFitness(organism)
            if organism.fitness == 0:
                return  organism
        return None


    def MultiplyFitness (self):
        coefficientSum = 0
        for organism in self.organisms:
            coefficientSum += 1/float(organism.fitness)
        return coefficientSum

    def GenerateLikelihoods (self):
        last = 0
        multiplyFitness = self.MultiplyFitness()
        for organism in self.organisms:
            last = ((1/float(organism.fitness)/multiplyFitness)*100)
            #print '1/%s/%s*100 - %s' % (organism.fitness, multiplyFitness, last)
            organism.likelihood = last

    def Breed (self, parentOne, parentTwo):
        crossover = randint (1,len(self.coefficients)-1)
        child = deepcopy(parentOne)
        initial = 0
        final = len(parentOne.alleles) - 1
        if randint (1,100) < 50:
            father = parentOne
            mother = parentTwo
        else:
            father = parentTwo
            mother = parentOne
        child.alleles = mother.alleles[:crossover] + father.alleles[crossover:]
        if randint (1,100) < 5:
            for i in range(initial,final):    
                child.alleles[i] = randint (0,self.result)

        return child

    def CreateNewOrganisms (self):
        #generating new population
        tempPopulation = []
        for _ in self.organisms:
            iterations = 0
            father = deepcopy(self.organisms[0])
            mother = deepcopy(self.organisms[1])
            while father.alleles == mother.alleles:
                father = self.WeightedChoice()
                mother = self.WeightedChoice()
                iterations+=1
                if iterations > 35:
                    break
            kid = self.Breed(father,mother)
            tempPopulation.append(kid)
        self.organisms = tempPopulation

    def WeightedChoice (self):
        list = []
        for organism in self.organisms:
            list.append((organism.likelihood,organism))
        list = sorted((random.random() * x[0], x[1]) for x in list)
        return list[-1][1]


    def AverageFitness (self):
        sum = 0
        for organism in self.organisms:
            sum += organism.fitness
        return float(sum)/len(self.organisms)

    def AverageLikelihoods (self):
        sum = 0
        for organism in self.organisms:
            sum += organism.likelihood
        return sum/len(self.organisms)

    def Solve (self):
        solution = None
        for i in range(0,self.maxPopulation):
            alleles = []
            #
            for j in range(0, len(self.coefficients)):
                alleles.append(randint(0, self.result))
            self.organisms.append(Organism(alleles,0,0))
        solution = self.Fitness()
        if solution:
            return solution.alleles
        iterations = 0
        while not solution and  iterations <3000:
            self.GenerateLikelihoods()
            self.CreateNewOrganisms()
            solution = self.Fitness()
            if solution:
                print 'SOLUTION FOUND IN %s ITERATIONS' % iterations
                return solution.alleles
            iterations += 1
        return  -1

if __name__ == "__main__":
    diophantine = CDiophantine ([1,2,3,4],30)
    #cProfile.run('diophantine.Solve()')
    print diophantine.Solve()

尝试更改品种和加权随机选择逻辑,但没有结果。这个 GA 应该是工作,我不知道出了什么问题。 我知道 Python 上有一些 GA 库,我现在正在尝试理解它们 - 似乎它们对我来说相当复杂。抱歉,有错误,英语不是我的母语。感谢您的理解。

死灵更新: 以格雷码存储染色体,而不是整数。

Yesterday i started exploring the genetic algorithms, and when i ended up with some basic theory, i tried to write simple GA on Python, that solves Diophantine equation. I'm new to Python and GAs, so please, don't judge my code strictly.

Problem

I cant get any result due to premature convergence (there is some no-return point (n-population), population[n] == population[n+i], where i is any integer. even the random mutatuion element cant change this, the generation is degradating very quickly)

GA is using crossover to breed, and weighted choice of parents.

  • Q1: Is there any design mistakes in my
    code (below)?
  • Q1.2: Do i need to add elitism?
  • Q1.3: Do i need to change breed
    logic?
  • Q2: Is there realy needed deep copy?

Code:

# -*- coding: utf-8 -*-
from random import randint
from copy import deepcopy
from math import floor
import random

class Organism:
    #initiate
    def __init__(self, alleles, fitness, likelihood):
        self.alleles = alleles
        self.fitness = fitness
        self.likelihood = likelihood
        self.result = 0
    def __unicode__(self):
        return '%s [%s - %s]' % (self.alleles, self.fitness, self.likelihood)

class  CDiophantine:
    def __init__(self, coefficients,  result):
        self.coefficients = coefficients
        self.result = result

    maxPopulation = 40
    organisms = []
    def GetGene (self,i):
        return self.organisms[i]

    def OrganismFitness (self,gene):
        gene.result = 0
        for i in range (0, len(self.coefficients)):
            gene.result += self.coefficients[i]*gene.alleles[i]
        gene.fitness = abs(gene.result - self.result)
        return gene.fitness

    def Fitness (self):
        for organism in self.organisms:
            organism.fitness = self.OrganismFitness(organism)
            if organism.fitness == 0:
                return  organism
        return None


    def MultiplyFitness (self):
        coefficientSum = 0
        for organism in self.organisms:
            coefficientSum += 1/float(organism.fitness)
        return coefficientSum

    def GenerateLikelihoods (self):
        last = 0
        multiplyFitness = self.MultiplyFitness()
        for organism in self.organisms:
            last = ((1/float(organism.fitness)/multiplyFitness)*100)
            #print '1/%s/%s*100 - %s' % (organism.fitness, multiplyFitness, last)
            organism.likelihood = last

    def Breed (self, parentOne, parentTwo):
        crossover = randint (1,len(self.coefficients)-1)
        child = deepcopy(parentOne)
        initial = 0
        final = len(parentOne.alleles) - 1
        if randint (1,100) < 50:
            father = parentOne
            mother = parentTwo
        else:
            father = parentTwo
            mother = parentOne
        child.alleles = mother.alleles[:crossover] + father.alleles[crossover:]
        if randint (1,100) < 5:
            for i in range(initial,final):    
                child.alleles[i] = randint (0,self.result)

        return child

    def CreateNewOrganisms (self):
        #generating new population
        tempPopulation = []
        for _ in self.organisms:
            iterations = 0
            father = deepcopy(self.organisms[0])
            mother = deepcopy(self.organisms[1])
            while father.alleles == mother.alleles:
                father = self.WeightedChoice()
                mother = self.WeightedChoice()
                iterations+=1
                if iterations > 35:
                    break
            kid = self.Breed(father,mother)
            tempPopulation.append(kid)
        self.organisms = tempPopulation

    def WeightedChoice (self):
        list = []
        for organism in self.organisms:
            list.append((organism.likelihood,organism))
        list = sorted((random.random() * x[0], x[1]) for x in list)
        return list[-1][1]


    def AverageFitness (self):
        sum = 0
        for organism in self.organisms:
            sum += organism.fitness
        return float(sum)/len(self.organisms)

    def AverageLikelihoods (self):
        sum = 0
        for organism in self.organisms:
            sum += organism.likelihood
        return sum/len(self.organisms)

    def Solve (self):
        solution = None
        for i in range(0,self.maxPopulation):
            alleles = []
            #
            for j in range(0, len(self.coefficients)):
                alleles.append(randint(0, self.result))
            self.organisms.append(Organism(alleles,0,0))
        solution = self.Fitness()
        if solution:
            return solution.alleles
        iterations = 0
        while not solution and  iterations <3000:
            self.GenerateLikelihoods()
            self.CreateNewOrganisms()
            solution = self.Fitness()
            if solution:
                print 'SOLUTION FOUND IN %s ITERATIONS' % iterations
                return solution.alleles
            iterations += 1
        return  -1

if __name__ == "__main__":
    diophantine = CDiophantine ([1,2,3,4],30)
    #cProfile.run('diophantine.Solve()')
    print diophantine.Solve()

I tried to change breed and weighted random choice logic but with no results. This GA supposed to be work, i dont know, what's wrong.
I know that there are some GA libraries on Python, i'm trying to understand them at the moment - it seems that they are quite complex to me. Sorry for mistakes, english is not my native language. Thank you for your understanding.

NECROUPDATE:
Store chromosomes in Gray Code, not in integer.

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

熊抱啵儿 2024-11-24 14:34:00

轻微的逻辑错误:parentTwo 是父亲的可能性比母亲的可能性稍大一些。偶数赔率将为 randint (1,100) <= 50,而不是 randint (1,100) <= 50。 50. 不会是造成这里问题的原因。

  1. 你们的人口规模相当小。对于大多数问题来说,40 个是很少的。这将导致它快速收敛。
  2. 精英主义会让你的人口聚合得更快,而不是更慢。
  3. 如果我没有正确阅读的话,您的 WeightedChoice 函数似乎效率相当低。我最近还没有足够多地使用Python来真正理解那里发生了什么,但看看它确实感觉效率很低。如果你可以改进这一点,它应该加快处理速度,这样你就可以增加人口规模(并且,据我计算你的算法,可能至少有 O(n^2),这将是 真的很重要)。

人口规模如此之小,200-300代就能解决问题并不奇怪。如果增加人口,就会减少所需的世代。

注意:我发现了一些几年前为解决类似问题而编写的旧代码。它是 C 语言,并使用锦标赛选择,但也许它可以给你一些想法:

/*Diophantine equation solving genetic algorithm
Copyright (C) 2009- by Joel Rein
Licensed under the terms of the MIT License*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define POP 100
//number of variables to solve for
#define VAR 4
//maximum value for a) result and b) variables
#define MAX 100 
#define MAX_GENS 500
//probability of crossover (otherwise just one parent will be used)
#define CROSSOVER 0.7
//probability of mutation (per gene)
#define MUTATION 0.4
//print out debug information each generation (recommended: if used, keep RUNS low)
#define DEBUG
//print result of each run individually
#define PRINT_RESULT
//how many times to run the GA
#define RUNS 1

int pop[POP][VAR], scores[POP], new[POP][VAR];
int coefficients[VAR];
int result=0;

int score(int index){
    int sum=0;
    for(int i=0;i<VAR;i++)
        sum+=coefficients[i]*pop[index][i];
    return abs(sum-result);
}

int tournament(int size){
    int best=rand()%POP;
    for(int i=1;i<size;i++){
        int comp=rand()%POP;
        if(scores[comp]<scores[best])
            best=comp;
    }
    return best;
}

void breed(int target){
    int a=tournament(3), b=tournament(3);
    //copy a
    for(int i=0;i<VAR;i++)
        new[target][i]=pop[a][i];
    //crossover
    if((float)rand()/RAND_MAX<CROSSOVER){
        int x=rand()%VAR;
        for(int i=x;i<VAR;i++)
            new[target][i]=pop[b][i];
    }
    //mutation
    for(int i=0;i<VAR;i++)
        if((float)rand()/RAND_MAX<MUTATION)
            new[target][i]=rand()%(result*2)-result;
}

void debug(int gen, int best){
#ifdef DEBUG
    printf("Gen: %3i Score: %3i --- ", gen, scores[best]);
    int sum=0;
    for(int i=0;i<VAR;i++){
        sum+=coefficients[i]*pop[best][i];
        printf("%3i*%3i+", coefficients[i], pop[best][i]);
    }
    printf("0= %3i (target: %i)\n", sum, result);
#endif
}

int ga(int run){
    srand(time(NULL)+run);
    //calculate a result for the equation. 
    //this mustn't be 0, else we get division-by-zero errors while initialising & mutating.
    while(!result)
        result=rand()%MAX;
    for(int i=0;i<VAR;i++)
        coefficients[i]=rand()%result;
    //initialise population
    for(int i=0;i<POP;i++)
        for(int j=0;j<VAR;j++)
            pop[i][j]=rand()%(result*2)-result;
    //main loop
    int gen, best;
    for(gen=0;gen<MAX_GENS;gen++){
        best=0;
        //evaluate population
        for(int i=0;i<POP;i++){
            scores[i]=score(i);
            if(scores[i]<scores[best])
                best=i;
        }
        debug(gen, best);
        if(scores[best]==0)
            break;
        //breed and replace
        for(int i=0;i<POP;i++)
            breed(i);
        for(int i=0;i<POP;i++)
            for(int j=0;j<VAR;j++)
                pop[i][j]=new[i][j];
    }
#ifdef PRINT_RESULT
    printf("Terminated after %4i generations with a score of %3i\n", gen, scores[best]); 
#else
    printf(".");
#endif
    return gen;
}

int main(){
    int total=0;
    for(int i=0;i<RUNS;i++)
        total+=ga(i);
    printf("\nAverage runtime: %i generations\n", total/RUNS);
}

Slight logic error: parentTwo is slightly more likely to be the father than the mother. Even odds would be randint (1,100) <= 50, not randint (1,100) < 50. Won't be what's causing the issue here.

  1. Your population size is fairly small. 40 is very few for most problems. That will cause it to converge quickly.
  2. Elitism will cause your population to converge faster, not slower.
  3. Your WeightedChoice function appears to be rather inefficient, if I'm reading it correctly. I haven't used Python recently enough to really understand what's going on there, but looking at it it certainly feels like something inefficient. If you can improve on that, it should speed up the processing so you can increase the population size (and, seeing as I'm figuring your algorithm there is probably at least O(n^2), that'll be really important).

With such a small population size, 200-300 generations is not surprising to solve the problem. If you increase the population, it should reduce the generations required.

Note: I found some old code that I wrote a few years ago for solving a similar problem. It's in C, and uses tournament selection, but perhaps it can give you a few ideas:

/*Diophantine equation solving genetic algorithm
Copyright (C) 2009- by Joel Rein
Licensed under the terms of the MIT License*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define POP 100
//number of variables to solve for
#define VAR 4
//maximum value for a) result and b) variables
#define MAX 100 
#define MAX_GENS 500
//probability of crossover (otherwise just one parent will be used)
#define CROSSOVER 0.7
//probability of mutation (per gene)
#define MUTATION 0.4
//print out debug information each generation (recommended: if used, keep RUNS low)
#define DEBUG
//print result of each run individually
#define PRINT_RESULT
//how many times to run the GA
#define RUNS 1

int pop[POP][VAR], scores[POP], new[POP][VAR];
int coefficients[VAR];
int result=0;

int score(int index){
    int sum=0;
    for(int i=0;i<VAR;i++)
        sum+=coefficients[i]*pop[index][i];
    return abs(sum-result);
}

int tournament(int size){
    int best=rand()%POP;
    for(int i=1;i<size;i++){
        int comp=rand()%POP;
        if(scores[comp]<scores[best])
            best=comp;
    }
    return best;
}

void breed(int target){
    int a=tournament(3), b=tournament(3);
    //copy a
    for(int i=0;i<VAR;i++)
        new[target][i]=pop[a][i];
    //crossover
    if((float)rand()/RAND_MAX<CROSSOVER){
        int x=rand()%VAR;
        for(int i=x;i<VAR;i++)
            new[target][i]=pop[b][i];
    }
    //mutation
    for(int i=0;i<VAR;i++)
        if((float)rand()/RAND_MAX<MUTATION)
            new[target][i]=rand()%(result*2)-result;
}

void debug(int gen, int best){
#ifdef DEBUG
    printf("Gen: %3i Score: %3i --- ", gen, scores[best]);
    int sum=0;
    for(int i=0;i<VAR;i++){
        sum+=coefficients[i]*pop[best][i];
        printf("%3i*%3i+", coefficients[i], pop[best][i]);
    }
    printf("0= %3i (target: %i)\n", sum, result);
#endif
}

int ga(int run){
    srand(time(NULL)+run);
    //calculate a result for the equation. 
    //this mustn't be 0, else we get division-by-zero errors while initialising & mutating.
    while(!result)
        result=rand()%MAX;
    for(int i=0;i<VAR;i++)
        coefficients[i]=rand()%result;
    //initialise population
    for(int i=0;i<POP;i++)
        for(int j=0;j<VAR;j++)
            pop[i][j]=rand()%(result*2)-result;
    //main loop
    int gen, best;
    for(gen=0;gen<MAX_GENS;gen++){
        best=0;
        //evaluate population
        for(int i=0;i<POP;i++){
            scores[i]=score(i);
            if(scores[i]<scores[best])
                best=i;
        }
        debug(gen, best);
        if(scores[best]==0)
            break;
        //breed and replace
        for(int i=0;i<POP;i++)
            breed(i);
        for(int i=0;i<POP;i++)
            for(int j=0;j<VAR;j++)
                pop[i][j]=new[i][j];
    }
#ifdef PRINT_RESULT
    printf("Terminated after %4i generations with a score of %3i\n", gen, scores[best]); 
#else
    printf(".");
#endif
    return gen;
}

int main(){
    int total=0;
    for(int i=0;i<RUNS;i++)
        total+=ga(i);
    printf("\nAverage runtime: %i generations\n", total/RUNS);
}
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文