00001 #include "../ECF_base.h"
00002 #include "../ECF_derived.h"
00003 #include "../ECF_macro.h"
00004
00005 #include "AlgXCS.h"
00006 #include "math.h"
00007 #include "time.h"
00008 #include "../Logger.h"
00009 #include <string>
00010
00011
00012
00013
00014 #pragma region Intro comments
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #pragma endregion
00026
00027
00028 bool cmp (ClassifierP a, ClassifierP b){
00029 return a->ind->fitness->getValue() > b->ind->fitness->getValue();
00030 }
00031 using namespace std;
00032
00033 XCS::XCS()
00034 {
00035 name_ = "XCS";
00036
00037
00038 selRandomOp = static_cast<SelectionOperatorP> (new SelRandomOp);
00039 selFitPropOp = static_cast<SelectionOperatorP> (new SelFitnessProportionalOp);
00040 selWorstOp = static_cast<SelectionOperatorP> (new SelWorstOp);
00041
00042 params = static_cast<XCSParamsP> ( new XCSParams(name_));
00043 }
00044
00045
00046
00047 void XCS::registerParameters(StateP state)
00048 {
00049 params->registerParams(state->getRegistry());
00050
00051
00052 ClassifierParamsP params = static_cast<ClassifierParamsP> (new ClassifierParams(0,0,0));
00053 state->addGenotype(params);
00054
00055 }
00056 bool XCS::initialize(StateP state)
00057 {
00058 if (!Classifier::checkState(state)) {
00059 throw ("");
00060 }
00061
00062 selRandomOp->initialize(state);
00063 selWorstOp->initialize(state);
00064 selFitPropOp->initialize(state);
00065
00066 params->readParams(state->getRegistry());
00067
00068 time = 0;
00069
00070 ClassifierParamsP clParams = static_cast<ClassifierParamsP> (new ClassifierParams(0,0,0));
00071 clParams->setGenotypeId(3);
00072 clParams->initialize(state);
00073 params->initF_ = clParams->F_;
00074
00075
00076 environment = boost::dynamic_pointer_cast<Environment> (evalOp_);
00077
00078 if (!environment->checkState(state)) {
00079 throw ("");
00080 }
00081
00082 try {
00083 if (!environment->initialize()){
00084 throw (std::string("failed to initialize"));
00085 }
00086 }catch (std::string text){
00087 ECF_LOG_ERROR(state, "Environment error: "+text);
00088 throw ("");
00089 }
00090
00091 return true;
00092 }
00093 void XCS::printPopulation(){
00094
00095 for (uint i = 0; i < populationSet.size(); i++)
00096 populationSet[i]->print();
00097 }
00098
00099 bool XCS::advanceGeneration(StateP state, DemeP deme) {
00100
00101 if (state->getGenerationNo() == 0){
00102
00103
00104 ClassifierP classP;
00105 PopulationP pop = state->getPopulation();
00106
00107 for (uint i = 0; i < pop->size(); i++){
00108 for (uint j = 0; j < pop->at(i)->size(); j++){
00109 classP = static_cast<ClassifierP>
00110 (new Classifier(params, time, pop->at(i)->at(j), state));
00111 populationSet.push_back(classP);
00112 }
00113 }
00114
00115 for (uint i = 0; i < deme->size(); i++)
00116 deme->at(i)->fitness->setValue(params->initF_);
00117 environment->reset();
00118 }
00119
00120 std::vector<ClassifierP> lastActionSet;
00121 double lastReward = 0;
00122 GenotypeP lastInput;
00123
00124
00125 do {
00126
00127 #ifdef XCS_STEP_DEBUG
00128 std::cout << "Press any key to continue..." << std::endl;
00129 std::cin.get();
00130 #endif
00131 time++;
00132 std::vector<ClassifierP> matchSet;
00133 std::vector<ClassifierP> actionSet;
00134
00135
00136 GenotypeP input = environment->getInput();
00137
00138 #ifdef XCS_DEBUG
00139 std::cout << "Input value: ";
00140 Classifier::printBitString(boost::dynamic_pointer_cast<BitString::BitString> (input));
00141 std::cout << std::endl;
00142 #endif
00143
00144 matchSet = generateMatchSet(state, deme, input);
00145
00146 #ifdef XCS_DEBUG
00147 std::cout << "Match set [M]: "<< std::endl;
00148 for (uint i = 0; i < matchSet.size(); i++)
00149 matchSet[i]->print();
00150 #endif
00151
00152
00153 std::map<int, double> PA = generatePA(matchSet);
00154
00155
00156 int actionId = selectActionId(state, PA);
00157 #ifdef XCS_DEBUG
00158 std::cout << "action id = " << actionId<< std::endl;
00159 #endif
00160
00161
00162 actionSet = generateActionSet(matchSet, actionId);
00163
00164 #ifdef XCS_DEBUG
00165 std::cout << "\nAction set [A]: "<< std::endl;
00166 for (uint i = 0; i < actionSet.size(); i++)
00167 actionSet[i]->print();
00168 #endif
00169
00170
00171 IndividualP ind = static_cast<IndividualP> (new Individual);
00172 ind->push_back(input);
00173 ind->push_back(actionSet[0]->getAction());
00174
00175
00176 evaluate(ind);
00177 double reward = ind->fitness->getValue();
00178
00179 #ifdef XCS_DEBUG
00180 std::cout << "Reward: " << reward << std::endl;
00181 #endif
00182
00183 if (!lastActionSet.empty()){
00184 double P = lastReward + params->gama_ * getMaxFromPA(PA).second;
00185
00186 updateActionSet(lastActionSet, deme, P, state);
00187 if (!environment->isExploit()) {
00188 runGA(lastActionSet, lastInput, deme, state);
00189 }
00190 }
00191 if (environment->isOver()) {
00192
00193 if (!environment->isExploit()) {
00194 updateActionSet(actionSet, deme, reward, state);
00195
00196 runGA(actionSet, input, deme, state);
00197 lastActionSet.clear();
00198 }
00199
00200 } else {
00201 lastActionSet = actionSet;
00202 lastReward = reward;
00203 lastInput = input;
00204 }
00205 } while (!environment->isOver());
00206
00207 environment->nextTrial();
00208
00209 #ifdef XCS_DEBUG
00210 std::cout << "Classifiers:" << std::endl;
00211 printPopulation();
00212 std::cout << " ===== advanceGeneration end ====="<< std::endl;
00213 #endif
00214
00215 return true;
00216 }
00217
00218 std::vector<ClassifierP> XCS::generateMatchSet(StateP state, DemeP deme, GenotypeP input) {
00219
00220
00221 std::vector<ClassifierP> matchSet;
00222
00223 while(matchSet.empty()){
00224
00225 for (uint i = 0; i < populationSet.size(); ++i){
00226 if (populationSet[i]->doesMatch(input))
00227 matchSet.push_back(populationSet[i]);
00228 }
00229
00230 uint noDiffActions = (uint) getActionsFromMs(matchSet).size();
00231 if (noDiffActions < params->mna_){
00232
00233 ClassifierP coverCl = cover(state, deme, input, matchSet);
00234 deleteFromPopulation(state, deme);
00235 matchSet.clear();
00236 }
00237 }
00238
00239 return matchSet;
00240 }
00241
00242 std::set<int> XCS::getActionsFromMs (std::vector<ClassifierP> matchSet){
00243
00244 std::set<int> actions;
00245 for (uint i = 0; i < matchSet.size(); ++i){
00246 actions.insert(matchSet[i]->getActionId());
00247 }
00248 return actions;
00249 }
00250
00251
00252
00253 ClassifierP XCS::cover (StateP state, DemeP deme, GenotypeP input, std::vector<ClassifierP> matchSet){
00254
00255 IndividualP newInd = static_cast<IndividualP> (new Individual(state));
00256
00257 ClassifierP newClassifier = static_cast<ClassifierP> (new
00258 Classifier (params,time, newInd, state));
00259
00260
00261 newClassifier->cover(getActionsFromMs(matchSet), input, state);
00262 newInd->index = (uint)deme->size();
00263
00264 populationSet.push_back(newClassifier);
00265 deme->push_back(newInd);
00266
00267 assert(deme->size() == populationSet.size());
00268
00269 return newClassifier;
00270 }
00271
00272
00273 std::map<int, double> XCS::generatePA(std::vector<ClassifierP> matchSet){
00274 std::map<int, double> PA;
00275 std::map<int, double> fsa;
00276
00277 for (uint i = 0; i < matchSet.size(); ++i){
00278
00279 ClassifierP cl = matchSet[i];
00280 double fitness = cl->getFitness();
00281 int action = cl->getActionId();
00282
00283 if(PA[action] == NULL) {
00284 PA[action] = cl->getPrediction() * fitness;
00285 fsa[action] = 0;
00286 } else
00287 PA[action] += cl->getPrediction() * fitness;
00288 fsa[action] += fitness;
00289 }
00290 for (std::map<int, double>::iterator it = PA.begin(); it != PA.end(); ++it){
00291 PA[it->first] /= (fsa[it->first] == 0 ? 1 : fsa[it->first]);
00292 }
00293 return PA;
00294 }
00295
00296
00297 int XCS:: selectActionId(StateP state, std::map<int, double> PA){
00298 double rnd = state->getRandomizer()->getRandomDouble();
00299 int actionId = PA.begin()->first;
00300
00301
00302 if (environment->isExploit())
00303 return getMaxFromPA(PA).first;
00304
00305 if (rnd < params->p_explore_) {
00306
00307 rnd = state->getRandomizer()->getRandomDouble();
00308 int i = 1;
00309 for (std::map<int, double>::const_iterator it = PA.begin(); it != PA.end(); ++it){
00310 if (i > PA.size() * rnd){
00311 actionId = it->first;
00312 break;
00313 }
00314 i++;
00315 }
00316 }else {
00317
00318 actionId = getMaxFromPA(PA).first;
00319 }
00320 return actionId;
00321 }
00322
00323 std::vector<ClassifierP> XCS::generateActionSet(std::vector<ClassifierP> matchSet, int actionId){
00324 std::vector<ClassifierP> actionSet;
00325 for (uint i = 0; i < matchSet.size(); i++)
00326 if (matchSet[i]->getActionId() == actionId)
00327 actionSet.push_back(matchSet[i]);
00328 return actionSet;
00329 }
00330
00331 void XCS::updateActionSet(std::vector<ClassifierP> actionSet, DemeP deme, double reward, StateP state){
00332
00333 ClassifierP cl;
00334 double sum = 0;
00335 std::vector<double> accuracy;
00336
00337 int numSum = 0;
00338 for (uint i = 0; i < actionSet.size(); ++i)
00339 numSum += actionSet[i]->getNumerosity();
00340
00341
00342 for (uint i = 0; i < actionSet.size(); ++i){
00343 cl = actionSet[i];
00344
00345 double exp = cl->getExperience() + 1;
00346 cl->setExperience(exp);
00347
00348 double eps = cl->getError();
00349 double p = cl->getPrediction();
00350 double as = cl->getActSetSize();
00351
00352 if (exp < 1 / params->beta_) {
00353 p += (reward - p) / exp;
00354 eps += (fabs(reward - p) - eps) / exp;
00355 as += (numSum - as) / exp;
00356 } else {
00357 p += params->beta_ * (reward - p);
00358 eps += params->beta_ * (fabs(reward - p) - eps);
00359 as += params->beta_ * (numSum - as);
00360 }
00361
00362 if (eps < params->eps0_)
00363 accuracy.push_back(1);
00364 else
00365 accuracy.push_back( params->alpha_ * pow(eps / params->eps0_, -params->accExp_) );
00366
00367 sum += accuracy[i] * cl->getNumerosity();
00368
00369 cl->setPrediction(p);
00370 cl->setError(eps);
00371 cl->setActSetSize(as);
00372 }
00373
00374
00375 for (uint i = 0; i < actionSet.size(); ++i){
00376
00377 cl = actionSet[i];
00378
00379 double F = cl->getFitness();
00380 F += params->beta_ * (accuracy[i] * cl->getNumerosity() / sum - F) ;
00381 cl->setFitness(F);
00382 }
00383
00384 actionSetSubsumption(&actionSet, deme, state);
00385
00386 }
00387
00388 double XCS::getAsTimeSum(std::vector<ClassifierP> as) {
00389
00390 double sumNum = 0, sumTsNum = 0;
00391 for (uint i = 0; i < as.size(); ++i){
00392 sumTsNum += as[i]->getTimeStamp() * as[i]->getNumerosity();
00393 sumNum += as[i]->getNumerosity();
00394 }
00395
00396 return sumTsNum / sumNum;
00397 }
00398
00399 void XCS::runGA(std::vector<ClassifierP> actionSet, GenotypeP genInput, DemeP deme, StateP state){
00400
00401 if (time - getAsTimeSum(actionSet) < params->thresholdGA_) return;
00402
00403 std::vector<IndividualP> tournament, vActionSet;
00404
00405 for (uint i = 0; i < actionSet.size(); i++) {
00406 if (!actionSet[i]->valid) continue;
00407 vActionSet.push_back(actionSet[i]->ind);
00408 actionSet[i]->setTimeStamp(time);
00409 }
00410 if (vActionSet.size() < 1) return;
00411
00412 IndividualP parent[2];
00413 parent[0] = selFitPropOp->select(vActionSet);
00414 parent[1] = selFitPropOp->select(vActionSet);
00415
00416 ClassifierP clParent[2];
00417 clParent[0] = populationSet[parent[0]->index];
00418 clParent[1] = populationSet[parent[1]->index];
00419
00420 assert(clParent[0]->getActionId() == clParent[1]->getActionId());
00421 ClassifierP clChild[2];
00422
00423 for (int i = 0; i < 2; ++i) {
00424
00425 clChild[i] = static_cast<ClassifierP> (new Classifier(clParent[i]));
00426
00427 clChild[i]->setNumerosity(1);
00428 clChild[i]->setExperience(0);
00429
00430 double f = clChild[i]->getFitness();
00431
00432 if (state->getRandomizer()->getRandomDouble() < params->pCrossover_) {
00433 mate(parent[0], parent[1], clChild[i]->ind);
00434
00435 clChild[i]->setAction(clParent[0]->getAction());
00436
00437 assert(clChild[i]->getActionId() == clParent[i]->getActionId());
00438
00439 clChild[i]->setPrediction((clParent[0]->getPrediction() + clParent[1]->getPrediction()) / 2);
00440 clChild[i]->setError((clParent[0]->getError() + clParent[1]->getError()) / 2);
00441
00442 f = (clParent[0]->getFitness() + clParent[1]->getFitness()) / 2;
00443 }
00444
00445 clChild[i]->setFitness( 0.1 * f);
00446
00447 clChild[i]->mutateRule(genInput, state);
00448 clChild[i]->mutateAction(state);
00449
00450 if (clParent[0]->doesSubsume(clChild[i])) {
00451 clParent[0]->setNumerosity(clParent[0]->getNumerosity() + 1);
00452 } else if (clParent[1]->doesSubsume(clChild[i])) {
00453 clParent[1]->setNumerosity(clParent[1]->getNumerosity() + 1);
00454 } else {
00455 clChild[i]->ind->index = (uint)deme->size();
00456 populationSet.push_back(clChild[i]);
00457 deme->push_back(clChild[i]->ind);
00458 }
00459 assert(deme->size() == populationSet.size());
00460
00461 deleteFromPopulation(state, deme);
00462 }
00463 }
00464
00465 std::pair<int, double> XCS::getMaxFromPA(std::map<int, double> PA){
00466
00467 if (PA.empty()) return std::make_pair(-1, -1.);
00468
00469 std::pair<int, double> maxPA = *PA.begin();
00470 for (std::map<int, double>::iterator it = PA.begin(); it != PA.end(); ++it){
00471 if (it->second > maxPA.second)
00472 maxPA = *it;
00473 }
00474 return maxPA;
00475 }
00476
00477 void XCS::deleteFromPopulation(StateP state, DemeP deme) {
00478
00479 int sumNum = 0;
00480 double sumFit = 0;
00481
00482 for ( uint i = 0; i < populationSet.size(); ++i) {
00483 sumNum += populationSet[i]->getNumerosity();
00484 sumFit += populationSet[i]->getFitness();
00485 }
00486 if (sumNum <= (int) params->popSize_) return;
00487
00488 double avFitInPop = sumFit / sumNum;
00489 double sumVote = 0;
00490
00491 for ( uint i = 0; i < populationSet.size(); ++i) {
00492 sumVote += populationSet[i]->getDeletionVote(avFitInPop);
00493 }
00494 double choicePoint = state->getRandomizer()->getRandomDouble() * sumVote;
00495 sumVote = 0;
00496 for ( uint i = 0; i < populationSet.size(); ++i) {
00497 ClassifierP cl = populationSet[i];
00498 sumVote += cl->getDeletionVote(avFitInPop);
00499 if (sumVote > choicePoint) {
00500 int n = cl->getNumerosity();
00501 if (n > 1){
00502 cl->setNumerosity(n-1);
00503 } else {
00504 removeFromPopSet(cl, deme);
00505 }
00506 return;
00507 }
00508 }
00509 assert(deme->size() == populationSet.size());
00510
00511 }
00512
00513 void XCS::removeFromPopSet(ClassifierP cl, DemeP deme){
00514
00515 IndividualP lastInd = deme->at(deme->size()-1);
00516 ClassifierP lastCl = populationSet[populationSet.size()-1];
00517
00518 lastInd->index = cl->ind->index;
00519
00520 populationSet[cl->ind->index] = lastCl;
00521 populationSet.erase(populationSet.begin() + populationSet.size() - 1);
00522
00523 deme->at(cl->ind->index) = lastInd;
00524 deme->erase(deme->begin() + deme->size() - 1);
00525
00526 assert(deme->size() == populationSet.size());
00527 cl->valid = false;
00528 }
00529
00530 void XCS::actionSetSubsumption(std::vector<ClassifierP> *actionSet, DemeP deme, StateP state){
00531 ClassifierP cl;
00532 bool clSet = false;
00533
00534 for (uint i = 0; i < actionSet->size(); ++i){
00535
00536 ClassifierP c = actionSet->at(i);
00537 if (c->couldSubsume()){
00538
00539 int clDcb = c->numOfDCBits();
00540 double rnd = state->getRandomizer()->getRandomDouble();
00541 if (!clSet || clDcb > cl->numOfDCBits() || (clDcb == cl->numOfDCBits() && rnd < 0.5 )) {
00542 cl = c;
00543 clSet = true;
00544 }
00545 }
00546 }
00547 if (clSet) {
00548 for (uint i = 0; i < actionSet->size(); ++i){
00549 ClassifierP c = actionSet->at(i);
00550 if (cl->isMoreGeneral(c)){
00551 cl->setNumerosity(cl->getNumerosity() + c->getNumerosity());
00552 actionSet->erase(actionSet->begin() + i);
00553 removeFromPopSet(c,deme);
00554 }
00555 }
00556 }
00557 }