#region License Information
/* HeuristicLab
* Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion License Information
using System;
using System.Collections.Generic;
using System.Linq;
using HEAL.Attic;
using HeuristicLab.Analysis;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.Encodings.IntegerVectorEncoding;
using HeuristicLab.Encodings.RealVectorEncoding;
using HeuristicLab.Optimization;
using HeuristicLab.Problems.DataAnalysis;
namespace HeuristicLab.Problems.Modifiers {
[StorableType("867E0908-9DD4-4924-BB31-10B81B006BE4")]
[Item("ModelBasedEvaluationRemoverProblemModifier", " A problem modifier that provides extended Analysis by creating running models (models trained on the evaluations of previous iterations) and analyzing their performance over time")]
public abstract class ModelBasedEvaluationRemoverProblemModifier : ProblemModifier {
private readonly object locker = new object();
#region Properties
[Storable]
protected ModifiableDataset data;
[Storable]
protected Dictionary> evaluationsLookUp;
[Storable]
protected List> evaluatedThisIteration;
[Storable]
protected List> lastPopulation;
[Storable]
protected List solutions;
[Storable]
protected ResultCollection modelingResults;
[Storable]
protected int iteration;
#endregion
#region constructors
[StorableConstructor]
protected ModelBasedEvaluationRemoverProblemModifier(StorableConstructorFlag _) : base(_) { }
protected ModelBasedEvaluationRemoverProblemModifier(ModelBasedEvaluationRemoverProblemModifier original, Cloner cloner) : base(original, cloner) {
data = cloner.Clone(original?.data);
evaluationsLookUp = original?.evaluationsLookUp.ToDictionary(e => e.Key, e => e.Value.Select(o => o.ToArray()).ToList());
evaluatedThisIteration = original?.evaluatedThisIteration.Select(x => Tuple.Create(x.Item1.ToArray(), x.Item2.ToArray())).ToList();
lastPopulation = original?.lastPopulation.Select(x => Tuple.Create(x.Item1.ToArray(), x.Item2.ToArray(), x.Item3.ToArray())).ToList();
solutions = original?.solutions?.Select(cloner.Clone).ToList();
iteration = original?.iteration ?? 0;
modelingResults = cloner.Clone(original?.modelingResults);
}
protected ModelBasedEvaluationRemoverProblemModifier() {
InitializeDataCollection();
}
#endregion
#region ProblemModifier
public override void ModifiedAnalyze(Individual[] individuals, double[][] qualities, ResultCollection results, IRandom random) {
lock (locker) {
solutions = new List();
for (var i = 0; i < qualities.First().Length; i++) {
// model building and prediction
var pd = new RegressionProblemData(data, data.VariableNames.Where(v => v.Contains("X")), TargetVariableName(i));
pd.TrainingPartition.Start = 0;
pd.TrainingPartition.End = pd.TestPartition.Start = data.Rows;
pd.TestPartition.End = data.Rows;
var sol = BuildRunningModel(pd, random, i);
solutions.Add(sol);
}
var survivors = new HashSet(individuals.Select(ToIdentifier));
var newSurvivors = evaluatedThisIteration.Where(x => survivors.Contains(ToIdentifier(x.Item1))).ToArray();
AddOrExtendScatterPlot(modelingResults,
"Real Evaluations",
"objective 1",
"objective 2",
"iteration" + iteration,
evaluatedThisIteration.Select(x => new Point2D(x.Item2[0], x.Item2[1])).ToArray()
);
AddOrExtendDataTable(modelingResults, "Removal Plot", new[] {
Tuple.Create("Total Removed Evaluations", (double) ((IntValue) modelingResults["Removed Evaluations"].Value).Value, false),
Tuple.Create("Total Performed Evaluations", (double) ((IntValue) modelingResults["Performed Evaluations"].Value).Value, false),
Tuple.Create("Removed Evaluations", (double) ((IntValue) modelingResults["Removed Evaluations (current generation)"].Value).Value, false),
Tuple.Create("Performed Evaluations", (double) ((IntValue) modelingResults["Performed Evaluations (current generation)"].Value).Value, false),
Tuple.Create("Survived Performed Evaluations", (double) newSurvivors.Length, false)
});
modelingResults.AddOrUpdateResult("Removed Evaluations (current generation)", new IntValue(0));
modelingResults.AddOrUpdateResult("Performed Evaluations (current generation)", new IntValue(0));
foreach (var regressionSolution in solutions) {
modelingResults.AddOrUpdateResult("model_" + regressionSolution.ProblemData.TargetVariable, regressionSolution);
}
}
iteration++;
lastPopulation = individuals.Zip(qualities, (i, q) => Tuple.Create(
ExtractInputs(i),
q,
solutions.Select(sol => sol.Model.GetEstimatedValues(ToDataset(ExtractInputs(i)), new[] { 0 }).Single()).ToArray()
)).ToList();
results.AddOrUpdateResult("ModelingResults", modelingResults);
lock (evaluatedThisIteration) evaluatedThisIteration.Clear();
base.ModifiedAnalyze(individuals, qualities, results, random);
}
public override double[] ModifiedEvaluate(Individual individual, IRandom random) {
if (RemoveEvaluation(individual, Maximization.CloneAsArray(), random)) {
lock (locker) {
((IntValue)modelingResults["Removed Evaluations"].Value).Value++;
((IntValue)modelingResults["Removed Evaluations (current generation)"].Value).Value++;
}
return Maximization.Select(x => x ? double.MinValue : double.MaxValue).ToArray();
}
var q = base.ModifiedEvaluate(individual, random);
lock (locker) {
((IntValue)modelingResults["Performed Evaluations"].Value).Value++;
((IntValue)modelingResults["Performed Evaluations (current generation)"].Value).Value++;
ExtendDatasetWithoutDuplicates(new[] { individual }, new[] { q });
evaluatedThisIteration.Add(Tuple.Create(ExtractInputs(individual), q.ToArray()));
}
return q;
}
#endregion
protected abstract IRegressionSolution BuildRunningModel(RegressionProblemData pd, IRandom random, int objectiveNumber);
protected abstract bool RemoveEvaluation(Individual individual, bool[] maximization, IRandom random);
#region AnalysisHelpers
private static void AddOrExtendScatterPlot(ResultCollection results, string resultName, string xLabel, string yLabel, string rowName, IList> points) {
ScatterPlot plot;
if (results.ContainsKey(resultName)) {
plot = (ScatterPlot)results[resultName].Value;
} else {
plot = new ScatterPlot(resultName, "");
results.Add(new Result(resultName, plot));
plot.VisualProperties.XAxisTitle = xLabel;
plot.VisualProperties.YAxisTitle = yLabel;
}
var row = new ScatterPlotDataRow(rowName, "", points);
if (!plot.Rows.ContainsKey(rowName)) plot.Rows.Add(row);
else plot.Rows[rowName].Points.AddRange(points);
}
private static void AddOrExtendDataTable(ResultCollection results, string resultName, IReadOnlyList> values) {
DataTable plot;
if (results.ContainsKey(resultName)) {
plot = (DataTable)results[resultName].Value;
} else {
plot = new DataTable(resultName);
results.Add(new Result(resultName, plot));
plot.VisualProperties.XAxisTitle = "Iteration";
plot.VisualProperties.YAxisTitle = "Evaluations";
}
foreach (var tuple in values) AddOrExtendRow(plot, tuple.Item1, tuple.Item2, tuple.Item3);
}
private static void AddOrExtendRow(DataTable plot, string rowName, double d, bool secondary = false) {
DataRow row;
if (plot.Rows.ContainsKey(rowName)) {
row = plot.Rows[rowName];
} else {
row = new DataRow(rowName);
plot.Rows.Add(row);
}
row.Values.Add(d);
row.VisualProperties.SecondYAxis = secondary;
}
#endregion
#region DataHandling
private void InitializeDataCollection() {
lock (locker) {
evaluatedThisIteration = new List>();
lastPopulation = new List>();
modelingResults = new ResultCollection();
iteration = 0;
modelingResults.AddOrUpdateResult("Removed Evaluations", new IntValue(0));
modelingResults.AddOrUpdateResult("Performed Evaluations", new IntValue(0));
modelingResults.AddOrUpdateResult("Removed Evaluations (current generation)", new IntValue(0));
modelingResults.AddOrUpdateResult("Performed Evaluations (current generation)", new IntValue(0));
data = new ModifiableDataset();
if (evaluationsLookUp == null) evaluationsLookUp = new Dictionary>();
evaluationsLookUp.Clear();
}
}
private void ExtendDatasetWithoutDuplicates(IReadOnlyList individuals, IReadOnlyList qualities) {
if (data.Rows == 0) {
for (var i = 0; i < ExtractInputs(individuals[0]).Length; i++) {
var v = InputVariableName(i);
if (!data.DoubleVariables.Contains(v))
data.AddVariable(v, new List());
}
for (var i = 0; i < qualities[0].Length; i++) {
var v = TargetVariableName(i);
if (!data.DoubleVariables.Contains(v))
data.AddVariable(v, new List());
}
}
for (var i = 0; i < individuals.Count; i++) {
var ins = ExtractInputs(individuals[i]);
var id = ToIdentifier(ins);
var outs = qualities[i];
if (outs.Any(x => double.IsNaN(x) || double.IsInfinity(x) || double.MaxValue / 100 < x || double.MinValue / 100 > x || x > 100000)) continue;
if (evaluationsLookUp.ContainsKey(id) && evaluationsLookUp[id].Any(o => Equals(o, outs))) continue;
if (ins.Length + outs.Length != data.DoubleVariables.Count()) throw new ArgumentException("length of individuals and outputs does not match existing data");
data.AddRow(ins.Concat(qualities[i]).Select(x => (object)x));
if (!evaluationsLookUp.ContainsKey(id)) evaluationsLookUp.Add(id, new List() { outs });
else { evaluationsLookUp[id].Add(outs); }
}
}
protected static Dataset ToDataset(double[] ins) {
return new Dataset(ins.Select((d, i1) => InputVariableName(i1)), ins.Select(d => new List() { d }));
}
protected static double[] ExtractInputs(Individual individual) {
if (!(individual is SingleEncodingIndividual si)) throw new ArgumentException("Multi encodings are not supported with this problem modifier");
var e = si[si.Name];
switch (e) {
case RealVector rv:
return rv.CloneAsArray();
case IntegerVector iv:
return iv.Select(i => (double)i).ToArray();
default:
throw new ArgumentException("Only Integer and Real Vector Individuals can be transformed to input values");
}
}
#endregion DataHandling
#region Naming
public static string ToIdentifier(double[] inputs) {
return string.Join(";", inputs);
}
public static string ToIdentifier(Individual i) {
return string.Join(";", ExtractInputs(i));
}
public static string TargetVariableName(int targetNumber) {
return "Y" + targetNumber;
}
public static string InputVariableName(int inputNumber) {
return "X" + inputNumber;
}
#endregion Naming
}
}