#region License Information
/* HeuristicLab
* Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion License Information
using System;
using System.Collections.Generic;
using System.Linq;
using HEAL.Attic;
using HeuristicLab.Algorithms.DataAnalysis;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.Encodings.IntegerVectorEncoding;
using HeuristicLab.Encodings.RealVectorEncoding;
using HeuristicLab.Optimization;
using HeuristicLab.Parameters;
using HeuristicLab.Problems.DataAnalysis;
namespace HeuristicLab.Problems.Modifiers {
[StorableType("A0E33EDB-04F6-48B6-BB10-7E3753841AEA")]
[Item("AnalysisRunningPredictionQualityProblemModifier", " A problem modifier that provides extended Analysis by creating running models (models trained on the evaluations of previous iterations) and analyzing their performance over time")]
public class AnalysisRunningPredictionQualityProblemModifier : ProblemModifier {
#region Properties
[Storable]
private ModifiableDataset data;
[Storable]
private Dictionary> evaluationsLookUp;
[Storable]
private List> evaluatedThisIteration;
[Storable]
private List> lastPopulation;
[Storable]
private int iteration;
[Storable]
private int trainingLength;
public const string ModelBuilderParameterName = "ModelBuilder";
public IValueParameter ModelBuilderParameter => (IValueParameter)Parameters[ModelBuilderParameterName];
public IAlgorithm ModelBuilder => ModelBuilderParameter.Value;
#endregion
[StorableConstructor]
protected AnalysisRunningPredictionQualityProblemModifier(StorableConstructorFlag _) : base(_) { }
protected AnalysisRunningPredictionQualityProblemModifier(AnalysisRunningPredictionQualityProblemModifier original, Cloner cloner) : base(original, cloner) {
data = cloner.Clone(original?.data);
evaluationsLookUp = original?.evaluationsLookUp.ToDictionary(e => e.Key, e => e.Value.Select(o => o.ToArray()).ToList());
iteration = original?.iteration ?? 0;
trainingLength = original?.trainingLength ?? 0;
evaluatedThisIteration = original?.evaluatedThisIteration.Select(x => Tuple.Create(x.Item1.ToArray(), x.Item2.ToArray())).ToList();
lastPopulation = original?.lastPopulation.Select(x => Tuple.Create(x.Item1.ToArray(), x.Item2.ToArray())).ToList();
Parameters.Add(new ValueParameter(ModelBuilderParameterName, "The model builder", new GaussianProcessRegression()));
}
protected AnalysisRunningPredictionQualityProblemModifier() {
evaluationsLookUp = new Dictionary>();
}
public override void Initialize() {
data = new ModifiableDataset();
if (evaluationsLookUp == null) evaluationsLookUp = new Dictionary>();
evaluationsLookUp.Clear();
iteration = 0;
trainingLength = 0;
evaluatedThisIteration = new List>();
lastPopulation = new List>();
}
public override void ModifiedAnalyze(Individual[] individuals, double[][] qualities, ResultCollection results, IRandom random) {
var models = new ResultCollection(qualities.First().Length);
for (var i = 0; i < qualities.First().Length; i++) {
var pd = new RegressionProblemData(data, data.VariableNames.Where(v => v.Contains("X")), TargetVariableName(i));
pd.TrainingPartition.Start = 0;
pd.TrainingPartition.End = pd.TestPartition.Start = trainingLength;
pd.TestPartition.End = data.Rows;
models.AddOrUpdateResult(TargetVariableName(i), BuildRunningModel(pd, random));
}
results.AddOrUpdateResult("Running Models", models);
trainingLength = data.Rows;
lastPopulation = individuals.Zip(qualities, (i, q) => Tuple.Create(ExtractInputs(i), q)).ToList();
evaluatedThisIteration.Clear();
iteration++;
base.ModifiedAnalyze(individuals, qualities, results, random);
}
public override double[] ModifiedEvaluate(Individual individual, IRandom random) {
var q = base.ModifiedEvaluate(individual, random);
lock (data) {
ExtendDatasetWithoutDuplicates(new[] { individual }, new[] { q });
evaluatedThisIteration.Add(Tuple.Create(ExtractInputs(individual), q.ToArray()));
}
return q;
}
private IRegressionSolution BuildRunningModel(RegressionProblemData pd, IRandom random) {
if (pd.TrainingPartition.Size <= 0) return null;
try {
ModelBuilder.Problem = new RegressionProblem() { ProblemData = pd };
if (ModelBuilder.Parameters.ContainsKey("Seed") && (ModelBuilder.Parameters["Seed"] is IValueParameter seedParam)) seedParam.Value.Value = random.Next();
if (ModelBuilder.Parameters.ContainsKey("SetSeedRandomly") && (ModelBuilder.Parameters["SetSeedRandomly"] is IValueParameter setSeedParam)) setSeedParam.Value.Value = false;
ModelBuilder.Start();
var res = ModelBuilder.Results.Select(x => x.Value).OfType().Single();
ModelBuilder.Prepare();
ModelBuilder.Runs.Clear();
return res;
} catch (Exception) {
return null;
}
}
#region DataHandling
private void ExtendDatasetWithoutDuplicates(IReadOnlyList individuals, IReadOnlyList qualities) {
if (data.Rows == 0) {
for (var i = 0; i < ExtractInputs(individuals[0]).Length; i++) {
var v = InputVariableName(i);
if (!data.DoubleVariables.Contains(v))
data.AddVariable(v, new List());
}
for (var i = 0; i < qualities[0].Length; i++) {
var v = TargetVariableName(i);
if (!data.DoubleVariables.Contains(v))
data.AddVariable(v, new List());
}
}
for (var i = 0; i < individuals.Count; i++) {
var ins = ExtractInputs(individuals[i]);
var id = ToIdentifier(ins);
var outs = qualities[i];
if (outs.Any(x => double.IsNaN(x) || double.IsInfinity(x) || double.MaxValue / 100 < x || double.MinValue / 100 > x)) continue;
if (evaluationsLookUp.ContainsKey(id) && evaluationsLookUp[id].Any(o => Equals(o, outs))) continue;
if (ins.Length + outs.Length != data.DoubleVariables.Count()) throw new ArgumentException("length of individuals and outputs does not match existing data");
data.AddRow(ins.Concat(qualities[i]).Select(x => (object)x));
if (!evaluationsLookUp.ContainsKey(id)) evaluationsLookUp.Add(id, new List() { outs });
else { evaluationsLookUp[id].Add(outs); }
}
}
private static double[] ExtractInputs(Individual individual) {
if (!(individual is SingleEncodingIndividual si)) throw new ArgumentException("Multi encodings are not supported with this problem modifier");
switch (si[si.Name]) {
case RealVector rv:
return rv.CloneAsArray();
case IntegerVector iv:
return iv.Select(i => (double)i).ToArray();
default:
throw new ArgumentException("Only Integer and Real Vector Individuals can be transformed to input values");
}
}
#endregion DataHandling
#region Naming
public static string ToIdentifier(IEnumerable inputs) {
return string.Join(";", inputs);
}
public static string ToIdentifier(Individual i) {
return string.Join(";", ExtractInputs(i));
}
public static string TargetVariableName(int targetNumber) {
return "Y" + targetNumber;
}
public static string InputVariableName(int inputNumber) {
return "X" + inputNumber;
}
#endregion Naming
public override IDeepCloneable Clone(Cloner cloner) {
return new AnalysisRunningPredictionQualityProblemModifier(this, cloner);
}
}
}