#region License Information /* HeuristicLab * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion License Information using System; using System.Collections.Generic; using System.Linq; using HEAL.Attic; using HeuristicLab.Algorithms.DataAnalysis; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.IntegerVectorEncoding; using HeuristicLab.Encodings.RealVectorEncoding; using HeuristicLab.Optimization; using HeuristicLab.Parameters; using HeuristicLab.Problems.DataAnalysis; namespace HeuristicLab.Problems.Modifiers { [StorableType("A0E33EDB-04F6-48B6-BB10-7E3753841AEA")] [Item("AnalysisRunningPredictionQualityProblemModifier", " A problem modifier that provides extended Analysis by creating running models (models trained on the evaluations of previous iterations) and analyzing their performance over time")] public class AnalysisRunningPredictionQualityProblemModifier : ProblemModifier { #region Properties [Storable] private ModifiableDataset data; [Storable] private Dictionary> evaluationsLookUp; [Storable] private List> evaluatedThisIteration; [Storable] private List> lastPopulation; [Storable] private int iteration; [Storable] private int trainingLength; public const string ModelBuilderParameterName = "ModelBuilder"; public IValueParameter ModelBuilderParameter => (IValueParameter)Parameters[ModelBuilderParameterName]; public IAlgorithm ModelBuilder => ModelBuilderParameter.Value; #endregion [StorableConstructor] protected AnalysisRunningPredictionQualityProblemModifier(StorableConstructorFlag _) : base(_) { } protected AnalysisRunningPredictionQualityProblemModifier(AnalysisRunningPredictionQualityProblemModifier original, Cloner cloner) : base(original, cloner) { data = cloner.Clone(original?.data); evaluationsLookUp = original?.evaluationsLookUp.ToDictionary(e => e.Key, e => e.Value.Select(o => o.ToArray()).ToList()); iteration = original?.iteration ?? 0; trainingLength = original?.trainingLength ?? 0; evaluatedThisIteration = original?.evaluatedThisIteration.Select(x => Tuple.Create(x.Item1.ToArray(), x.Item2.ToArray())).ToList(); lastPopulation = original?.lastPopulation.Select(x => Tuple.Create(x.Item1.ToArray(), x.Item2.ToArray())).ToList(); Parameters.Add(new ValueParameter(ModelBuilderParameterName, "The model builder", new GaussianProcessRegression())); } protected AnalysisRunningPredictionQualityProblemModifier() { evaluationsLookUp = new Dictionary>(); } public override void Initialize() { data = new ModifiableDataset(); if (evaluationsLookUp == null) evaluationsLookUp = new Dictionary>(); evaluationsLookUp.Clear(); iteration = 0; trainingLength = 0; evaluatedThisIteration = new List>(); lastPopulation = new List>(); } public override void ModifiedAnalyze(Individual[] individuals, double[][] qualities, ResultCollection results, IRandom random) { var models = new ResultCollection(qualities.First().Length); for (var i = 0; i < qualities.First().Length; i++) { var pd = new RegressionProblemData(data, data.VariableNames.Where(v => v.Contains("X")), TargetVariableName(i)); pd.TrainingPartition.Start = 0; pd.TrainingPartition.End = pd.TestPartition.Start = trainingLength; pd.TestPartition.End = data.Rows; models.AddOrUpdateResult(TargetVariableName(i), BuildRunningModel(pd, random)); } results.AddOrUpdateResult("Running Models", models); trainingLength = data.Rows; lastPopulation = individuals.Zip(qualities, (i, q) => Tuple.Create(ExtractInputs(i), q)).ToList(); evaluatedThisIteration.Clear(); iteration++; base.ModifiedAnalyze(individuals, qualities, results, random); } public override double[] ModifiedEvaluate(Individual individual, IRandom random) { var q = base.ModifiedEvaluate(individual, random); lock (data) { ExtendDatasetWithoutDuplicates(new[] { individual }, new[] { q }); evaluatedThisIteration.Add(Tuple.Create(ExtractInputs(individual), q.ToArray())); } return q; } private IRegressionSolution BuildRunningModel(RegressionProblemData pd, IRandom random) { if (pd.TrainingPartition.Size <= 0) return null; try { ModelBuilder.Problem = new RegressionProblem() { ProblemData = pd }; if (ModelBuilder.Parameters.ContainsKey("Seed") && (ModelBuilder.Parameters["Seed"] is IValueParameter seedParam)) seedParam.Value.Value = random.Next(); if (ModelBuilder.Parameters.ContainsKey("SetSeedRandomly") && (ModelBuilder.Parameters["SetSeedRandomly"] is IValueParameter setSeedParam)) setSeedParam.Value.Value = false; ModelBuilder.Start(); var res = ModelBuilder.Results.Select(x => x.Value).OfType().Single(); ModelBuilder.Prepare(); ModelBuilder.Runs.Clear(); return res; } catch (Exception) { return null; } } #region DataHandling private void ExtendDatasetWithoutDuplicates(IReadOnlyList individuals, IReadOnlyList qualities) { if (data.Rows == 0) { for (var i = 0; i < ExtractInputs(individuals[0]).Length; i++) { var v = InputVariableName(i); if (!data.DoubleVariables.Contains(v)) data.AddVariable(v, new List()); } for (var i = 0; i < qualities[0].Length; i++) { var v = TargetVariableName(i); if (!data.DoubleVariables.Contains(v)) data.AddVariable(v, new List()); } } for (var i = 0; i < individuals.Count; i++) { var ins = ExtractInputs(individuals[i]); var id = ToIdentifier(ins); var outs = qualities[i]; if (outs.Any(x => double.IsNaN(x) || double.IsInfinity(x) || double.MaxValue / 100 < x || double.MinValue / 100 > x)) continue; if (evaluationsLookUp.ContainsKey(id) && evaluationsLookUp[id].Any(o => Equals(o, outs))) continue; if (ins.Length + outs.Length != data.DoubleVariables.Count()) throw new ArgumentException("length of individuals and outputs does not match existing data"); data.AddRow(ins.Concat(qualities[i]).Select(x => (object)x)); if (!evaluationsLookUp.ContainsKey(id)) evaluationsLookUp.Add(id, new List() { outs }); else { evaluationsLookUp[id].Add(outs); } } } private static double[] ExtractInputs(Individual individual) { if (!(individual is SingleEncodingIndividual si)) throw new ArgumentException("Multi encodings are not supported with this problem modifier"); switch (si[si.Name]) { case RealVector rv: return rv.CloneAsArray(); case IntegerVector iv: return iv.Select(i => (double)i).ToArray(); default: throw new ArgumentException("Only Integer and Real Vector Individuals can be transformed to input values"); } } #endregion DataHandling #region Naming public static string ToIdentifier(IEnumerable inputs) { return string.Join(";", inputs); } public static string ToIdentifier(Individual i) { return string.Join(";", ExtractInputs(i)); } public static string TargetVariableName(int targetNumber) { return "Y" + targetNumber; } public static string InputVariableName(int inputNumber) { return "X" + inputNumber; } #endregion Naming public override IDeepCloneable Clone(Cloner cloner) { return new AnalysisRunningPredictionQualityProblemModifier(this, cloner); } } }