#region License Information /* HeuristicLab * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion License Information using System; using System.Collections.Generic; using System.Linq; using HEAL.Attic; using HeuristicLab.Analysis; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.IntegerVectorEncoding; using HeuristicLab.Encodings.RealVectorEncoding; using HeuristicLab.Optimization; using HeuristicLab.Problems.DataAnalysis; namespace HeuristicLab.Problems.Modifiers { [StorableType("867E0908-9DD4-4924-BB31-10B81B006BE4")] [Item("ModelBasedEvaluationRemoverProblemModifier", " A problem modifier that provides extended Analysis by creating running models (models trained on the evaluations of previous iterations) and analyzing their performance over time")] public abstract class ModelBasedEvaluationRemoverProblemModifier : ProblemModifier { private readonly object locker = new object(); #region Properties [Storable] protected ModifiableDataset data; [Storable] protected Dictionary> evaluationsLookUp; [Storable] protected List> evaluatedThisIteration; [Storable] protected List> lastPopulation; [Storable] protected List solutions; [Storable] protected ResultCollection modelingResults; [Storable] protected int iteration; #endregion #region constructors [StorableConstructor] protected ModelBasedEvaluationRemoverProblemModifier(StorableConstructorFlag _) : base(_) { } protected ModelBasedEvaluationRemoverProblemModifier(ModelBasedEvaluationRemoverProblemModifier original, Cloner cloner) : base(original, cloner) { data = cloner.Clone(original?.data); evaluationsLookUp = original?.evaluationsLookUp.ToDictionary(e => e.Key, e => e.Value.Select(o => o.ToArray()).ToList()); evaluatedThisIteration = original?.evaluatedThisIteration.Select(x => Tuple.Create(x.Item1.ToArray(), x.Item2.ToArray())).ToList(); lastPopulation = original?.lastPopulation.Select(x => Tuple.Create(x.Item1.ToArray(), x.Item2.ToArray(), x.Item3.ToArray())).ToList(); solutions = original?.solutions?.Select(cloner.Clone).ToList(); iteration = original?.iteration ?? 0; modelingResults = cloner.Clone(original?.modelingResults); } protected ModelBasedEvaluationRemoverProblemModifier() { InitializeDataCollection(); } #endregion #region ProblemModifier public override void ModifiedAnalyze(Individual[] individuals, double[][] qualities, ResultCollection results, IRandom random) { lock (locker) { solutions = new List(); for (var i = 0; i < qualities.First().Length; i++) { // model building and prediction var pd = new RegressionProblemData(data, data.VariableNames.Where(v => v.Contains("X")), TargetVariableName(i)); pd.TrainingPartition.Start = 0; pd.TrainingPartition.End = pd.TestPartition.Start = data.Rows; pd.TestPartition.End = data.Rows; var sol = BuildRunningModel(pd, random, i); solutions.Add(sol); } var survivors = new HashSet(individuals.Select(ToIdentifier)); var newSurvivors = evaluatedThisIteration.Where(x => survivors.Contains(ToIdentifier(x.Item1))).ToArray(); AddOrExtendScatterPlot(modelingResults, "Real Evaluations", "objective 1", "objective 2", "iteration" + iteration, evaluatedThisIteration.Select(x => new Point2D(x.Item2[0], x.Item2[1])).ToArray() ); AddOrExtendDataTable(modelingResults, "Removal Plot", new[] { Tuple.Create("Total Removed Evaluations", (double) ((IntValue) modelingResults["Removed Evaluations"].Value).Value, false), Tuple.Create("Total Performed Evaluations", (double) ((IntValue) modelingResults["Performed Evaluations"].Value).Value, false), Tuple.Create("Removed Evaluations", (double) ((IntValue) modelingResults["Removed Evaluations (current generation)"].Value).Value, false), Tuple.Create("Performed Evaluations", (double) ((IntValue) modelingResults["Performed Evaluations (current generation)"].Value).Value, false), Tuple.Create("Survived Performed Evaluations", (double) newSurvivors.Length, false) }); modelingResults.AddOrUpdateResult("Removed Evaluations (current generation)", new IntValue(0)); modelingResults.AddOrUpdateResult("Performed Evaluations (current generation)", new IntValue(0)); foreach (var regressionSolution in solutions) { modelingResults.AddOrUpdateResult("model_" + regressionSolution.ProblemData.TargetVariable, regressionSolution); } } iteration++; lastPopulation = individuals.Zip(qualities, (i, q) => Tuple.Create( ExtractInputs(i), q, solutions.Select(sol => sol.Model.GetEstimatedValues(ToDataset(ExtractInputs(i)), new[] { 0 }).Single()).ToArray() )).ToList(); results.AddOrUpdateResult("ModelingResults", modelingResults); lock (evaluatedThisIteration) evaluatedThisIteration.Clear(); base.ModifiedAnalyze(individuals, qualities, results, random); } public override double[] ModifiedEvaluate(Individual individual, IRandom random) { if (RemoveEvaluation(individual, Maximization.CloneAsArray(), random)) { lock (locker) { ((IntValue)modelingResults["Removed Evaluations"].Value).Value++; ((IntValue)modelingResults["Removed Evaluations (current generation)"].Value).Value++; } return Maximization.Select(x => x ? double.MinValue : double.MaxValue).ToArray(); } var q = base.ModifiedEvaluate(individual, random); lock (locker) { ((IntValue)modelingResults["Performed Evaluations"].Value).Value++; ((IntValue)modelingResults["Performed Evaluations (current generation)"].Value).Value++; ExtendDatasetWithoutDuplicates(new[] { individual }, new[] { q }); evaluatedThisIteration.Add(Tuple.Create(ExtractInputs(individual), q.ToArray())); } return q; } #endregion protected abstract IRegressionSolution BuildRunningModel(RegressionProblemData pd, IRandom random, int objectiveNumber); protected abstract bool RemoveEvaluation(Individual individual, bool[] maximization, IRandom random); #region AnalysisHelpers private static void AddOrExtendScatterPlot(ResultCollection results, string resultName, string xLabel, string yLabel, string rowName, IList> points) { ScatterPlot plot; if (results.ContainsKey(resultName)) { plot = (ScatterPlot)results[resultName].Value; } else { plot = new ScatterPlot(resultName, ""); results.Add(new Result(resultName, plot)); plot.VisualProperties.XAxisTitle = xLabel; plot.VisualProperties.YAxisTitle = yLabel; } var row = new ScatterPlotDataRow(rowName, "", points); if (!plot.Rows.ContainsKey(rowName)) plot.Rows.Add(row); else plot.Rows[rowName].Points.AddRange(points); } private static void AddOrExtendDataTable(ResultCollection results, string resultName, IReadOnlyList> values) { DataTable plot; if (results.ContainsKey(resultName)) { plot = (DataTable)results[resultName].Value; } else { plot = new DataTable(resultName); results.Add(new Result(resultName, plot)); plot.VisualProperties.XAxisTitle = "Iteration"; plot.VisualProperties.YAxisTitle = "Evaluations"; } foreach (var tuple in values) AddOrExtendRow(plot, tuple.Item1, tuple.Item2, tuple.Item3); } private static void AddOrExtendRow(DataTable plot, string rowName, double d, bool secondary = false) { DataRow row; if (plot.Rows.ContainsKey(rowName)) { row = plot.Rows[rowName]; } else { row = new DataRow(rowName); plot.Rows.Add(row); } row.Values.Add(d); row.VisualProperties.SecondYAxis = secondary; } #endregion #region DataHandling private void InitializeDataCollection() { lock (locker) { evaluatedThisIteration = new List>(); lastPopulation = new List>(); modelingResults = new ResultCollection(); iteration = 0; modelingResults.AddOrUpdateResult("Removed Evaluations", new IntValue(0)); modelingResults.AddOrUpdateResult("Performed Evaluations", new IntValue(0)); modelingResults.AddOrUpdateResult("Removed Evaluations (current generation)", new IntValue(0)); modelingResults.AddOrUpdateResult("Performed Evaluations (current generation)", new IntValue(0)); data = new ModifiableDataset(); if (evaluationsLookUp == null) evaluationsLookUp = new Dictionary>(); evaluationsLookUp.Clear(); } } private void ExtendDatasetWithoutDuplicates(IReadOnlyList individuals, IReadOnlyList qualities) { if (data.Rows == 0) { for (var i = 0; i < ExtractInputs(individuals[0]).Length; i++) { var v = InputVariableName(i); if (!data.DoubleVariables.Contains(v)) data.AddVariable(v, new List()); } for (var i = 0; i < qualities[0].Length; i++) { var v = TargetVariableName(i); if (!data.DoubleVariables.Contains(v)) data.AddVariable(v, new List()); } } for (var i = 0; i < individuals.Count; i++) { var ins = ExtractInputs(individuals[i]); var id = ToIdentifier(ins); var outs = qualities[i]; if (outs.Any(x => double.IsNaN(x) || double.IsInfinity(x) || double.MaxValue / 100 < x || double.MinValue / 100 > x || x > 100000)) continue; if (evaluationsLookUp.ContainsKey(id) && evaluationsLookUp[id].Any(o => Equals(o, outs))) continue; if (ins.Length + outs.Length != data.DoubleVariables.Count()) throw new ArgumentException("length of individuals and outputs does not match existing data"); data.AddRow(ins.Concat(qualities[i]).Select(x => (object)x)); if (!evaluationsLookUp.ContainsKey(id)) evaluationsLookUp.Add(id, new List() { outs }); else { evaluationsLookUp[id].Add(outs); } } } protected static Dataset ToDataset(double[] ins) { return new Dataset(ins.Select((d, i1) => InputVariableName(i1)), ins.Select(d => new List() { d })); } protected static double[] ExtractInputs(Individual individual) { if (!(individual is SingleEncodingIndividual si)) throw new ArgumentException("Multi encodings are not supported with this problem modifier"); var e = si[si.Name]; switch (e) { case RealVector rv: return rv.CloneAsArray(); case IntegerVector iv: return iv.Select(i => (double)i).ToArray(); default: throw new ArgumentException("Only Integer and Real Vector Individuals can be transformed to input values"); } } #endregion DataHandling #region Naming public static string ToIdentifier(double[] inputs) { return string.Join(";", inputs); } public static string ToIdentifier(Individual i) { return string.Join(";", ExtractInputs(i)); } public static string TargetVariableName(int targetNumber) { return "Y" + targetNumber; } public static string InputVariableName(int inputNumber) { return "X" + inputNumber; } #endregion Naming } }