#region License Information /* HeuristicLab * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using System.Threading; using HeuristicLab.Algorithms.DataAnalysis; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.RealVectorEncoding; using HeuristicLab.Optimization; using HeuristicLab.Problems.DataAnalysis; namespace HeuristicLab.Algorithms.SAPBA { internal static class SapbaUtilities { //Extention methods for convenience public static int ArgMax(this IEnumerable values, Func func) { var max = double.MinValue; var maxIdx = 0; var idx = 0; foreach (var v in values) { var d = func.Invoke(v); if (d > max) { max = d; maxIdx = idx; } idx++; } return maxIdx; } public static int ArgMin(this IEnumerable values, Func func) { return ArgMax(values, x => -func.Invoke(x)); } public static double GetEstimation(this IRegressionModel model, RealVector r) { var dataset = GetDataSet(new[] { new Tuple(r, 0.0) }, false); return model.GetEstimatedValues(dataset, new[] { 0 }).First(); } public static double GetVariance(this IConfidenceRegressionModel model, RealVector r) { var dataset = GetDataSet(new[] { new Tuple(r, 0.0) }, false); return model.GetEstimatedVariances(dataset, new[] { 0 }).First(); } public static double GetDoubleValue(this IDataset dataset, int i, int j) { return dataset.GetDoubleValue("input" + j, i); } //Sub-Algorithms public static ResultCollection SyncRunSubAlgorithm(IAlgorithm alg, int random) { if (alg.Parameters.ContainsKey("SetSeedRandomly") && alg.Parameters.ContainsKey("Seed")) { var setSeed = alg.Parameters["SetSeedRandomly"].ActualValue as BoolValue; var seed = alg.Parameters["Seed"].ActualValue as IntValue; if (seed == null || setSeed == null) throw new ArgumentException("wrong SeedParametertypes"); setSeed.Value = false; seed.Value = random; } EventWaitHandle trigger = new AutoResetEvent(false); Exception ex = null; EventHandler> exhandler = (sender, e) => ex = e.Value; EventHandler stoppedHandler = (sender, e) => trigger.Set(); alg.ExceptionOccurred += exhandler; alg.Stopped += stoppedHandler; alg.Prepare(); alg.Start(); trigger.WaitOne(); alg.ExceptionOccurred -= exhandler; alg.Stopped -= stoppedHandler; if (ex != null) throw ex; return alg.Results; } public static IRegressionSolution BuildModel(IReadOnlyList> samples, IDataAnalysisAlgorithm regressionAlgorithm, IRandom random, bool removeDuplicates = true, IRegressionSolution oldSolution = null) { var dataset = GetDataSet(samples, removeDuplicates); var problemdata = new RegressionProblemData(dataset, dataset.VariableNames.Where(x => !x.Equals("output")), "output"); problemdata.TrainingPartition.Start = 0; problemdata.TrainingPartition.End = dataset.Rows; problemdata.TestPartition.Start = dataset.Rows; problemdata.TestPartition.End = dataset.Rows; if (regressionAlgorithm.Problem == null) regressionAlgorithm.Problem = new RegressionProblem(); var problem = regressionAlgorithm.Problem; problem.ProblemDataParameter.Value = problemdata; var i = 0; IRegressionSolution solution = null; while (solution == null && i++ < 100) { var results = SyncRunSubAlgorithm(regressionAlgorithm, random.Next(int.MaxValue)); solution = results.Select(x => x.Value).OfType().SingleOrDefault(); } //special treatement for GaussianProcessRegression var gp = regressionAlgorithm as GaussianProcessRegression; var oldGaussian = oldSolution as GaussianProcessRegressionSolution; if (gp != null && oldGaussian != null) { const double noise = 0.0; var n = samples.First().Item1.Length; var mean = (IMeanFunction)oldGaussian.Model.MeanFunction.Clone(); var cov = (ICovarianceFunction)oldGaussian.Model.CovarianceFunction.Clone(); if (mean.GetNumberOfParameters(n) != 0 || cov.GetNumberOfParameters(n) != 0) throw new ArgumentException("DEBUG: assumption about fixed paramters wrong"); double[] hyp = { noise }; try { var model = new GaussianProcessModel(problemdata.Dataset, problemdata.TargetVariable, problemdata.AllowedInputVariables, problemdata.TrainingIndices, hyp, mean, cov); model.FixParameters(); var sol = new GaussianProcessRegressionSolution(model, problemdata); if (solution == null || solution.TrainingMeanSquaredError > sol.TrainingMeanSquaredError) solution = sol; } catch (ArgumentException) { } } if (solution == null) throw new ArgumentException("The algorithm didn't return a model"); regressionAlgorithm.Runs.Clear(); return solution; } //RegressionModel extensions public const double DuplicateResolution = 0.000001; public static Dataset GetDataSet(IReadOnlyList> samples, bool removeDuplicates) { if (removeDuplicates) samples = RemoveDuplicates(samples); //TODO duplicate removal leads to incorrect uncertainty values in models var dimensions = samples[0].Item1.Length + 1; var data = new double[samples.Count, dimensions]; var names = new string[dimensions - 1]; for (var i = 0; i < names.Length; i++) names[i] = "input" + i; for (var j = 0; j < samples.Count; j++) { for (var i = 0; i < names.Length; i++) data[j, i] = samples[j].Item1[i]; data[j, dimensions - 1] = samples[j].Item2; } return new Dataset(names.Concat(new[] { "output" }).ToArray(), data); } private static IReadOnlyList> RemoveDuplicates(IReadOnlyList> samples) { var res = new List>(); foreach (var sample in samples) { if (res.Count == 0) { res.Add(new Tuple(sample.Item1, sample.Item2, 1)); continue; } var index = res.ArgMin(x => Euclidian(sample.Item1, x.Item1)); var d = Euclidian(res[index].Item1, sample.Item1); if (d > DuplicateResolution) res.Add(new Tuple(sample.Item1, sample.Item2, 1)); else { var t = res[index]; res.RemoveAt(index); res.Add(new Tuple(t.Item1, t.Item2 + sample.Item2, t.Item3 + 1)); } } return res.Select(x => new Tuple(x.Item1, x.Item2 / x.Item3)).ToArray(); } private static double Euclidian(IEnumerable a, IEnumerable b) { return Math.Sqrt(a.Zip(b, (d, d1) => d - d1).Sum(d => d * d)); } } }