#region License Information /* HeuristicLab * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections; using System.Collections.Generic; using System.Linq; using HeuristicLab.Analysis; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Optimization; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Problems.DataAnalysis; using HeuristicLab.Problems.DataAnalysis.Symbolic; using HeuristicLab.Problems.Instances; namespace HeuristicLab.Problems.GeneticProgramming.GlucosePrediction { [Item("Blood Glucose Forecast", "See MedGEC Workshop at GECCO 2016")] [Creatable(CreatableAttribute.Categories.GeneticProgrammingProblems, Priority = 999)] [StorableClass] public sealed class Problem : SymbolicExpressionTreeProblem, IRegressionProblem, IProblemInstanceConsumer, IProblemInstanceExporter { #region parameter names private const string ProblemDataParameterName = "ProblemData"; #endregion #region Parameter Properties IParameter IDataAnalysisProblem.ProblemDataParameter { get { return ProblemDataParameter; } } public IValueParameter ProblemDataParameter { get { return (IValueParameter)Parameters[ProblemDataParameterName]; } } #endregion #region Properties public IRegressionProblemData ProblemData { get { return ProblemDataParameter.Value; } set { ProblemDataParameter.Value = value; } } IDataAnalysisProblemData IDataAnalysisProblem.ProblemData { get { return ProblemData; } } #endregion public event EventHandler ProblemDataChanged; public override bool Maximization { get { return true; } } #region item cloning and persistence // persistence [StorableConstructor] private Problem(bool deserializing) : base(deserializing) { } [StorableHook(HookType.AfterDeserialization)] private void AfterDeserialization() { RegisterEventHandlers(); } // cloning private Problem(Problem original, Cloner cloner) : base(original, cloner) { RegisterEventHandlers(); } public override IDeepCloneable Clone(Cloner cloner) { return new Problem(this, cloner); } #endregion public Problem() : base() { Parameters.Add(new ValueParameter(ProblemDataParameterName, "The data for the glucose prediction problem", new RegressionProblemData())); var g = new SimpleSymbolicExpressionGrammar(); // empty grammar is replaced in UpdateGrammar() base.Encoding = new SymbolicExpressionTreeEncoding(g, 100, 17); UpdateGrammar(); RegisterEventHandlers(); } public override double Evaluate(ISymbolicExpressionTree tree, IRandom random) { var problemData = ProblemData; var target = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices); var allPredicted = Interpreter.Apply(tree.Root.GetSubtree(0).GetSubtree(0), problemData.Dataset, problemData.AllIndices).ToArray(); var predicted = problemData.TrainingIndices.Select(r => allPredicted[r]); // var predicted1 = Interpreter.Apply(tree.Root.GetSubtree(0).GetSubtree(0).GetSubtree(1), problemData.Dataset, rows); // var predicted2 = Interpreter.Apply(tree.Root.GetSubtree(0).GetSubtree(0).GetSubtree(2), problemData.Dataset, rows); var pred0_rsq = Rsq(predicted, target); // var pred1_rsq = Rsq(predicted1, target); // var pred2_rsq = Rsq(predicted2, target); return pred0_rsq; // + pred1_rsq + pred2_rsq; } private double Rsq(IEnumerable predicted, IEnumerable target) { // only take predictions for which the target is not NaN var selectedTuples = target.Zip(predicted, Tuple.Create).Where(t => !double.IsNaN(t.Item1)).ToArray(); target = selectedTuples.Select(t => t.Item1); predicted = selectedTuples.Select(t => t.Item2); OnlineCalculatorError errorState; var r = OnlinePearsonsRCalculator.Calculate(target, predicted, out errorState); if (errorState != OnlineCalculatorError.None) r = 0; return r * r; } public override void Analyze(ISymbolicExpressionTree[] trees, double[] qualities, ResultCollection results, IRandom random) { base.Analyze(trees, qualities, results, random); if (!results.ContainsKey("Solution")) { results.Add(new Result("Solution", typeof(IRegressionSolution))); } if (!results.ContainsKey("ScaledTree")) { results.Add(new Result("ScaledTree", typeof(ISymbolicExpressionTree))); } // if (!results.ContainsKey("Terms")) { // results.Add(new Result("Terms", typeof(DataTable))); // } var bestTree = trees.First(); var bestQuality = qualities.First(); for (int i = 1; i < trees.Length; i++) { if (qualities[i] > bestQuality) { bestQuality = qualities[i]; bestTree = trees[i]; } } bestTree = (ISymbolicExpressionTree)bestTree.Clone(); var expressionNode = bestTree.Root.GetSubtree(0).GetSubtree(0); // scale var problemData = ProblemData; var rows = problemData.AllIndices.ToArray(); var target = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows).ToArray(); var predicted = Interpreter.Apply(expressionNode.GetSubtree(0), problemData.Dataset, rows) .ToArray(); var filteredPredicted = rows.Where(r => !double.IsNaN(target[r])).Select(r => predicted[r]).ToArray(); var filteredTarget = target.Where(t => !double.IsNaN(t)).ToArray(); OnlineCalculatorError error; double alpha; double beta; OnlineLinearScalingParameterCalculator.Calculate(filteredPredicted, filteredTarget, out alpha, out beta, out error); var prod = new SimpleSymbol("*", "*", 2, 2).CreateTreeNode(); var sum = new SimpleSymbol("+", "+", 2, 2).CreateTreeNode(); var constAlpha = (ConstantTreeNode)(new Constant()).CreateTreeNode(); constAlpha.Value = alpha; var constBeta = (ConstantTreeNode)(new Constant()).CreateTreeNode(); constBeta.Value = beta; var originalTree = expressionNode.GetSubtree(0); expressionNode.RemoveSubtree(0); expressionNode.AddSubtree(sum); sum.AddSubtree(prod); sum.AddSubtree(constAlpha); prod.AddSubtree(originalTree); prod.AddSubtree(constBeta); var model = new Model(bestTree, problemData.TargetVariable, problemData.AllowedInputVariables.ToArray()); model.Name = "Scaled Model"; model.Description = "Scaled Model"; results["Solution"].Value = model.CreateRegressionSolution(problemData); results["ScaledTree"].Value = bestTree; } #region events private void RegisterEventHandlers() { ProblemDataParameter.ValueChanged += new EventHandler(ProblemDataParameter_ValueChanged); if (ProblemDataParameter.Value != null) ProblemDataParameter.Value.Changed += new EventHandler(ProblemData_Changed); } private void ProblemDataParameter_ValueChanged(object sender, EventArgs e) { ProblemDataParameter.Value.Changed += new EventHandler(ProblemData_Changed); OnProblemDataChanged(); OnReset(); } private void ProblemData_Changed(object sender, EventArgs e) { OnReset(); } private void OnProblemDataChanged() { UpdateGrammar(); var handler = ProblemDataChanged; if (handler != null) handler(this, EventArgs.Empty); } private void UpdateGrammar() { // whenever ProblemData is changed we create a new grammar with the necessary symbols var g = new Grammar(); Encoding.Grammar = g; } #endregion #region Import & Export public void Load(IRegressionProblemData data) { Name = data.Name; Description = data.Description; ProblemData = data; } public IRegressionProblemData Export() { return ProblemData; } #endregion } }