using HEAL.Attic; using HeuristicLab.Algorithms.OESRALPS.Evaluators; using HeuristicLab.Analysis; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Optimization; using HeuristicLab.Parameters; using HeuristicLab.Problems.DataAnalysis; using HeuristicLab.Problems.DataAnalysis.Symbolic; using HeuristicLab.Random; using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace HeuristicLab.Algorithms.OESRALPS.Analyzers { [Item("SymbolicDataAnalysisSingleObjectiveValidationLayerBestSolutionAnalyzer", "An operator that analyzes the validation best symbolic data analysis solution for single objective symbolic data analysis problems.")] [StorableType("CD82C026-CF68-40D7-A898-77EA61972DE9")] public abstract class SymbolicDataAnalysisSingleObjectiveValidationLayerBestSolutionAnalyzer : SymbolicDataAnalysisSingleObjectiveLayerValidationAnalyzer, IIterationBasedOperator where S : class, ISymbolicDataAnalysisSolution where T : class, ISymbolicDataAnalysisSingleObjectiveEvaluator where U : class, IDataAnalysisProblemData { private const string ValidationBestSolutionParameterName = "Best validation solution"; private const string ValidationBestSolutionQualityParameterName = "Best validation solution quality"; private const string ValidationBestSolutionGenerationParameterName = "Best validation solution generation"; private const string TrainingBestSolutionParameterName = "Best training solution"; private const string TrainingBestSolutionQualityParameterName = "Best training solution quality"; private const string TrainingBestSolutionGenerationParameterName = "Best training solution generation"; private const string UpdateAlwaysParameterName = "Always update best solution"; private const string IterationsParameterName = "Generations"; private const string MaximumIterationsParameterName = "Maximum Iterations"; protected const string LayerResultsParameterName = "LayerResults"; protected const string EstimationLimitsParameterName = "EstimationLimits"; protected const string LayerOfBestValidationSolutionParameterName = "Layer (Validation)"; protected const string LayerOfBestTrainingSolutionParameterName = "Layer (Training)"; protected const string LayerOfBestSolutionDescription = "The layer in which the best solution has been found."; private const string ValidationBestSolutionTableParameterName = "Best solution chart"; private const string OpenLayersParameterName = "OpenLayers"; private const string ErrorMeasureChart = "Error Measure of best solution Chart"; private const string ValidationMAETestSetQuality = "Validation MAE (test)"; private const string ValidationMAETrainingSetQuality = "Validation MAE (training)"; private const string ValidationMSETestSetQuality = "Validation MSE (test)"; private const string ValidationMSETrainingSetQuality = "Validation MSE (training)"; private const string ValidationRMSETestSetQuality = "Validation RMSE (test)"; private const string ValidationRMSETrainingSetQuality = "Validation RMSE (training)"; private const string ValidationPearsonR2TestSetQuality = "Validation PearsonR2 (test)"; private const string ValidationPearsonR2TrainingSetQuality = "Validation PearsonR2 (training)"; private const string TrainingMAETestSetQuality = "Training MAE (test)"; private const string TrainingMAETrainingSetQuality = "Training MAE (training)"; private const string TrainingMSETestSetQuality = "Training MSE (test)"; private const string TrainingMSETrainingSetQuality = "Training MSE (training)"; private const string TrainingRMSETestSetQuality = "Training RMSE (test)"; private const string TrainingRMSETrainingSetQuality = "Training RMSE (training)"; private const string TrainingPearsonR2TestSetQuality = "Training PearsonR2 (test)"; private const string TrainingPearsonR2TrainingSetQuality = "Training PearsonR2 (training)"; private const string PreviousWindowBestValidationSolutionQuality = "Quality on previous window (Best Validation Solution)"; private const string NextWindowBestValidationSolutionQuality = "Quality on next window (Best Validation Solution)"; private bool reevaluateValidationResult = false; #region parameter properties public ILookupParameter ValidationBestSolutionParameter { get { return (ILookupParameter)Parameters[ValidationBestSolutionParameterName]; } } public ILookupParameter ValidationBestSolutionQualityParameter { get { return (ILookupParameter)Parameters[ValidationBestSolutionQualityParameterName]; } } public ILookupParameter ValidationBestSolutionGenerationParameter { get { return (ILookupParameter)Parameters[ValidationBestSolutionGenerationParameterName]; } } public IFixedValueParameter UpdateAlwaysParameter { get { return (IFixedValueParameter)Parameters[UpdateAlwaysParameterName]; } } public ILookupParameter IterationsParameter { get { return (ILookupParameter)Parameters[IterationsParameterName]; } } public IValueLookupParameter MaximumIterationsParameter { get { return (IValueLookupParameter)Parameters[MaximumIterationsParameterName]; } } public IScopeTreeLookupParameter LayerResultsParameterParameter { get { return (IScopeTreeLookupParameter)Parameters[LayerResultsParameterName]; } } public IValueLookupParameter EstimationLimitsParameter { get { return (IValueLookupParameter)Parameters[EstimationLimitsParameterName]; } } public ILookupParameter OpenLayersParameter { get { return (ILookupParameter)Parameters[OpenLayersParameterName]; } } #endregion #region properties public S ValidationBestSolution { get { return ValidationBestSolutionParameter.ActualValue; } set { ValidationBestSolutionParameter.ActualValue = value; } } public DoubleValue ValidationBestSolutionQuality { get { return ValidationBestSolutionQualityParameter.ActualValue; } set { ValidationBestSolutionQualityParameter.ActualValue = value; } } public BoolValue UpdateAlways { get { return UpdateAlwaysParameter.Value; } } #endregion [StorableConstructor] protected SymbolicDataAnalysisSingleObjectiveValidationLayerBestSolutionAnalyzer(StorableConstructorFlag _) : base(_) { } protected SymbolicDataAnalysisSingleObjectiveValidationLayerBestSolutionAnalyzer(SymbolicDataAnalysisSingleObjectiveValidationLayerBestSolutionAnalyzer original, Cloner cloner) : base(original, cloner) { } public SymbolicDataAnalysisSingleObjectiveValidationLayerBestSolutionAnalyzer() : base() { Parameters.Add(new LookupParameter(ValidationBestSolutionParameterName, "The validation best symbolic data analyis solution.")); Parameters.Add(new LookupParameter(ValidationBestSolutionQualityParameterName, "The quality of the validation best symbolic data analysis solution.")); Parameters.Add(new LookupParameter(ValidationBestSolutionGenerationParameterName, "The generation in which the best validation solution was found.")); Parameters.Add(new FixedValueParameter(UpdateAlwaysParameterName, "Determines if the best validation solution should always be updated regardless of its quality.", new BoolValue(true))); Parameters.Add(new LookupParameter(IterationsParameterName, "The number of performed iterations.")); Parameters.Add(new ValueLookupParameter(MaximumIterationsParameterName, "The maximum number of performed iterations.") { Hidden = true }); Parameters.Add(new ValueLookupParameter(EstimationLimitsParameterName, "The lower and upper limit for the estimated values produced by the symbolic regression model.")); Parameters.Add(new ScopeTreeLookupParameter(LayerResultsParameterName, "Results of all Layers.") { Depth = 1 }); Parameters.Add(new LookupParameter(OpenLayersParameterName, "Open Layers")); UpdateAlwaysParameter.Hidden = true; } protected abstract S CreateSolution(ISymbolicExpressionTree bestTree, double bestQuality); public override IOperation Apply() { IEnumerable rows = GenerateRowsToEvaluate(); if (!rows.Any()) return base.Apply(); #region find best validation tree // Obtain result collections from layers LayerResultsParameterParameter.Depth = 1; var resultCollections = LayerResultsParameterParameter.ActualValue; var bestLayerValidationSolution = resultCollections .Where(result => result.ContainsKey(ValidationBestSolutionParameterName) && result.ContainsKey(ValidationBestSolutionQualityParameterName) && result.ContainsKey(ValidationBestSolutionGenerationParameterName) ) .Select((item, index) => new LayerSolution() { Solution = (S)item[ValidationBestSolutionParameterName].Value, Quality = (DoubleValue)item[ValidationBestSolutionQualityParameterName].Value, Generation = (IntValue)item[ValidationBestSolutionGenerationParameterName].Value, Layer = new IntValue(index) // TODO Ask Affenzeller if layers should be zero indexed }).ToArray(); // TODO // Check if TrainingBestSolutionAnalyzer added to LayerAnalyzer // if false, add it and restart // else this analyzer is at the wrong scope, must be set as Analyzer not Layer analyzer LayerSolution bestValidationSolution; if (Maximization.Value) bestValidationSolution = bestLayerValidationSolution.OrderByDescending(layerSolution => layerSolution.Quality.Value).First(); else bestValidationSolution = bestLayerValidationSolution.OrderBy(layerSolution => layerSolution.Quality.Value).First(); #endregion #region find best training tree LayerResultsParameterParameter.Depth = 1; var bestLayerTrainingSolution = resultCollections .Where(result => result.ContainsKey(TrainingBestSolutionParameterName) && result.ContainsKey(TrainingBestSolutionQualityParameterName) && result.ContainsKey(TrainingBestSolutionGenerationParameterName) ) .Select((item, index) => new LayerSolution() { Solution = (S)item[TrainingBestSolutionParameterName].Value, Quality = (DoubleValue)item[TrainingBestSolutionQualityParameterName].Value, Generation = (IntValue)item[TrainingBestSolutionGenerationParameterName].Value, Layer = new IntValue(index) // TODO Ask Affenzeller if layers should be zero indexed }).ToArray(); // TODO // Check if TrainingBestSolutionAnalyzer added to LayerAnalyzer // if false, add it and restart // else this analyzer is at the wrong scope, must be set as Analyzer not Layer analyzer LayerSolution bestTrainingSolution; if (Maximization.Value) bestTrainingSolution = bestLayerTrainingSolution.OrderByDescending(layerSolution => layerSolution.Quality.Value).First(); else bestTrainingSolution = bestLayerTrainingSolution.OrderBy(layerSolution => layerSolution.Quality.Value).First(); #endregion var results = ResultCollection; #region Add Parameter if (!results.ContainsKey(ValidationBestSolutionParameterName)) { results.Add(new Result(ValidationBestSolutionParameterName, ValidationBestSolutionParameter.Description, typeof(S))); ValidationPartitionParameter.ActualValue.ValueChanged += ValidationPartition_ValueChanged; } if (!results.ContainsKey(ValidationBestSolutionQualityParameterName)) results.Add(new Result(ValidationBestSolutionQualityParameterName, ValidationBestSolutionQualityParameter.Description, typeof(DoubleValue))); if (!results.ContainsKey(ValidationBestSolutionGenerationParameterName)) results.Add(new Result(ValidationBestSolutionGenerationParameterName, ValidationBestSolutionGenerationParameter.Description, typeof(IntValue))); if (!results.ContainsKey(LayerOfBestValidationSolutionParameterName)) results.Add(new Result(LayerOfBestValidationSolutionParameterName, LayerOfBestSolutionDescription, typeof(IntValue))); if (!results.ContainsKey(ValidationBestSolutionTableParameterName)) results.Add(new Result(ValidationBestSolutionTableParameterName, new DataTable(ValidationBestSolutionTableParameterName))); var validationBestSolutionTable = (DataTable)results[ValidationBestSolutionTableParameterName].Value; if (!validationBestSolutionTable.Rows.ContainsKey(ValidationBestSolutionQualityParameterName)) validationBestSolutionTable.Rows.Add(new DataRow(ValidationBestSolutionQualityParameterName)); if (!validationBestSolutionTable.Rows.ContainsKey(LayerOfBestValidationSolutionParameterName)) validationBestSolutionTable.Rows.Add(new DataRow(LayerOfBestValidationSolutionParameterName) { VisualProperties = { SecondYAxis = true } }); if (!validationBestSolutionTable.Rows.ContainsKey(OpenLayersParameterName)) validationBestSolutionTable.Rows.Add(new DataRow(OpenLayersParameterName) { VisualProperties = { SecondYAxis = true } }); if (!validationBestSolutionTable.Rows.ContainsKey(TrainingBestSolutionQualityParameterName)) validationBestSolutionTable.Rows.Add(new DataRow(TrainingBestSolutionQualityParameterName)); if (!validationBestSolutionTable.Rows.ContainsKey(LayerOfBestTrainingSolutionParameterName)) validationBestSolutionTable.Rows.Add(new DataRow(LayerOfBestTrainingSolutionParameterName) { VisualProperties = { SecondYAxis = true } }); if (!validationBestSolutionTable.Rows.ContainsKey(PreviousWindowBestValidationSolutionQuality)) validationBestSolutionTable.Rows.Add(new DataRow(PreviousWindowBestValidationSolutionQuality)); if (!validationBestSolutionTable.Rows.ContainsKey(NextWindowBestValidationSolutionQuality)) validationBestSolutionTable.Rows.Add(new DataRow(NextWindowBestValidationSolutionQuality)); if (!results.ContainsKey(ErrorMeasureChart)) results.Add(new Result(ErrorMeasureChart, new DataTable(ErrorMeasureChart) { VisualProperties = { YAxisLogScale = true } })); var MSEBestSolutionErrorTable = (DataTable)results[ErrorMeasureChart].Value; if (!MSEBestSolutionErrorTable.Rows.ContainsKey(ValidationMAETestSetQuality)) MSEBestSolutionErrorTable.Rows.Add(new DataRow(ValidationMAETestSetQuality)); if (!MSEBestSolutionErrorTable.Rows.ContainsKey(ValidationMAETrainingSetQuality)) MSEBestSolutionErrorTable.Rows.Add(new DataRow(ValidationMAETrainingSetQuality)); if (!MSEBestSolutionErrorTable.Rows.ContainsKey(ValidationMSETestSetQuality)) MSEBestSolutionErrorTable.Rows.Add(new DataRow(ValidationMSETestSetQuality)); if (!MSEBestSolutionErrorTable.Rows.ContainsKey(ValidationMSETrainingSetQuality)) MSEBestSolutionErrorTable.Rows.Add(new DataRow(ValidationMSETrainingSetQuality)); if (!MSEBestSolutionErrorTable.Rows.ContainsKey(ValidationRMSETestSetQuality)) MSEBestSolutionErrorTable.Rows.Add(new DataRow(ValidationRMSETestSetQuality)); if (!MSEBestSolutionErrorTable.Rows.ContainsKey(ValidationRMSETrainingSetQuality)) MSEBestSolutionErrorTable.Rows.Add(new DataRow(ValidationRMSETrainingSetQuality)); if (!MSEBestSolutionErrorTable.Rows.ContainsKey(ValidationPearsonR2TestSetQuality)) MSEBestSolutionErrorTable.Rows.Add(new DataRow(ValidationPearsonR2TestSetQuality) { VisualProperties = { SecondYAxis = true } }); if (!MSEBestSolutionErrorTable.Rows.ContainsKey(ValidationPearsonR2TrainingSetQuality)) MSEBestSolutionErrorTable.Rows.Add(new DataRow(ValidationPearsonR2TrainingSetQuality) { VisualProperties = { SecondYAxis = true } }); if (!MSEBestSolutionErrorTable.Rows.ContainsKey(TrainingMSETestSetQuality)) MSEBestSolutionErrorTable.Rows.Add(new DataRow(TrainingMSETestSetQuality)); if (!MSEBestSolutionErrorTable.Rows.ContainsKey(TrainingMSETrainingSetQuality)) MSEBestSolutionErrorTable.Rows.Add(new DataRow(TrainingMSETrainingSetQuality)); if (!MSEBestSolutionErrorTable.Rows.ContainsKey(TrainingRMSETestSetQuality)) MSEBestSolutionErrorTable.Rows.Add(new DataRow(TrainingRMSETestSetQuality)); if (!MSEBestSolutionErrorTable.Rows.ContainsKey(TrainingRMSETrainingSetQuality)) MSEBestSolutionErrorTable.Rows.Add(new DataRow(TrainingRMSETrainingSetQuality)); if (!MSEBestSolutionErrorTable.Rows.ContainsKey(TrainingMAETestSetQuality)) MSEBestSolutionErrorTable.Rows.Add(new DataRow(TrainingMAETestSetQuality)); if (!MSEBestSolutionErrorTable.Rows.ContainsKey(TrainingMAETrainingSetQuality)) MSEBestSolutionErrorTable.Rows.Add(new DataRow(TrainingMAETrainingSetQuality)); if (!MSEBestSolutionErrorTable.Rows.ContainsKey(TrainingPearsonR2TestSetQuality)) MSEBestSolutionErrorTable.Rows.Add(new DataRow(TrainingPearsonR2TestSetQuality) { VisualProperties = { SecondYAxis = true } }); if (!MSEBestSolutionErrorTable.Rows.ContainsKey(TrainingPearsonR2TrainingSetQuality)) MSEBestSolutionErrorTable.Rows.Add(new DataRow(TrainingPearsonR2TrainingSetQuality) { VisualProperties = { SecondYAxis = true } }); #endregion #region Reevaluate current best solution if (reevaluateValidationResult && ValidationBestSolutionQuality != null && ValidationPartitionParameter.ActualValue != null) { var evaluator = EvaluatorParameter.ActualValue; var problemData = ProblemDataParameter.ActualValue; IExecutionContext childContext = (IExecutionContext)ExecutionContext.CreateChildOperation(evaluator); var currentValidationBestSolution = (S)results[ValidationBestSolutionParameterName].Value; ValidationBestSolutionQuality.Value = evaluator.Evaluate( childContext, currentValidationBestSolution.Model.SymbolicExpressionTree, problemData, Enumerable.Range( ValidationPartitionParameter.ActualValue.Start, ValidationPartitionParameter.ActualValue.End - ValidationPartitionParameter.ActualValue.Start )); results[ValidationBestSolutionQualityParameterName].Value = ValidationBestSolutionQuality; results[ValidationBestSolutionGenerationParameterName].Value = IterationsParameter.ActualValue; reevaluateValidationResult = false; } #endregion if (bestValidationSolution.Solution != null && (UpdateAlways.Value || ValidationBestSolutionQuality == null || IsBetter(bestValidationSolution.Quality.Value, ValidationBestSolutionQuality.Value, Maximization.Value))) { ValidationBestSolution = CreateSolution(bestValidationSolution.Solution.Model.SymbolicExpressionTree, bestValidationSolution.Quality.Value); ValidationBestSolutionQuality = bestValidationSolution.Quality; results[ValidationBestSolutionParameter.Name].Value = ValidationBestSolution; results[ValidationBestSolutionQualityParameter.Name].Value = ValidationBestSolutionQuality; // TODO Ask Affenzeller if current iteration should be shown or the iteration when the solution was created results[ValidationBestSolutionGenerationParameter.Name].Value = bestValidationSolution.Generation; results[LayerOfBestValidationSolutionParameterName].Value = bestValidationSolution.Layer; } #region create charts validationBestSolutionTable.Rows[OpenLayersParameterName].Values.Add(OpenLayersParameter.ActualValue.Value); validationBestSolutionTable.Rows[ValidationBestSolutionQualityParameterName].Values.Add(ValidationBestSolutionQualityParameter.ActualValue.Value); validationBestSolutionTable.Rows[LayerOfBestValidationSolutionParameterName].Values.Add(((IntValue)results[LayerOfBestValidationSolutionParameterName].Value).Value + 1); validationBestSolutionTable.Rows[TrainingBestSolutionQualityParameterName].Values.Add(bestTrainingSolution.Quality.Value); validationBestSolutionTable.Rows[LayerOfBestTrainingSolutionParameterName].Values.Add(bestTrainingSolution.Layer.Value + 1); results[ValidationBestSolutionTableParameterName].Value = validationBestSolutionTable; var bestValidationRegressionSolution = ValidationBestSolution as IRegressionSolution; var bestTrainingRegressionSolution = bestTrainingSolution.Solution as IRegressionSolution; MSEBestSolutionErrorTable.Rows[ValidationMSETestSetQuality].Values.Add(bestValidationRegressionSolution.TestMeanSquaredError); MSEBestSolutionErrorTable.Rows[ValidationMSETrainingSetQuality].Values.Add(bestValidationRegressionSolution.TrainingMeanSquaredError); MSEBestSolutionErrorTable.Rows[ValidationRMSETestSetQuality].Values.Add(bestValidationRegressionSolution.TestRootMeanSquaredError); MSEBestSolutionErrorTable.Rows[ValidationRMSETrainingSetQuality].Values.Add(bestValidationRegressionSolution.TrainingRootMeanSquaredError); MSEBestSolutionErrorTable.Rows[ValidationMAETestSetQuality].Values.Add(bestValidationRegressionSolution.TestMeanAbsoluteError); MSEBestSolutionErrorTable.Rows[ValidationMAETrainingSetQuality].Values.Add(bestValidationRegressionSolution.TrainingMeanAbsoluteError); MSEBestSolutionErrorTable.Rows[ValidationPearsonR2TestSetQuality].Values.Add(bestValidationRegressionSolution.TestRSquared); MSEBestSolutionErrorTable.Rows[ValidationPearsonR2TrainingSetQuality].Values.Add(bestValidationRegressionSolution.TrainingRSquared); MSEBestSolutionErrorTable.Rows[TrainingMSETestSetQuality].Values.Add(bestTrainingRegressionSolution.TestMeanSquaredError); MSEBestSolutionErrorTable.Rows[TrainingMSETrainingSetQuality].Values.Add(bestTrainingRegressionSolution.TrainingMeanSquaredError); MSEBestSolutionErrorTable.Rows[TrainingRMSETestSetQuality].Values.Add(bestTrainingRegressionSolution.TestRootMeanSquaredError); MSEBestSolutionErrorTable.Rows[TrainingRMSETrainingSetQuality].Values.Add(bestTrainingRegressionSolution.TrainingRootMeanSquaredError); MSEBestSolutionErrorTable.Rows[TrainingMAETestSetQuality].Values.Add(bestTrainingRegressionSolution.TestMeanAbsoluteError); MSEBestSolutionErrorTable.Rows[TrainingMAETrainingSetQuality].Values.Add(bestTrainingRegressionSolution.TrainingMeanAbsoluteError); MSEBestSolutionErrorTable.Rows[TrainingPearsonR2TestSetQuality].Values.Add(bestTrainingRegressionSolution.TestRSquared); MSEBestSolutionErrorTable.Rows[TrainingPearsonR2TrainingSetQuality].Values.Add(bestTrainingRegressionSolution.TrainingRSquared); results[ErrorMeasureChart].Value = MSEBestSolutionErrorTable; #endregion return base.Apply(); } private void ValidationPartition_ValueChanged(object sender, EventArgs e) { reevaluateValidationResult = true; } private bool IsBetter(double lhs, double rhs, bool maximization) { if (maximization) return lhs > rhs; else return lhs < rhs; } protected override IEnumerable GenerateRowsToEvaluate() { if (ValidationPartitionParameter.ActualValue == null || TestPartitionParameter.ActualValue == null) return base.GenerateRowsToEvaluate(); int seed = RandomParameter.ActualValue.Next(); int samplesStart = ValidationPartitionParameter.ActualValue.Start; int samplesEnd = ValidationPartitionParameter.ActualValue.End; int testPartitionStart = TestPartitionParameter.ActualValue.Start; int testPartitionEnd = TestPartitionParameter.ActualValue.End; if (samplesEnd < samplesStart) throw new ArgumentException("Start value is larger than end value."); int count = (int)((samplesEnd - samplesStart) * RelativeNumberOfEvaluatedSamplesParameter.ActualValue.Value); if (count == 0) count = 1; return RandomEnumerable.SampleRandomNumbers(seed, samplesStart, samplesEnd, count) .Where(i => i < testPartitionStart && i < ProblemDataParameter.ActualValue.Dataset.Rows); } [StorableType("CD82C026-CF61-40D7-A898-77EA61992DE9")] class LayerSolution { [Storable] public S Solution { get; set; } [Storable] public DoubleValue Quality { get; set; } [Storable] public IntValue Generation { get; set; } [Storable] public IntValue Layer { get; set; } } } }