#region License Information /* HeuristicLab * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using Tensorflow; using Tensorflow.NumPy; using static Tensorflow.Binding; using DoubleVector = MathNet.Numerics.LinearAlgebra.Vector; namespace HeuristicLab.Problems.DataAnalysis.Symbolic { public class TreeToTensorConverter { //private static readonly TF_DataType DataType = tf.float64; private static readonly TF_DataType DataType = tf.float32; public static bool TryPrepareTree( ISymbolicExpressionTree tree, IRegressionProblemData problemData, List rows, bool updateVariableWeights, bool applyLinearScaling, bool eagerEvaluation, out Dictionary inputFeatures, out Tensor target, out Dictionary variables) { try { var converter = new TreeToTensorConverter( problemData, rows, updateVariableWeights, applyLinearScaling, eagerEvaluation ); if (eagerEvaluation) tf.enable_eager_execution(); else tf.compat.v1.disable_eager_execution(); converter.PrepareNode(tree.Root.GetSubtree(0)); inputFeatures = converter.inputFeatures; target = tf.reshape(InputFeatureToTensor(problemData.TargetVariable, problemData, rows), new Shape(-1)); variables = converter.variables; return true; } catch (NotSupportedException) { inputFeatures = null; target= null; variables = null; return false; } } public static bool TryEvaluate( ISymbolicExpressionTree tree, Dictionary inputFeatures, Dictionary variables, bool makeVariableWeightsVariable, bool addLinearScalingTerms, bool eagerEvaluation, out Tensor prediction) { try { var converter = new TreeToTensorConverter( inputFeatures, variables, makeVariableWeightsVariable, addLinearScalingTerms, eagerEvaluation ); if (eagerEvaluation) tf.enable_eager_execution(); else tf.compat.v1.disable_eager_execution(); prediction = converter.EvaluateNode(tree.Root.GetSubtree(0)); return true; } catch (NotSupportedException) { prediction = null; return false; } } //public static bool TryConvert/*Lazy/Graph*/( // ISymbolicExpressionTree tree, // Dictionary inputFeatures, Dictionary variables, // bool makeVariableWeightsVariable, bool addLinearScalingTerms, // out Tensor prediction) { // try { // var converter = new TreeToTensorConverter( // inputFeatures, variables, // makeVariableWeightsVariable, addLinearScalingTerms, // eagerEvaluation: false // ); // tf.compat.v1.disable_eager_execution(); // prediction = converter.EvaluateNode(tree.Root.GetSubtree(0)); // return true; // } catch (NotSupportedException) { // prediction = null; // return false; // } //} private readonly IDataAnalysisProblemData problemData; private readonly List rows; private readonly Dictionary inputFeatures = new Dictionary(); private readonly Dictionary variables = new Dictionary(); private readonly bool makeVariableWeightsVariable; private readonly bool addLinearScalingTerms; private readonly bool eagerEvaluation; private TreeToTensorConverter( IDataAnalysisProblemData problemData, List rows, bool makeVariableWeightsVariable, bool addLinearScalingTerms, bool eagerEvaluation ) { this.problemData = problemData; this.rows = rows; this.makeVariableWeightsVariable = makeVariableWeightsVariable; this.addLinearScalingTerms = addLinearScalingTerms; this.eagerEvaluation = eagerEvaluation; } private TreeToTensorConverter( Dictionary inputFeatures, Dictionary variables, bool makeVariableWeightsVariable, bool addLinearScalingTerms, bool eagerEvaluation ) { this.inputFeatures = inputFeatures; this.variables = variables; this.makeVariableWeightsVariable = makeVariableWeightsVariable; this.addLinearScalingTerms = addLinearScalingTerms; this.eagerEvaluation = eagerEvaluation; } private static Tensor InputFeatureToTensor(string var, IDataAnalysisProblemData problemData, List rows) { if (problemData.Dataset.VariableHasType(var)) { var data = problemData.Dataset.GetDoubleValues(var, rows).Select(x => (float)x).ToArray(); return tf.convert_to_tensor(np.array(data).reshape(new Shape(rows.Count, 1)), DataType); } else if (problemData.Dataset.VariableHasType(var)) { var data = problemData.Dataset.GetDoubleVectorValues(var, rows).SelectMany(x => x.Select(y => (float)y)).ToArray(); return tf.convert_to_tensor(np.array(data).reshape(new Shape(rows.Count, -1)), DataType); } else throw new NotSupportedException($"Type of the variable is not supported: {var}"); } private static Tensor InputFeatureToPlaceholder(string var, IDataAnalysisProblemData problemData, List rows) { if (problemData.Dataset.VariableHasType(var)) { return tf.placeholder(DataType, new Shape(rows.Count, 1), name: var); } else if (problemData.Dataset.VariableHasType(var)) { //var vectorLength = problemData.Dataset.GetDoubleVectorValues(var, rows).Select(v => v.Count).Distinct().Single(); var vectorLength = problemData.Dataset.GetDoubleVectorValue(var, rows[0]).Count; return tf.placeholder(DataType, new Shape(rows.Count, vectorLength), name: var); } else throw new NotSupportedException($"Type of the variable is not supported: {var}"); } private void PrepareNode(ISymbolicExpressionTreeNode node) { if (node.Symbol is Constant ) { var constantNode = (ConstantTreeNode)node; var value = (float)constantNode.Value; var value_arr = np.array(value).reshape(new Shape(1, 1)); var c = tf.Variable(value_arr, name: $"c_{variables.Count}", dtype: DataType); variables.Add(node, new[] { c }); } else if (node.Symbol is Variable) { var varNode = (VariableTreeNodeBase)node; if (makeVariableWeightsVariable) { var w_arr = np.array((float)varNode.Weight).reshape(new Shape(1, 1)); var w = tf.Variable(w_arr, name: $"w_{varNode.VariableName}", dtype: DataType); variables.Add(node, new[] { w }); } if (!inputFeatures.ContainsKey(varNode.VariableName)) { inputFeatures.Add( varNode.VariableName, eagerEvaluation ? InputFeatureToTensor(varNode.VariableName, problemData, rows) : InputFeatureToPlaceholder(varNode.VariableName, problemData, rows)); } } else if (node.Symbol is StartSymbol) { if (addLinearScalingTerms) { var alpha_arr = np.array((float)1.0).reshape(new Shape(1, 1)); var alpha = tf.Variable(alpha_arr, name: "alpha", dtype: DataType); var beta_arr = np.array((float)0.0).reshape(new Shape(1, 1)); var beta = tf.Variable(beta_arr, name: "beta", dtype: DataType); variables.Add(node, new[] { beta, alpha }); } } foreach (var subTree in node.Subtrees) { PrepareNode(subTree); } } private Tensor EvaluateNode(ISymbolicExpressionTreeNode node) { if (node.Symbol is Constant) { return variables[node][0]; } if (node.Symbol is Variable/* || node.Symbol is BinaryFactorVariable*/) { var varNode = node as VariableTreeNodeBase; var par = inputFeatures[varNode.VariableName]; // eager or placeholder if (makeVariableWeightsVariable) { var w = variables[node][0]; return w * par; } else { return varNode.Weight * par; } } //if (node.Symbol is FactorVariable) { // var factorVarNode = node as FactorVariableTreeNode; // var products = new List(); // foreach (var variableValue in factorVarNode.Symbol.GetVariableValues(factorVarNode.VariableName)) { // //var par = FindOrCreateParameter(parameters, factorVarNode.VariableName, variableValue); // var par = tf.placeholder(DataType, new TensorShape(numRows, 1), name: factorVarNode.VariableName); // parameters.Add(par, factorVarNode.VariableName); // var value = factorVarNode.GetValue(variableValue); // //initialConstants.Add(value); // var wVar = (RefVariable)tf.VariableV1(value, name: $"f_{factorVarNode.VariableName}_{variables.Count}", dtype: DataType, shape: new[] { 1, 1 }); // //var wVar = tf.Variable(value, name: $"f_{factorVarNode.VariableName}_{variables.Count}"/*, shape: new[] { 1, 1 }*/); // variables.Add(wVar); // products.add(wVar * par); // } // return products.Aggregate((a, b) => a + b); //} if (node.Symbol is Addition) { var terms = node.Subtrees.Select(EvaluateNode).ToList(); if (terms.Count == 1) return terms[0]; return terms.Aggregate((a, b) => a + b); } if (node.Symbol is Subtraction) { var terms = node.Subtrees.Select(EvaluateNode).ToList(); if (terms.Count == 1) return -terms[0]; return terms.Aggregate((a, b) => a - b); } if (node.Symbol is Multiplication) { var terms = node.Subtrees.Select(EvaluateNode).ToList(); if (terms.Count == 1) return terms[0]; return terms.Aggregate((a, b) => a * b); } if (node.Symbol is Division) { var terms = node.Subtrees.Select(EvaluateNode).ToList(); if (terms.Count == 1) return (float)1.0 / terms[0]; return terms.Aggregate((a, b) => a / b); } if (node.Symbol is Absolute) { var x1 = EvaluateNode(node.GetSubtree(0)); return tf.abs(x1); } if (node.Symbol is AnalyticQuotient) { var x1 = EvaluateNode(node.GetSubtree(0)); var x2 = EvaluateNode(node.GetSubtree(1)); return x1 / tf.pow((float)1.0 + x2 * x2, (float)0.5); //return x1 / tf.pow(1.0 + x2 * x2, 0.5); } if (node.Symbol is Logarithm) { return tf.log( EvaluateNode(node.GetSubtree(0))); } if (node.Symbol is Exponential) { return tf.pow( (float)Math.E, EvaluateNode(node.GetSubtree(0))); } if (node.Symbol is Square) { return tf.square( EvaluateNode(node.GetSubtree(0))); } if (node.Symbol is SquareRoot) { return tf.sqrt( EvaluateNode(node.GetSubtree(0))); } if (node.Symbol is Cube) { return tf.pow( EvaluateNode(node.GetSubtree(0)), (float)3.0); } if (node.Symbol is CubeRoot) { return tf.pow( EvaluateNode(node.GetSubtree(0)), (float)1.0 / (float)3.0); // TODO // f: x < 0 ? -Math.Pow(-x, 1.0 / 3) : Math.Pow(x, 1.0 / 3), // g: { var cbrt_x = x < 0 ? -Math.Pow(-x, 1.0 / 3) : Math.Pow(x, 1.0 / 3); return 1.0 / (3 * cbrt_x * cbrt_x); } } if (node.Symbol is Sine) { return tf.sin( EvaluateNode(node.GetSubtree(0))); } if (node.Symbol is Cosine) { return tf.cos( EvaluateNode(node.GetSubtree(0))); } if (node.Symbol is Tangent) { return tf.tan( EvaluateNode(node.GetSubtree(0))); } if (node.Symbol is Mean) { return tf.reduce_mean( EvaluateNode(node.GetSubtree(0)), axis: new[] { 1 }, keepdims: true); } if (node.Symbol is StandardDeviation) { return tf.reduce_std( EvaluateNode(node.GetSubtree(0)), axis: new[] { 1 }, keepdims: true ); } if (node.Symbol is Variance) { return tf.reduce_variance( EvaluateNode(node.GetSubtree(0)), axis: new[] { 1 } , keepdims: true ); } if (node.Symbol is Sum) { return tf.reduce_sum( EvaluateNode(node.GetSubtree(0)), axis: new[] { 1 }, keepdims: true); } if (node.Symbol is SubVector) { var tensor = EvaluateNode(node.GetSubtree(0)); int rows = (int)tensor.shape[0], vectorLength = (int)tensor.shape[1]; var windowedNode = (IWindowedSymbolTreeNode)node; int startIdx = SymbolicDataAnalysisExpressionTreeVectorInterpreter.ToVectorIdx(windowedNode.Offset, vectorLength); int endIdx = SymbolicDataAnalysisExpressionTreeVectorInterpreter.ToVectorIdx(windowedNode.Length, vectorLength); var slices = SymbolicDataAnalysisExpressionTreeVectorInterpreter.GetVectorSlices(startIdx, endIdx, vectorLength); var segments = new List(); foreach (var (start, count) in slices) { segments.Add(tensor[new Slice(), new Slice(start, start + count)]); } return tf.concat(segments, axis: 1); } if (node.Symbol is StartSymbol) { Tensor prediction = EvaluateNode(node.GetSubtree(0)); if (prediction.rank != 2 && prediction.shape[1] != 1) throw new InvalidOperationException("Prediction must be a rank 1 (single value per row)."); prediction = tf.reshape(prediction, new Shape(-1)); if (addLinearScalingTerms) { var vars = variables[node]; Tensor alpha = vars[1], beta = vars[0]; return prediction * alpha + beta; } else { return prediction; } } throw new NotSupportedException($"Node symbol {node.Symbol} is not supported."); } public static bool IsCompatible(ISymbolicExpressionTree tree) { var containsUnknownSymbol = ( from n in tree.Root.GetSubtree(0).IterateNodesPrefix() where !(n.Symbol is Variable) && //!(n.Symbol is BinaryFactorVariable) && //!(n.Symbol is FactorVariable) && !(n.Symbol is Constant) && !(n.Symbol is Addition) && !(n.Symbol is Subtraction) && !(n.Symbol is Multiplication) && !(n.Symbol is Division) && !(n.Symbol is Logarithm) && !(n.Symbol is Exponential) && !(n.Symbol is SquareRoot) && !(n.Symbol is Square) && !(n.Symbol is Sine) && !(n.Symbol is Cosine) && !(n.Symbol is Tangent) && !(n.Symbol is HyperbolicTangent) && !(n.Symbol is Erf) && !(n.Symbol is Norm) && !(n.Symbol is StartSymbol) && !(n.Symbol is Absolute) && !(n.Symbol is AnalyticQuotient) && !(n.Symbol is Cube) && !(n.Symbol is CubeRoot) && !(n.Symbol is Mean) && !(n.Symbol is StandardDeviation) && !(n.Symbol is Variance) && !(n.Symbol is Sum) && !(n.Symbol is SubVector) select n).Any(); return !containsUnknownSymbol; } } }