#region License Information
/* HeuristicLab
* Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
using Tensorflow;
using Tensorflow.NumPy;
using static Tensorflow.Binding;
using DoubleVector = MathNet.Numerics.LinearAlgebra.Vector;
namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
public class TreeToTensorConverter {
//private static readonly TF_DataType DataType = tf.float64;
private static readonly TF_DataType DataType = tf.float32;
public static bool TryPrepareTree(
ISymbolicExpressionTree tree,
IRegressionProblemData problemData, List rows,
bool updateVariableWeights, bool applyLinearScaling,
bool eagerEvaluation,
out Dictionary inputFeatures, out Tensor target,
out Dictionary variables) {
try {
var converter = new TreeToTensorConverter(
problemData, rows,
updateVariableWeights, applyLinearScaling,
eagerEvaluation
);
if (eagerEvaluation)
tf.enable_eager_execution();
else
tf.compat.v1.disable_eager_execution();
converter.PrepareNode(tree.Root.GetSubtree(0));
inputFeatures = converter.inputFeatures;
target = tf.reshape(InputFeatureToTensor(problemData.TargetVariable, problemData, rows), new Shape(-1));
variables = converter.variables;
return true;
} catch (NotSupportedException) {
inputFeatures = null;
target= null;
variables = null;
return false;
}
}
public static bool TryEvaluate(
ISymbolicExpressionTree tree,
Dictionary inputFeatures, Dictionary variables,
bool makeVariableWeightsVariable, bool addLinearScalingTerms,
bool eagerEvaluation,
out Tensor prediction) {
try {
var converter = new TreeToTensorConverter(
inputFeatures, variables,
makeVariableWeightsVariable, addLinearScalingTerms,
eagerEvaluation
);
if (eagerEvaluation)
tf.enable_eager_execution();
else
tf.compat.v1.disable_eager_execution();
prediction = converter.EvaluateNode(tree.Root.GetSubtree(0));
return true;
} catch (NotSupportedException) {
prediction = null;
return false;
}
}
//public static bool TryConvert/*Lazy/Graph*/(
// ISymbolicExpressionTree tree,
// Dictionary inputFeatures, Dictionary variables,
// bool makeVariableWeightsVariable, bool addLinearScalingTerms,
// out Tensor prediction) {
// try {
// var converter = new TreeToTensorConverter(
// inputFeatures, variables,
// makeVariableWeightsVariable, addLinearScalingTerms,
// eagerEvaluation: false
// );
// tf.compat.v1.disable_eager_execution();
// prediction = converter.EvaluateNode(tree.Root.GetSubtree(0));
// return true;
// } catch (NotSupportedException) {
// prediction = null;
// return false;
// }
//}
private readonly IDataAnalysisProblemData problemData;
private readonly List rows;
private readonly Dictionary inputFeatures = new Dictionary();
private readonly Dictionary variables = new Dictionary();
private readonly bool makeVariableWeightsVariable;
private readonly bool addLinearScalingTerms;
private readonly bool eagerEvaluation;
private TreeToTensorConverter(
IDataAnalysisProblemData problemData, List rows,
bool makeVariableWeightsVariable, bool addLinearScalingTerms,
bool eagerEvaluation
) {
this.problemData = problemData;
this.rows = rows;
this.makeVariableWeightsVariable = makeVariableWeightsVariable;
this.addLinearScalingTerms = addLinearScalingTerms;
this.eagerEvaluation = eagerEvaluation;
}
private TreeToTensorConverter(
Dictionary inputFeatures, Dictionary variables,
bool makeVariableWeightsVariable, bool addLinearScalingTerms,
bool eagerEvaluation
) {
this.inputFeatures = inputFeatures;
this.variables = variables;
this.makeVariableWeightsVariable = makeVariableWeightsVariable;
this.addLinearScalingTerms = addLinearScalingTerms;
this.eagerEvaluation = eagerEvaluation;
}
private static Tensor InputFeatureToTensor(string var, IDataAnalysisProblemData problemData, List rows) {
if (problemData.Dataset.VariableHasType(var)) {
var data = problemData.Dataset.GetDoubleValues(var, rows).Select(x => (float)x).ToArray();
return tf.convert_to_tensor(np.array(data).reshape(new Shape(rows.Count, 1)), DataType);
} else if (problemData.Dataset.VariableHasType(var)) {
var data = problemData.Dataset.GetDoubleVectorValues(var, rows).SelectMany(x => x.Select(y => (float)y)).ToArray();
return tf.convert_to_tensor(np.array(data).reshape(new Shape(rows.Count, -1)), DataType);
} else throw new NotSupportedException($"Type of the variable is not supported: {var}");
}
private static Tensor InputFeatureToPlaceholder(string var, IDataAnalysisProblemData problemData, List rows) {
if (problemData.Dataset.VariableHasType(var)) {
return tf.placeholder(DataType, new Shape(rows.Count, 1), name: var);
} else if (problemData.Dataset.VariableHasType(var)) {
//var vectorLength = problemData.Dataset.GetDoubleVectorValues(var, rows).Select(v => v.Count).Distinct().Single();
var vectorLength = problemData.Dataset.GetDoubleVectorValue(var, rows[0]).Count;
return tf.placeholder(DataType, new Shape(rows.Count, vectorLength), name: var);
} else throw new NotSupportedException($"Type of the variable is not supported: {var}");
}
private void PrepareNode(ISymbolicExpressionTreeNode node) {
if (node.Symbol is Constant ) {
var constantNode = (ConstantTreeNode)node;
var value = (float)constantNode.Value;
var value_arr = np.array(value).reshape(new Shape(1, 1));
var c = tf.Variable(value_arr, name: $"c_{variables.Count}", dtype: DataType);
variables.Add(node, new[] { c });
} else if (node.Symbol is Variable) {
var varNode = (VariableTreeNodeBase)node;
if (makeVariableWeightsVariable) {
var w_arr = np.array((float)varNode.Weight).reshape(new Shape(1, 1));
var w = tf.Variable(w_arr, name: $"w_{varNode.VariableName}", dtype: DataType);
variables.Add(node, new[] { w });
}
if (!inputFeatures.ContainsKey(varNode.VariableName)) {
inputFeatures.Add(
varNode.VariableName,
eagerEvaluation
? InputFeatureToTensor(varNode.VariableName, problemData, rows)
: InputFeatureToPlaceholder(varNode.VariableName, problemData, rows));
}
} else if (node.Symbol is StartSymbol) {
if (addLinearScalingTerms) {
var alpha_arr = np.array((float)1.0).reshape(new Shape(1, 1));
var alpha = tf.Variable(alpha_arr, name: "alpha", dtype: DataType);
var beta_arr = np.array((float)0.0).reshape(new Shape(1, 1));
var beta = tf.Variable(beta_arr, name: "beta", dtype: DataType);
variables.Add(node, new[] { beta, alpha });
}
}
foreach (var subTree in node.Subtrees) {
PrepareNode(subTree);
}
}
private Tensor EvaluateNode(ISymbolicExpressionTreeNode node) {
if (node.Symbol is Constant) {
return variables[node][0];
}
if (node.Symbol is Variable/* || node.Symbol is BinaryFactorVariable*/) {
var varNode = node as VariableTreeNodeBase;
var par = inputFeatures[varNode.VariableName]; // eager or placeholder
if (makeVariableWeightsVariable) {
var w = variables[node][0];
return w * par;
} else {
return varNode.Weight * par;
}
}
//if (node.Symbol is FactorVariable) {
// var factorVarNode = node as FactorVariableTreeNode;
// var products = new List();
// foreach (var variableValue in factorVarNode.Symbol.GetVariableValues(factorVarNode.VariableName)) {
// //var par = FindOrCreateParameter(parameters, factorVarNode.VariableName, variableValue);
// var par = tf.placeholder(DataType, new TensorShape(numRows, 1), name: factorVarNode.VariableName);
// parameters.Add(par, factorVarNode.VariableName);
// var value = factorVarNode.GetValue(variableValue);
// //initialConstants.Add(value);
// var wVar = (RefVariable)tf.VariableV1(value, name: $"f_{factorVarNode.VariableName}_{variables.Count}", dtype: DataType, shape: new[] { 1, 1 });
// //var wVar = tf.Variable(value, name: $"f_{factorVarNode.VariableName}_{variables.Count}"/*, shape: new[] { 1, 1 }*/);
// variables.Add(wVar);
// products.add(wVar * par);
// }
// return products.Aggregate((a, b) => a + b);
//}
if (node.Symbol is Addition) {
var terms = node.Subtrees.Select(EvaluateNode).ToList();
if (terms.Count == 1) return terms[0];
return terms.Aggregate((a, b) => a + b);
}
if (node.Symbol is Subtraction) {
var terms = node.Subtrees.Select(EvaluateNode).ToList();
if (terms.Count == 1) return -terms[0];
return terms.Aggregate((a, b) => a - b);
}
if (node.Symbol is Multiplication) {
var terms = node.Subtrees.Select(EvaluateNode).ToList();
if (terms.Count == 1) return terms[0];
return terms.Aggregate((a, b) => a * b);
}
if (node.Symbol is Division) {
var terms = node.Subtrees.Select(EvaluateNode).ToList();
if (terms.Count == 1) return (float)1.0 / terms[0];
return terms.Aggregate((a, b) => a / b);
}
if (node.Symbol is Absolute) {
var x1 = EvaluateNode(node.GetSubtree(0));
return tf.abs(x1);
}
if (node.Symbol is AnalyticQuotient) {
var x1 = EvaluateNode(node.GetSubtree(0));
var x2 = EvaluateNode(node.GetSubtree(1));
return x1 / tf.pow((float)1.0 + x2 * x2, (float)0.5);
//return x1 / tf.pow(1.0 + x2 * x2, 0.5);
}
if (node.Symbol is Logarithm) {
return tf.log(
EvaluateNode(node.GetSubtree(0)));
}
if (node.Symbol is Exponential) {
return tf.pow(
(float)Math.E,
EvaluateNode(node.GetSubtree(0)));
}
if (node.Symbol is Square) {
return tf.square(
EvaluateNode(node.GetSubtree(0)));
}
if (node.Symbol is SquareRoot) {
return tf.sqrt(
EvaluateNode(node.GetSubtree(0)));
}
if (node.Symbol is Cube) {
return tf.pow(
EvaluateNode(node.GetSubtree(0)), (float)3.0);
}
if (node.Symbol is CubeRoot) {
return tf.pow(
EvaluateNode(node.GetSubtree(0)), (float)1.0 / (float)3.0);
// TODO
// f: x < 0 ? -Math.Pow(-x, 1.0 / 3) : Math.Pow(x, 1.0 / 3),
// g: { var cbrt_x = x < 0 ? -Math.Pow(-x, 1.0 / 3) : Math.Pow(x, 1.0 / 3); return 1.0 / (3 * cbrt_x * cbrt_x); }
}
if (node.Symbol is Sine) {
return tf.sin(
EvaluateNode(node.GetSubtree(0)));
}
if (node.Symbol is Cosine) {
return tf.cos(
EvaluateNode(node.GetSubtree(0)));
}
if (node.Symbol is Tangent) {
return tf.tan(
EvaluateNode(node.GetSubtree(0)));
}
if (node.Symbol is Mean) {
return tf.reduce_mean(
EvaluateNode(node.GetSubtree(0)),
axis: new[] { 1 },
keepdims: true);
}
if (node.Symbol is StandardDeviation) {
return tf.reduce_std(
EvaluateNode(node.GetSubtree(0)),
axis: new[] { 1 },
keepdims: true
);
}
if (node.Symbol is Variance) {
return tf.reduce_variance(
EvaluateNode(node.GetSubtree(0)),
axis: new[] { 1 } ,
keepdims: true
);
}
if (node.Symbol is Sum) {
return tf.reduce_sum(
EvaluateNode(node.GetSubtree(0)),
axis: new[] { 1 },
keepdims: true);
}
if (node.Symbol is SubVector) {
var tensor = EvaluateNode(node.GetSubtree(0));
int rows = (int)tensor.shape[0], vectorLength = (int)tensor.shape[1];
var windowedNode = (IWindowedSymbolTreeNode)node;
int startIdx = SymbolicDataAnalysisExpressionTreeVectorInterpreter.ToVectorIdx(windowedNode.Offset, vectorLength);
int endIdx = SymbolicDataAnalysisExpressionTreeVectorInterpreter.ToVectorIdx(windowedNode.Length, vectorLength);
var slices = SymbolicDataAnalysisExpressionTreeVectorInterpreter.GetVectorSlices(startIdx, endIdx, vectorLength);
var segments = new List();
foreach (var (start, count) in slices) {
segments.Add(tensor[new Slice(), new Slice(start, start + count)]);
}
return tf.concat(segments, axis: 1);
}
if (node.Symbol is StartSymbol) {
Tensor prediction = EvaluateNode(node.GetSubtree(0));
if (prediction.rank != 2 && prediction.shape[1] != 1)
throw new InvalidOperationException("Prediction must be a rank 1 (single value per row).");
prediction = tf.reshape(prediction, new Shape(-1));
if (addLinearScalingTerms) {
var vars = variables[node];
Tensor alpha = vars[1], beta = vars[0];
return prediction * alpha + beta;
} else {
return prediction;
}
}
throw new NotSupportedException($"Node symbol {node.Symbol} is not supported.");
}
public static bool IsCompatible(ISymbolicExpressionTree tree) {
var containsUnknownSymbol = (
from n in tree.Root.GetSubtree(0).IterateNodesPrefix()
where
!(n.Symbol is Variable) &&
//!(n.Symbol is BinaryFactorVariable) &&
//!(n.Symbol is FactorVariable) &&
!(n.Symbol is Constant) &&
!(n.Symbol is Addition) &&
!(n.Symbol is Subtraction) &&
!(n.Symbol is Multiplication) &&
!(n.Symbol is Division) &&
!(n.Symbol is Logarithm) &&
!(n.Symbol is Exponential) &&
!(n.Symbol is SquareRoot) &&
!(n.Symbol is Square) &&
!(n.Symbol is Sine) &&
!(n.Symbol is Cosine) &&
!(n.Symbol is Tangent) &&
!(n.Symbol is HyperbolicTangent) &&
!(n.Symbol is Erf) &&
!(n.Symbol is Norm) &&
!(n.Symbol is StartSymbol) &&
!(n.Symbol is Absolute) &&
!(n.Symbol is AnalyticQuotient) &&
!(n.Symbol is Cube) &&
!(n.Symbol is CubeRoot) &&
!(n.Symbol is Mean) &&
!(n.Symbol is StandardDeviation) &&
!(n.Symbol is Variance) &&
!(n.Symbol is Sum) &&
!(n.Symbol is SubVector)
select n).Any();
return !containsUnknownSymbol;
}
}
}