#region License Information
/* HeuristicLab
* Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.Serialization;
using AutoDiff;
using HeuristicLab.Common;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
public class VectorUnrollingTreeToAutoDiffTermConverter {
public delegate double ParametricFunction(double[] vars, double[] @params);
public delegate Tuple ParametricFunctionGradient(double[] vars, double[] @params);
#region helper class
public class DataForVariable {
public readonly string variableName;
public readonly string variableValue; // for factor vars
public readonly int lag;
public readonly int index; // for vectors
public DataForVariable(string varName, string varValue, int lag, int index) {
this.variableName = varName;
this.variableValue = varValue;
this.lag = lag;
this.index = index;
}
public override bool Equals(object obj) {
var other = obj as DataForVariable;
if (other == null) return false;
return other.variableName.Equals(this.variableName) &&
other.variableValue.Equals(this.variableValue) &&
other.lag == this.lag &&
other.index == this.index;
}
public override int GetHashCode() {
return variableName.GetHashCode() ^ variableValue.GetHashCode() ^ lag ^ index;
}
}
#endregion
#region derivations of functions
// create function factory for arctangent
private static readonly Func arctan = UnaryFunc.Factory(
eval: Math.Atan,
diff: x => 1 / (1 + x * x));
private static readonly Func sin = UnaryFunc.Factory(
eval: Math.Sin,
diff: Math.Cos);
private static readonly Func cos = UnaryFunc.Factory(
eval: Math.Cos,
diff: x => -Math.Sin(x));
private static readonly Func tan = UnaryFunc.Factory(
eval: Math.Tan,
diff: x => 1 + Math.Tan(x) * Math.Tan(x));
private static readonly Func tanh = UnaryFunc.Factory(
eval: Math.Tanh,
diff: x => 1 - Math.Tanh(x) * Math.Tanh(x));
private static readonly Func erf = UnaryFunc.Factory(
eval: alglib.errorfunction,
diff: x => 2.0 * Math.Exp(-(x * x)) / Math.Sqrt(Math.PI));
private static readonly Func norm = UnaryFunc.Factory(
eval: alglib.normaldistribution,
diff: x => -(Math.Exp(-(x * x)) * Math.Sqrt(Math.Exp(x * x)) * x) / Math.Sqrt(2 * Math.PI));
private static readonly Func abs = UnaryFunc.Factory(
eval: Math.Abs,
diff: x => Math.Sign(x)
);
private static readonly Func cbrt = UnaryFunc.Factory(
eval: x => x < 0 ? -Math.Pow(-x, 1.0 / 3) : Math.Pow(x, 1.0 / 3),
diff: x => { var cbrt_x = x < 0 ? -Math.Pow(-x, 1.0 / 3) : Math.Pow(x, 1.0 / 3); return 1.0 / (3 * cbrt_x * cbrt_x); }
);
#endregion
public static bool TryConvertToAutoDiff(ISymbolicExpressionTree tree,
IDictionary evaluationTrace,
bool makeVariableWeightsVariable, bool addLinearScalingTerms,
out List parameters, out double[] initialConstants,
out ParametricFunction func,
out ParametricFunctionGradient func_grad) {
// use a transformator object which holds the state (variable list, parameter list, ...) for recursive transformation of the tree
var transformator = new VectorUnrollingTreeToAutoDiffTermConverter(evaluationTrace,
makeVariableWeightsVariable, addLinearScalingTerms);
Term term;
try {
term = transformator.ConvertToAutoDiff(tree.Root.GetSubtree(0)).Single();
var parameterEntries = transformator.parameters.ToArray(); // guarantee same order for keys and values
var compiledTerm = term.Compile(transformator.variables.ToArray(),
parameterEntries.Select(kvp => kvp.Value).ToArray());
parameters = new List(parameterEntries.Select(kvp => kvp.Key));
initialConstants = transformator.initialConstants.ToArray();
func = (vars, @params) => compiledTerm.Evaluate(vars, @params);
func_grad = (vars, @params) => compiledTerm.Differentiate(vars, @params);
return true;
} catch (ConversionException) {
func = null;
func_grad = null;
parameters = null;
initialConstants = null;
}
return false;
}
private readonly IDictionary evaluationTrace;
// state for recursive transformation of trees
private readonly List initialConstants;
private readonly Dictionary parameters;
private readonly List variables;
private readonly bool makeVariableWeightsVariable;
private readonly bool addLinearScalingTerms;
private VectorUnrollingTreeToAutoDiffTermConverter(IDictionary evaluationTrace,
bool makeVariableWeightsVariable, bool addLinearScalingTerms) {
this.evaluationTrace = evaluationTrace;
this.makeVariableWeightsVariable = makeVariableWeightsVariable;
this.addLinearScalingTerms = addLinearScalingTerms;
this.initialConstants = new List();
this.parameters = new Dictionary();
this.variables = new List();
}
private static IEnumerable> Broadcast(IList[] source) {
var maxLength = source.Max(x => x.Count);
if (source.Any(x => x.Count != maxLength && x.Count != 1))
throw new InvalidOperationException("Length must match to maxLength or one");
return source.Select(x => x.Count == maxLength ? x : Enumerable.Repeat(x[0], maxLength));
}
public static IEnumerable> Transpose(IEnumerable> source) {
var enumerators = source.Select(x => x.GetEnumerator()).ToArray();
try {
while (enumerators.All(x => x.MoveNext())) {
yield return enumerators.Select(x => x.Current).ToArray();
}
} finally {
foreach (var enumerator in enumerators)
enumerator.Dispose();
}
}
private IList ConvertToAutoDiff(ISymbolicExpressionTreeNode node) {
IList BinaryOp(Func binaryOp, Func singleElementOp, params IList[] terms) {
if (terms.Length == 1) return terms[0].Select(singleElementOp).ToList();
var broadcastedTerms = Broadcast(terms);
var transposedTerms = Transpose(broadcastedTerms);
return transposedTerms.Select(term => term.Aggregate(binaryOp)).ToList();
}
IList UnaryOp(Func unaryOp, IList term) {
return term.Select(unaryOp).ToList();
}
var evaluationResult = evaluationTrace[node];
if (node.Symbol is Constant) { // assume scalar constant
initialConstants.Add(((ConstantTreeNode)node).Value);
var var = new AutoDiff.Variable();
variables.Add(var);
return new Term[] { var };
}
if (node.Symbol is Variable || node.Symbol is BinaryFactorVariable) {
var varNode = node as VariableTreeNodeBase;
var factorVarNode = node as BinaryFactorVariableTreeNode;
// factor variable values are only 0 or 1 and set in x accordingly
var varValue = factorVarNode != null ? factorVarNode.VariableValue : string.Empty;
var pars = evaluationResult.IsVector
? Enumerable.Range(0, evaluationResult.Vector.Count).Select(i => FindOrCreateParameter(parameters, varNode.VariableName, varValue, index: i))
: FindOrCreateParameter(parameters, varNode.VariableName, varValue).ToEnumerable();
if (makeVariableWeightsVariable) {
initialConstants.Add(varNode.Weight);
var w = new AutoDiff.Variable();
variables.Add(w);
return pars.Select(par => AutoDiff.TermBuilder.Product(w, par)).ToList();
} else {
return pars.Select(par => varNode.Weight * par).ToList();
}
}
if (node.Symbol is FactorVariable) {
var factorVarNode = node as FactorVariableTreeNode;
var products = new List();
foreach (var variableValue in factorVarNode.Symbol.GetVariableValues(factorVarNode.VariableName)) {
var par = FindOrCreateParameter(parameters, factorVarNode.VariableName, variableValue);
initialConstants.Add(factorVarNode.GetValue(variableValue));
var wVar = new AutoDiff.Variable();
variables.Add(wVar);
products.Add(AutoDiff.TermBuilder.Product(wVar, par));
}
return new[] { AutoDiff.TermBuilder.Sum(products) };
}
//if (node.Symbol is LaggedVariable) {
// var varNode = node as LaggedVariableTreeNode;
// var par = FindOrCreateParameter(parameters, varNode.VariableName, string.Empty, varNode.Lag);
// if (makeVariableWeightsVariable) {
// initialConstants.Add(varNode.Weight);
// var w = new AutoDiff.Variable();
// variables.Add(w);
// return AutoDiff.TermBuilder.Product(w, par);
// } else {
// return varNode.Weight * par;
// }
//}
if (node.Symbol is Addition) {
var terms = node.Subtrees.Select(ConvertToAutoDiff).ToArray();
return BinaryOp((a, b) => a + b, a => a, terms);
}
if (node.Symbol is Subtraction) {
var terms = node.Subtrees.Select(ConvertToAutoDiff).ToArray();
return BinaryOp((a, b) => a - b, a => -a, terms);
}
if (node.Symbol is Multiplication) {
var terms = node.Subtrees.Select(ConvertToAutoDiff).ToArray();
return BinaryOp((a, b) => a * b, a => a, terms);
}
if (node.Symbol is Division) {
var terms = node.Subtrees.Select(ConvertToAutoDiff).ToArray();
return BinaryOp((a, b) => a / b, a => 1.0 / a, terms);
}
if (node.Symbol is Absolute) {
var term = node.Subtrees.Select(ConvertToAutoDiff).Single();
return UnaryOp(abs, term);
}
//if (node.Symbol is AnalyticQuotient) {
// var x1 = ConvertToAutoDiff(node.GetSubtree(0));
// var x2 = ConvertToAutoDiff(node.GetSubtree(1));
// return x1 / (TermBuilder.Power(1 + x2 * x2, 0.5));
//}
if (node.Symbol is Logarithm) {
var term = node.Subtrees.Select(ConvertToAutoDiff).Single();
return UnaryOp(TermBuilder.Log, term);
}
if (node.Symbol is Exponential) {
var term = node.Subtrees.Select(ConvertToAutoDiff).Single();
return UnaryOp(TermBuilder.Exp, term);
}
if (node.Symbol is Square) {
var term = node.Subtrees.Select(ConvertToAutoDiff).Single();
return UnaryOp(t => TermBuilder.Power(t, 2.0), term);
}
if (node.Symbol is SquareRoot) {
var term = node.Subtrees.Select(ConvertToAutoDiff).Single();
return UnaryOp(t => TermBuilder.Power(t, 0.5), term);
}
if (node.Symbol is Cube) {
var term = node.Subtrees.Select(ConvertToAutoDiff).Single();
return UnaryOp(t => TermBuilder.Power(t, 3.0), term);
}
if (node.Symbol is CubeRoot) {
var term = node.Subtrees.Select(ConvertToAutoDiff).Single();
return UnaryOp(cbrt, term);
}
if (node.Symbol is Sine) {
var term = node.Subtrees.Select(ConvertToAutoDiff).Single();
return UnaryOp(sin, term);
}
if (node.Symbol is Cosine) {
var term = node.Subtrees.Select(ConvertToAutoDiff).Single();
return UnaryOp(cos, term);
}
if (node.Symbol is Tangent) {
var term = node.Subtrees.Select(ConvertToAutoDiff).Single();
return UnaryOp(tan, term);
}
if (node.Symbol is HyperbolicTangent) {
var term = node.Subtrees.Select(ConvertToAutoDiff).Single();
return UnaryOp(tanh, term);
}
if (node.Symbol is Erf) {
var term = node.Subtrees.Select(ConvertToAutoDiff).Single();
return UnaryOp(erf, term);
}
if (node.Symbol is Norm) {
var term = node.Subtrees.Select(ConvertToAutoDiff).Single();
return UnaryOp(norm, term);
}
if (node.Symbol is StartSymbol) {
if (addLinearScalingTerms) {
// scaling variables α, β are given at the beginning of the parameter vector
var alpha = new AutoDiff.Variable();
var beta = new AutoDiff.Variable();
variables.Add(beta);
variables.Add(alpha);
var t = ConvertToAutoDiff(node.GetSubtree(0));
if (t.Count > 1) throw new InvalidOperationException("Tree Result must be scalar value");
return new[] { t[0] * alpha + beta };
} else return ConvertToAutoDiff(node.GetSubtree(0));
}
if (node.Symbol is Sum) {
var term = node.Subtrees.Select(ConvertToAutoDiff).Single();
return new[] { TermBuilder.Sum(term) };
}
if (node.Symbol is Mean) {
var term = node.Subtrees.Select(ConvertToAutoDiff).Single();
return new[] { TermBuilder.Sum(term) / term.Count };
}
if (node.Symbol is StandardDeviation) {
var term = node.Subtrees.Select(ConvertToAutoDiff).Single();
var mean = TermBuilder.Sum(term) / term.Count;
var ssd = TermBuilder.Sum(term.Select(t => TermBuilder.Power(t - mean, 2.0)));
return new[] { TermBuilder.Power(ssd / term.Count, 0.5) };
}
if (node.Symbol is Length) {
var term = node.Subtrees.Select(ConvertToAutoDiff).Single();
return new[] { TermBuilder.Constant(term.Count) };
}
//if (node.Symbol is Min) {
//}
//if (node.Symbol is Max) {
//}
if (node.Symbol is Variance) {
var term = node.Subtrees.Select(ConvertToAutoDiff).Single();
var mean = TermBuilder.Sum(term) / term.Count;
var ssd = TermBuilder.Sum(term.Select(t => TermBuilder.Power(t - mean, 2.0)));
return new[] { ssd / term.Count };
}
//if (node.Symbol is Skewness) {
//}
//if (node.Symbol is Kurtosis) {
//}
//if (node.Symbol is EuclideanDistance) {
//}
//if (node.Symbol is Covariance) {
//}
if (node.Symbol is SubVector) {
var term = node.Subtrees.Select(ConvertToAutoDiff).Single();
var windowedNode = (IWindowedSymbolTreeNode)node;
int startIdx = SymbolicDataAnalysisExpressionTreeVectorInterpreter.ToVectorIdx(windowedNode.Offset, term.Count);
int endIdx = SymbolicDataAnalysisExpressionTreeVectorInterpreter.ToVectorIdx(windowedNode.Length, term.Count);
var slices = SymbolicDataAnalysisExpressionTreeVectorInterpreter.GetVectorSlices(startIdx, endIdx, term.Count);
var selectedTerms = new List(capacity: slices.Sum(s => s.Item2));
foreach (var (start, count) in slices) {
for (int i = start; i < start + count; i++){
selectedTerms.Add(term[i]);
}
}
return selectedTerms;
}
throw new ConversionException();
}
// for each factor variable value we need a parameter which represents a binary indicator for that variable & value combination
// each binary indicator is only necessary once. So we only create a parameter if this combination is not yet available
private static Term FindOrCreateParameter(Dictionary parameters,
string varName, string varValue = "", int lag = 0, int index = -1) {
var data = new DataForVariable(varName, varValue, lag, index);
AutoDiff.Variable par = null;
if (!parameters.TryGetValue(data, out par)) {
// not found -> create new parameter and entries in names and values lists
par = new AutoDiff.Variable();
parameters.Add(data, par);
}
return par;
}
public static bool IsCompatible(ISymbolicExpressionTree tree) {
var containsUnknownSymbol = (
from n in tree.Root.GetSubtree(0).IterateNodesPrefix()
where
!(n.Symbol is Variable) &&
!(n.Symbol is BinaryFactorVariable) &&
//!(n.Symbol is FactorVariable) &&
//!(n.Symbol is LaggedVariable) &&
!(n.Symbol is Constant) &&
!(n.Symbol is Addition) &&
!(n.Symbol is Subtraction) &&
!(n.Symbol is Multiplication) &&
!(n.Symbol is Division) &&
!(n.Symbol is Logarithm) &&
!(n.Symbol is Exponential) &&
!(n.Symbol is SquareRoot) &&
!(n.Symbol is Square) &&
!(n.Symbol is Sine) &&
!(n.Symbol is Cosine) &&
!(n.Symbol is Tangent) &&
!(n.Symbol is HyperbolicTangent) &&
!(n.Symbol is Erf) &&
!(n.Symbol is Norm) &&
!(n.Symbol is StartSymbol) &&
!(n.Symbol is Absolute) &&
//!(n.Symbol is AnalyticQuotient) &&
!(n.Symbol is Cube) &&
!(n.Symbol is CubeRoot) &&
!(n.Symbol is Sum) &&
!(n.Symbol is Mean) &&
!(n.Symbol is StandardDeviation) &&
!(n.Symbol is Length) &&
//!(n.Symbol is Min) &&
//!(n.Symbol is Max) &&
!(n.Symbol is Variance) &&
//!(n.Symbol is Skewness) &&
//!(n.Symbol is Kurtosis) &&
//!(n.Symbol is EuclideanDistance) &&
//!(n.Symbol is Covariance) &&
!(n.Symbol is SubVector)
select n).Any();
return !containsUnknownSymbol;
}
#region exception class
[Serializable]
public class ConversionException : Exception {
public ConversionException() {
}
public ConversionException(string message) : base(message) {
}
public ConversionException(string message, Exception inner) : base(message, inner) {
}
protected ConversionException(
SerializationInfo info,
StreamingContext context) : base(info, context) {
}
}
#endregion
}
}