using System; using System.IO; using System.Collections.Generic; using System.Globalization; using System.Linq; using HeuristicLab.Problems.DataAnalysis; using System.Data; using System.Diagnostics; using System.Threading; using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression; namespace HeuristicLab.Algorithms.DataAnalysis.FastFunctionExtraction { class Program { private static void TestPennML() { string projectDirectoryPath = Directory.GetParent(Environment.CurrentDirectory).Parent.FullName + "../branches/3022-FastFunctionExtraction/TestFFX/"; string pennMLDataPath = projectDirectoryPath + "/pennML_data/"; string targetPath = projectDirectoryPath + "/results/results.txt"; var dInfo = new DirectoryInfo(pennMLDataPath); StreamWriter streamWriter = new StreamWriter(targetPath); streamWriter.WriteLine("dataset,algorithm,num_bases,train_mse,train_mae,test_mse,test_mae,runtime"); streamWriter.Close(); int len = dInfo.GetFiles().Length; int idx = 0; // foreach dataset foreach (var fInfo in dInfo.GetFiles()) { //if (fInfo.Name != "1089_USCrime.csv") continue; Console.Write($"{++idx}/{len}\t{fInfo.Name,-35}:"); RunFFXOnDataset(fInfo, targetPath, out var elapsedSeconds); Console.WriteLine($"{elapsedSeconds + " sec",-15}"); } } private static void SaveAccuracyInFile(ISymbolicRegressionSolution[] regressionSolutions, int[] numBases, double runtime, string filePath, string problemName) { if (filePath == "") return; CultureInfo culture = new CultureInfo("en-US"); StreamWriter sw = new StreamWriter(filePath, true); int i = 0; foreach (var solution in regressionSolutions) { string outputStr = String.Join(",", new[]{ problemName.Substring(0, problemName.Length - 4), "hl_ffx", numBases[i++].ToString(), solution.TrainingMeanSquaredError.ToString(culture), solution.TrainingMeanAbsoluteError.ToString(culture), solution.TestMeanSquaredError.ToString(culture), solution.TestMeanAbsoluteError.ToString(culture), runtime.ToString(culture) }); sw.WriteLine(outputStr, culture); } sw.Dispose(); Thread.Sleep(100); // to prevent race conditions with stream writers } private static void RunFFXOnDataset(FileInfo fInfo, string outFilePath, out double elapsedSeconds) { var data = ParseProblemDataFromFile(fInfo.FullName); Stopwatch sw = new Stopwatch(); sw.Start(); var regressionModels = FastFunctionExtraction.Fit(data, 0.95, out var numBases, true, true, true, true, true, maxNumBases: 10).ToArray(); sw.Stop(); var regressionSolutions = regressionModels.Select(model => new SymbolicRegressionSolution(model, data)).ToArray(); elapsedSeconds = sw.Elapsed.TotalSeconds; SaveAccuracyInFile(regressionSolutions, numBases.ToArray(), elapsedSeconds, outFilePath, fInfo.Name); } private static IRegressionProblemData ParseProblemDataFromFile(string filepath, char separator = ';') { CultureInfo culture = new CultureInfo("en-US"); var reader = new StreamReader(filepath); var variables = reader.ReadLine().Split(separator); var vals = Enumerable.Range(0, variables.Length).Select(_ => new List()).ToArray(); var targetVar = variables.Last(); var allowedInputVars = variables.Where(val => val != targetVar); string line; line = reader.ReadLine(); while (!String.IsNullOrWhiteSpace(line)) { int i = 0; foreach (var strVal in line.Split(separator)) { vals[i++].Add(Convert.ToDouble(strVal, culture)); } line = reader.ReadLine(); } IDataset dataset = new Dataset(variables, vals); var temp = new RegressionProblemData(dataset, allowedInputVars, targetVar); return temp; } static void Main() { TestPennML(); Console.WriteLine("Done."); Console.ReadLine(); } } }