#region License Information
/* HeuristicLab
* Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HEAL.Attic;
using HeuristicLab.Problems.DataAnalysis;
namespace HeuristicLab.DataPreprocessing {
[Item("FilteredPreprocessingData", "Represents filtered data used for preprocessing.")]
[StorableType("26BAE57C-A102-483D-8A09-AEC7132FD837")]
public sealed class FilteredPreprocessingData : NamedItem, IFilteredPreprocessingData {
[Storable]
private readonly IPreprocessingData originalData;
[Storable]
private IPreprocessingData filteredData;
public IPreprocessingData ActiveData {
get { return IsFiltered ? filteredData : originalData; }
}
#region Constructor, Cloning & Persistence
public FilteredPreprocessingData(IPreprocessingData preprocessingData)
: base() {
originalData = preprocessingData;
filteredData = null;
}
private FilteredPreprocessingData(FilteredPreprocessingData original, Cloner cloner)
: base(original, cloner) {
originalData = original.originalData;
filteredData = original.filteredData;
}
public override IDeepCloneable Clone(Cloner cloner) {
return new FilteredPreprocessingData(this, cloner);
}
[StorableConstructor]
private FilteredPreprocessingData(StorableConstructorFlag _) : base(_) { }
#endregion
#region Cells
public bool IsCellEmpty(int columnIndex, int rowIndex) {
return ActiveData.IsCellEmpty(columnIndex, rowIndex);
}
public T GetCell(int columnIndex, int rowIndex) {
return ActiveData.GetCell(columnIndex, rowIndex);
}
public void SetCell(int columnIndex, int rowIndex, T value) {
if (IsFiltered)
throw new InvalidOperationException("SetValues not possible while data is filtered");
originalData.SetCell(columnIndex, rowIndex, value);
}
public string GetCellAsString(int columnIndex, int rowIndex) {
return ActiveData.GetCellAsString(columnIndex, rowIndex);
}
public IList GetValues(int columnIndex, bool considerSelection) {
return ActiveData.GetValues(columnIndex, considerSelection);
}
public void SetValues(int columnIndex, IList values) {
if (IsFiltered)
throw new InvalidOperationException("SetValues not possible while data is filtered");
originalData.SetValues(columnIndex, values);
}
public bool SetValue(string value, int columnIndex, int rowIndex) {
if (IsFiltered)
throw new InvalidOperationException("SetValue not possible while data is filtered");
return originalData.SetValue(value, columnIndex, rowIndex);
}
public int Columns {
get { return ActiveData.Columns; }
}
public int Rows {
get { return ActiveData.Rows; }
}
#endregion
#region Rows
public void InsertRow(int rowIndex) {
if (IsFiltered)
throw new InvalidOperationException("InsertRow not possible while data is filtered");
originalData.InsertRow(rowIndex);
}
public void DeleteRow(int rowIndex) {
if (IsFiltered)
throw new InvalidOperationException("DeleteRow not possible while data is filtered");
originalData.DeleteRow(rowIndex);
}
public void DeleteRowsWithIndices(IEnumerable rows) {
if (IsFiltered)
throw new InvalidOperationException("DeleteRowsWithIndices not possible while data is filtered");
originalData.DeleteRowsWithIndices(rows);
}
public void InsertColumn(string variableName, int columnIndex) {
if (IsFiltered)
throw new InvalidOperationException("InsertColumn not possible while data is filtered");
originalData.InsertColumn(variableName, columnIndex);
}
public void DeleteColumn(int columnIndex) {
if (IsFiltered)
throw new InvalidOperationException("DeleteColumn not possible while data is filtered");
originalData.DeleteColumn(columnIndex);
}
public void RenameColumn(int columnIndex, string name) {
if (IsFiltered)
throw new InvalidOperationException("RenameColumn not possible while data is filtered");
originalData.RenameColumn(columnIndex, name);
}
public void RenameColumns(IList names) {
if (IsFiltered)
throw new InvalidOperationException("RenameColumns not possible while data is filtered");
originalData.RenameColumns(names);
}
public bool AreAllStringColumns(IEnumerable columnIndices) {
return originalData.AreAllStringColumns(columnIndices);
}
#endregion
#region Variables
public IEnumerable VariableNames {
get { return ActiveData.VariableNames; }
}
public IEnumerable GetDoubleVariableNames() {
return originalData.GetDoubleVariableNames();
}
public string GetVariableName(int columnIndex) {
return ActiveData.GetVariableName(columnIndex);
}
public int GetColumnIndex(string variableName) {
return ActiveData.GetColumnIndex(variableName);
}
public bool VariableHasType(int columnIndex) {
return originalData.VariableHasType(columnIndex);
}
public Type GetVariableType(int columnIndex) {
return ActiveData.GetVariableType(columnIndex);
}
public IList InputVariables {
get { return ActiveData.InputVariables; }
}
public string TargetVariable {
get { return ActiveData.TargetVariable; }
} // optional
#endregion
#region Partitions
public IntRange TrainingPartition {
get { return originalData.TrainingPartition; }
}
public IntRange TestPartition {
get { return originalData.TestPartition; }
}
#endregion
#region Transformations
public IList Transformations {
get { return originalData.Transformations; }
}
#endregion
#region Validation
public bool Validate(string value, out string errorMessage, int columnIndex) {
return originalData.Validate(value, out errorMessage, columnIndex);
}
#endregion
#region Import & Export
public void Import(IDataAnalysisProblemData problemData) {
if (IsFiltered)
throw new InvalidOperationException("Import not possible while data is filtered");
originalData.Import(problemData);
}
public Dataset ExportToDataset() {
return originalData.ExportToDataset();
}
#endregion
#region Selection
public IDictionary> Selection {
get { return originalData.Selection; }
set { originalData.Selection = value; }
}
public void ClearSelection() {
originalData.ClearSelection();
}
public event EventHandler SelectionChanged {
add { originalData.SelectionChanged += value; }
remove { originalData.SelectionChanged -= value; }
}
#endregion
#region Transactions
public event DataPreprocessingChangedEventHandler Changed {
add { originalData.Changed += value; }
remove { originalData.Changed -= value; }
}
public bool IsUndoAvailable {
get { return IsFiltered ? false : originalData.IsUndoAvailable; }
}
public void Undo() {
if (IsFiltered)
throw new InvalidOperationException("Undo not possible while data is filtered");
originalData.Undo();
}
public void InTransaction(Action action, DataPreprocessingChangedEventType type = DataPreprocessingChangedEventType.Any) {
if (IsFiltered)
throw new InvalidOperationException("Transaction not possible while data is filtered");
originalData.InTransaction(action, type);
}
public void BeginTransaction(DataPreprocessingChangedEventType type) {
if (IsFiltered)
throw new InvalidOperationException("Transaction not possible while data is filtered");
originalData.BeginTransaction(type);
}
public void EndTransaction() {
originalData.EndTransaction();
}
#endregion
#region Statistics
public T GetMin(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
return ActiveData.GetMin(columnIndex, considerSelection, emptyValue);
}
public T GetMax(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
return ActiveData.GetMax(columnIndex, considerSelection, emptyValue);
}
public T GetMean(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
return ActiveData.GetMean(columnIndex, considerSelection, emptyValue);
}
public T GetMedian(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable {
return ActiveData.GetMedian(columnIndex, considerSelection, emptyValue);
}
public T GetMode(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IEquatable {
return ActiveData.GetMode(columnIndex, considerSelection, emptyValue);
}
public T GetStandardDeviation(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
return ActiveData.GetStandardDeviation(columnIndex, considerSelection, emptyValue);
}
public T GetVariance(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
return ActiveData.GetVariance(columnIndex, considerSelection, emptyValue);
}
public T GetQuantile(double alpha, int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable {
return ActiveData.GetQuantile(alpha, columnIndex, considerSelection, emptyValue);
}
public int GetDistinctValues(int columnIndex, bool considerSelection = false) {
return ActiveData.GetDistinctValues(columnIndex, considerSelection);
}
public int GetMissingValueCount() {
return ActiveData.GetMissingValueCount();
}
public int GetMissingValueCount(int columnIndex) {
return ActiveData.GetMissingValueCount(columnIndex);
}
public int GetRowMissingValueCount(int rowIndex) {
return ActiveData.GetRowMissingValueCount(rowIndex);
}
#endregion
#region Filters
public void SetFilter(bool[] remainingRows) {
filteredData = (IPreprocessingData)originalData.Clone();
filteredData.InTransaction(() => {
var remainingIndices = Enumerable.Range(0, remainingRows.Length).Where(x => remainingRows[x]);
foreach (var v in filteredData.VariableNames) {
var ci = filteredData.GetColumnIndex(v);
if (filteredData.VariableHasType(ci)) {
var values = filteredData.GetValues(ci);
var filteredValues = remainingIndices.Select(x => values[x]).ToList();
filteredData.SetValues(ci, filteredValues);
} else if (filteredData.VariableHasType(ci)) {
var values = filteredData.GetValues(ci);
var filteredValues = remainingIndices.Select(x => values[x]).ToList();
filteredData.SetValues(ci, filteredValues);
} else if (filteredData.VariableHasType(ci)) {
var values = filteredData.GetValues(ci);
var filteredValues = remainingIndices.Select(x => values[x]).ToList();
filteredData.SetValues(ci, filteredValues);
}
}
});
OnFilterChanged();
}
public void PersistFilter() {
originalData.InTransaction(() => {
for (int i = 0; i < filteredData.Columns; ++i) {
if (filteredData.VariableHasType(i)) {
originalData.SetValues(i, filteredData.GetValues(i));
} else if (filteredData.VariableHasType(i)) {
originalData.SetValues(i, filteredData.GetValues(i));
} else if (filteredData.VariableHasType(i)) {
originalData.SetValues(i, filteredData.GetValues(i));
} else {
throw new ArgumentException("Data types of columns do not match");
}
}
});
ResetFilter();
}
public void ResetFilter() {
filteredData = null;
OnFilterChanged();
}
public bool IsFiltered {
get { return filteredData != null; }
}
public event EventHandler FilterChanged;
private void OnFilterChanged() {
if (FilterChanged != null) {
FilterChanged(this, new EventArgs());
}
}
#endregion
}
}