relational.helper
Class DataSetHelper

java.lang.Object
  extended byrelational.helper.DataSetHelper

public class DataSetHelper
extends java.lang.Object

Contains helper methods (e.g. preprocessing)


Constructor Summary
DataSetHelper()
           
 
Method Summary
static weka.core.FastVector add(weka.core.FastVector v1, weka.core.FastVector v2)
          Merge vectors
static int[] computeDistribution(weka.core.Instances instances)
          Computes distribution of instances (regarding the categories)
static double[] computePriors(weka.core.FastVector items, java.util.HashMap labels, java.lang.String type)
          Computes the prior from the training set
static double[] computePriors(weka.core.FastVector items, int iLabel, java.lang.String type)
          Computes the prior from the training set (for binary classification)
static java.util.HashMap countNeighborsClassesMultiLabelTest(weka.core.FastVector items, int klasse, java.util.HashMap initial, java.util.HashMap weightsMap, java.util.HashMap nodes)
          Counts the frequency (weighted by their probabilities) of the classes of neighbors in the test set (for binary classification)
static java.util.HashMap countNeighborsClassesMultiLabelTrain(weka.core.FastVector items, java.util.HashMap initial, java.util.HashMap weightsMap, java.util.HashMap nodes, int labelSize)
          Counts the frequency of the classes of neighbors in the train set (for binary classification)
static java.util.HashMap countNeighborsClassesTest(weka.core.FastVector items, java.util.HashMap initial, java.util.HashMap weightsMap, java.util.HashMap nodes, int labelSize)
          Counts the frequency (weighted by their probabilities) of the classes of neighbors in the test set
static java.util.HashMap countNeighborsClassesTrain(weka.core.FastVector items, java.util.HashMap initial, java.util.HashMap weightsMap, java.util.HashMap nodes, int labelsSize)
          Counts the frequency of the classes of neighbors in the train set
static weka.classifiers.Classifier[] loadClassifier(java.lang.String filename, int numClassifier)
          Loads classifier
static weka.core.FastVector loadFastVector(java.lang.String filename)
          Loads a fastvector
static java.util.HashMap loadHashmap(java.lang.String filename)
          Loads hashmap
static java.util.Hashtable loadHashtable(java.lang.String filename)
          Loads hashtable
static double[][] loadMatrix(java.lang.String filename)
          Loads matrix
static java.util.Vector loadVector(java.lang.String filename)
          Loads a vector
static void main(java.lang.String[] args)
           
static java.util.HashMap normalizeWeights(BipartGraph graph, weka.core.FastVector test, java.util.HashMap weights)
          Normalizes the weights of the graph, so that the values are between 0 and 1
static java.util.HashMap normalizeWeights(edu.uci.ics.jung.graph.impl.SparseGraph graph, weka.core.FastVector test, java.util.HashMap weights)
          Normalizes the weights of the graph, so that the values are between 0 and 1
static void saveClassifier(java.lang.String filename, weka.classifiers.Classifier[] cl)
          serialize a classifier
static void saveHashMap(java.lang.String filename, java.util.HashMap h)
          serialize hashmap
static void saveMatrix(java.lang.String filename, double[][] m)
          serialize matrix
static void saveTrainTest(weka.core.FastVector traintest, java.lang.String outputPath, boolean multilabel, int numFolds, java.lang.String type)
          Write training and test sets into a xml-file
static void saveVector(java.lang.String filename, weka.core.FastVector items)
          serialize vector
static void saveVector(java.lang.String filename, java.util.Vector items)
          serialize vector
 
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

DataSetHelper

public DataSetHelper()
Method Detail

countNeighborsClassesTrain

public static java.util.HashMap countNeighborsClassesTrain(weka.core.FastVector items,
                                                           java.util.HashMap initial,
                                                           java.util.HashMap weightsMap,
                                                           java.util.HashMap nodes,
                                                           int labelsSize)
Counts the frequency of the classes of neighbors in the train set

Parameters:
items - - data set
initial - - initialization of test set
weightsMap - - weights of edges
nodes - - nodes of the graph
labelsSize - - number of categories
Returns:
the frequency of neighboring classes

countNeighborsClassesTest

public static java.util.HashMap countNeighborsClassesTest(weka.core.FastVector items,
                                                          java.util.HashMap initial,
                                                          java.util.HashMap weightsMap,
                                                          java.util.HashMap nodes,
                                                          int labelSize)
Counts the frequency (weighted by their probabilities) of the classes of neighbors in the test set

Parameters:
items - - data set
initial - - initialization of test set
weightsMap - - weights of edges
nodes - - nodes of the graph
Returns:
the frequency of neighboring classes

countNeighborsClassesMultiLabelTrain

public static java.util.HashMap countNeighborsClassesMultiLabelTrain(weka.core.FastVector items,
                                                                     java.util.HashMap initial,
                                                                     java.util.HashMap weightsMap,
                                                                     java.util.HashMap nodes,
                                                                     int labelSize)
Counts the frequency of the classes of neighbors in the train set (for binary classification)

Parameters:
items - - data set
initial - - initialization of test set
weightsMap - - weights of edges
nodes - - nodes of the graph
Returns:
the frequency of neighboring classes

countNeighborsClassesMultiLabelTest

public static java.util.HashMap countNeighborsClassesMultiLabelTest(weka.core.FastVector items,
                                                                    int klasse,
                                                                    java.util.HashMap initial,
                                                                    java.util.HashMap weightsMap,
                                                                    java.util.HashMap nodes)
Counts the frequency (weighted by their probabilities) of the classes of neighbors in the test set (for binary classification)

Parameters:
items - - data set
initial - - initialization of test set
weightsMap - - weights of edges
nodes - - nodes of the graph
Returns:
the frequency of neighboring classes

loadFastVector

public static weka.core.FastVector loadFastVector(java.lang.String filename)
Loads a fastvector

Parameters:
filename -
Returns:
loaded fastvector

loadVector

public static java.util.Vector loadVector(java.lang.String filename)
Loads a vector

Parameters:
filename -
Returns:
loaded vector

loadClassifier

public static weka.classifiers.Classifier[] loadClassifier(java.lang.String filename,
                                                           int numClassifier)
Loads classifier

Parameters:
filename -
numClassifier - - number of classifiers to be loaded
Returns:
the loaded classifiers

loadHashmap

public static java.util.HashMap loadHashmap(java.lang.String filename)
Loads hashmap

Parameters:
filename -
Returns:
loaded hashmap

loadMatrix

public static double[][] loadMatrix(java.lang.String filename)
Loads matrix

Parameters:
filename -
Returns:

loadHashtable

public static java.util.Hashtable loadHashtable(java.lang.String filename)
Loads hashtable

Parameters:
filename -
Returns:

computeDistribution

public static int[] computeDistribution(weka.core.Instances instances)
Computes distribution of instances (regarding the categories)

Parameters:
instances - - data instances
Returns:
distribution of instances (regarding the categories)

computePriors

public static double[] computePriors(weka.core.FastVector items,
                                     int iLabel,
                                     java.lang.String type)
Computes the prior from the training set (for binary classification)

Parameters:
items - - train set
type - - indicates the type of problem (heterogenous or homogenous)
Returns:

computePriors

public static double[] computePriors(weka.core.FastVector items,
                                     java.util.HashMap labels,
                                     java.lang.String type)
Computes the prior from the training set

Parameters:
items - - train set
labels - - categories
type - - indicates the type of problem (heterogenous or homogenous)
Returns:

saveTrainTest

public static void saveTrainTest(weka.core.FastVector traintest,
                                 java.lang.String outputPath,
                                 boolean multilabel,
                                 int numFolds,
                                 java.lang.String type)
                          throws java.io.IOException
Write training and test sets into a xml-file

Parameters:
traintest - - train and test set
outputPath - - location where to save
multilabel - - indicates if have to cope with multilabel problem
numFolds - - number of folds (cross-validation)
type - - indicates the type of problem (heterogenous or homogenous)
Throws:
java.io.IOException

saveVector

public static void saveVector(java.lang.String filename,
                              weka.core.FastVector items)
serialize vector

Parameters:
filename -
items - vector

saveVector

public static void saveVector(java.lang.String filename,
                              java.util.Vector items)
serialize vector

Parameters:
filename -
items - vector

saveClassifier

public static void saveClassifier(java.lang.String filename,
                                  weka.classifiers.Classifier[] cl)
serialize a classifier

Parameters:
filename -
cl - classifier

saveHashMap

public static void saveHashMap(java.lang.String filename,
                               java.util.HashMap h)
serialize hashmap

Parameters:
filename -
h - hashmap

saveMatrix

public static void saveMatrix(java.lang.String filename,
                              double[][] m)
serialize matrix

Parameters:
filename -
m - matrix

add

public static weka.core.FastVector add(weka.core.FastVector v1,
                                       weka.core.FastVector v2)
Merge vectors

Parameters:
v1 - vector
v2 - vector
Returns:
merged vector

normalizeWeights

public static java.util.HashMap normalizeWeights(BipartGraph graph,
                                                 weka.core.FastVector test,
                                                 java.util.HashMap weights)
Normalizes the weights of the graph, so that the values are between 0 and 1

Parameters:
graph - - graph (jung)
test - - test set
weights - - weights of the edges in the graph
Returns:
normalized weights

normalizeWeights

public static java.util.HashMap normalizeWeights(edu.uci.ics.jung.graph.impl.SparseGraph graph,
                                                 weka.core.FastVector test,
                                                 java.util.HashMap weights)
Normalizes the weights of the graph, so that the values are between 0 and 1

Parameters:
graph - - graph (jung)
test - - test set
weights - - weights of the edges in the graph
Returns:
normalized weights

main

public static void main(java.lang.String[] args)
                 throws java.lang.Exception
Throws:
java.lang.Exception