入门从文件加载数据集
该鸢尾花数据集是一种广泛使用的数据进行演示设置。我们将加载它,检查它并稍微修改它以供以后使用。
import java.io.File;
import java.net.URL;
import weka.core.Instances;
import weka.core.converters.ArffSaver;
import weka.core.converters.CSVLoader;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.RenameAttribute;
import weka.classifiers.evaluation.Evaluation;
import weka.classifiers.rules.ZeroR;
import weka.classifiers.bayes.NaiveBayes;
import weka.classifiers.lazy.IBk;
import weka.classifiers.trees.J48;
import weka.classifiers.meta.AdaBoostM1;
public class IrisExperiments {
public static void main(String args[]) throws Exception
{
//First we open stream to a data set as provided on http://archive.ics.uci.edu
CSVLoader loader = new CSVLoader();
loader.setSource(new URL("http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data").openStream());
Instances data = loader.getDataSet();
//This file has 149 examples with 5 attributes
//In order:
// sepal length in cm
// sepal width in cm
// petal length in cm
// petal width in cm
// class ( Iris Setosa , Iris Versicolour, Iris Virginica)
//Let's briefly inspect the data
System.out.println("This file has " + data.numInstances()+" examples.");
System.out.println("The first example looks like this: ");
for(int i = 0; i < data.instance(0).numAttributes();i++ ){
System.out.println(data.instance(0).attribute(i));
}
// NOTE that the last attribute is Nominal
// It is convention to have a nominal variable at the last index as target variable
// Let's tidy up the data a little bit
// Nothing too serious just to show how we can manipulate the data with filters
RenameAttribute renamer = new RenameAttribute();
renamer.setOptions(weka.core.Utils.splitOptions("-R last -replace Iris-type"));
renamer.setInputFormat(data);
data = Filter.useFilter(data, renamer);
System.out.println("We changed the name of the target class.");
System.out.println("And now it looks like this:");
System.out.println(data.instance(0).attribute(4));
//Now we do this for all the attributes
renamer.setOptions(weka.core.Utils.splitOptions("-R 1 -replace sepal-length"));
renamer.setInputFormat(data);
data = Filter.useFilter(data, renamer);
renamer.setOptions(weka.core.Utils.splitOptions("-R 2 -replace sepal-width"));
renamer.setInputFormat(data);
data = Filter.useFilter(data, renamer);
renamer.setOptions(weka.core.Utils.splitOptions("-R 3 -replace petal-length"));
renamer.setInputFormat(data);
data = Filter.useFilter(data, renamer);
renamer.setOptions(weka.core.Utils.splitOptions("-R 4 -replace petal-width"));
renamer.setInputFormat(data);
data = Filter.useFilter(data, renamer);
//Lastly we save our newly created file to disk
ArffSaver saver = new ArffSaver();
saver.setInstances(data);
saver.setFile(new File("IrisSet.arff"));
saver.writeBatch();
}
}