使用 CLI 和 Java API 使用 openNLP 进行句子检测
使用 CLI:
$ opennlp SentenceDetector ./en-sent.bin < ./input.txt > output.txt
使用 API:
import static java.nio.file.Files.readAllBytes;
import static java.nio.file.Paths.get;
import java.io.IOException;
import java.util.Objects;
public class FileUtils {
/**
* Get file data as string
*
* @param fileName
* @return
*/
public static String getFileDataAsString(String fileName) {
Objects.nonNull(fileName);
try {
String data = new String(readAllBytes(get(fileName)));
return data;
} catch (IOException e) {
System.out.println(e.getMessage());
return null;
}
}
}
class sentecedetectorutil:
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.Objects;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;
public class SentenceDetectorUtil {
private SentenceModel model = null;
SentenceDetectorME sentenceDetector = null;
public SentenceDetectorUtil(String modelFile) {
Objects.nonNull(modelFile);
initSentenceModel(modelFile);
initSentenceDetectorME();
}
private void initSentenceDetectorME() {
sentenceDetector = new SentenceDetectorME(model);
}
private SentenceModel initSentenceModel(String file) {
InputStream modelIn;
try {
modelIn = new FileInputStream(file);
} catch (FileNotFoundException e) {
System.out.println(e.getMessage());
return null;
}
try {
model = new SentenceModel(modelIn);
} catch (IOException e) {
e.printStackTrace();
} finally {
if (modelIn != null) {
try {
modelIn.close();
} catch (IOException e) {
}
}
}
return model;
}
public String[] getSentencesFromFile(String inputFile) {
String data = FileUtils.getFileDataAsString(inputFile);
return sentenceDetector.sentDetect(data);
}
public String[] getSentences(String data) {
return sentenceDetector.sentDetect(data);
}
}
}
主要类:
public class Main {
public static void main(String args[]) {
SentenceDetectorUtil util = new SentenceDetectorUtil(
"path//to//your//en-sent.bin");
String data = "Welcome to Stackoverflow Documentation.This is the first example in OenNLP.";
String[] sentences = util.getSentences(data);
for (String s : sentences)
System.out.println(s +"\n");
}
}
输出将是:
欢迎使用 Stackoverflow 文档。
这是 OpenNLP 中的第一个示例。