|
|
@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
package xyz.wbsite.ai;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import cn.hutool.core.io.IoUtil;
|
|
|
|
|
|
|
|
import opennlp.tools.doccat.*;
|
|
|
|
|
|
|
|
import opennlp.tools.util.InputStreamFactory;
|
|
|
|
|
|
|
|
import opennlp.tools.util.ObjectStream;
|
|
|
|
|
|
|
|
import opennlp.tools.util.PlainTextByLineStream;
|
|
|
|
|
|
|
|
import opennlp.tools.util.TrainingParameters;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import java.io.File;
|
|
|
|
|
|
|
|
import java.io.IOException;
|
|
|
|
|
|
|
|
import java.nio.charset.Charset;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public class Opennlp_Example {
|
|
|
|
|
|
|
|
public static void main(String[] args) {
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
// 读取训练数据
|
|
|
|
|
|
|
|
InputStreamFactory dataIn = () -> IoUtil.toStream("""
|
|
|
|
|
|
|
|
好 这部电影真是太棒了!
|
|
|
|
|
|
|
|
好 这部电影演的太棒了!
|
|
|
|
|
|
|
|
好 这部电影演的太好了!
|
|
|
|
|
|
|
|
好 这部电影主演演的太好了!
|
|
|
|
|
|
|
|
好 这部电影非常棒
|
|
|
|
|
|
|
|
好 这部电影很棒
|
|
|
|
|
|
|
|
好 电影演的太好了
|
|
|
|
|
|
|
|
好 电影演的好了
|
|
|
|
|
|
|
|
好 好莱坞的最新电影大受欢迎
|
|
|
|
|
|
|
|
差 这电影一点也不好看
|
|
|
|
|
|
|
|
差 这电影演的太差了
|
|
|
|
|
|
|
|
差 这电影演的差到极点
|
|
|
|
|
|
|
|
差 这电影真差
|
|
|
|
|
|
|
|
差 这电影太差
|
|
|
|
|
|
|
|
差 这电影不能看,太垃圾
|
|
|
|
|
|
|
|
差 这电影不好看
|
|
|
|
|
|
|
|
差 这电影不能看
|
|
|
|
|
|
|
|
差 这电影真看不下去
|
|
|
|
|
|
|
|
差 这电影真的不好看
|
|
|
|
|
|
|
|
""", Charset.defaultCharset());
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ObjectStream<String> lineStream = new PlainTextByLineStream(dataIn, "UTF-8");
|
|
|
|
|
|
|
|
ObjectStream<DocumentSample> sampleStream = new DocumentSampleStream(lineStream);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
TrainingParameters params = new TrainingParameters();
|
|
|
|
|
|
|
|
params.put(TrainingParameters.ITERATIONS_PARAM, 100);
|
|
|
|
|
|
|
|
params.put(TrainingParameters.CUTOFF_PARAM, 0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// 训练模型
|
|
|
|
|
|
|
|
DoccatModel model = DocumentCategorizerME.train("en", sampleStream, params, new DoccatFactory());
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// 保存模型
|
|
|
|
|
|
|
|
model.serialize(new File("test-model.bin"));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// 关闭流
|
|
|
|
|
|
|
|
sampleStream.close();
|
|
|
|
|
|
|
|
lineStream.close();
|
|
|
|
|
|
|
|
} catch (IOException e) {
|
|
|
|
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
// 加载模型
|
|
|
|
|
|
|
|
DoccatModel model = new DoccatModel(new File("test-model.bin"));
|
|
|
|
|
|
|
|
DocumentCategorizerME categorizer = new DocumentCategorizerME(model);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// 待分类的文本
|
|
|
|
|
|
|
|
String text = "这电影真的不好看啊";
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// 进行分类
|
|
|
|
|
|
|
|
double[] outcomes = categorizer.categorize(new String[]{text});
|
|
|
|
|
|
|
|
String category = categorizer.getBestCategory(outcomes);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// 输出分类结果
|
|
|
|
|
|
|
|
System.out.println("Text: " + text);
|
|
|
|
|
|
|
|
System.out.println("Category: " + category);
|
|
|
|
|
|
|
|
} catch (IOException e) {
|
|
|
|
|
|
|
|
e.printStackTrace();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|