上传备份

master
王兵 5 months ago
parent a62a5d3968
commit bd63f12893

@ -60,6 +60,12 @@
<version>portable-1.8.4</version>
</dependency>
<dependency>
<groupId>org.apache.opennlp</groupId>
<artifactId>opennlp-tools</artifactId>
<version>1.9.3</version> <!-- 请检查最新版本 -->
</dependency>
<!-- 日志框架 -->
<dependency>
<groupId>ch.qos.logback</groupId>

@ -0,0 +1,80 @@
package xyz.wbsite.ai;
import cn.hutool.core.io.IoUtil;
import opennlp.tools.doccat.*;
import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.TrainingParameters;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
public class Opennlp_Example {
public static void main(String[] args) {
try {
// 读取训练数据
InputStreamFactory dataIn = () -> IoUtil.toStream("""
""", Charset.defaultCharset());
ObjectStream<String> lineStream = new PlainTextByLineStream(dataIn, "UTF-8");
ObjectStream<DocumentSample> sampleStream = new DocumentSampleStream(lineStream);
TrainingParameters params = new TrainingParameters();
params.put(TrainingParameters.ITERATIONS_PARAM, 100);
params.put(TrainingParameters.CUTOFF_PARAM, 0);
// 训练模型
DoccatModel model = DocumentCategorizerME.train("en", sampleStream, params, new DoccatFactory());
// 保存模型
model.serialize(new File("test-model.bin"));
// 关闭流
sampleStream.close();
lineStream.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
try {
// 加载模型
DoccatModel model = new DoccatModel(new File("test-model.bin"));
DocumentCategorizerME categorizer = new DocumentCategorizerME(model);
// 待分类的文本
String text = "这电影真的不好看啊";
// 进行分类
double[] outcomes = categorizer.categorize(new String[]{text});
String category = categorizer.getBestCategory(outcomes);
// 输出分类结果
System.out.println("Text: " + text);
System.out.println("Category: " + category);
} catch (IOException e) {
e.printStackTrace();
}
}
}
Loading…
Cancel
Save

Powered by TurnKey Linux.