commit a62a5d396893c1e475a77fa595902c1b3a48e284 Author: wangbing Date: Thu Apr 3 23:34:40 2025 +0800 上传备份 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..13e058c --- /dev/null +++ b/.gitignore @@ -0,0 +1,21 @@ +target/ +pom.xml.tag +pom.xml.releaseBackup +pom.xml.versionsBackup +pom.xml.next +release.properties +/.idea +*.iml +/.settings +/bin +/gen +/build +/gradle +/classes +.classpath +.project +*.gradle +gradlew +local.properties +node_modules/ +data/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..a7a6747 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +## 自然语言学习 \ No newline at end of file diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..583c05e --- /dev/null +++ b/pom.xml @@ -0,0 +1,84 @@ + + + 4.0.0 + xyz.wbsite + starter-nlp + 0.0.1-SNAPSHOT + jar + + + 17 + + 1.0.0-beta2 + + + + + + central + Central Repository + default + https://maven.aliyun.com/repository/public + + + + + + central + Central Repository + https://maven.aliyun.com/repository/public + default + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + 17 + 17 + + + + + + + + + cn.hutool + hutool-all + 5.8.24 + + + + + com.hankcs + hanlp + portable-1.8.4 + + + + + ch.qos.logback + logback-classic + 1.2.11 + + + org.slf4j + slf4j-api + 1.7.32 + + + + + + dev.langchain4j + langchain4j + ${langchain4j.version} + + + + \ No newline at end of file diff --git a/src/main/java/xyz/wbsite/ai/HanLP_Example.java b/src/main/java/xyz/wbsite/ai/HanLP_Example.java new file mode 100644 index 0000000..2b3bce7 --- /dev/null +++ b/src/main/java/xyz/wbsite/ai/HanLP_Example.java @@ -0,0 +1,44 @@ +package xyz.wbsite.ai; + +import com.hankcs.hanlp.HanLP; +import com.hankcs.hanlp.corpus.tag.Nature; +import com.hankcs.hanlp.seg.common.Term; +import com.hankcs.hanlp.tokenizer.StandardTokenizer; + +import java.util.List; + +public class HanLP_Example { + public static void main(String[] args) { + +// HanLPTokenizer hanLPTokenizer = new HanLPTokenizer(); +// String[] segment = hanLPTokenizer.segment("我喜欢吃苹果"); +// +// List 我喜欢吃苹果1 = HanLP.extractKeyword("我喜欢吃苹果", 2); +// +// HanLPEngine hanLPEngine = new HanLPEngine(); +// Result parse = hanLPEngine.parse("我喜欢吃苹果"); +// +// +// for (Word word : parse) { +// System.out.println(word); +// } + + List strings = HanLP.extractKeyword("身份证去哪里办理", 1); + List string = HanLP.extractKeyword("需要带什么材料", 1); + + // 分词 + List> lists = StandardTokenizer.seg2sentence("那么还需要哪些材料"); + for (List list : lists) { + for (Term term : list) { + System.out.println(term); + // 检查词性是否为主语相关的词性(例如:主谓宾中的主语通常是名词或代词) +// if (term.nature.equals(Nature.n)) { // 名词 +// System.out.println("主语: " + term); +// } +// if (term.nature.equals(Nature.r)) { // 代词,例如“他”、“她”等 +// System.out.println("主语: " + term); +// } + } + } + } +}