diff --git a/pom.xml b/pom.xml
index 1a1cd80..47c2ad6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -77,11 +77,11 @@
langchain4j-embeddings
${langchain4j.version}
-
- dev.langchain4j
- langchain4j-embeddings-all-minilm-l6-v2
- ${langchain4j.version}
-
+
+
+
+
+
dev.langchain4j
@@ -102,6 +102,13 @@
1.0.0-beta2
+
+
+ com.hankcs
+ hanlp
+ portable-1.8.4
+
+
ch.qos.logback
diff --git a/src/main/java/xyz/wbsite/ai/HanLP_Example.java b/src/main/java/xyz/wbsite/ai/HanLP_Example.java
new file mode 100644
index 0000000..2b3bce7
--- /dev/null
+++ b/src/main/java/xyz/wbsite/ai/HanLP_Example.java
@@ -0,0 +1,44 @@
+package xyz.wbsite.ai;
+
+import com.hankcs.hanlp.HanLP;
+import com.hankcs.hanlp.corpus.tag.Nature;
+import com.hankcs.hanlp.seg.common.Term;
+import com.hankcs.hanlp.tokenizer.StandardTokenizer;
+
+import java.util.List;
+
+public class HanLP_Example {
+ public static void main(String[] args) {
+
+// HanLPTokenizer hanLPTokenizer = new HanLPTokenizer();
+// String[] segment = hanLPTokenizer.segment("我喜欢吃苹果");
+//
+// List 我喜欢吃苹果1 = HanLP.extractKeyword("我喜欢吃苹果", 2);
+//
+// HanLPEngine hanLPEngine = new HanLPEngine();
+// Result parse = hanLPEngine.parse("我喜欢吃苹果");
+//
+//
+// for (Word word : parse) {
+// System.out.println(word);
+// }
+
+ List strings = HanLP.extractKeyword("身份证去哪里办理", 1);
+ List string = HanLP.extractKeyword("需要带什么材料", 1);
+
+ // 分词
+ List> lists = StandardTokenizer.seg2sentence("那么还需要哪些材料");
+ for (List list : lists) {
+ for (Term term : list) {
+ System.out.println(term);
+ // 检查词性是否为主语相关的词性(例如:主谓宾中的主语通常是名词或代词)
+// if (term.nature.equals(Nature.n)) { // 名词
+// System.out.println("主语: " + term);
+// }
+// if (term.nature.equals(Nature.r)) { // 代词,例如“他”、“她”等
+// System.out.println("主语: " + term);
+// }
+ }
+ }
+ }
+}
diff --git a/src/main/java/xyz/wbsite/ai/Helper.java b/src/main/java/xyz/wbsite/ai/Helper.java
index fd7ed29..56a6773 100644
--- a/src/main/java/xyz/wbsite/ai/Helper.java
+++ b/src/main/java/xyz/wbsite/ai/Helper.java
@@ -3,6 +3,7 @@ package xyz.wbsite.ai;
import cn.hutool.core.collection.CollUtil;
import dev.langchain4j.data.document.Document;
import dev.langchain4j.model.openai.OpenAiChatModel;
+import dev.langchain4j.model.openai.OpenAiEmbeddingModel;
import dev.langchain4j.model.openai.OpenAiStreamingChatModel;
import java.util.List;
@@ -25,6 +26,14 @@ public class Helper {
.logResponses(true)
.build();
+ private static OpenAiEmbeddingModel openAiEmbeddingModel = OpenAiEmbeddingModel.builder()
+ .baseUrl("http://192.168.88.106:11434/v1")
+ .apiKey("1")
+ .modelName("bge-m3")
+ .logRequests(true)
+ .logResponses(true)
+ .build();
+
private static OpenAiChatModel toolChatModel = OpenAiChatModel.builder()
.baseUrl("http://192.168.88.106:11434/v1")
.apiKey("1")
@@ -53,6 +62,10 @@ public class Helper {
return gemmaModel;
}
+ public static OpenAiEmbeddingModel getOpenAiEmbeddingModel() {
+ return openAiEmbeddingModel;
+ }
+
public static Document getDocument() {
return Document.from("人往往在做梦的时候会打呼噜");
}
diff --git a/src/main/java/xyz/wbsite/ai/Qdrant_Embedding_Example.java b/src/main/java/xyz/wbsite/ai/Qdrant_Embedding_Example.java
index 21a3383..326aa70 100644
--- a/src/main/java/xyz/wbsite/ai/Qdrant_Embedding_Example.java
+++ b/src/main/java/xyz/wbsite/ai/Qdrant_Embedding_Example.java
@@ -3,7 +3,7 @@ package xyz.wbsite.ai;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.embedding.EmbeddingModel;
-import dev.langchain4j.model.embedding.onnx.allminilml6v2.AllMiniLmL6V2EmbeddingModel;
+import dev.langchain4j.model.embedding.onnx.bgesmallenv15q.BgeSmallEnV15QuantizedEmbeddingModel;
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.store.embedding.qdrant.QdrantEmbeddingStore;
@@ -22,7 +22,7 @@ public class Qdrant_Embedding_Example {
.collectionName("langchain4j-" + randomUUID())
.build();
- EmbeddingModel embeddingModel = new AllMiniLmL6V2EmbeddingModel();
+ EmbeddingModel embeddingModel = new BgeSmallEnV15QuantizedEmbeddingModel();
TextSegment segment1 = TextSegment.from("I've been to France twice.");
Embedding embedding1 = embeddingModel.embed(segment1).content();
embeddingStore.add(embedding1, segment1);
diff --git a/src/main/java/xyz/wbsite/ai/Text_Compare_Example.java b/src/main/java/xyz/wbsite/ai/Text_Compare_Example.java
index ea2aed9..318d597 100644
--- a/src/main/java/xyz/wbsite/ai/Text_Compare_Example.java
+++ b/src/main/java/xyz/wbsite/ai/Text_Compare_Example.java
@@ -2,7 +2,6 @@ package xyz.wbsite.ai;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.model.embedding.EmbeddingModel;
-import dev.langchain4j.model.embedding.onnx.allminilml6v2.AllMiniLmL6V2EmbeddingModel;
import dev.langchain4j.model.embedding.onnx.bgesmallenv15q.BgeSmallEnV15QuantizedEmbeddingModel;
import dev.langchain4j.store.embedding.CosineSimilarity;
@@ -13,11 +12,12 @@ public class Text_Compare_Example {
public static void main(String[] args) {
// 初始化嵌入模型
- EmbeddingModel embeddingModel = new AllMiniLmL6V2EmbeddingModel();
+// EmbeddingModel embeddingModel = new BgeSmallEnV15QuantizedEmbeddingModel();
+ EmbeddingModel embeddingModel = Helper.getOpenAiEmbeddingModel();
// 将文本转换为向量
- Embedding embedding1 = embeddingModel.embed("工伤医疗费的申领").content();
- Embedding embedding2 = embeddingModel.embed("预告登记的转移").content();
+ Embedding embedding1 = embeddingModel.embed("身份证办理").content();
+ Embedding embedding2 = embeddingModel.embed("身份证首次办理").content();
double between = CosineSimilarity.between(embedding1, embedding2);
System.out.println("余弦相似度: " + between); // 值越接近1越相似