上传备份

master
王兵 5 months ago
parent 8ff8a139aa
commit 69940f7d95

@ -77,11 +77,11 @@
<artifactId>langchain4j-embeddings</artifactId>
<version>${langchain4j.version}</version>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-embeddings-all-minilm-l6-v2</artifactId>
<version>${langchain4j.version}</version>
</dependency>
<!-- <dependency>-->
<!-- <groupId>dev.langchain4j</groupId>-->
<!-- <artifactId>langchain4j-embeddings-all-minilm-l6-v2</artifactId>-->
<!-- <version>${langchain4j.version}</version>-->
<!-- </dependency>-->
<dependency>
<groupId>dev.langchain4j</groupId>
@ -102,6 +102,13 @@
<version>1.0.0-beta2</version>
</dependency>
<!-- 然语言处理工具包 -->
<dependency>
<groupId>com.hankcs</groupId>
<artifactId>hanlp</artifactId>
<version>portable-1.8.4</version>
</dependency>
<!-- 日志框架 -->
<dependency>
<groupId>ch.qos.logback</groupId>

@ -0,0 +1,44 @@
package xyz.wbsite.ai;
import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.tokenizer.StandardTokenizer;
import java.util.List;
public class HanLP_Example {
public static void main(String[] args) {
// HanLPTokenizer hanLPTokenizer = new HanLPTokenizer();
// String[] segment = hanLPTokenizer.segment("我喜欢吃苹果");
//
// List<String> 我喜欢吃苹果1 = HanLP.extractKeyword("我喜欢吃苹果", 2);
//
// HanLPEngine hanLPEngine = new HanLPEngine();
// Result parse = hanLPEngine.parse("我喜欢吃苹果");
//
//
// for (Word word : parse) {
// System.out.println(word);
// }
List<String> strings = HanLP.extractKeyword("身份证去哪里办理", 1);
List<String> string = HanLP.extractKeyword("需要带什么材料", 1);
// 分词
List<List<Term>> lists = StandardTokenizer.seg2sentence("那么还需要哪些材料");
for (List<Term> list : lists) {
for (Term term : list) {
System.out.println(term);
// 检查词性是否为主语相关的词性(例如:主谓宾中的主语通常是名词或代词)
// if (term.nature.equals(Nature.n)) { // 名词
// System.out.println("主语: " + term);
// }
// if (term.nature.equals(Nature.r)) { // 代词,例如“他”、“她”等
// System.out.println("主语: " + term);
// }
}
}
}
}

@ -3,6 +3,7 @@ package xyz.wbsite.ai;
import cn.hutool.core.collection.CollUtil;
import dev.langchain4j.data.document.Document;
import dev.langchain4j.model.openai.OpenAiChatModel;
import dev.langchain4j.model.openai.OpenAiEmbeddingModel;
import dev.langchain4j.model.openai.OpenAiStreamingChatModel;
import java.util.List;
@ -25,6 +26,14 @@ public class Helper {
.logResponses(true)
.build();
private static OpenAiEmbeddingModel openAiEmbeddingModel = OpenAiEmbeddingModel.builder()
.baseUrl("http://192.168.88.106:11434/v1")
.apiKey("1")
.modelName("bge-m3")
.logRequests(true)
.logResponses(true)
.build();
private static OpenAiChatModel toolChatModel = OpenAiChatModel.builder()
.baseUrl("http://192.168.88.106:11434/v1")
.apiKey("1")
@ -53,6 +62,10 @@ public class Helper {
return gemmaModel;
}
public static OpenAiEmbeddingModel getOpenAiEmbeddingModel() {
return openAiEmbeddingModel;
}
public static Document getDocument() {
return Document.from("人往往在做梦的时候会打呼噜");
}

@ -3,7 +3,7 @@ package xyz.wbsite.ai;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.model.embedding.onnx.allminilml6v2.AllMiniLmL6V2EmbeddingModel;
import dev.langchain4j.model.embedding.onnx.bgesmallenv15q.BgeSmallEnV15QuantizedEmbeddingModel;
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.store.embedding.qdrant.QdrantEmbeddingStore;
@ -22,7 +22,7 @@ public class Qdrant_Embedding_Example {
.collectionName("langchain4j-" + randomUUID())
.build();
EmbeddingModel embeddingModel = new AllMiniLmL6V2EmbeddingModel();
EmbeddingModel embeddingModel = new BgeSmallEnV15QuantizedEmbeddingModel();
TextSegment segment1 = TextSegment.from("I've been to France twice.");
Embedding embedding1 = embeddingModel.embed(segment1).content();
embeddingStore.add(embedding1, segment1);

@ -2,7 +2,6 @@ package xyz.wbsite.ai;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.model.embedding.onnx.allminilml6v2.AllMiniLmL6V2EmbeddingModel;
import dev.langchain4j.model.embedding.onnx.bgesmallenv15q.BgeSmallEnV15QuantizedEmbeddingModel;
import dev.langchain4j.store.embedding.CosineSimilarity;
@ -13,11 +12,12 @@ public class Text_Compare_Example {
public static void main(String[] args) {
// 初始化嵌入模型
EmbeddingModel embeddingModel = new AllMiniLmL6V2EmbeddingModel();
// EmbeddingModel embeddingModel = new BgeSmallEnV15QuantizedEmbeddingModel();
EmbeddingModel embeddingModel = Helper.getOpenAiEmbeddingModel();
// 将文本转换为向量
Embedding embedding1 = embeddingModel.embed("工伤医疗费的申领").content();
Embedding embedding2 = embeddingModel.embed("预告登记的转移").content();
Embedding embedding1 = embeddingModel.embed("身份证办理").content();
Embedding embedding2 = embeddingModel.embed("身份证首次办理").content();
double between = CosineSimilarity.between(embedding1, embedding2);
System.out.println("余弦相似度: " + between); // 值越接近1越相似

Loading…
Cancel
Save

Powered by TurnKey Linux.