Milvus 基本操作

1、maven 依赖

<dependency><groupId>io.milvus</groupId><artifactId>milvus-sdk-java</artifactId><version>2.3.3</version><exclusions><exclusion><groupId>org.slf4j</groupId><artifactId>slf4j-api</artifactId></exclusion><exclusion><groupId>org.apache.logging.log4j</groupId><artifactId>log4j-slf4j-impl</artifactId></exclusion></exclusions></dependency>

2、MivusService 封装了 基本操作

@Service
@Slf4j
public class MivusService {@AutowiredMilvusServiceClient milvusClient;private String clientId;/*** 同步搜索milvus* @param collectionName 表名* @param vectors 查询向量* @param topK 最相似的向量个数* @return*/public List<Long> search(String collectionName, List<List<Float>> vectors, Integer topK) {Assert.notNull(collectionName, "collectionName  is null");Assert.notNull(vectors, "vectors is null");Assert.notEmpty(vectors, "vectors is empty");Assert.notNull(topK, "topK is null");int nprobeVectorSize = vectors.get(0).size();String paramsInJson = "{\"nprobe\": " + nprobeVectorSize + "}";SearchParam searchParam =SearchParam.newBuilder().withCollectionName(collectionName).withParams(paramsInJson).withMetricType(MetricType.L2).withVectors(vectors).withVectorFieldName("embeddings").withTopK(topK).build();R<SearchResults> searchResultsR = milvusClient.search(searchParam);SearchResults searchResultsRData = searchResultsR.getData();List<Long> topksList = searchResultsRData.getResults().getIds().getIntId().getDataList();return topksList;}/*** 同步搜索milvus* @param collectionName 表名* @param vectors 查询向量* @param topK 最相似的向量个数* @return*/public List<Long> search1(String collectionName, List<List<Float>> vectors, Integer topK) {Assert.notNull(collectionName, "collectionName  is null");Assert.notNull(vectors, "vectors is null");Assert.notEmpty(vectors, "vectors is empty");Assert.notNull(topK, "topK is null");int nprobeVectorSize = vectors.get(0).size();String paramsInJson = "{\"nprobe\": " + nprobeVectorSize + "}";SearchParam searchParam =SearchParam.newBuilder().withCollectionName(collectionName).withParams(paramsInJson).withMetricType(MetricType.IP).withVectors(vectors).withVectorFieldName("embedding").withTopK(topK).build();R<SearchResults> searchResultsR = milvusClient.search(searchParam);SearchResults searchResultsRData = searchResultsR.getData();List<Long> topksList = searchResultsRData.getResults().getIds().getIntId().getDataList();return topksList;}/*** 同步搜索milvus,增加过滤条件搜索** @param collectionName 表名* @param vectors 查询向量* @param topK 最相似的向量个数* @param exp 过滤条件:status=1* @return*/public List<Long> search2(String collectionName, List<List<Float>> vectors, Integer topK, String exp) {Assert.notNull(collectionName, "collectionName  is null");Assert.notNull(vectors, "vectors is null");Assert.notEmpty(vectors, "vectors is empty");Assert.notNull(topK, "topK is null");Assert.notNull(exp, "exp is null");int nprobeVectorSize = vectors.get(0).size();String paramsInJson = "{\"nprobe\": " + nprobeVectorSize + "}";SearchParam searchParam =SearchParam.newBuilder().withCollectionName(collectionName).withParams(paramsInJson).withMetricType(MetricType.IP).withVectors(vectors).withExpr(exp).withVectorFieldName("embedding").withTopK(topK).build();R<SearchResults> searchResultsR = milvusClient.search(searchParam);SearchResults searchResultsRData = searchResultsR.getData();List<Long> topksList = searchResultsRData.getResults().getIds().getIntId().getDataList();return topksList;}/*** 异步搜索milvus** @param collectionName 表名* @param vectors 查询向量* @param partitionList 最相似的向量个数* @param topK* @return*/public List<Long> searchAsync(String collectionName, List<List<Float>> vectors,List<String> partitionList, Integer topK) throws ExecutionException, InterruptedException {Assert.notNull(collectionName, "collectionName  is null");Assert.notNull(vectors, "vectors is null");Assert.notEmpty(vectors, "vectors is empty");Assert.notNull(partitionList, "partitionList is null");Assert.notEmpty(partitionList, "partitionList is empty");Assert.notNull(topK, "topK is null");int nprobeVectorSize = vectors.get(0).size();String paramsInJson = "{\"nprobe\": " + nprobeVectorSize + "}";SearchParam searchParam =SearchParam.newBuilder().withCollectionName(collectionName).withParams(paramsInJson).withVectors(vectors).withTopK(topK).withPartitionNames(partitionList).build();ListenableFuture<R<SearchResults>> listenableFuture = milvusClient.searchAsync(searchParam);List<Long> resultIdsList = listenableFuture.get().getData().getResults().getTopksList();return resultIdsList;}/*** 获取分区集合* @param collectionName 表名* @return*/public List<String> getPartitionsList(String collectionName) {Assert.notNull(collectionName, "collectionName  is null");ShowPartitionsParam searchParam = ShowPartitionsParam.newBuilder().withCollectionName(collectionName).build();List<ByteString> byteStrings = milvusClient.showPartitions(searchParam).getData().getPartitionNamesList().asByteStringList();List<String> partitionList = Lists.newLinkedList();byteStrings.forEach(s -> {partitionList.add(s.toStringUtf8());});return partitionList;}public void loadCollection(String collectionName) {LoadCollectionParam loadCollectionParam = LoadCollectionParam.newBuilder().withCollectionName(collectionName).build();R<RpcStatus> response = milvusClient.loadCollection(loadCollectionParam);log.info("loadCollection {} is {}", collectionName, response.getData().getMsg());}public void releaseCollection(String collectionName) {ReleaseCollectionParam param = ReleaseCollectionParam.newBuilder().withCollectionName(collectionName).build();R<RpcStatus> response = milvusClient.releaseCollection(param);log.info("releaseCollection {} is {}", collectionName, response.getData().getMsg());}public void loadPartitions(String collectionName, List<String> partitionsName) {LoadPartitionsParam build = LoadPartitionsParam.newBuilder().withCollectionName(collectionName).withPartitionNames(partitionsName).build();R<RpcStatus> rpcStatusR = milvusClient.loadPartitions(build);log.info("loadPartitions {} is {}", partitionsName, rpcStatusR.getData().getMsg());}public void releasePartitions(String collectionName, List<String> partitionsName) {ReleasePartitionsParam build = ReleasePartitionsParam.newBuilder().withCollectionName(collectionName).withPartitionNames(partitionsName).build();R<RpcStatus> rpcStatusR = milvusClient.releasePartitions(build);log.info("releasePartition {} is {}", collectionName, rpcStatusR.getData().getMsg());}public boolean isExitCollection(String collectionName) {HasCollectionParam hasCollectionParam = HasCollectionParam.newBuilder().withCollectionName(collectionName).build();R<Boolean> response = milvusClient.hasCollection(hasCollectionParam);Boolean isExists = response.getData();log.info("collection {} is exists: {}", collectionName, isExists);return isExists;}public Boolean creatCollection(String collectionName) {// 主键字段FieldType fieldType1 = FieldType.newBuilder().withName(Content.Field.ID).withDescription("primary key").withDataType(DataType.Int64).withPrimaryKey(true).withAutoID(true).build();// 文本字段FieldType fieldType2 = FieldType.newBuilder().withName(Content.Field.CONTENT).withDataType(DataType.VarChar).withMaxLength(Content.MAX_LENGTH).build();// 向量字段FieldType fieldType3 = FieldType.newBuilder().withName(Content.Field.CONTENT_VECTOR).withDataType(DataType.FloatVector).withDimension(Content.FEATURE_DIM).build();// 创建collectionCreateCollectionParam createCollectionReq = CreateCollectionParam.newBuilder().withCollectionName(collectionName).withDescription("Schema of Content").withShardsNum(Content.SHARDS_NUM).addFieldType(fieldType1).addFieldType(fieldType2).addFieldType(fieldType3).build();R<RpcStatus> response = milvusClient.createCollection(createCollectionReq);log.info("collection: {} is created ? status: = {}", collectionName, response.getData().getMsg());return response.getData().getMsg().equals("Success");}public Boolean dropCollection(String collectionName) {DropCollectionParam book = DropCollectionParam.newBuilder().withCollectionName(collectionName).build();R<RpcStatus> response = milvusClient.dropCollection(book);return response.getData().getMsg().equals("Success");}public void createPartition(String collectionName, String partitionName) {CreatePartitionParam param = CreatePartitionParam.newBuilder().withCollectionName(collectionName).withPartitionName(partitionName).build();R<RpcStatus> partition = milvusClient.createPartition(param);String msg = partition.getData().getMsg();log.info("create partition: {} in collection: {} is: {}", partition, collectionName, msg);}public Boolean createIndex(String collectionName) {// IndexTypefinal IndexType INDEX_TYPE = IndexType.IVF_FLAT;// ExtraParam 建议值为 4 × sqrt(n), 其中 n 指 segment 最多包含的 entity 条数。final String INDEX_PARAM = "{\"nlist\":16384}";long startIndexTime = System.currentTimeMillis();R<RpcStatus> response = milvusClient.createIndex(CreateIndexParam.newBuilder().withCollectionName(collectionName).withIndexName(Content.CONTENT_INDEX).withFieldName(Content.Field.CONTENT_VECTOR).withMetricType(MetricType.L2).withIndexType(INDEX_TYPE).withExtraParam(INDEX_PARAM).withSyncMode(Boolean.TRUE).withSyncWaitingInterval(500L).withSyncWaitingTimeout(30L).build());long endIndexTime = System.currentTimeMillis();log.info("Succeed in " + (endIndexTime - startIndexTime) / 1000.00 + " seconds!");log.info("createIndex --->>> {} ", response.toString());GetIndexBuildProgressParam build = GetIndexBuildProgressParam.newBuilder().withCollectionName(collectionName).build();R<GetIndexBuildProgressResponse> idnexResp = milvusClient.getIndexBuildProgress(build);log.info("getIndexBuildProgress --->>> {}", idnexResp.getStatus());return response.getData().getMsg().equals("Success");}public ReplyMsg insert(String collectionName, List<InsertParam.Field> fields) {InsertParam insertParam = InsertParam.newBuilder().withCollectionName(collectionName).withFields(fields).build();R<MutationResult> mutationResultR = milvusClient.insert(insertParam);log.info("Flushing...");long startFlushTime = System.currentTimeMillis();milvusClient.flush(FlushParam.newBuilder().withCollectionNames(Collections.singletonList(collectionName)).withSyncFlush(true).withSyncFlushWaitingInterval(50L).withSyncFlushWaitingTimeout(30L).build());long endFlushTime = System.currentTimeMillis();log.info("Succeed in " + (endFlushTime - startFlushTime) / 1000.00 + " seconds!");if (mutationResultR.getStatus() == 0){long insertCnt = mutationResultR.getData().getInsertCnt();log.info("Successfully! Total number of entities inserted: {} ", insertCnt);return ReplyMsg.ofSuccess("success", insertCnt);}log.error("InsertRequest failed!");return ReplyMsg.ofErrorMsg("InsertRequest failed!");}public List<List<SearchResultVo>> searchTopKSimilarity(SearchParamVo searchParamVo) {log.info("------search TopK Similarity------");SearchParam searchParam = SearchParam.newBuilder().withCollectionName(searchParamVo.getCollectionName()).withMetricType(MetricType.L2).withOutFields(searchParamVo.getOutputFields()).withTopK(searchParamVo.getTopK()).withVectors(searchParamVo.getQueryVectors()).withVectorFieldName(Content.Field.CONTENT_VECTOR).withParams(searchParamVo.getParams()).build();R<SearchResults> respSearch = milvusClient.search(searchParam);if (respSearch.getData() == null) {return null;}log.info("------ process query results ------");SearchResultsWrapper wrapper = new SearchResultsWrapper(respSearch.getData().getResults());List<List<SearchResultVo>> result = new ArrayList<>();for (int i = 0; i < searchParamVo.getQueryVectors().size(); ++i) {List<SearchResultsWrapper.IDScore> scores = wrapper.getIDScore(i);List<QueryResultsWrapper.RowRecord> rowRecords = wrapper.getRowRecords();List<SearchResultVo> list = new ArrayList<>();for (int j = 0; j < scores.size(); ++j) {SearchResultsWrapper.IDScore score = scores.get(j);QueryResultsWrapper.RowRecord rowRecord = rowRecords.get(j);long longID = score.getLongID();float distance = score.getScore();String content = (String) rowRecord.get(searchParamVo.getOutputFields().get(0));log.info("Top " + j + " ID:" + longID + " Distance:" + distance);log.info("Content: " + content);list.add(SearchResultVo.builder().id(longID).score(distance).conent(content).build());}result.add(list);}log.info("Successfully!");return result;}public Boolean creatCollectionERP(String collectionName) {// 主键字段FieldType fieldType1 = FieldType.newBuilder().withName(Content.Field.ID).withDescription("primary key").withDataType(DataType.Int64).withPrimaryKey(true).withAutoID(true).build();// 文本字段FieldType fieldType2 = FieldType.newBuilder().withName(Content.Field.CONTENT).withDataType(DataType.VarChar).withMaxLength(Content.MAX_LENGTH).build();// 向量字段FieldType fieldType3 = FieldType.newBuilder().withName(Content.Field.CONTENT_VECTOR).withDataType(DataType.FloatVector).withDimension(Content.FEATURE_DIM).build();FieldType fieldType4 = FieldType.newBuilder().withName(Content.Field.CONTENT_ANSWER).withDataType(DataType.VarChar).withMaxLength(Content.MAX_LENGTH).build();FieldType fieldType5 = FieldType.newBuilder().withName(Content.Field.TITLE).withDataType(DataType.VarChar).withMaxLength(Content.MAX_LENGTH).build();FieldType fieldType6 = FieldType.newBuilder().withName(Content.Field.PARAM).withDataType(DataType.VarChar).withMaxLength(Content.MAX_LENGTH).build();FieldType fieldType7 = FieldType.newBuilder().withName(Content.Field.TYPE).withDataType(DataType.VarChar).withMaxLength(Content.MAX_LENGTH).build();// 创建collectionCreateCollectionParam createCollectionReq = CreateCollectionParam.newBuilder().withCollectionName(collectionName).withDescription("Schema of Content ERP").withShardsNum(Content.SHARDS_NUM).addFieldType(fieldType1).addFieldType(fieldType2).addFieldType(fieldType3).addFieldType(fieldType4).addFieldType(fieldType5).addFieldType(fieldType6).addFieldType(fieldType7).build();R<RpcStatus> response = milvusClient.createCollection(createCollectionReq);log.info("collection: {} is created ? status: = {}", collectionName, response.getData().getMsg());return response.getData().getMsg().equals("Success");}public Boolean creatCollectionERPCLIP(String collectionName) {// 主键字段FieldType fieldType1 = FieldType.newBuilder().withName(Content.Field.ID).withDescription("primary key").withDataType(DataType.Int64).withPrimaryKey(true).withAutoID(true).build();// 文本字段FieldType fieldType2 = FieldType.newBuilder().withName(Content.Field.CONTENT).withDataType(DataType.VarChar).withMaxLength(Content.MAX_LENGTH).build();// 向量字段FieldType fieldType3 = FieldType.newBuilder().withName(Content.Field.CONTENT_VECTOR).withDataType(DataType.FloatVector).withDimension(Content.FEATURE_DIM_CLIP).build();FieldType fieldType4 = FieldType.newBuilder().withName(Content.Field.CONTENT_ANSWER).withDataType(DataType.VarChar).withMaxLength(Content.MAX_LENGTH).build();FieldType fieldType5 = FieldType.newBuilder().withName(Content.Field.TITLE).withDataType(DataType.VarChar).withMaxLength(Content.MAX_LENGTH).build();FieldType fieldType6 = FieldType.newBuilder().withName(Content.Field.PARAM).withDataType(DataType.VarChar).withMaxLength(Content.MAX_LENGTH).build();FieldType fieldType7 = FieldType.newBuilder().withName(Content.Field.TYPE).withDataType(DataType.VarChar).withMaxLength(Content.MAX_LENGTH).build();FieldType fieldType8 = FieldType.newBuilder().withName(Content.Field.LABEL).withDataType(DataType.VarChar).withMaxLength(Content.MAX_LENGTH).build();// 创建collectionCreateCollectionParam createCollectionReq = CreateCollectionParam.newBuilder().withCollectionName(collectionName).withDescription("Schema of Content ERP").withShardsNum(Content.SHARDS_NUM).addFieldType(fieldType1).addFieldType(fieldType2).addFieldType(fieldType3).addFieldType(fieldType4).addFieldType(fieldType5).addFieldType(fieldType6).addFieldType(fieldType7).addFieldType(fieldType8).build();R<RpcStatus> response = milvusClient.createCollection(createCollectionReq);log.info("collection: {} is created ? status: = {}", collectionName, response.getData().getMsg());return response.getData().getMsg().equals("Success");}public Boolean creatCollectionERPNLP(String collectionName) {// 主键字段FieldType fieldType1 = FieldType.newBuilder().withName(Content.Field.ID).withDescription("primary key").withDataType(DataType.Int64).withPrimaryKey(true).withAutoID(true).build();// 文本字段FieldType fieldType2 = FieldType.newBuilder().withName(Content.Field.CONTENT).withDataType(DataType.VarChar).withMaxLength(Content.MAX_LENGTH).build();// 向量字段FieldType fieldType3 = FieldType.newBuilder().withName(Content.Field.CONTENT_VECTOR).withDataType(DataType.FloatVector).withDimension(Content.FEATURE_DIM_CLIP).build();FieldType fieldType4 = FieldType.newBuilder().withName(Content.Field.CONTENT_ANSWER).withDataType(DataType.VarChar).withMaxLength(Content.MAX_LENGTH).build();FieldType fieldType5 = FieldType.newBuilder().withName(Content.Field.TITLE).withDataType(DataType.VarChar).withMaxLength(Content.MAX_LENGTH).build();FieldType fieldType6 = FieldType.newBuilder().withName(Content.Field.PARAM).withDataType(DataType.VarChar).withMaxLength(Content.MAX_LENGTH).build();FieldType fieldType7 = FieldType.newBuilder().withName(Content.Field.TYPE).withDataType(DataType.VarChar).withMaxLength(Content.MAX_LENGTH).build();FieldType fieldType8 = FieldType.newBuilder().withName(Content.Field.LABEL).withDataType(DataType.VarChar).withMaxLength(Content.MAX_LENGTH).build();// 创建collectionCreateCollectionParam createCollectionReq = CreateCollectionParam.newBuilder().withCollectionName(collectionName).withDescription("Schema of Content ERP").withShardsNum(Content.SHARDS_NUM).addFieldType(fieldType1).addFieldType(fieldType2).addFieldType(fieldType3).addFieldType(fieldType4).addFieldType(fieldType5).addFieldType(fieldType6).addFieldType(fieldType7).addFieldType(fieldType8).build();R<RpcStatus> response = milvusClient.createCollection(createCollectionReq);log.info("collection: {} is created ? status: = {}", collectionName, response.getData().getMsg());return response.getData().getMsg().equals("Success");}public List<List<SearchERPResultVo>> searchERPTopKSimilarity(SearchERPParamVo searchParamVo) {log.info("------search ERP TopK Similarity------");SearchParam searchParam = SearchParam.newBuilder().withCollectionName(searchParamVo.getCollectionName()).withMetricType(MetricType.L2).withOutFields(searchParamVo.getOutputFields()).withTopK(searchParamVo.getTopK()).withVectors(searchParamVo.getQueryVectors()).withVectorFieldName(Content.Field.CONTENT_VECTOR).withParams(searchParamVo.getParams()).build();R<SearchResults> respSearch = milvusClient.search(searchParam);if (respSearch.getData() == null) {return null;}log.info("------ process query results ------");SearchResultsWrapper wrapper = new SearchResultsWrapper(respSearch.getData().getResults());List<List<SearchERPResultVo>> result = new ArrayList<>();for (int i = 0; i < searchParamVo.getQueryVectors().size(); ++i) {List<SearchResultsWrapper.IDScore> scores = wrapper.getIDScore(i);List<QueryResultsWrapper.RowRecord> rowRecords = wrapper.getRowRecords();List<SearchERPResultVo> list = new ArrayList<>();for (int j = 0; j < scores.size(); ++j) {SearchResultsWrapper.IDScore score = scores.get(j);QueryResultsWrapper.RowRecord rowRecord = rowRecords.get(j);long longID = score.getLongID();float distance = score.getScore();String content = (String) rowRecord.get(searchParamVo.getOutputFields().get(0));String contentAnswer = (String) rowRecord.get(searchParamVo.getOutputFields().get(1));String title = (String) rowRecord.get(searchParamVo.getOutputFields().get(2));log.info("Top " + j + " ID:" + longID + " Distance:" + distance);log.info("Content: " + content);list.add(SearchERPResultVo.builder().id(longID).score(distance).content(content).contentAnswer(contentAnswer).title(title).build());}result.add(list);}log.info("Successfully!");return result;}public List<List<SearchNLPResultVo>> searchNLPTopKSimilarity(SearchNLPParamVo searchParamVo) {log.info("------search ERP TopK Similarity------");SearchParam searchParam = SearchParam.newBuilder().withCollectionName(searchParamVo.getCollectionName()).withMetricType(MetricType.L2).withOutFields(searchParamVo.getOutputFields()).withTopK(searchParamVo.getTopK()).withVectors(searchParamVo.getQueryVectors()).withVectorFieldName(Content.Field.CONTENT_VECTOR).withParams(searchParamVo.getParams()).withExpr(searchParamVo.getExpr()).build();R<SearchResults> respSearch = milvusClient.search(searchParam);if (respSearch.getData() == null) {return null;}log.info("------ process query results ------");SearchResultsWrapper wrapper = new SearchResultsWrapper(respSearch.getData().getResults());List<List<SearchNLPResultVo>> result = new ArrayList<>();for (int i = 0; i < searchParamVo.getQueryVectors().size(); ++i) {List<SearchResultsWrapper.IDScore> scores = wrapper.getIDScore(i);List<QueryResultsWrapper.RowRecord> rowRecords = wrapper.getRowRecords();List<SearchNLPResultVo> list = new ArrayList<>();for (int j = 0; j < scores.size(); ++j) {SearchResultsWrapper.IDScore score = scores.get(j);QueryResultsWrapper.RowRecord rowRecord = rowRecords.get(j);long longID = score.getLongID();float distance = score.getScore();String content = (String) rowRecord.get(searchParamVo.getOutputFields().get(0));String contentAnswer = (String) rowRecord.get(searchParamVo.getOutputFields().get(1));String title = (String) rowRecord.get(searchParamVo.getOutputFields().get(2));log.info("Top " + j + " ID:" + longID + " Distance:" + distance);log.info("Content: " + content);list.add(SearchNLPResultVo.builder().id(longID).score(distance).content(content).contentAnswer(contentAnswer).title(title).build());}result.add(list);}log.info("Successfully!");return result;}
}

3、测试用例 

MilvusServiceERPNLPTest

@SpringBootTest(classes = {DataChatgptApplication.class}, webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
public class MilvusServiceERPNLPTest {@AutowiredMivusService milvusService;@AutowiredMilvusClient milvusClient;@Testvoid isExitCollection() {boolean mediumArticles = milvusService.isExitCollection(Content.COLLECTION_NAME_NLP);Assertions.assertTrue(mediumArticles);}@Testvoid creatCollection() {Boolean created = milvusService.creatCollectionERPNLP(Content.COLLECTION_NAME_NLP);Assertions.assertTrue(created);}@Testvoid createIndex(){Boolean index = milvusService.createIndex(Content.COLLECTION_NAME_NLP);Assertions.assertTrue(index);}@Testpublic void insertVector(){List<String> sentenceList = new ArrayList<>();sentenceList.add("网址是多少");List<String> contentAnswerList = new ArrayList<>();contentAnswerList.add("/home.ashx");List<String> titleList = new ArrayList<>();titleList.add("网址");List<String> paramList = new ArrayList<>();paramList.add("");List<String> typeList = new ArrayList<>();typeList.add("0");List<String> labelList = new ArrayList<>();labelList.add("操作直达");PaddleNewTextVo paddleNewTextVo = null;try {paddleNewTextVo = getVectorsLists(sentenceList);if (paddleNewTextVo == null) {// 获取不到再重试下paddleNewTextVo = getVectorsLists(sentenceList);}List<List<Double>> vectors = paddleNewTextVo.getVector();List<List<Float>> floatVectors = new ArrayList<>();for (List<Double> innerList : vectors) {List<Float> floatInnerList = new ArrayList<>();for (Double value : innerList) {floatInnerList.add(value.floatValue());}floatVectors.add(floatInnerList);}// 2.准备插入向量数据库List<InsertParam.Field> fields = new ArrayList<>();fields.add(new InsertParam.Field(Content.Field.CONTENT, sentenceList));fields.add(new InsertParam.Field(Content.Field.CONTENT_VECTOR, floatVectors));fields.add(new InsertParam.Field(Content.Field.CONTENT_ANSWER, contentAnswerList));fields.add(new InsertParam.Field(Content.Field.TITLE, titleList));fields.add(new InsertParam.Field(Content.Field.PARAM, paramList));fields.add(new InsertParam.Field(Content.Field.TYPE, typeList));fields.add(new InsertParam.Field(Content.Field.LABEL, labelList));// 3.执行操作milvusService.insert(Content.COLLECTION_NAME_NLP, fields);} catch (ApiException e) {System.out.println(e.getMessage());} catch (IOException e) {throw new RuntimeException(e);}}private static PaddleNewTextVo getVectorsLists(List<String> sentenceList) throws IOException {String url = "http://192.168.1.243:6001/";  //paddleURL obj = new URL(url);HttpURLConnection con = (HttpURLConnection) obj.openConnection();// 设置超时时间con.setConnectTimeout(50000);con.setReadTimeout(200000);con.setRequestMethod("POST");con.setRequestProperty("Content-Type", "application/json");con.setDoOutput(true);ObjectMapper objectParmMapper = new ObjectMapper();// 创建一个Map结构表示您的数据Map<String, List<Map<String, String>>> dataMap = new HashMap<>();dataMap.put("data", sentenceList.stream().map(sentence -> Collections.singletonMap("text", sentence)).collect(Collectors.toList()));String jsonData = null;try {// 将Map转换为JSON字符串jsonData = objectParmMapper.writeValueAsString(dataMap);} catch (JsonProcessingException e) {System.err.println("Error converting to JSON: " + e.getMessage());}String data = jsonData;try(OutputStream os = con.getOutputStream()) {byte[] input = data.getBytes("utf-8");os.write(input, 0, input.length);}int responseCode = con.getResponseCode();System.out.println("Response Code: " + responseCode);PaddleNewTextVo paddleNewTextVo = null;if (responseCode == HttpURLConnection.HTTP_OK) { // 200表示成功BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));String inputLine;StringBuilder content = new StringBuilder();while ((inputLine = in.readLine()) != null) {content.append(inputLine);}in.close();try {String contentStr = content.toString();// 直接解析JSON字符串到PaddleTextVo实例paddleNewTextVo = JSON.parseObject(contentStr, PaddleNewTextVo.class);} catch (Exception e) {System.err.println("Error parsing JSON: " + e.getMessage());}} else {System.out.println("Error Response Code: " + responseCode);BufferedReader errorReader = new BufferedReader(new InputStreamReader(con.getErrorStream()));String errorMessage;while ((errorMessage = errorReader.readLine()) != null) {System.out.println("Error Message: " + errorMessage);}errorReader.close();}return paddleNewTextVo;}@Testvoid searchTest(){// 0.加载向量集合milvusService.loadCollection(Content.COLLECTION_NAME_NLP);try {List<String> sentenceList = new ArrayList<>();sentenceList.add("XX列表");String label = "操作直达";// 1.获得向量// List<List<Float>> vectors = getVectorsLists(sentenceList);List<List<Float>> vectors = new ArrayList<>();SearchNLPParamVo searchParamVo = SearchNLPParamVo.builder().collectionName(Content.COLLECTION_NAME_NLP).queryVectors(vectors).expr("label == '" + label + "'").topK(3).build();// 2.在向量数据库中进行搜索内容知识List<List<SearchNLPResultVo>> lists = milvusService.searchNLPTopKSimilarity(searchParamVo);lists.forEach(searchResultVos -> {searchResultVos.forEach(searchResultVo -> {System.out.println(searchResultVo.getContent());System.out.println(searchResultVo.getContentAnswer());System.out.println(searchResultVo.getTitle());System.out.println(searchResultVo.getLabel());});});} catch (ApiException e) {System.out.println(e.getMessage());} /*catch (IOException e) {throw new RuntimeException(e);}
*/}@Testpublic void insertTextVector() throws IOException {List<String> titleList = new ArrayList<>();List<String> sentenceList = new ArrayList<>();List<String> contentAnswerList = new ArrayList<>();List<String> paramList = new ArrayList<>();List<String> typeList = new ArrayList<>();String filePath = "src/main/resources/data/text.txt";try (BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(filePath), StandardCharsets.UTF_8))) {// 使用4个竖线(||||)作为分隔符String line;while ((line = reader.readLine()) != null) {String[] parts = line.split("\\|\\|\\|\\|");if (parts.length >= 3) {titleList.add(parts[0].trim());sentenceList.add(parts[1].trim());contentAnswerList.add(parts[2].trim());paramList.add("");typeList.add("2");} else {System.out.println("Warning: Invalid format on line: " + line);}}// 打印或处理列表内容System.out.println("Title List: " + titleList);System.out.println("Sentence List: " + sentenceList);System.out.println("Content Answer List: " + contentAnswerList);} catch (IOException e) {System.err.println("Error reading file: " + e.getMessage());}try {// 1.获得向量TextEmbeddingParam param = TextEmbeddingParam.builder().model(TextEmbedding.Models.TEXT_EMBEDDING_V1).texts(sentenceList).build();TextEmbedding textEmbedding = new TextEmbedding();TextEmbeddingResult result = textEmbedding.call(param);List<List<Float>> vectors = new ArrayList<>();for (int i = 0; i < result.getOutput().getEmbeddings().size(); i++) {List<Double> vector = result.getOutput().getEmbeddings().get(i).getEmbedding();List<Float> floatVector = vector.stream().map(Double::floatValue).collect(Collectors.toList());vectors.add(floatVector);}// 2.准备插入向量数据库List<InsertParam.Field> fields = new ArrayList<>();fields.add(new InsertParam.Field(Content.Field.CONTENT, sentenceList));fields.add(new InsertParam.Field(Content.Field.CONTENT_VECTOR, vectors));fields.add(new InsertParam.Field(Content.Field.CONTENT_ANSWER, contentAnswerList));fields.add(new InsertParam.Field(Content.Field.TITLE, titleList));fields.add(new InsertParam.Field(Content.Field.PARAM, paramList));fields.add(new InsertParam.Field(Content.Field.TYPE, typeList));// 3.执行操作milvusService.insert(Content.COLLECTION_NAME_NLP, fields);} catch (ApiException | NoApiKeyException e) {System.out.println(e.getMessage());}}@Testvoid ChatBasedContentTest() throws NoApiKeyException, InputRequiredException, InterruptedException {// 0.加载向量集合milvusService.loadCollection(Content.COLLECTION_NAME_NLP);try {String question = "查询订单";List<String> sentenceList = new ArrayList<>();sentenceList.add(question);// 1.获得向量TextEmbeddingParam param = TextEmbeddingParam.builder().model(TextEmbedding.Models.TEXT_EMBEDDING_V1).texts(sentenceList).build();TextEmbedding textEmbedding = new TextEmbedding();TextEmbeddingResult result = textEmbedding.call(param);List<Double> vector = result.getOutput().getEmbeddings().get(0).getEmbedding();List<Float> floatVector = vector.stream().map(Double::floatValue).collect(Collectors.toList());List<List<Float>> vectors = Collections.singletonList(floatVector);SearchERPParamVo searchParamVo = SearchERPParamVo.builder().collectionName(Content.COLLECTION_NAME_NLP).queryVectors(vectors).topK(3).build();// 2.在向量数据库中进行搜索内容知识StringBuffer buffer = new StringBuffer();List<List<SearchERPResultVo>> lists = milvusService.searchERPTopKSimilarity(searchParamVo);lists.forEach(searchResultVos -> {searchResultVos.forEach(searchResultVo -> {buffer.append("问题: " + searchResultVo.getContent());buffer.append("答案: " + searchResultVo.getContentAnswer());});});// 3.进行对话String prompt = "请你充分理解下面的内容,然后回答问题, 要求仅返回答案[]中内容:";String content = buffer.toString();String resultQwen = streamCallWithCallback(prompt + content + question);// System.out.println(resultQwen);} catch (ApiException | NoApiKeyException e) {System.out.println(e.getMessage());}}public static String streamCallWithCallback(String content)throws NoApiKeyException, ApiException, InputRequiredException,InterruptedException {Constants.apiKey="sk-2106098eed1f43c9bde754f3e87038a2";Generation gen = new Generation();Message userMsg = Message.builder().role(Role.USER.getValue()).content(content).build();QwenParam param = QwenParam.builder().model(Generation.Models.QWEN_PLUS).resultFormat(QwenParam.ResultFormat.MESSAGE).messages(Arrays.asList(userMsg)).topP(0.8).incrementalOutput(true) // get streaming output incrementally.build();Semaphore semaphore = new Semaphore(0);StringBuilder fullContent = new StringBuilder();gen.streamCall(param, new ResultCallback<GenerationResult>() {@Overridepublic void onEvent(GenerationResult message) {fullContent.append(message.getOutput().getChoices().get(0).getMessage().getContent());System.out.println(message);}@Overridepublic void onError(Exception err){System.out.println(String.format("Exception: %s", err.getMessage()));semaphore.release();}@Overridepublic void onComplete(){System.out.println("Completed");semaphore.release();}});semaphore.acquire();System.out.println("Full content: \n" + fullContent.toString());return fullContent.toString();}@Testvoid loadData() throws IOException {// Read the dataset fileString content = readFileToString("src/main/resources/data/medium_articles_2020_dpr.json");// Load datasetJSONObject dataset = JSON.parseObject(content);List<JSONObject> rows = getRows(dataset.getJSONArray("rows"), 2);System.out.println(rows);}public String readFileToString(String filePath) throws IOException {return new String(Files.readAllBytes(Paths.get(filePath)), StandardCharsets.UTF_8);}public static List<JSONObject> getRows(JSONArray dataset, int counts) {List<JSONObject> rows = new ArrayList<>();for (int i = 0; i < counts; i++) {JSONObject row = dataset.getJSONObject(i);List<Float> vectors = row.getJSONArray("title_vector").toJavaList(Float.class);Long reading_time = row.getLong("reading_time");Long claps = row.getLong("claps");Long responses = row.getLong("responses");row.put("title_vector", vectors);row.put("reading_time", reading_time);row.put("claps", claps);row.put("responses", responses);row.remove("id");rows.add(row);}return rows;}@Testvoid getFileds() throws IOException {String content = readFileToString("src/main/resources/data/medium_articles_2020_dpr.json");// Load datasetJSONObject dataset = JSON.parseObject(content);List<InsertParam.Field> field = getFields(dataset.getJSONArray("rows"), 1);System.out.println(field);}public static List<InsertParam.Field> getFields(JSONArray dataset, int counts) {List<InsertParam.Field> fields = new ArrayList<>();List<String> titles = new ArrayList<>();List<List<Float>> title_vectors = new ArrayList<>();List<String> links = new ArrayList<>();List<Long> reading_times = new ArrayList<>();List<String> publications = new ArrayList<>();List<Long> claps_list = new ArrayList<>();List<Long> responses_list = new ArrayList<>();for (int i = 0; i < counts; i++) {JSONObject row = dataset.getJSONObject(i);titles.add(row.getString("title"));title_vectors.add(row.getJSONArray("title_vector").toJavaList(Float.class));links.add(row.getString("link"));reading_times.add(row.getLong("reading_time"));publications.add(row.getString("publication"));claps_list.add(row.getLong("claps"));responses_list.add(row.getLong("responses"));}fields.add(new InsertParam.Field("title", titles));fields.add(new InsertParam.Field("title_vector", title_vectors));fields.add(new InsertParam.Field("link", links));fields.add(new InsertParam.Field("reading_time", reading_times));fields.add(new InsertParam.Field("publication", publications));fields.add(new InsertParam.Field("claps", claps_list));fields.add(new InsertParam.Field("responses", responses_list));return fields;}@Testvoid searchTopKSimilarity() throws IOException {// Search dataString content = readFileToString("src/main/resources/data/medium_articles_2020_dpr.json");// Load datasetJSONObject dataset = JSON.parseObject(content);List<JSONObject> rows = getRows(dataset.getJSONArray("rows"), 10);// You should include the following in the main functionList<List<Float>> queryVectors = new ArrayList<>();List<Float> queryVector = rows.get(0).getJSONArray("title_vector").toJavaList(Float.class);queryVectors.add(queryVector);// Prepare the outputFieldsList<String> outputFields = new ArrayList<>();outputFields.add("title");outputFields.add("link");// Search vectors in a collectionSearchParam searchParam = SearchParam.newBuilder().withCollectionName("medium_articles").withVectorFieldName("title_vector").withVectors(queryVectors).withExpr("claps > 30 and reading_time < 10").withTopK(3).withMetricType(MetricType.L2).withParams("{\"nprobe\":10,\"offset\":2, \"limit\":3}").withConsistencyLevel(ConsistencyLevelEnum.BOUNDED).withOutFields(outputFields).build();R<SearchResults> response = milvusClient.search(searchParam);SearchResultsWrapper wrapper = new SearchResultsWrapper(response.getData().getResults());System.out.println("Search results");for (int i = 0; i < queryVectors.size(); ++i) {List<SearchResultsWrapper.IDScore> scores = wrapper.getIDScore(i);List<QueryResultsWrapper.RowRecord> rowRecords = wrapper.getRowRecords();for (int j = 0; j < scores.size(); ++j) {SearchResultsWrapper.IDScore score = scores.get(j);QueryResultsWrapper.RowRecord rowRecord = rowRecords.get(j);System.out.println("Top " + j + " ID:" + score.getLongID() + " Distance:" + score.getScore());System.out.println("Title: " + rowRecord.get("title"));System.out.println("Link: " + rowRecord.get("link"));}}}}

4、查询

// 先根据向量查询语义相近的语料
List<Question> questionList = mivusService.searchNewPaddleQuestion(req.getMessage(), "1", appType);

*** 根据问题进行向量查询,采用Paddle服务 采用新的文本分类方法* @param question 用户的问题文本* @return 相关的初始问题知识列表*/public List<Question> searchNewPaddleQuestion(String question, String type, String appType) {// 0.加载向量集合String collection = Content.COLLECTION_NAME_NLP;if (appType.equals("1")) {collection = Content.COLLECTION_NAME_NLP_APP;}loadCollection(collection);List<Question> resultList = new LinkedList<>();PaddleNewTextVo paddleNewTextVo = null;try {List<String> sentenceList = new ArrayList<>();sentenceList.add(question);// 1.获得向量paddleNewTextVo = getNewNLPVectorsLists(sentenceList);log.info("实时向量值 : {}", paddleNewTextVo.getPredictedList());List<List<Double>> vectors = paddleNewTextVo.getVector();List<List<Float>> floatVectors = new ArrayList<>();for (List<Double> innerList : vectors) {List<Float> floatInnerList = new ArrayList<>();for (Double value : innerList) {floatInnerList.add(value.floatValue());}floatVectors.add(floatInnerList);}List<Integer> predictedList = paddleNewTextVo.getPredictedList();List<String> labelStrings = new ArrayList<>();HashSet<Integer> setType = new HashSet();int topK = 3;if(!predictedList.isEmpty()) {// 去重for (Integer number : predictedList) {setType.add(number);if (number == 2) {// 如何是 2topK = 1;}}for (Integer label : setType) {labelStrings.add("'" + label + "'");}}String typeResult = "[" + String.join(", ", labelStrings) + "]";SearchNLPParamVo searchParamVo = SearchNLPParamVo.builder().collectionName(collection)//.expr("type == '" + type + "'").expr("type in ['0','1','2']")//.expr("type in " + typeResult + " ").queryVectors(floatVectors).topK(topK).build();// 2.在向量数据库中进行搜索内容知识List<List<SearchNLPResultVo>> lists = searchNLPERPTopKSimilarity(searchParamVo);lists.forEach(searchResultVos -> {searchResultVos.forEach(searchResultVo -> {log.info(searchResultVo.getContent());log.info(searchResultVo.getContentAnswer());Question question1 = new Question();question1.setQuestionId(Long.valueOf(searchResultVo.getId()));question1.setQuestion(searchResultVo.getContent());question1.setAnswer(searchResultVo.getContentAnswer());question1.setTitle(searchResultVo.getTitle());question1.setParam(searchResultVo.getParam());question1.setType(searchResultVo.getType());question1.setLabel(searchResultVo.getLabel());resultList.add(question1);});});} catch (ApiException | IOException e) {log.error(e.getMessage());}// 将查询到的结果转换为之前构造的 Question 的格式返回给前端return resultList;}

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.mzph.cn/news/847078.shtml

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈email:809451989@qq.com,一经查实,立即删除!

相关文章

STL容器--list

1. list的介绍及使用 1.1 list的介绍 1. list是可以在常数范围内在任意位置进行插入和删除的序列式容器&#xff0c;并且该容器可以前后双向迭代。 2. list的底层是双向链表结构&#xff0c;双向链表中每个元素存储在互不相关的独立节点中&#xff0c;在节点中通过指针指其前…

面试官:对于MQ中的消息丢失你是如何理解的?

相信很多的小伙伴在面试的时候&#xff0c;涉及到MQ的面试题&#xff0c;消息丢失是必问面试题之一。那么对于消息丢失你又是如何理解的呢&#xff1f; 下面我们一起来看一下。 本文以 Kafka 举例说明 一、什么是消息丢失&#xff1f; 消息丢失的定义是&#xff1a;在消息传递…

ActivityRecord、TaskRecord、ActivityStack以及Activity详解

adb shell dumpsys activity activities 输出涉及到了解 Android 活动管理器&#xff08;Activity Manager&#xff09;的当前状态&#xff0c;以及系统中运行的活动和任务的详细信息。这是系统中活动&#xff08;Activities&#xff09;、任务&#xff08;Tasks&#xff09;、…

【动手学深度学习】softmax回归从零开始实现的研究详情

目录 &#x1f30a;1. 研究目的 &#x1f30a;2. 研究准备 &#x1f30a;3. 研究内容 &#x1f30d;3.1 softmax回归的从零开始实现 &#x1f30d;3.2 基础练习 &#x1f30a;4. 研究体会 &#x1f30a;1. 研究目的 理解softmax回归的原理和基本实现方式&#xff1b;学习…

Python打印当前目录下,所有文件名的首字母

代码如下&#xff1a; #!/usr/bin/env python3 """ 按顺序打印当前目录下&#xff0c;所有文件名的首字母&#xff08;忽略大小写&#xff09; """ import sys from pathlib import Pathdef main() -> None:ps Path(__file__).parent.glob(…

代码随想录算法训练营第26天(py)| 回溯 | 39. 组合总和、40.组合总和II、131.分割回文串

39. 组合总和 力扣链接 给定一个无重复元素的数组 candidates 和一个目标数 target &#xff0c;找出 candidates 中所有可以使数字和为 target 的组合。 candidates 中的数字可以无限制重复被选取。 说明&#xff1a; 所有数字&#xff08;包括 target&#xff09;都是正整数…

上传RKP 证书签名请求息上传到 Google 的后端服务器

上传证书签名请求 1.准备环境&#xff1a;OK pip3 install google-auth2.13.0 requests2.28下载 device_info_uploader.py 。 没找到先跳过 选项 1&#xff1a;通过 GCP 帐户使用 device_info_uploader.py 运行脚本。 ./device_info_uploader.py --credentials /secure/s…

深入理解Python的包管理器:pip

深入理解Python的包管理器&#xff1a;pip 引言 Python作为一门流行的编程语言&#xff0c;拥有强大的生态系统&#xff0c;其中pip扮演着至关重要的角色。pip是Python的包管理工具&#xff0c;它允许用户安装、升级和管理Python包。本专栏旨在帮助读者深入了解pip的各个方面…

NFS服务p.2 用户的上传与下载,以及用户映射

如何进行上传和下载呢&#xff1f; 目录 如何进行上传和下载呢&#xff1f; 上传 访问时的账户映射对于上传文件和下载文件的影响&#xff1f; 在服务器里进行修改用户的权限 如和修改使用用户上传时的名字&#xff1f; 上传 上传的话&#xff0c;因为我们现在所在的nfs1…

端午佳节到,礼轻情意重,闪侠惠递帮你高效便宜寄快递

马上就是端午佳节了&#xff0c;我们通常会吃粽子&#xff0c;赛龙舟&#xff0c;但是这些礼物我们该怎么快速的送到我们亲朋好友的手中呢&#xff1f;小编这里非常推荐大家使用闪侠惠递来寄快递。不仅能高效便捷的把礼物送到你的手中&#xff0c;而且还能以非常便宜的价格呢&a…

03_初识Spring Cloud Gateway

文章目录 一、网关简介1.1 网关提出的背景1.2 网关在微服务中的位置1.3 网关的技术选型1.4 补充 二、Spring Cloud Gateway的简介2.1 核心概念&#xff1a;路由&#xff08;Route&#xff09;2.2 核心概念&#xff1a;断言&#xff08;Predicate&#xff09;2.3 核心概念&#…

聊聊Java中的动态代理机制

引言 动态代理是Java中一个非常强大的特性&#xff0c;它允许我们在运行时动态地创建代理对象。本文将深入探讨动态代理的工作原理、实现步骤以及在实际项目中的应用。 第一部分&#xff1a;代理模式基础 代理模式是一种结构型设计模式&#xff0c;它为其他对象提供一个代替…

力扣524. 通过删除字母匹配到字典里最长单词

给你一个字符串 s 和一个字符串数组 dictionary &#xff0c;找出并返回 dictionary 中最长的字符串&#xff0c;该字符串可以通过删除 s 中的某些字符得到。 如果答案不止一个&#xff0c;返回长度最长且字母序最小的字符串。如果答案不存在&#xff0c;则返回空字符串。 示…

为什么人工智能用 Python?

为什么人工智能用 Python&#xff1f; 人工智能&#xff08;AI&#xff09;技术取得了飞速发展&#xff0c;从语音识别、图像处理到自然语言处理&#xff0c;而在众多编程语言中&#xff0c;Python 因其简洁易用、丰富的库和社区支持&#xff0c;成为了 AI 开发的首选语言。本…

linux系统——性能检测工具glances

在linux系统中&#xff0c;由python开发的glances工具是一个功能强大的性能检测工具 可以通过yum进行安装 安装glances后&#xff0c;进入命令界面 glance支持网站模式&#xff0c;将监控到的数据以网站形式显示出来 这里需要用python包管理命令 使用glances -w开放…

linux中最基础使用的命令

小白学习记录&#xff1a; 前情提要&#xff1a;Linux命令基础格式!查看 ls看目录的小技巧 进入指定目录 cd查看当前工作目录 pwd创建一个新的目录(文件夹&#xff09; mkdir创建文件 touch查看文件内容 cat、more操作文件、文件夹- 复制 cp- 移动 mv- 删除【危险操作&#xff…

PostgreSQL 远程登录postgres用户不需要密码?免密登录权限设置

PostgreSQL 安装之后&#xff0c;发现登录postgres用户时不需要密码。原因配置远程IP时&#xff0c;IP 地址对应的 method 设置成了 trust。 今天安全测试反馈&#xff0c;pgsql有弱口令问题&#xff0c;于是上去改了pgsql的密码&#xff0c;结果问题还没解决。查看了具体的问…

docker运行centos提示Operation not permitted

1、在docker中运行了centos7镜像 2、进入到centos容器中使用systemctl命令时提示 systemctl Failed to get D-Bus connection: Operation not permitted 3、解决办法 在运行centos镜像的时候加上--privileged参数 4、附上docker官网命令说明截图

ASP.NET 中的 默认应用程序配置源

默认应用程序配置源 从最高级到最低优先级 使用命令行配置提供程序的命令行参数。使用无前缀环境变量配置提供程序的无前缀环境变量,应用程序在环境中运行时的用户机密 Development 。appsettings.{Environment}.json使用JSON 配置提供程序。例如appsettings.Production.json…