Hibernate search入门
原理是运用hibernate结合lucene来实现局部索引
1、引入jar包配置(Hibernate和Lucene包)
<dependency> <groupId>org.hibernate</groupId> <artifactId>hibernate-search-orm</artifactId> <version>4.2.0.Final</version>
</dependency>
<dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-smartcn</artifactId> <version>3.6.2</version>
</dependency>
3、给实体类增加注解(注意指定表的索引以及索引设定的分词器)
@Entity
@Table(name = "PAGEINFO")
@Indexed(index="PageInfo")/*标记该表可索引,参数index指定存放索引信息的文件名,路径在主配置文件中指定*/
@Analyzer(impl=SmartChineseAnalyzer.class)//分词器
public class Pageinfo implements java.io.Serializable { private static final long serialVersionUID = 5454155825314635342L; @Id @GeneratedValue(generator = "custom-id") @GenericGenerator(name = "custom-id", strategy = "uuid") @Column(name = "ID", unique = true, nullable = false, insertable = true, updatable = true, length = 32) @DocumentId /*以字段id作为文档id*/ public java.lang.String getId() { return this.id; } @Column(name = "TITLE", unique = false, nullable = true, insertable = true, updatable = true, length = 255) @Field(store=Store.NO) /*可索引,但不存储*/ public java.lang.String getTitle() { return this.title; } @Column(name = "CONTENT", unique = false, nullable = true, insertable = true, updatable = true) @Field(store=Store.NO) /*可索引,但不存储*/ public java.lang.String getContent() { return this.content; } @Column(name = "SOURCE", unique = false, nullable = true, insertable = true, updatable = true) @Field(store=Store.NO) /*可索引,但不存储*/ public java.lang.String getSource() { return this.source; } @Column(name = "SUMMARY", unique = false, nullable = true, insertable = true, updatable = true) @Field(store=Store.NO) /*可索引,但不存储*/ public java.lang.String getSummary() { return this.summary; } @ManyToOne(cascade = {}, fetch = FetchType.LAZY) @JoinColumns({ @JoinColumn(name = "SITE_ID", nullable = false, insertable = false, updatable = false) }) @IndexedEmbedded(prefix="site_",depth=1) /*关联检索,如field为site_name实则是按关联表的那么属性检索*/ public GrabageSiteconfig getGrabageSiteconfig() { return grabageSiteconfig; } }
4、使用API做单元测试
public class SearchTest { private static SessionFactory sf; @BeforeClass public static void init() { sf = HibernateConfigTest.sf;//弄一个SessionFactory,不多说 } @Before //执行索引 public void index(){ Session session = sf.openSession(); FullTextSession fullTextSession = Search.getFullTextSession(session); //查出结果 List<Pageinfo> pageinfos = session.createCriteria(Pageinfo.class).list(); session.beginTransaction(); //依次建立索引 for (Iterator iterator = pageinfos.iterator(); iterator.hasNext();) { Pageinfo pageinfo = (Pageinfo) iterator.next(); fullTextSession.index(pageinfo); } session.getTransaction().commit(); session.close(); System.out.println("index over......"); } @Test public void searchTest() { Session session = sf.openSession(); FullTextSession fullTextSession = Search.getFullTextSession(session); //在字段content中检索 QueryParser queryParser = new QueryParser(Version.LUCENE_36, "content", new SmartChineseAnalyzer(Version.LUCENE_36)); Query luceneqQuery=null; try { //检索含有“大风”的信息 luceneqQuery = queryParser.parse("大风"); } catch (ParseException e) { e.printStackTrace(); } //执行检索,得到结果集 FullTextQuery fullTextQuery = fullTextSession.createFullTextQuery(luceneqQuery, Pageinfo.class); List<Pageinfo> pageinfos = fullTextQuery.list(); //查看结果做验证 for (Iterator iterator = pageinfos.iterator(); iterator.hasNext();) { Pageinfo pageinfo = (Pageinfo) iterator.next(); System.out.println(pageinfo.getContent()); } }
}
更详细的资料:
1、注解说明:
注解说明:@Indexed:让实体支持索引@Analyzer :设置分词器,我这里使用的是开源的IK中文分词器@DocumentID:索引文档ID@Field :索引字段,该注解默认属性值为store=Store.NO:是否将数据存储在索引中,经实验无论store=Store.NO还是store=Store.YES都不会影响最终的搜索。如果store=Store.NO值是通过数据库中获取,如果store=Store.YES值是直接从索引文档中获取。index=Index.YES:是否索引analyze=Analyze.YES:是否分词标注了注解后的实体在保存和更新的时候,会自动生成或修改索引。
2、查询索引
public PageModel<Article> searchArticle(int pageNum, int pageSize, String keyword) {FullTextSession fts = Search.getFullTextSession(sessionFactory.getCurrentSession());QueryBuilder qb = fts.getSearchFactory().buildQueryBuilder().forEntity(Article.class).get();Query luceneQuery = qb.keyword().onFields("title", "content", "description").matching(keyword).createQuery();FullTextQuery query = fts.createFullTextQuery(luceneQuery, Article.class);query.setFirstResult((pageNum - 1) * pageSize);query.setMaxResults(pageSize);List<Article> data = query.list();//封装分页数据PageModel<Article> model = new PageModel<>(pageNum, pageSize, data.size());//将数据高亮model.setData(SearchUtils.hightLight(luceneQuery, data, "title", "content", "description"));return model;
}
3、数据高亮
public static List<Article> hightLight(Query query, List<Article> data, String... fields) {List<Article> result = new ArrayList<Article>();Formatter formatter = new SimpleHTMLFormatter("<b style=\"color:red\">", "</b>");QueryScorer queryScorer = new QueryScorer(query);Highlighter highlighter = new Highlighter(formatter, queryScorer);// 使用IK中文分词Analyzer analyzer = new IKAnalyzer();for (Article a : data) {// 构建新的对象进行返回,避免页面错乱(我的页面有错乱)Article article = new Article();for (String fieldName : fields) {// 获得字段值,并给新的文章对象赋值Object fieldValue = ReflectionUtils.invokeMethod(BeanUtils.getPropertyDescriptor(Article.class, fieldName).getReadMethod(),a);ReflectionUtils.invokeMethod(BeanUtils.getPropertyDescriptor(Article.class, fieldName).getWriteMethod(),article, fieldValue);String hightLightFieldValue = null;try {hightLightFieldValue = highlighter.getBestFragment(analyzer, fieldName, String.valueOf(fieldValue));} catch (Exception e) {throw new RuntimeException("高亮显示关键字失败", e);}// 如果高亮成功则重新赋值if (hightLightFieldValue != null) {ReflectionUtils.invokeMethod(BeanUtils.getPropertyDescriptor(Article.class, fieldName).getWriteMethod(),article,hightLightFieldValue);}}// 赋值IDReflectionUtils.invokeMethod(BeanUtils.getPropertyDescriptor(Article.class, "id").getWriteMethod(),article, a.getId());result.add(article);}return result;
}
4、页面迭代显示
<s:iterator value="#request.pageModel.data">
<div class="article"><div class="article_title_area"><span class="article_title"><a href="${pageContext.request.contextPath }/article/show.action?id=${id }">${title }</a></span><span class="article_date">发表时间:<s:date name="postTime" format="yyyy-MM-dd HH:mm:ss"/></span></div><div class="article_content">${description }</div><div class="article_count_info"><span>阅读(${viewCount })</span><span>评论(${comments.size() })</span></div>
</div>
</s:iterator>