Lucene系列:(9)搜索结果排序
程序员文章站
2022-07-09 10:26:23
...
1、什么是搜索结果排序
搜索结果是按某个或某些字段高低排序来显示的结果
2、影响网站排名的多种因素
head/meta/
网页的标签整洁
网页执行速度
采用div+css
。。。。。。
3、Lucene中的显示结果次序与相关度得分有关
ScoreDoc.score;
默认情况下,Lucene是按相关度得分排序的,得分高排在前,得分低排在后
如果相关度得分相同,按插入索引库的先后次序排序
4、Lucene中的手工设置相关度得分
IndexWriter indexWriter = new IndexWriter(LuceneUtils.getDirectory(),LuceneUtils.getAnalyzer(),LuceneUtils.getMaxFieldLength());
document.setBoost(20F);
indexWriter.addDocument(document);
indexWriter.close();
TestSort.java
package com.rk.lucene.f_sort;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.junit.Test;
import com.rk.lucene.entity.Article;
import com.rk.lucene.utils.LuceneUtils;
/**
* 在默认情况下,Lucene是按照相关度得份排序的
*/
public class TestSort {
@Test
public void testAdd() throws Exception{
List<Article> list = new ArrayList<Article>();
list.add(new Article(1, "疾风之刃", "《疾风之刃》是一款超动作3D动漫风网游。作为新一代动作游戏,《疾风之刃》呈现出极致华丽的动作表演,精心打磨出的打击感震撼人心。"));
list.add(new Article(2, "月光疾风", "月光疾风,日本动漫《火影忍者》中的人物,比较个人主义,性格温和。火之国木叶村的特别上忍,中忍考试正赛预选的考官,体质似乎很不好,有着严重的黑眼圈、脸色苍白且经常咳嗽,善用剑术。"));
list.add(new Article(3, "疾风航班中文版下载", "《疾风航班》是一款优质的动作模拟游戏。游戏中包括亚欧美洲,乃至飞往太空的5条航线,共计50个循序渐进的关卡,以及具有挑战性的Expert级别评定,每个关卡结束后还可进入商店对主角和飞机进..."));
list.add(new Article(4, "八神疾风", "八神疾风(CV:植田佳奈)是日本动漫《魔法少女奈叶A's》首次登场的女角色。暗之书事件中心人物,时空管理局魔导师,擅长贝尔卡式广域·远程魔法。"));
list.add(new Article(5, "逝去的疾风", "大战中飞得最快的日本飞机,恐怕要数“疾风”战斗机了,它由中岛飞机厂研制生产,制式型号为: 四式单(座)战(斗机),代号キ-84(读作 Ki-84)。"));
list.add(new Article(6, "疾风剑豪 亚索", "亚索是一个百折不屈的男人,还是一名身手敏捷的剑客,能够运用风的力量来斩杀敌人。这位曾经春风得意的战士因为诬告而身败名裂,并且*卷入了一场令人绝望的生存之..."));
list.add(new Article(7, "疾风知劲草", "疾风知劲草,谓在猛烈的大风中,可看出什么样的草是强劲的。比喻意志坚定,经得起考验。出自《东观汉记·王霸传》:“上谓霸曰:‘颍川从我者皆逝,而子独留,始验疾风知劲草。..."));
LuceneUtils.addAll(list);
}
/**
* 人工设置该document的得分
*/
@Test
public void testAdd2() throws Exception{
Article article = new Article(8, "疾风知劲草", "疾风知劲草,谓在猛烈的大风中,可看出什么样的草是强劲的。比喻意志坚定,经得起考验。出自《东观汉记·王霸传》:“上谓霸曰:‘颍川从我者皆逝,而子独留,始验疾风知劲草。...");
Document document = LuceneUtils.javabean2document(article);
IndexWriter indexWriter = new IndexWriter(LuceneUtils.getDirectory(), LuceneUtils.getAnalyzer(), LuceneUtils.getMaxFieldLength());
//人工设置该document的得分
document.setBoost(100F);
indexWriter.addDocument(document);
indexWriter.close();
}
@Test
public void testSearch() throws Exception{
List<Article> list = new ArrayList<Article>();
String keyword = "疾风";
QueryParser queryParser = new QueryParser(LuceneUtils.getVersion(),"content", LuceneUtils.getAnalyzer());
Query query = queryParser.parse(keyword);
IndexSearcher indexSearcher = new IndexSearcher(LuceneUtils.getDirectory());
TopDocs topDocs = indexSearcher.search(query, 10000);
for(int i=0;i<topDocs.scoreDocs.length;i++){
ScoreDoc scoreDoc = topDocs.scoreDocs[i];
int docIndex = scoreDoc.doc;
Document document = indexSearcher.doc(docIndex);
System.out.println("编号为"+document.get("id")+"号的文章得分是" + scoreDoc.score);
Article article = LuceneUtils.document2javabean(document, Article.class);
list.add(article);
}
indexSearcher.close();
for(Article article : list){
System.out.println(article);
}
}
}
输出结果:
编号为8号的文章得分是31.94228
编号为1号的文章得分是0.33273208
编号为7号的文章得分是0.29114056
编号为4号的文章得分是0.23527712
编号为5号的文章得分是0.23527712
编号为2号的文章得分是0.20586747
编号为3号的文章得分是0.20586747
编号: 8
标题: 疾风知劲草
内容: 疾风知劲草,谓在猛烈的大风中,可看出什么样的草是强劲的。比喻意志坚定,经得起考验。出自《东观汉记·王霸传》:“上谓霸曰:‘颍川从我者皆逝,而子独留,始验疾风知劲草。...
------------------------------------------------------------------
编号: 1
标题: 疾风之刃
内容: 《疾风之刃》是一款超动作3D动漫风网游。作为新一代动作游戏,《疾风之刃》呈现出极致华丽的动作表演,精心打磨出的打击感震撼人心。
------------------------------------------------------------------
编号: 7
标题: 疾风知劲草
内容: 疾风知劲草,谓在猛烈的大风中,可看出什么样的草是强劲的。比喻意志坚定,经得起考验。出自《东观汉记·王霸传》:“上谓霸曰:‘颍川从我者皆逝,而子独留,始验疾风知劲草。...
------------------------------------------------------------------
编号: 4
标题: 八神疾风
内容: 八神疾风(CV:植田佳奈)是日本动漫《魔法少女奈叶A's》首次登场的女角色。暗之书事件中心人物,时空管理局魔导师,擅长贝尔卡式广域·远程魔法。
------------------------------------------------------------------
编号: 5
标题: 逝去的疾风
内容: 大战中飞得最快的日本飞机,恐怕要数“疾风”战斗机了,它由中岛飞机厂研制生产,制式型号为: 四式单(座)战(斗机),代号キ-84(读作 Ki-84)。
------------------------------------------------------------------
编号: 2
标题: 月光疾风
内容: 月光疾风,日本动漫《火影忍者》中的人物,比较个人主义,性格温和。火之国木叶村的特别上忍,中忍考试正赛预选的考官,体质似乎很不好,有着严重的黑眼圈、脸色苍白且经常咳嗽,善用剑术。
------------------------------------------------------------------
编号: 3
标题: 疾风航班中文版下载
内容: 《疾风航班》是一款优质的动作模拟游戏。游戏中包括亚欧美洲,乃至飞往太空的5条航线,共计50个循序渐进的关卡,以及具有挑战性的Expert级别评定,每个关卡结束后还可进入商店对主角和飞机进...
------------------------------------------------------------------
5、Lucene中按单个字段和多个字段排序
(1)按单个字段排序
Sort sort = new Sort(new SortField("id",SortField.INT,true));
TopDocs topDocs = indexSearcher.search(query,null,1000000,sort);
(2)按多个字段排序
Sort sort = new Sort(new SortField("count",SortField.INT,true),new SortField("id",SortField.INT,true));
TopDocs topDocs = indexSearcher.search(query,null,1000000,sort);
在多字段排序中,只有第一个字段排序结果相同时,第二个字段排序才有作用(提倡用数值型排序)
TestSort2.java
package com.rk.lucene.f_sort;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.junit.Test;
import com.rk.lucene.entity.Article;
import com.rk.lucene.utils.LuceneUtils;
/**
* 在默认情况下,Lucene是按照相关度得份排序的
*/
public class TestSort2 {
@Test
public void testAdd() throws Exception{
List<Article> list = new ArrayList<Article>();
list.add(new Article(1, "疾风之刃", "《疾风之刃》是一款超动作3D动漫风网游。作为新一代动作游戏,《疾风之刃》呈现出极致华丽的动作表演,精心打磨出的打击感震撼人心。"));
list.add(new Article(2, "月光疾风", "月光疾风,日本动漫《火影忍者》中的人物,比较个人主义,性格温和。火之国木叶村的特别上忍,中忍考试正赛预选的考官,体质似乎很不好,有着严重的黑眼圈、脸色苍白且经常咳嗽,善用剑术。"));
list.add(new Article(3, "疾风航班中文版下载", "《疾风航班》是一款优质的动作模拟游戏。游戏中包括亚欧美洲,乃至飞往太空的5条航线,共计50个循序渐进的关卡,以及具有挑战性的Expert级别评定,每个关卡结束后还可进入商店对主角和飞机进..."));
list.add(new Article(4, "八神疾风", "八神疾风(CV:植田佳奈)是日本动漫《魔法少女奈叶A's》首次登场的女角色。暗之书事件中心人物,时空管理局魔导师,擅长贝尔卡式广域·远程魔法。"));
list.add(new Article(5, "逝去的疾风", "大战中飞得最快的日本飞机,恐怕要数“疾风”战斗机了,它由中岛飞机厂研制生产,制式型号为: 四式单(座)战(斗机),代号キ-84(读作 Ki-84)。"));
list.add(new Article(6, "疾风剑豪 亚索", "亚索是一个百折不屈的男人,还是一名身手敏捷的剑客,能够运用风的力量来斩杀敌人。这位曾经春风得意的战士因为诬告而身败名裂,并且*卷入了一场令人绝望的生存之..."));
list.add(new Article(7, "疾风知劲草", "疾风知劲草,谓在猛烈的大风中,可看出什么样的草是强劲的。比喻意志坚定,经得起考验。出自《东观汉记·王霸传》:“上谓霸曰:‘颍川从我者皆逝,而子独留,始验疾风知劲草。..."));
LuceneUtils.addAll(list);
}
@Test
public void testSearch() throws Exception{
List<Article> list = new ArrayList<Article>();
String keyword = "疾风";
QueryParser queryParser = new QueryParser(LuceneUtils.getVersion(),"content", LuceneUtils.getAnalyzer());
Query query = queryParser.parse(keyword);
IndexSearcher indexSearcher = new IndexSearcher(LuceneUtils.getDirectory());
//按得分度高低排序
//TopDocs topDocs = indexSearcher.search(query, 10000);
//创建排序对象
//参数一:id表示依据document对象中的哪个字段排序,例如:id
//参数二:SortField.INT表示document对象中该字段的类型,以常量方式书写
//参数三:true表示降序,类似于order by id desc
//参数三:false表示升序,类似于order by id asc
//Sort sort = new Sort(new SortField("id", SortField.INT, false));//单个字段排序
//TopDocs topDocs = indexSearcher.search(query, null, 10000, sort);
//多个字段排序:先按content的内容得分排序,再按id的降序排序
Sort sort = new Sort(new SortField("content", SortField.SCORE, false),new SortField("id", SortField.INT, true));
TopDocs topDocs = indexSearcher.search(query, null, 10000, sort);
for(int i=0;i<topDocs.scoreDocs.length;i++){
ScoreDoc scoreDoc = topDocs.scoreDocs[i];
int docIndex = scoreDoc.doc;
Document document = indexSearcher.doc(docIndex);
System.out.println("编号为"+document.get("id")+"号的文章得分是" + scoreDoc.score);
Article article = LuceneUtils.document2javabean(document, Article.class);
list.add(article);
}
indexSearcher.close();
for(Article article : list){
System.out.println(article);
}
}
}
LuceneUtils.java
package com.rk.lucene.utils;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.beanutils.BeanUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import com.rk.lucene.entity.Page;
public class LuceneUtils {
private static Directory directory;
private static Version version;
private static Analyzer analyzer;
private static MaxFieldLength maxFieldLength;
private static final String LUCENE_DIRECTORY= "D:/rk/indexDB";
static{
try {
directory = FSDirectory.open(new File(LUCENE_DIRECTORY));
version = Version.LUCENE_30;
analyzer = new StandardAnalyzer(version);
maxFieldLength = MaxFieldLength.LIMITED;
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
//不让外部new当前帮助类的对象
private LuceneUtils(){}
public static <T> void pagination(Page<T> page,String field,String keyword,Class<T> clazz) throws Exception{
QueryParser queryParser = new QueryParser(getVersion(), field, getAnalyzer());
Query query = queryParser.parse(keyword);
IndexSearcher indexSearcher = new IndexSearcher(getDirectory());
TopDocs topDocs = indexSearcher.search(query, 200);
int totalHits = topDocs.totalHits;
int curPage = page.getCurPage();
int pageSize = page.getPageSize();
int quotient = totalHits / pageSize;
int remainder = totalHits % pageSize;
int totalPages = remainder==0 ? quotient : quotient+1;
int startIndex = (curPage-1) * pageSize;
int stopIndex = Math.min(startIndex + pageSize, totalHits);
List<T> list = page.getItems();
if(list == null){
list = new ArrayList<T>();
page.setItems(list);
}
list.clear();
for(int i=startIndex;i<stopIndex;i++){
ScoreDoc scoreDoc = topDocs.scoreDocs[i];
int docIndex = scoreDoc.doc;
Document document = indexSearcher.doc(docIndex);
T t = document2javabean(document, clazz);
list.add(t);
}
page.setTotalPages(totalPages);
page.setTotalItems(totalHits);
indexSearcher.close();
}
public static <T> void add(T t) throws Exception{
Document document = javabean2document(t);
IndexWriter indexWriter = new IndexWriter(getDirectory(), getAnalyzer(), getMaxFieldLength());
indexWriter.addDocument(document);
indexWriter.close();
}
public static <T> void addAll(List<T> list) throws Exception{
IndexWriter indexWriter = new IndexWriter(getDirectory(), getAnalyzer(), getMaxFieldLength());
for(T t : list){
Document doc = javabean2document(t);
indexWriter.addDocument(doc);
}
indexWriter.close();
}
public static <T> void update(String field,String value,T t) throws Exception{
Document document = javabean2document(t);
IndexWriter indexWriter = new IndexWriter(getDirectory(), getAnalyzer(), getMaxFieldLength());
indexWriter.updateDocument(new Term(field,value), document);
indexWriter.close();
}
public static <T> void delete(String field,String value) throws Exception{
IndexWriter indexWriter = new IndexWriter(getDirectory(), getAnalyzer(), getMaxFieldLength());
indexWriter.deleteDocuments(new Term(field,value));
indexWriter.close();
}
/**
* 删除所有记录
*/
public static void deleteAll() throws Exception {
IndexWriter indexWriter = new IndexWriter(getDirectory(), getAnalyzer(), getMaxFieldLength());
indexWriter.deleteAll();
indexWriter.close();
}
/**
* 根据关键字进行搜索
*/
public static <T> List<T> search(String field,String keyword,int topN,Class<T> clazz) throws Exception{
List<T> list = new ArrayList<T>();
QueryParser queryParser = new QueryParser(getVersion(), field, getAnalyzer());
Query query = queryParser.parse(keyword);
IndexSearcher indexSearcher = new IndexSearcher(getDirectory());
TopDocs topDocs = indexSearcher.search(query, topN);
for(int i=0;i<topDocs.scoreDocs.length;i++){
ScoreDoc scoreDoc = topDocs.scoreDocs[i];
int docIndex = scoreDoc.doc;
System.out.println("文档索引号" + docIndex + ",文档得分:" + scoreDoc.score);
Document document = indexSearcher.doc(docIndex);
T entity = (T) document2javabean(document, clazz);
list.add(entity);
}
indexSearcher.close();
return list;
}
/**
* 打印List
*/
public static <T> void printList(List<T> list){
if(list != null && list.size()>0){
for(T t : list){
System.out.println(t);
}
}
}
//将JavaBean转成Document对象
public static Document javabean2document(Object obj) throws Exception{
//创建Document对象
Document document = new Document();
//获取obj引用的对象字节码
Class clazz = obj.getClass();
//通过对象字节码获取私有的属性
java.lang.reflect.Field[] reflectFields = clazz.getDeclaredFields();
//迭代
for(java.lang.reflect.Field reflectField : reflectFields){
//反射
reflectField.setAccessible(true);
//获取字段名
String name = reflectField.getName();
//获取字段值
String value = reflectField.get(obj).toString();
//加入到Document对象中去,这时javabean的属性与document对象的属性相同
document.add(new Field(name, value, Store.YES, Index.ANALYZED));
}
//返回document对象
return document;
}
//将Document对象转换成JavaBean对象
public static <T> T document2javabean(Document document,Class<T> clazz) throws Exception{
T obj = clazz.newInstance();
java.lang.reflect.Field[] reflectFields = clazz.getDeclaredFields();
for(java.lang.reflect.Field reflectField : reflectFields){
reflectField.setAccessible(true);
String name = reflectField.getName();
String value = document.get(name);
BeanUtils.setProperty(obj, name, value);
}
return obj;
}
public static Directory getDirectory() {
return directory;
}
public static void setDirectory(Directory directory) {
LuceneUtils.directory = directory;
}
public static Version getVersion() {
return version;
}
public static void setVersion(Version version) {
LuceneUtils.version = version;
}
public static Analyzer getAnalyzer() {
return analyzer;
}
public static void setAnalyzer(Analyzer analyzer) {
LuceneUtils.analyzer = analyzer;
}
public static MaxFieldLength getMaxFieldLength() {
return maxFieldLength;
}
public static void setMaxFieldLength(MaxFieldLength maxFieldLength) {
LuceneUtils.maxFieldLength = maxFieldLength;
}
}
转载于:https://blog.51cto.com/lsieun/1852886