lucene+MongoDB+IKAnalyzer 做全文搜索
package sample3; import java.io.File; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.FSDirectory; import org.wltea.analyzer.lucene.IKAnalyzer; import com.mongodb.DB; import com.mongodb.DBCollection; import com.mongodb.DBCursor; import com.mongodb.Mongo; /** * 创建索引 * @author zhanghaijun * */ public class Demo1 { public static void main(String[] args) throws Exception { //先在数据库中拿到要创建索引的数据 Mongo mongo = new Mongo(); DB db = mongo.getDB("zhang"); DBCollection msg = db.getCollection("test3"); DBCursor cursor = msg.find(); //是否重新创建索引文件,false:在原有的基础上追加 boolean create = true; //IK中文分词器 Analyzer analyzer = new IKAnalyzer(); //创建索引 IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File("E:\lucene\index")), analyzer, create, IndexWriter.MaxFieldLength.UNLIMITED); boolean exist = cursor.hasNext(); while(exist){ //System.out.println(cursor.next().get("text").toString()); Document doc = new Document(); Field fieldText = new Field("text",cursor.next().get("text").toString(),Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(fieldText); exist = cursor.hasNext(); } cursor = null; //optimize()方法是对索引进行优化 indexWriter.optimize(); //最后关闭索引 indexWriter.close(); } }
package sample3; import java.io.File; import java.io.IOException; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.store.FSDirectory; import org.wltea.analyzer.lucene.IKAnalyzer; import org.wltea.analyzer.lucene.IKQueryParser; import org.wltea.analyzer.lucene.IKSimilarity; /** * 查找索引 */ public class Demo2 { public static void main(String[] args) throws Exception { // onlysearching, so read-only=true IndexReader reader =IndexReader.open(FSDirectory.open(new File("E:\lucene\index")), true); IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new IKSimilarity()); //在索引器中使用IKSimilarity相似度评估器 //String[] keys = {"4","testtest"}; //关键字数组 //String[] fields = {"id","title"}; //搜索的字段 //BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST,BooleanClause.Occur.MUST}; //BooleanClause.Occur[]数组,它表示多个条件之间的关系 //使用 IKQueryParser类提供的parseMultiField方法构建多字段多条件查询 //Query query = IKQueryParser.parseMultiField(fields,keys, flags); //IKQueryParser多个字段搜索 Query query =IKQueryParser.parse("text","上海人"); //IK搜索单个字段 IKAnalyzer analyzer = new IKAnalyzer(); // Query query =MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, keys, fields, flags,analyzer); //用MultiFieldQueryParser得到query对象 // System.out.println("query"+query.toString()); //查询条件 TopScoreDocCollector topCollector = TopScoreDocCollector.create(searcher.maxDoc(), false); searcher.search(query,topCollector); ScoreDoc[] docs = topCollector.topDocs(3).scoreDocs; System.out.println(docs.length); reader.close(); //关闭索引 } }
结果:
Exception in thread "Main Thread" java.lang.IllegalArgumentException: numHits must be > 0; please use TotalHitCountCollector if you just need the total hit count at org.apache.lucene.search.TopScoreDocCollector.create(TopScoreDocCollector.java:254) at org.apache.lucene.search.TopScoreDocCollector.create(TopScoreDocCollector.java:238) at sample3.Demo2.main(Demo2.java:35)
哪位朋友分析下吧 我是按照网上的模仿下来的不知道哪里出问题了
mongo中的一段数据 /* 0 */ { "_id" : ObjectId("500d1a96df1d4d3b58245f95"), "username" : "zhangsan7046", "text" : "我是上海人,我的工作是承续员" }
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(3)
searcher.search(query,topCollector);
这句代码中的topCollector不能小于等于0
也很奇怪这个地方
起码这里有问题吧
创建索引的时候,得:indexWriter.addDocument(doc);