Solr 3.3.0 - 增加搜索中的字段权重 - 提高字段优先级

发布于 2024-12-09 12:57:59 字数 10428 浏览 1 评论 0原文

我正在使用 solr 3.3.0 索引这些文档:

<book id="bk101">
        <keywords>----</keywords>
        <title>----</title>
        <owner>----</owner>
        <artist>abcde</artist>
    </book>
    <book id="bk102">
        <keywords>----</keywords>
        <title>----</title>
        <owner>abcde</owner>
        <artist>----</artist>
    </book>
    <book id="bk103">
        <keywords>----</keywords>
        <title>----</title>
        <owner>----</owner>
        <artist>abc</artist>
    </book>
    <book id="bk104">
        <keywords>----</keywords>
        <title>----</title>
        <owner>abc</owner>
        <artist>----</artist>
    </book>
    <book id="bk105">
        <keywords>abcde</keywords>
        <title>----</title>
        <owner>----</owner>
        <artist>----</artist>
    </book>
    <book id="bk106">
        <keywords>abc</keywords>
        <title>----</title>
        <owner>----</owner>
        <artist>----</artist>
    </book>
    <book id="bk107">
        <keywords>----</keywords>
        <title>abcde</title>
        <owner>----</owner>
        <artist>----</artist>
    </book>
    <book id="bk108">
        <keywords>----</keywords>
        <title>abc</title>
        <owner>----</owner>
        <artist>----</artist>
    </book>

Schema.xml

<types>
    <fieldType name="text" class="solr.TextField" positionIncrementGap="100" omitNorms="false">
        <tokenizer class="solr.WhitespaceTokenizerFactory" />
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0" splitOnNumerics="0" />
        <filter class="solr.LowerCaseFilterFactory" />
        <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true" />
        <filter class="solr.ISOLatin1AccentFilterFactory" />
        <filter class="solr.RemoveDuplicatesTokenFilterFactory" />
        <filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="15" side="front" />
        <filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="15" side="back" />
    </fieldType>

    <fieldType name="text_original" class="solr.TextField" positionIncrementGap="100" omitNorms="false">
        <tokenizer class="solr.KeywordTokenizerFactory" />
        <filter class="solr.LowerCaseFilterFactory" />
    </fieldType>

    <fieldType name="uuid" class="solr.UUIDField" indexed="true" />
    <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true" />
    <fieldType name="float" class="solr.FloatField" omitNorms="true" />
    <fieldType name="date" class="solr.DateField" />
</types>

<fields>
    <field name="id" type="uuid" indexed="true" stored="true" default="NEW" />
    <field name="book_id" type="string" indexed="true" stored="true" />
    <field name="keywords" type="text" indexed="true" stored="true" />
    <field name="keywords_ex" type="text_original" indexed="true" stored="true" />
    <field name="title" type="text" indexed="true" stored="true" />
    <field name="title_ex" type="text_original" indexed="true" stored="true" />
    <field name="owner" type="text" indexed="true" stored="true" />
    <field name="owner_ex" type="text_original" indexed="true" stored="true" />
    <field name="artist" type="text" indexed="true" stored="true" />
    <field name="artist_ex" type="text_original" indexed="true" stored="true" />

    <copyField source="title" dest="title_ex" />
    <copyField source="keywords" dest="keywords_ex" />
    <copyField source="owner" dest="owner_ex" />
    <copyField source="artist" dest="artist_ex" />

</fields>

如您所见,如果我搜索 'abc' solr 返回所有文档(我正在使用 solr.EdgeNGramFilterFactory 进行索引);我的成就是使用以下逻辑对结果文档进行排序:

  1. title
  2. keywords_ex
  3. keywordsowner_exartist_exownerartist
  4. 尝试了“^”lucene sintax和dismax ,
  5. 但响应
  6. title_ex

列表不是我想象的

`http://localhost:8080/solr33a/core0/select?q=abc&defType=edismax&qf=title_ex^10%20title^8.0% 20keywords_ex^6%20keywords^5.5%20owner_ex^1.2%20artist_ex^0.8%20owner^0.5%20artist^0.2&fl=*,分数

结果:

<doc>
            <float name="score">2.3862944</float>
            <str name="artist">----</str>
            <str name="artist_ex">----</str>
            <str name="book_id">bk108</str>
            <str name="id">2cc5d478-6901-4777-abc9-680fd959ef90</str>
            <str name="keywords">----</str>
            <str name="keywords_ex">----</str>
            <str name="owner">----</str>
            <str name="owner_ex">----</str>
            <str name="title">abc</str>
            <str name="title_ex">abc</str>
        </doc>
        <doc>
            <float name="score">1.4317766</float>
            <str name="artist">----</str>
            <str name="artist_ex">----</str>
            <str name="book_id">bk106</str>
            <str name="id">e12683a2-faff-4d86-8107-7406491f4f89</str>
            <str name="keywords">abc</str>
            <str name="keywords_ex">abc</str>
            <str name="owner">----</str>
            <str name="owner_ex">----</str>
            <str name="title">----</str>
            <str name="title_ex">----</str>
        </doc>
        <doc>
            <float name="score">0.3288517</float>
            <str name="artist">----</str>
            <str name="artist_ex">----</str>
            <str name="book_id">bk107</str>
            <str name="id">a6a4a014-ce94-4257-a215-c1a64aa41cf5</str>
            <str name="keywords">----</str>
            <str name="keywords_ex">----</str>
            <str name="owner">----</str>
            <str name="owner_ex">----</str>
            <str name="title">abcde</str>
            <str name="title_ex">abcde</str>
        </doc>
        <doc>
            <float name="score">0.28635535</float>
            <str name="artist">----</str>
            <str name="artist_ex">----</str>
            <str name="book_id">bk104</str>
            <str name="id">9b909c65-e56a-4407-a789-53a570a7ae40</str>
            <str name="keywords">----</str>
            <str name="keywords_ex">----</str>
            <str name="owner">abc</str>
            <str name="owner_ex">abc</str>
            <str name="title">----</str>
            <str name="title_ex">----</str>
        </doc>
        <doc>
            <float name="score">0.22608554</float>
            <str name="artist">----</str>
            <str name="artist_ex">----</str>
            <str name="book_id">bk105</str>
            <str name="id">dead87cc-f93b-4562-af32-4d9fb2613c7f</str>
            <str name="keywords">abcde</str>
            <str name="keywords_ex">abcde</str>
            <str name="owner">----</str>
            <str name="owner_ex">----</str>
            <str name="title">----</str>
            <str name="title_ex">----</str>
        </doc>
        <doc>
            <float name="score">0.19090356</float>
            <str name="artist">abc</str>
            <str name="artist_ex">abc</str>
            <str name="book_id">bk103</str>
            <str name="id">306a252c-a0b5-474d-b55d-a25740d063b4</str>
            <str name="keywords">----</str>
            <str name="keywords_ex">----</str>
            <str name="owner">----</str>
            <str name="owner_ex">----</str>
            <str name="title">----</str>
            <str name="title_ex">----</str>
        </doc>
        <doc>
            <float name="score">0.020553231</float>
            <str name="artist">----</str>
            <str name="artist_ex">----</str>
            <str name="book_id">bk102</str>
            <str name="id">a684de0c-b286-4d9e-bd68-d5305afeee76</str>
            <str name="keywords">----</str>
            <str name="keywords_ex">----</str>
            <str name="owner">abcde</str>
            <str name="owner_ex">abcde</str>
            <str name="title">----</str>
            <str name="title_ex">----</str>
        </doc>
        <doc>
            <float name="score">0.008221293</float>
            <str name="artist">abcde</str>
            <str name="artist_ex">abcde</str>
            <str name="book_id">bk101</str>
            <str name="id">30a0f9de-1224-49d2-90aa-41f57af4956c</str>
            <str name="keywords">----</str>
            <str name="keywords_ex">----</str>
            <str name="owner">----</str>
            <str name="owner_ex">----</str>
            <str name="title">----</str>
            <str name="title_ex">----</str>
        </doc>

I'm indexing with solr 3.3.0 these docs:

<book id="bk101">
        <keywords>----</keywords>
        <title>----</title>
        <owner>----</owner>
        <artist>abcde</artist>
    </book>
    <book id="bk102">
        <keywords>----</keywords>
        <title>----</title>
        <owner>abcde</owner>
        <artist>----</artist>
    </book>
    <book id="bk103">
        <keywords>----</keywords>
        <title>----</title>
        <owner>----</owner>
        <artist>abc</artist>
    </book>
    <book id="bk104">
        <keywords>----</keywords>
        <title>----</title>
        <owner>abc</owner>
        <artist>----</artist>
    </book>
    <book id="bk105">
        <keywords>abcde</keywords>
        <title>----</title>
        <owner>----</owner>
        <artist>----</artist>
    </book>
    <book id="bk106">
        <keywords>abc</keywords>
        <title>----</title>
        <owner>----</owner>
        <artist>----</artist>
    </book>
    <book id="bk107">
        <keywords>----</keywords>
        <title>abcde</title>
        <owner>----</owner>
        <artist>----</artist>
    </book>
    <book id="bk108">
        <keywords>----</keywords>
        <title>abc</title>
        <owner>----</owner>
        <artist>----</artist>
    </book>

Schema.xml

<types>
    <fieldType name="text" class="solr.TextField" positionIncrementGap="100" omitNorms="false">
        <tokenizer class="solr.WhitespaceTokenizerFactory" />
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0" splitOnNumerics="0" />
        <filter class="solr.LowerCaseFilterFactory" />
        <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true" />
        <filter class="solr.ISOLatin1AccentFilterFactory" />
        <filter class="solr.RemoveDuplicatesTokenFilterFactory" />
        <filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="15" side="front" />
        <filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="15" side="back" />
    </fieldType>

    <fieldType name="text_original" class="solr.TextField" positionIncrementGap="100" omitNorms="false">
        <tokenizer class="solr.KeywordTokenizerFactory" />
        <filter class="solr.LowerCaseFilterFactory" />
    </fieldType>

    <fieldType name="uuid" class="solr.UUIDField" indexed="true" />
    <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true" />
    <fieldType name="float" class="solr.FloatField" omitNorms="true" />
    <fieldType name="date" class="solr.DateField" />
</types>

<fields>
    <field name="id" type="uuid" indexed="true" stored="true" default="NEW" />
    <field name="book_id" type="string" indexed="true" stored="true" />
    <field name="keywords" type="text" indexed="true" stored="true" />
    <field name="keywords_ex" type="text_original" indexed="true" stored="true" />
    <field name="title" type="text" indexed="true" stored="true" />
    <field name="title_ex" type="text_original" indexed="true" stored="true" />
    <field name="owner" type="text" indexed="true" stored="true" />
    <field name="owner_ex" type="text_original" indexed="true" stored="true" />
    <field name="artist" type="text" indexed="true" stored="true" />
    <field name="artist_ex" type="text_original" indexed="true" stored="true" />

    <copyField source="title" dest="title_ex" />
    <copyField source="keywords" dest="keywords_ex" />
    <copyField source="owner" dest="owner_ex" />
    <copyField source="artist" dest="artist_ex" />

</fields>

As you can see if I search for 'abc' solr returns all docs (I'm using solr.EdgeNGramFilterFactory for indexing); my achievement is to have as result documents sorted using this logic:

  1. title_ex
  2. title
  3. keywords_ex
  4. keywords
  5. owner_ex
  6. artist_ex
  7. owner
  8. artist

I tryed "^" lucene sintax and dismax but the response list isn't the one I supposed.

`http://localhost:8080/solr33a/core0/select?q=abc&defType=edismax&qf=title_ex^10%20title^8.0%20keywords_ex^6%20keywords^5.5%20owner_ex^1.2%20artist_ex^0.8%20owner^0.5%20artist^0.2&fl=*,score

Result:

<doc>
            <float name="score">2.3862944</float>
            <str name="artist">----</str>
            <str name="artist_ex">----</str>
            <str name="book_id">bk108</str>
            <str name="id">2cc5d478-6901-4777-abc9-680fd959ef90</str>
            <str name="keywords">----</str>
            <str name="keywords_ex">----</str>
            <str name="owner">----</str>
            <str name="owner_ex">----</str>
            <str name="title">abc</str>
            <str name="title_ex">abc</str>
        </doc>
        <doc>
            <float name="score">1.4317766</float>
            <str name="artist">----</str>
            <str name="artist_ex">----</str>
            <str name="book_id">bk106</str>
            <str name="id">e12683a2-faff-4d86-8107-7406491f4f89</str>
            <str name="keywords">abc</str>
            <str name="keywords_ex">abc</str>
            <str name="owner">----</str>
            <str name="owner_ex">----</str>
            <str name="title">----</str>
            <str name="title_ex">----</str>
        </doc>
        <doc>
            <float name="score">0.3288517</float>
            <str name="artist">----</str>
            <str name="artist_ex">----</str>
            <str name="book_id">bk107</str>
            <str name="id">a6a4a014-ce94-4257-a215-c1a64aa41cf5</str>
            <str name="keywords">----</str>
            <str name="keywords_ex">----</str>
            <str name="owner">----</str>
            <str name="owner_ex">----</str>
            <str name="title">abcde</str>
            <str name="title_ex">abcde</str>
        </doc>
        <doc>
            <float name="score">0.28635535</float>
            <str name="artist">----</str>
            <str name="artist_ex">----</str>
            <str name="book_id">bk104</str>
            <str name="id">9b909c65-e56a-4407-a789-53a570a7ae40</str>
            <str name="keywords">----</str>
            <str name="keywords_ex">----</str>
            <str name="owner">abc</str>
            <str name="owner_ex">abc</str>
            <str name="title">----</str>
            <str name="title_ex">----</str>
        </doc>
        <doc>
            <float name="score">0.22608554</float>
            <str name="artist">----</str>
            <str name="artist_ex">----</str>
            <str name="book_id">bk105</str>
            <str name="id">dead87cc-f93b-4562-af32-4d9fb2613c7f</str>
            <str name="keywords">abcde</str>
            <str name="keywords_ex">abcde</str>
            <str name="owner">----</str>
            <str name="owner_ex">----</str>
            <str name="title">----</str>
            <str name="title_ex">----</str>
        </doc>
        <doc>
            <float name="score">0.19090356</float>
            <str name="artist">abc</str>
            <str name="artist_ex">abc</str>
            <str name="book_id">bk103</str>
            <str name="id">306a252c-a0b5-474d-b55d-a25740d063b4</str>
            <str name="keywords">----</str>
            <str name="keywords_ex">----</str>
            <str name="owner">----</str>
            <str name="owner_ex">----</str>
            <str name="title">----</str>
            <str name="title_ex">----</str>
        </doc>
        <doc>
            <float name="score">0.020553231</float>
            <str name="artist">----</str>
            <str name="artist_ex">----</str>
            <str name="book_id">bk102</str>
            <str name="id">a684de0c-b286-4d9e-bd68-d5305afeee76</str>
            <str name="keywords">----</str>
            <str name="keywords_ex">----</str>
            <str name="owner">abcde</str>
            <str name="owner_ex">abcde</str>
            <str name="title">----</str>
            <str name="title_ex">----</str>
        </doc>
        <doc>
            <float name="score">0.008221293</float>
            <str name="artist">abcde</str>
            <str name="artist_ex">abcde</str>
            <str name="book_id">bk101</str>
            <str name="id">30a0f9de-1224-49d2-90aa-41f57af4956c</str>
            <str name="keywords">----</str>
            <str name="keywords_ex">----</str>
            <str name="owner">----</str>
            <str name="owner_ex">----</str>
            <str name="title">----</str>
            <str name="title_ex">----</str>
        </doc>

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

终止放荡 2024-12-16 12:57:59

当您想要使用可变权重跨多个字段进行搜索时,请将您的请求处理程序配置为使用 edismax 查询解析器。
dismax 允许您跨字段搜索并为每个字段添加权重。

例子
标题匹配的权重为 1,而作者匹配的权重为 0.8,因此标题匹配的文档将出现在顶部。

<requestHandler name="search" class="solr.SearchHandler" default="true">
 <lst name="defaults">
   <str name="echoParams">explicit</str>
   <str name="defType">edismax</str>
   <str name="qf">
      title^1 author^0.8
   </str>
   <str name="q.alt">*:*</str>
   <str name="rows">10</str>
   <str name="fl">*,score</str>
 </lst>
</requestHandler>

As you want to search across multiple fields with variable weightage, configure your request handler to use edismax query parser.
dismax allows you to search across fields and add weights to each field.

Example
title match has weight 1, while matches on author 0.8, so that document having title matches would appear at the top.

<requestHandler name="search" class="solr.SearchHandler" default="true">
 <lst name="defaults">
   <str name="echoParams">explicit</str>
   <str name="defType">edismax</str>
   <str name="qf">
      title^1 author^0.8
   </str>
   <str name="q.alt">*:*</str>
   <str name="rows">10</str>
   <str name="fl">*,score</str>
 </lst>
</requestHandler>
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文