結合源碼分析Solr&Lucene查詢打分的工作流程

來源:互聯網
上載者:User

solr中的搜尋打分是在QueryComponent中進行的。

在prepare中根據查詢的參數,QueryParser對查詢語句進行分詞,並產生Query對象樹。      QParser parser = QParser.getParser(rb.getQueryString(), defType, req);      Query q = parser.getQuery();      if (q == null) {        // normalize a null query to a query that matches nothing        q = new BooleanQuery();              }

在process方法中,進行搜尋打分的過程

調用SolrIndexSearcher進行查詢,

    SolrIndexSearcher searcher = req.getSearcher();    // normal search result    searcher.search(result,cmd);search(Query query, Filter filter, Collector results)

SolrIndexSearcher整合lucene的IndexSearcher類,

最終調用IndexSearcher的search(Query query, Filter filter, Collector results)  public void search(Query query, Filter filter, Collector results)    throws IOException {//在這個方法中,會先建立Weight樹,計算TermWeight    search(leafContexts, createNormalizedWeight(wrapFilter(query, filter)), results);  }   protected void search(List<AtomicReaderContext> leaves, Weight weight, Collector collector)      throws IOException { .........//根據weight樹,構造Score對象樹,以及SumScore對象樹,為合并倒排表做準備//      Scorer scorer = weight.scorer(ctx, !collector.acceptsDocsOutOfOrder(), true, ctx.reader().getLiveDocs());      if (scorer != null) {        try {//根據SumScorer對象樹,進行文檔的合并,收集文檔結果結合,並進行打分排名          scorer.score(collector);        } catch (CollectionTerminatedException e) {          // collection was terminated prematurely          // continue with the following leaf        }      }    }  }

1、先看一下Weight對象樹的產生,

這一部分包括query的打分計算,參見紅色部分

IndexSearcher.createNormalizedWeight(Query query)//重寫Query對象樹     query = rewrite(query);//建立weight對象樹,遞迴計算idf      Weight weight = query.createWeight(this);計算Weight分數,    float v = weight.getValueForNormalization();//計算queryNorm    float norm = getSimilarity().queryNorm(v);    if (Float.isInfinite(norm) || Float.isNaN(norm)) {      norm = 1.0f;    }//將queryNorm的計算打分,遞迴調用weight     weight.normalize(norm, 1.0f); 根據Query對象樹,遞迴的調用query對象節點的createWeight方法比如BooleanQuery對應的是BooleanWeight對象,每個BooleanWeight包含weight對象數組最終葉子節點為TermWeight對象public TermWeight(IndexSearcher searcher, TermContext termStates)      throws IOException {      assert termStates != null : "TermContext must not be null";      this.termStates = termStates;      this.similarity = searcher.getSimilarity();//計算idf      this.stats = similarity.computeWeight(          getBoost(),           searcher.collectionStatistics(term.field()),           searcher.termStatistics(term, termStates));    }  public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {    final Explanation idf = termStats.length == 1    ? idfExplain(collectionStats, termStats[0])    : idfExplain(collectionStats, termStats);    return new IDFStats(collectionStats.field(), idf, queryBoost);  }  public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {    final long df = termStats.docFreq();    final long max = collectionStats.maxDoc();    final float idf = idf(df, max);    return new Explanation(idf, "idf(docFreq=" + df + ", maxDocs=" + max + ")");  } 計算Weight分數    public float getValueForNormalization() throws IOException {      float sum = 0.0f;      for (int i = 0 ; i < weights.size(); i++) {        // call sumOfSquaredWeights for all clauses in case of side effects        float s = weights.get(i).getValueForNormalization();         // sum sub weights        if (!clauses.get(i).isProhibited())          // only add to sum for non-prohibited clauses          sum += s;      }      sum *= getBoost() * getBoost();             // boost each sub-weight      return sum ;    }

2、根據weight樹,構造Score對象樹,以及SumScore對象樹,為合并倒排表做準備

 Scorer scorer = weight.scorer(ctx, !collector.acceptsDocsOutOfOrder(), true, ctx.reader().getLiveDocs());BooleanWeight遞迴調用節點weight.score建立score對象   public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,        boolean topScorer, Bits acceptDocs)        throws IOException {      List<Scorer> required = new ArrayList<Scorer>();      List<Scorer> prohibited = new ArrayList<Scorer>();      List<Scorer> optional = new ArrayList<Scorer>();      Iterator<BooleanClause> cIter = clauses.iterator();      for (Weight w  : weights) {        BooleanClause c =  cIter.next();        Scorer subScorer = w.scorer(context, true, false, acceptDocs);         required.add(subScorer);          return new BooleanScorer2(this, disableCoord, minNrShouldMatch, required, prohibited, optional, maxCoord);       }//在建立BooleanScore2的過程中,計算coordBooleanQuery$BooleanWeight,coord,    public float coord(int overlap, int maxOverlap) {      return maxOverlap == 1 ? 1F : similarity.coord(overlap, maxOverlap);    }//最終調用TermWeight.scorer方法,建立score對象 public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,        boolean topScorer, Bits acceptDocs) throws IOException {      assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);      final TermsEnum termsEnum = getTermsEnum(context);      if (termsEnum == null) {        return null;      }//Term對應的docs      DocsEnum docs = termsEnum.docs(acceptDocs, null);      assert docs != null;//TermScorer負責doc的打分      return new TermScorer(this, docs, similarity.simScorer(stats, context));    }   TermScorer(Weight weight, DocsEnum td, Similarity.SimScorer docScorer) {    super(weight);    this.docScorer = docScorer;    this.docsEnum = td;  }

 

3、根據SumScorer對象樹,進行文檔的合并,收集文檔結果結合,並進行打分排名          scorer.score(collector);  public void score(Collector collector) throws IOException {    assert docID() == -1; // not started    collector.setScorer(this);    int doc;//在nextDoc的過程中合并document,合并倒排表是按照樹的結構進行,先合并子樹,子樹與子樹合并,一直到根    while ((doc = nextDoc()) != NO_MORE_DOCS) {//收集doc,並打分,根據文檔的打分,放入優先順序隊列(最小堆)中      collector.collect(doc);    }  }//整個Score以及SumScorer對象數的打分計算,最終會落到葉子節點TermScorer上TermScorer: @Override  public float score() throws IOException {    assert docID() != NO_MORE_DOCS;    return docScorer.score(docsEnum.docID(), docsEnum.freq());    }//打分計算公式:tf * norm * weightValue = tf * norm *queryNorm * idf^2 * t.getBoost()TFIDFSimilarity$TFIDFSimScorer    @Override    public float score(int doc, float freq) {//weight是在建立weight階段的query分詞的打分,   //這一部分計算打分公式的藍色部分,再乘以weight      final float raw = tf(freq) * weightValue; // compute tf(f)*weight,weight=queryNorm * idf^2 * t.getBoost()      return norms == null ? raw : raw * decodeNormValue(norms.get(doc));  // normalize for field, norm部分    }

 

 

 

 

 

 

 

 

 

 

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.