1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
|
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.suggest.phrase;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.util.PriorityQueue;
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.CandidateSet;
final class CandidateScorer {
private final WordScorer scorer;
private final int maxNumCorrections;
private final int gramSize;
public CandidateScorer(WordScorer scorer, int maxNumCorrections, int gramSize) {
this.scorer = scorer;
this.maxNumCorrections = maxNumCorrections;
this.gramSize = gramSize;
}
public Correction[] findBestCandiates(CandidateSet[] sets, float errorFraction, double cutoffScore) throws IOException {
if (sets.length == 0) {
return Correction.EMPTY;
}
PriorityQueue<Correction> corrections = new PriorityQueue<Correction>(maxNumCorrections) {
@Override
protected boolean lessThan(Correction a, Correction b) {
return a.compareTo(b) < 0;
}
};
int numMissspellings = 1;
if (errorFraction >= 1.0) {
numMissspellings = (int) errorFraction;
} else {
numMissspellings = Math.round(errorFraction * sets.length);
}
findCandidates(sets, new Candidate[sets.length], 0, Math.max(1, numMissspellings), corrections, cutoffScore, 0.0);
Correction[] result = new Correction[corrections.size()];
for (int i = result.length - 1; i >= 0; i--) {
result[i] = corrections.pop();
}
assert corrections.size() == 0;
return result;
}
public void findCandidates(CandidateSet[] candidates, Candidate[] path, int ord, int numMissspellingsLeft,
PriorityQueue<Correction> corrections, double cutoffScore, final double pathScore) throws IOException {
CandidateSet current = candidates[ord];
if (ord == candidates.length - 1) {
path[ord] = current.originalTerm;
updateTop(candidates, path, corrections, cutoffScore, pathScore + scorer.score(path, candidates, ord, gramSize));
if (numMissspellingsLeft > 0) {
for (int i = 0; i < current.candidates.length; i++) {
path[ord] = current.candidates[i];
updateTop(candidates, path, corrections, cutoffScore, pathScore + scorer.score(path, candidates, ord, gramSize));
}
}
} else {
if (numMissspellingsLeft > 0) {
path[ord] = current.originalTerm;
findCandidates(candidates, path, ord + 1, numMissspellingsLeft, corrections, cutoffScore, pathScore + scorer.score(path, candidates, ord, gramSize));
for (int i = 0; i < current.candidates.length; i++) {
path[ord] = current.candidates[i];
findCandidates(candidates, path, ord + 1, numMissspellingsLeft - 1, corrections, cutoffScore, pathScore + scorer.score(path, candidates, ord, gramSize));
}
} else {
path[ord] = current.originalTerm;
findCandidates(candidates, path, ord + 1, 0, corrections, cutoffScore, pathScore + scorer.score(path, candidates, ord, gramSize));
}
}
}
private void updateTop(CandidateSet[] candidates, Candidate[] path, PriorityQueue<Correction> corrections, double cutoffScore, double score)
throws IOException {
score = Math.exp(score);
assert Math.abs(score - score(path, candidates)) < 0.00001;
if (score > cutoffScore) {
if (corrections.size() < maxNumCorrections) {
Candidate[] c = new Candidate[candidates.length];
System.arraycopy(path, 0, c, 0, path.length);
corrections.add(new Correction(score, c));
} else if (corrections.top().compareTo(score, path) < 0) {
Correction top = corrections.top();
System.arraycopy(path, 0, top.candidates, 0, path.length);
top.score = score;
corrections.updateTop();
}
}
}
public double score(Candidate[] path, CandidateSet[] candidates) throws IOException {
double score = 0.0d;
for (int i = 0; i < candidates.length; i++) {
score += scorer.score(path, candidates, i, gramSize);
}
return Math.exp(score);
}
}
|