summaryrefslogtreecommitdiff
path: root/core/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDAndVersionLookup.java
blob: 0cf8a520094a33f8cbf1eec1ec09dc6a3beb083b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
package org.elasticsearch.common.lucene.uid;

/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

import java.io.IOException;

import org.apache.lucene.index.Fields;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Numbers;
import org.elasticsearch.common.lucene.uid.Versions.DocIdAndVersion;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import org.elasticsearch.index.mapper.internal.VersionFieldMapper;


/** Utility class to do efficient primary-key (only 1 doc contains the
 *  given term) lookups by segment, re-using the enums.  This class is
 *  not thread safe, so it is the caller's job to create and use one
 *  instance of this per thread.  Do not use this if a term may appear
 *  in more than one document!  It will only return the first one it
 *  finds. */

final class PerThreadIDAndVersionLookup {
    // TODO: do we really need to store all this stuff? some if it might not speed up anything.
    // we keep it around for now, to reduce the amount of e.g. hash lookups by field and stuff

    /** terms enum for uid field */
    private final TermsEnum termsEnum;
    /** _version data */
    private final NumericDocValues versions;
    /** Only true when versions are indexed as payloads instead of docvalues */
    private final boolean hasPayloads;
    /** Reused for iteration (when the term exists) */
    private PostingsEnum docsEnum;
    /** Only used for back compat, to lookup a version from payload */
    private PostingsEnum posEnum;

    /**
     * Initialize lookup for the provided segment
     */
    public PerThreadIDAndVersionLookup(LeafReader reader) throws IOException {
        TermsEnum termsEnum = null;
        NumericDocValues versions = null;
        boolean hasPayloads = false;

        Fields fields = reader.fields();
        if (fields != null) {
            Terms terms = fields.terms(UidFieldMapper.NAME);
            if (terms != null) {
                hasPayloads = terms.hasPayloads();
                termsEnum = terms.iterator();
                assert termsEnum != null;
                versions = reader.getNumericDocValues(VersionFieldMapper.NAME);
            }
        }

        this.versions = versions;
        this.termsEnum = termsEnum;
        this.hasPayloads = hasPayloads;
    }

    /** Return null if id is not found. */
    public DocIdAndVersion lookup(BytesRef id, Bits liveDocs, LeafReaderContext context) throws IOException {
        if (termsEnum.seekExact(id)) {
            if (versions != null || hasPayloads == false) {
                // Use NDV to retrieve the version, in which case we only need PostingsEnum:

                // there may be more than one matching docID, in the case of nested docs, so we want the last one:
                docsEnum = termsEnum.postings(docsEnum, 0);
                int docID = DocIdSetIterator.NO_MORE_DOCS;
                for (int d = docsEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docsEnum.nextDoc()) {
                    if (liveDocs != null && liveDocs.get(d) == false) {
                        continue;
                    }
                    docID = d;
                }

                if (docID != DocIdSetIterator.NO_MORE_DOCS) {
                    if (versions != null) {
                        return new DocIdAndVersion(docID, versions.get(docID), context);
                    } else {
                        // _uid found, but no doc values and no payloads
                        return new DocIdAndVersion(docID, Versions.NOT_SET, context);
                    }
                }
            }

            // ... but used to be stored as payloads; in this case we must use PostingsEnum
            posEnum = termsEnum.postings(posEnum, PostingsEnum.PAYLOADS);
            assert posEnum != null; // terms has payloads
            for (int d = posEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = posEnum.nextDoc()) {
                if (liveDocs != null && liveDocs.get(d) == false) {
                    continue;
                }
                posEnum.nextPosition();
                final BytesRef payload = posEnum.getPayload();
                if (payload != null && payload.length == 8) {
                    // TODO: does this break the nested docs case?  we are not returning the last matching docID here?
                    return new DocIdAndVersion(d, Numbers.bytesToLong(payload), context);
                }
            }
        }

        return null;
    }
}