前言

Lucene中Term代表了一个原子粒度的索引和搜索单位,ES中keyword和text类型经过分词之后都会以Term的形式被Lucene索引并落入磁盘持久化,与Term相关的索引文件有Term Dictionary(.tim),Term Index(.tip), 还有与之关联的倒排索引相关的文件(.doc, .pos, .pay),本文重点关注在如何将内存中已经生成好的Term相关的数据结构落入磁盘的这一过程.

本文以Lucene tag releases/lucene/9.1.0 代码为准, commit: 5b522487ba8e0f1002b50a136817ca037aec9686, 只关注posting list中docId的写入,freq, position和payload 将会忽略

调用链路

function

相关源码分析

主要分析Lucene90BlockTreeTermsWriter的写入过程, 核心函数是TermsWriter.write

  1. Lucene90BlockTreeTermsWriter.write -> TermsWriter.write
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
// Lucene90BlockTreeTermsWriter.TermsWriter.write
// TermsEnum 代表了这个Field关联的所有terms
public void write(BytesRef text, TermsEnum termsEnum, NormsProducer norms) throws IOException {

// 首先写入倒排索引相关的问题, postingWriter是Lucene90PostingWriter的实例, 包括了docId和skip List相关的数据
BlockTermState state = postingsWriter.writeTerm(text, termsEnum, docsSeen, norms);
if (state != null) {

assert state.docFreq != 0;
assert fieldInfo.getIndexOptions() == IndexOptions.DOCS
|| state.totalTermFreq >= state.docFreq
: "postingsWriter=" + postingsWriter;
pushTerm(text);

PendingTerm term = new PendingTerm(text, state);
pending.add(term);
// if (DEBUG) System.out.println(" add pending term = " + text + " pending.size()=" +
// pending.size());

sumDocFreq += state.docFreq;
sumTotalTermFreq += state.totalTermFreq;
numTerms++;
if (firstPendingTerm == null) {
firstPendingTerm = term;
}
lastPendingTerm = term;
}
}
  1. PushPostingsWriterBase.writeTerm 抽象了writeTerm的整体流程,Lucene90PostingWriter extends PushPostingsWriterBase
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
 public final BlockTermState writeTerm(
BytesRef term, TermsEnum termsEnum, FixedBitSet docsSeen, NormsProducer norms)
throws IOException {
// 本文不涉及
NumericDocValues normValues;
if (fieldInfo.hasNorms() == false) {
normValues = null;
} else {
normValues = norms.getNorms(fieldInfo);
}
// 写入这个Term的一些初始化,包括lastDocId和skipList的初始化, 调用的实例方法是Lucene90PostingWriter.startTerm
startTerm(normValues);
// 从term中取出对应的posting list的iterator,本文主要关注posting中的docId
postingsEnum = termsEnum.postings(postingsEnum, enumFlags);
assert postingsEnum != null;

int docFreq = 0;
long totalTermFreq = 0;
while (true) {
int docID = postingsEnum.nextDoc();
if (docID == PostingsEnum.NO_MORE_DOCS) {
break;
}
docFreq++;
docsSeen.set(docID);
int freq;
if (writeFreqs) {
freq = postingsEnum.freq();
totalTermFreq += freq;
} else {
freq = -1;
}
// posting相关的核心函数, Lucene90PostingsWriter.startDoc
startDoc(docID, freq);

if (writePositions) {
for (int i = 0; i < freq; i++) {
int pos = postingsEnum.nextPosition();
BytesRef payload = writePayloads ? postingsEnum.getPayload() : null;
int startOffset;
int endOffset;
if (writeOffsets) {
startOffset = postingsEnum.startOffset();
endOffset = postingsEnum.endOffset();
} else {
startOffset = -1;
endOffset = -1;
}
addPosition(pos, payload, startOffset, endOffset);
}
}

finishDoc();
}

if (docFreq == 0) {
return null;
} else {
BlockTermState state = newTermState();
state.docFreq = docFreq;
state.totalTermFreq = writeFreqs ? totalTermFreq : -1;
finishTerm(state);
return state;
}
}
  1. Lucene90PostingsWriter.startDoc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
 public void startDoc(int docID, int termDocFreq) throws IOException {
// 为新的posting block创建(初始化)skipList
if (lastBlockDocID != -1 && docBufferUpto == 0) {
skipWriter.bufferSkip(
lastBlockDocID,
competitiveFreqNormAccumulator,
docCount,
lastBlockPosFP,
lastBlockPayFP,
lastBlockPosBufferUpto,
lastBlockPayloadByteUpto);
competitiveFreqNormAccumulator.clear();
}

// posting 存储的docId 都是差值,且要求所有docId都是严格升序排列,隐含条件docId不会重复(相同Term在posting中只会出现一次)
final int docDelta = docID - lastDocID;

if (docID < 0 || (docCount > 0 && docDelta <= 0)) {
throw new CorruptIndexException(
"docs out of order (" + docID + " <= " + lastDocID + " )", docOut);
}

docDeltaBuffer[docBufferUpto] = docDelta;
if (writeFreqs) {
freqBuffer[docBufferUpto] = termDocFreq;
}
// block内docCount的计数
docBufferUpto++;
// 整体的docCount 计数
docCount++;

// 以128的BLOCK_SIZE写入docOut(.doc)文件中
if (docBufferUpto == BLOCK_SIZE) {
pforUtil.encode(docDeltaBuffer, docOut);
if (writeFreqs) {
pforUtil.encode(freqBuffer, docOut);
}
// NOTE: don't set docBufferUpto back to 0 here;
// finishDoc will do so (because it needs to see that
// the block was filled so it can save skip data)
}

// 更新lastDocID
lastDocID = docID;
lastPosition = 0;
lastStartOffset = 0;

long norm;
if (fieldHasNorms) {
boolean found = norms.advanceExact(docID);
if (found == false) {
// This can happen if indexing hits a problem after adding a doc to the
// postings but before buffering the norm. Such documents are written
// deleted and will go away on the first merge.
norm = 1L;
} else {
norm = norms.longValue();
assert norm != 0 : docID;
}
} else {
norm = 1L;
}

competitiveFreqNormAccumulator.add(writeFreqs ? termDocFreq : 1, norm);
}
  1. Lucene90PostingsWriter.finishDoc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
public void finishDoc() throws IOException {
// Since we don't know df for current term, we had to buffer
// those skip data for each block, and when a new doc comes,
// write them to skip file.
if (docBufferUpto == BLOCK_SIZE) {
// skip data是针对block的,block对应的docId 是该block中最大的docId
lastBlockDocID = lastDocID;
if (posOut != null) {
if (payOut != null) {
lastBlockPayFP = payOut.getFilePointer();
}
lastBlockPosFP = posOut.getFilePointer();
lastBlockPosBufferUpto = posBufferUpto;
lastBlockPayloadByteUpto = payloadByteUpto;
}
docBufferUpto = 0;
}
}

  1. Lucene90PostingsWriter.finishTerm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
 public void finishTerm(BlockTermState _state) throws IOException {
IntBlockTermState state = (IntBlockTermState) _state;
assert state.docFreq > 0;

// TODO: wasteful we are counting this (counting # docs
// for this term) in two places?
assert state.docFreq == docCount : state.docFreq + " vs " + docCount;

// docFreq == 1, don't write the single docid/freq to a separate file along with a pointer to
// it.
final int singletonDocID;
if (state.docFreq == 1) {
// pulse the singleton docid into the term dictionary, freq is implicitly totalTermFreq
singletonDocID = (int) docDeltaBuffer[0];
} else {
singletonDocID = -1;
// vInt encode the remaining doc deltas and freqs:
for (int i = 0; i < docBufferUpto; i++) {
final int docDelta = (int) docDeltaBuffer[i];
final int freq = (int) freqBuffer[i];
if (!writeFreqs) {
// 将最后一个未满BLOCK_SIZE的block的docDelta 写入到doc文件中
docOut.writeVInt(docDelta);
} else if (freq == 1) {
docOut.writeVInt((docDelta << 1) | 1);
} else {
docOut.writeVInt(docDelta << 1);
docOut.writeVInt(freq);
}
}
}

final long lastPosBlockOffset;

if (writePositions) {
// totalTermFreq is just total number of positions(or payloads, or offsets)
// associated with current term.
assert state.totalTermFreq != -1;
if (state.totalTermFreq > BLOCK_SIZE) {
// record file offset for last pos in last block
lastPosBlockOffset = posOut.getFilePointer() - posStartFP;
} else {
lastPosBlockOffset = -1;
}
if (posBufferUpto > 0) {
// TODO: should we send offsets/payloads to
// .pay...? seems wasteful (have to store extra
// vLong for low (< BLOCK_SIZE) DF terms = vast vast
// majority)

// vInt encode the remaining positions/payloads/offsets:
int lastPayloadLength = -1; // force first payload length to be written
int lastOffsetLength = -1; // force first offset length to be written
int payloadBytesReadUpto = 0;
for (int i = 0; i < posBufferUpto; i++) {
final int posDelta = (int) posDeltaBuffer[i];
if (writePayloads) {
final int payloadLength = (int) payloadLengthBuffer[i];
if (payloadLength != lastPayloadLength) {
lastPayloadLength = payloadLength;
posOut.writeVInt((posDelta << 1) | 1);
posOut.writeVInt(payloadLength);
} else {
posOut.writeVInt(posDelta << 1);
}

if (payloadLength != 0) {
posOut.writeBytes(payloadBytes, payloadBytesReadUpto, payloadLength);
payloadBytesReadUpto += payloadLength;
}
} else {
posOut.writeVInt(posDelta);
}

if (writeOffsets) {
int delta = (int) offsetStartDeltaBuffer[i];
int length = (int) offsetLengthBuffer[i];
if (length == lastOffsetLength) {
posOut.writeVInt(delta << 1);
} else {
posOut.writeVInt(delta << 1 | 1);
posOut.writeVInt(length);
lastOffsetLength = length;
}
}
}

if (writePayloads) {
assert payloadBytesReadUpto == payloadByteUpto;
payloadByteUpto = 0;
}
}
} else {
lastPosBlockOffset = -1;
}

long skipOffset;
// 写入skipList的data, 可以看到skipList的数据是写完posting之后再写入的
if (docCount > BLOCK_SIZE) {
skipOffset = skipWriter.writeSkip(docOut) - docStartFP;
} else {
skipOffset = -1;
}

state.docStartFP = docStartFP;
state.posStartFP = posStartFP;
state.payStartFP = payStartFP;
state.singletonDocID = singletonDocID;
state.skipOffset = skipOffset;
state.lastPosBlockOffset = lastPosBlockOffset;
docBufferUpto = 0;
posBufferUpto = 0;
lastDocID = 0;
docCount = 0;
}
  1. Lucene90BlockTreeTermsWriter.TermsWriter.pushTerm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
 private void pushTerm(BytesRef text) throws IOException {
// Find common prefix between last term and current term:
int prefixLength =
Arrays.mismatch(
lastTerm.bytes(),
0,
lastTerm.length(),
text.bytes,
text.offset,
text.offset + text.length);
if (prefixLength == -1) { // Only happens for the first term, if it is empty
assert lastTerm.length() == 0;
prefixLength = 0;
}

// if (DEBUG) System.out.println(" shared=" + pos + " lastTerm.length=" + lastTerm.length);

// Close the "abandoned" suffix now:
for (int i = lastTerm.length() - 1; i >= prefixLength; i--) {

// How many items on top of the stack share the current suffix
// we are closing:
int prefixTopSize = pending.size() - prefixStarts[i];
if (prefixTopSize >= minItemsInBlock) {
// if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + "
// minItemsInBlock=" + minItemsInBlock);
// 重点: 构建FST 相关的索引
writeBlocks(i + 1, prefixTopSize);
prefixStarts[i] -= prefixTopSize - 1;
}
}

if (prefixStarts.length < text.length) {
prefixStarts = ArrayUtil.grow(prefixStarts, text.length);
}

// Init new tail:
for (int i = prefixLength; i < text.length; i++) {
prefixStarts[i] = pending.size();
}

lastTerm.copyBytes(text);
}
  1. Lucene90BlockTreeTermsWriter.TermsWriter.writeBlocks
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
void writeBlocks(int prefixLength, int count) throws IOException {

assert count > 0;

// if (DEBUG2) {
// BytesRef br = new BytesRef(lastTerm.bytes());
// br.length = prefixLength;
// System.out.println("writeBlocks: seg=" + segment + " prefix=" + brToString(br) + " count="
// + count);
// }

// Root block better write all remaining pending entries:
assert prefixLength > 0 || count == pending.size();

int lastSuffixLeadLabel = -1;

// True if we saw at least one term in this block (we record if a block
// only points to sub-blocks in the terms index so we can avoid seeking
// to it when we are looking for a term):
boolean hasTerms = false;
boolean hasSubBlocks = false;

int start = pending.size() - count;
int end = pending.size();
int nextBlockStart = start;
int nextFloorLeadLabel = -1;

for (int i = start; i < end; i++) {

PendingEntry ent = pending.get(i);

int suffixLeadLabel;

if (ent.isTerm) {
PendingTerm term = (PendingTerm) ent;
if (term.termBytes.length == prefixLength) {
// Suffix is 0, i.e. prefix 'foo' and term is
// 'foo' so the term has empty string suffix
// in this block
assert lastSuffixLeadLabel == -1
: "i=" + i + " lastSuffixLeadLabel=" + lastSuffixLeadLabel;
suffixLeadLabel = -1;
} else {
suffixLeadLabel = term.termBytes[prefixLength] & 0xff;
}
} else {
PendingBlock block = (PendingBlock) ent;
assert block.prefix.length > prefixLength;
suffixLeadLabel = block.prefix.bytes[block.prefix.offset + prefixLength] & 0xff;
}
// if (DEBUG) System.out.println(" i=" + i + " ent=" + ent + " suffixLeadLabel=" +
// suffixLeadLabel);

if (suffixLeadLabel != lastSuffixLeadLabel) {
int itemsInBlock = i - nextBlockStart;
if (itemsInBlock >= minItemsInBlock && end - nextBlockStart > maxItemsInBlock) {
// The count is too large for one block, so we must break it into "floor" blocks, where
// we record
// the leading label of the suffix of the first term in each floor block, so at search
// time we can
// jump to the right floor block. We just use a naive greedy segmenter here: make a new
// floor
// block as soon as we have at least minItemsInBlock. This is not always best: it often
// produces
// a too-small block as the final block:
boolean isFloor = itemsInBlock < count;
newBlocks.add(
writeBlock(
prefixLength,
isFloor,
nextFloorLeadLabel,
nextBlockStart,
i,
hasTerms,
hasSubBlocks));

hasTerms = false;
hasSubBlocks = false;
nextFloorLeadLabel = suffixLeadLabel;
nextBlockStart = i;
}

lastSuffixLeadLabel = suffixLeadLabel;
}

if (ent.isTerm) {
hasTerms = true;
} else {
hasSubBlocks = true;
}
}

// Write last block, if any:
if (nextBlockStart < end) {
int itemsInBlock = end - nextBlockStart;
boolean isFloor = itemsInBlock < count;
newBlocks.add(
writeBlock(
prefixLength,
isFloor,
nextFloorLeadLabel,
nextBlockStart,
end,
hasTerms,
hasSubBlocks));
}

assert newBlocks.isEmpty() == false;

PendingBlock firstBlock = newBlocks.get(0);

assert firstBlock.isFloor || newBlocks.size() == 1;

// NOTE: 每个block生成fst的index
firstBlock.compileIndex(newBlocks, scratchBytes, scratchIntsRef);

// Remove slice from the top of the pending stack, that we just wrote:
pending.subList(pending.size() - count, pending.size()).clear();

// Append new block
pending.add(firstBlock);

newBlocks.clear();
}
  1. Lucene90BlockTreeTermsWriter.TermsWriter.writeBlock
    PendingBlock中记录了对应的在.tim文件中的位置(startFP), 后续会被FST索引到对应的value里
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
private PendingBlock writeBlock(
int prefixLength,
boolean isFloor,
int floorLeadLabel,
int start,
int end,
boolean hasTerms,
boolean hasSubBlocks)
throws IOException {
// ....
//
if (isLeafBlock) {
subIndices = null;
StatsWriter statsWriter =
new StatsWriter(this.statsWriter, fieldInfo.getIndexOptions() != IndexOptions.DOCS);
for (int i = start; i < end; i++) {
PendingEntry ent = pending.get(i);
assert ent.isTerm : "i=" + i;

PendingTerm term = (PendingTerm) ent;

assert StringHelper.startsWith(term.termBytes, prefix) : term + " prefix=" + prefix;
BlockTermState state = term.state;
final int suffix = term.termBytes.length - prefixLength;
// if (DEBUG2) {
// BytesRef suffixBytes = new BytesRef(suffix);
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
// suffixBytes.length = suffix;
// System.out.println(" write term suffix=" + brToString(suffixBytes));
// }

// For leaf block we write suffix straight
suffixLengthsWriter.writeVInt(suffix);
suffixWriter.append(term.termBytes, prefixLength, suffix);
assert floorLeadLabel == -1 || (term.termBytes[prefixLength] & 0xff) >= floorLeadLabel;

// Write term stats, to separate byte[] blob:
statsWriter.add(state.docFreq, state.totalTermFreq);

// Write term meta data
postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute);
absolute = false;
}

} else {
// Block has at least one prefix term or a sub block:
subIndices = new ArrayList<>();
StatsWriter statsWriter =
new StatsWriter(this.statsWriter, fieldInfo.getIndexOptions() != IndexOptions.DOCS);
for (int i = start; i < end; i++) {
PendingEntry ent = pending.get(i);
if (ent.isTerm) {
PendingTerm term = (PendingTerm) ent;

assert StringHelper.startsWith(term.termBytes, prefix) : term + " prefix=" + prefix;
BlockTermState state = term.state;
final int suffix = term.termBytes.length - prefixLength;

// For non-leaf block we borrow 1 bit to record
// if entry is term or sub-block, and 1 bit to record if
// it's a prefix term. Terms cannot be larger than ~32 KB
// so we won't run out of bits:

suffixLengthsWriter.writeVInt(suffix << 1);
suffixWriter.append(term.termBytes, prefixLength, suffix);

// Write term stats, to separate byte[] blob:
statsWriter.add(state.docFreq, state.totalTermFreq);

// TODO: now that terms dict "sees" these longs,
// we can explore better column-stride encodings
// to encode all long[0]s for this block at
// once, all long[1]s, etc., e.g. using
// Simple64. Alternatively, we could interleave
// stats + meta ... no reason to have them
// separate anymore:

// Write term meta data
postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute);
absolute = false;
} else {
PendingBlock block = (PendingBlock) ent;
assert StringHelper.startsWith(block.prefix, prefix);
final int suffix = block.prefix.length - prefixLength;
assert StringHelper.startsWith(block.prefix, prefix);

assert suffix > 0;

// For non-leaf block we borrow 1 bit to record
// if entry is term or sub-block:f
suffixLengthsWriter.writeVInt((suffix << 1) | 1);
suffixWriter.append(block.prefix.bytes, prefixLength, suffix);

// if (DEBUG2) {
// BytesRef suffixBytes = new BytesRef(suffix);
// System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
// suffixBytes.length = suffix;
// System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + "
// subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
// }

assert floorLeadLabel == -1
|| (block.prefix.bytes[prefixLength] & 0xff) >= floorLeadLabel
: "floorLeadLabel="
+ floorLeadLabel
+ " suffixLead="
+ (block.prefix.bytes[prefixLength] & 0xff);
assert block.fp < startFP;

suffixLengthsWriter.writeVLong(startFP - block.fp);
subIndices.add(block.index);
}
}
statsWriter.finish();

assert subIndices.size() != 0;
}
// ...
// Write term meta data byte[] blob
termsOut.writeVInt((int) metaWriter.size());
// 写入到`tim`文件
metaWriter.copyTo(termsOut);
metaWriter.reset();
}
  1. Lucene90PostingWriter.encodeTerm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
// 这里的DataOutput 指的是ByteBuffersDataOutput, 一个内存结构, 这里记录了每个Term 倒排索引对应的文件位置相关, 最终会被写入`.tim`文件
public void encodeTerm(
DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute)
throws IOException {
IntBlockTermState state = (IntBlockTermState) _state;
if (absolute) {
lastState = emptyState;
assert lastState.docStartFP == 0;
}

if (lastState.singletonDocID != -1
&& state.singletonDocID != -1
&& state.docStartFP == lastState.docStartFP) {
// With runs of rare values such as ID fields, the increment of pointers in the docs file is
// often 0.
// Furthermore some ID schemes like auto-increment IDs or Flake IDs are monotonic, so we
// encode the delta
// between consecutive doc IDs to save space.
final long delta = (long) state.singletonDocID - lastState.singletonDocID;
out.writeVLong((BitUtil.zigZagEncode(delta) << 1) | 0x01);
} else {
out.writeVLong((state.docStartFP - lastState.docStartFP) << 1);
if (state.singletonDocID != -1) {
out.writeVInt(state.singletonDocID);
}
}

if (writePositions) {
out.writeVLong(state.posStartFP - lastState.posStartFP);
if (writePayloads || writeOffsets) {
out.writeVLong(state.payStartFP - lastState.payStartFP);
}
}
if (writePositions) {
if (state.lastPosBlockOffset != -1) {
out.writeVLong(state.lastPosBlockOffset);
}
}
if (state.skipOffset != -1) {
out.writeVLong(state.skipOffset);
}
lastState = state;
}
  1. Lucene90BlockTreeTermsWriter.TermsWriter.finish
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
public void finish() throws IOException {
if (numTerms > 0) {
// if (DEBUG) System.out.println("BTTW: finish prefixStarts=" +
// Arrays.toString(prefixStarts));

// Add empty term to force closing of all final blocks:
pushTerm(new BytesRef());

// TODO: if pending.size() is already 1 with a non-zero prefix length
// we can save writing a "degenerate" root block, but we have to
// fix all the places that assume the root block's prefix is the empty string:
pushTerm(new BytesRef());
// 这里完成将所有Term生成FST的索引, pending.get(0)是root的block,其索引包含了整个pending的Terms
writeBlocks(0, pending.size());

// We better have one final "root" block:
assert pending.size() == 1 && !pending.get(0).isTerm
: "pending.size()=" + pending.size() + " pending=" + pending;
final PendingBlock root = (PendingBlock) pending.get(0);
assert root.prefix.length == 0;
final BytesRef rootCode = root.index.getEmptyOutput();
assert rootCode != null;

ByteBuffersDataOutput metaOut = new ByteBuffersDataOutput();
fields.add(metaOut);

metaOut.writeVInt(fieldInfo.number);
metaOut.writeVLong(numTerms);
metaOut.writeVInt(rootCode.length);
metaOut.writeBytes(rootCode.bytes, rootCode.offset, rootCode.length);
assert fieldInfo.getIndexOptions() != IndexOptions.NONE;
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
metaOut.writeVLong(sumTotalTermFreq);
}
metaOut.writeVLong(sumDocFreq);
metaOut.writeVInt(docsSeen.cardinality());
writeBytesRef(metaOut, new BytesRef(firstPendingTerm.termBytes));
writeBytesRef(metaOut, new BytesRef(lastPendingTerm.termBytes));
metaOut.writeVLong(indexOut.getFilePointer());
// Write FST to index
// 这里的indexOut就是".tip"
root.index.save(metaOut, indexOut);
// System.out.println(" write FST " + indexStartFP + " field=" + fieldInfo.name);

/*
if (DEBUG) {
final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
Util.toDot(root.index, w, false, false);
System.out.println("SAVED to " + dotFileName);
w.close();
}
*/

} else {
assert sumTotalTermFreq == 0
|| fieldInfo.getIndexOptions() == IndexOptions.DOCS && sumTotalTermFreq == -1;
assert sumDocFreq == 0;
assert docsSeen.cardinality() == 0;
}
}

总结

Lucene整体倒排索引设计还是非常复杂的,这里仅仅是将内存的数据写入到磁盘就需要这么多步骤,同时Lucene保持了很好的抽象,不同格式的索引数据只要涉及实现对应的接口即可,通过codec的方式嵌入到引擎中.

我的问题是如何评估这些设计对性能的影响,FST的涉及需要如此复杂吗,只是为了定位Term对应的倒排表位置,有没有其他可以使用的数据结构呢, 在后续的学习过程中还是要多加深入了解和思考,希望能得到更好的答案.