org.apache.lucene.util.fst.ByteSequenceOutputs Java Examples
The following examples show how to use
org.apache.lucene.util.fst.ByteSequenceOutputs.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: NRTSuggester.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Loads a {@link NRTSuggester} from {@link org.apache.lucene.store.IndexInput} on or off-heap * depending on the provided <code>fstLoadMode</code> */ public static NRTSuggester load(IndexInput input, FSTLoadMode fstLoadMode) throws IOException { final FST<Pair<Long, BytesRef>> fst; if (shouldLoadFSTOffHeap(input, fstLoadMode)) { OffHeapFSTStore store = new OffHeapFSTStore(); IndexInput clone = input.clone(); clone.seek(input.getFilePointer()); fst = new FST<>(clone, clone, new PairOutputs<>( PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()), store); input.seek(clone.getFilePointer() + store.size()); } else { fst = new FST<>(input, input, new PairOutputs<>( PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton())); } /* read some meta info */ int maxAnalyzedPathsPerOutput = input.readVInt(); /* * Label used to denote the end of an input in the FST and * the beginning of dedup bytes */ int endByte = input.readVInt(); int payloadSep = input.readVInt(); return new NRTSuggester(fst, maxAnalyzedPathsPerOutput, payloadSep); }
Example #2
Source File: StemmerOverrideFilter.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Returns an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter} * @return an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter} * @throws IOException if an {@link IOException} occurs; */ public StemmerOverrideMap build() throws IOException { ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton(); FSTCompiler<BytesRef> fstCompiler = new FSTCompiler<>( FST.INPUT_TYPE.BYTE4, outputs); final int[] sort = hash.sort(); IntsRefBuilder intsSpare = new IntsRefBuilder(); final int size = hash.size(); BytesRef spare = new BytesRef(); for (int i = 0; i < size; i++) { int id = sort[i]; BytesRef bytesRef = hash.get(id, spare); intsSpare.copyUTF8Bytes(bytesRef); fstCompiler.add(intsSpare.get(), new BytesRef(outputValues.get(id))); } return new StemmerOverrideMap(fstCompiler.compile(), ignoreCase); }
Example #3
Source File: XAnalyzingSuggester.java From Elasticsearch with Apache License 2.0 | 5 votes |
@Override public boolean load(InputStream input) throws IOException { DataInput dataIn = new InputStreamDataInput(input); try { this.fst = new FST<>(dataIn, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton())); maxAnalyzedPathsForOneInput = dataIn.readVInt(); hasPayloads = dataIn.readByte() == 1; } finally { IOUtils.close(input); } return true; }
Example #4
Source File: XAnalyzingSuggester.java From Elasticsearch with Apache License 2.0 | 5 votes |
@Override public boolean load(DataInput input) throws IOException { count = input.readVLong(); this.fst = new FST<>(input, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton())); maxAnalyzedPathsForOneInput = input.readVInt(); hasPayloads = input.readByte() == 1; return true; }
Example #5
Source File: XAnalyzingSuggester.java From Elasticsearch with Apache License 2.0 | 5 votes |
public XBuilder(int maxSurfaceFormsPerAnalyzedForm, boolean hasPayloads, int payloadSep) { this.payloadSep = payloadSep; this.outputs = new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()); this.builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs); this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm; this.hasPayloads = hasPayloads; surfaceFormsAndPayload = new SurfaceFormAndPayload[maxSurfaceFormsPerAnalyzedForm]; }
Example #6
Source File: AnalyzingSuggester.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public boolean load(DataInput input) throws IOException { count = input.readVLong(); this.fst = new FST<>(input, input, new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton())); maxAnalyzedPathsForOneInput = input.readVInt(); hasPayloads = input.readByte() == 1; return true; }
Example #7
Source File: NRTSuggesterBuilder.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Create a builder for {@link NRTSuggester} */ public NRTSuggesterBuilder() { this.payloadSep = PAYLOAD_SEP; this.endByte = END_BYTE; this.outputs = new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()); this.entries = new PriorityQueue<>(); this.fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs); }
Example #8
Source File: SynonymMap.java From lucene-solr with Apache License 2.0 | 4 votes |
/** * Builds an {@link SynonymMap} and returns it. */ public SynonymMap build() throws IOException { ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton(); // TODO: are we using the best sharing options? FSTCompiler<BytesRef> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE4, outputs); BytesRefBuilder scratch = new BytesRefBuilder(); ByteArrayDataOutput scratchOutput = new ByteArrayDataOutput(); final Set<Integer> dedupSet; if (dedup) { dedupSet = new HashSet<>(); } else { dedupSet = null; } final byte[] spare = new byte[5]; Set<CharsRef> keys = workingSet.keySet(); CharsRef sortedKeys[] = keys.toArray(new CharsRef[keys.size()]); Arrays.sort(sortedKeys, CharsRef.getUTF16SortedAsUTF8Comparator()); final IntsRefBuilder scratchIntsRef = new IntsRefBuilder(); //System.out.println("fmap.build"); for (int keyIdx = 0; keyIdx < sortedKeys.length; keyIdx++) { CharsRef input = sortedKeys[keyIdx]; MapEntry output = workingSet.get(input); int numEntries = output.ords.size(); // output size, assume the worst case int estimatedSize = 5 + numEntries * 5; // numEntries + one ord for each entry scratch.grow(estimatedSize); scratchOutput.reset(scratch.bytes()); // now write our output data: int count = 0; for (int i = 0; i < numEntries; i++) { if (dedupSet != null) { // box once final Integer ent = output.ords.get(i); if (dedupSet.contains(ent)) { continue; } dedupSet.add(ent); } scratchOutput.writeVInt(output.ords.get(i)); count++; } final int pos = scratchOutput.getPosition(); scratchOutput.writeVInt(count << 1 | (output.includeOrig ? 0 : 1)); final int pos2 = scratchOutput.getPosition(); final int vIntLen = pos2-pos; // Move the count + includeOrig to the front of the byte[]: System.arraycopy(scratch.bytes(), pos, spare, 0, vIntLen); System.arraycopy(scratch.bytes(), 0, scratch.bytes(), vIntLen, pos); System.arraycopy(spare, 0, scratch.bytes(), 0, vIntLen); if (dedupSet != null) { dedupSet.clear(); } scratch.setLength(scratchOutput.getPosition()); //System.out.println(" add input=" + input + " output=" + scratch + " offset=" + scratch.offset + " length=" + scratch.length + " count=" + count); fstCompiler.add(Util.toUTF32(input, scratchIntsRef), scratch.toBytesRef()); } FST<BytesRef> fst = fstCompiler.compile(); return new SynonymMap(fst, words, maxHorizontalContext); }