本文共 2372 字,大约阅读时间需要 7 分钟。
package fst;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.io.StringReader;import java.util.ArrayList;import java.util.HashMap;import java.util.List;import java.util.Map;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.analysis.core.WhitespaceTokenizer;import org.apache.lucene.analysis.synonym.SynonymFilterFactory;import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;import org.apache.lucene.analysis.util.FilesystemResourceLoader;import org.apache.lucene.store.DataInput;import org.apache.lucene.store.InputStreamDataInput;import org.apache.lucene.util.BytesRef;import org.apache.lucene.util.CharsRef;import org.apache.lucene.util.IntsRef;import org.apache.lucene.util.NumericUtils;import org.apache.lucene.util.Version;import org.apache.lucene.util.fst.Builder;import org.apache.lucene.util.fst.ByteSequenceOutputs;import org.apache.lucene.util.fst.FST;import org.apache.lucene.util.fst.Util;class FSTDic{ FSTfst ; FST.BytesReader fstReader; public FSTDic() throws IOException{ File file=new File("fst"); if(file.exists()){ fst=load(file); }else{ List words=new ArrayList (); words.add("中国"); words.add("中国人"); words.add("中国人民"); words.add("中国人民解放军"); fst=build(words); } fstReader = fst.getBytesReader(); } public void save() throws IOException{ fst.save(new File("fst")); } public FST load(File file) throws IOException{ return new FST(new InputStreamDataInput(new FileInputStream("fst")),ByteSequenceOutputs.getSingleton() ); } private FST build(List words) throws IOException{ ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton(); Builder builder = new Builder (FST.INPUT_TYPE.BYTE4, outputs); final IntsRef scratchIntsRef = new IntsRef(); BytesRef output = new BytesRef(4); for(String word: words){ NumericUtils.intToPrefixCodedBytes(word.length(), 0, output); builder.add(Util.toUTF32(word, scratchIntsRef), BytesRef.deepCopyOf(output)); } return builder.finish(); } public boolean contains(String word) throws IOException{ FST.Arc scratchArc = new FST.Arc (); int bufUpto=0,buflen=word.length(); BytesRef pendingOutput=fst.outputs.getNoOutput(); BytesRef matchOutput = null; fst.getFirstArc(scratchArc); while(bufUpto
(随记,稍后补全……)
转载地址:http://lmdkx.baihongyu.com/