package org.eaglei.suggest.provider.lucene;

import java.io.Reader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.Version;

/**
 * Subclass of Lucene's Analyzer that supports the EdgeNGramTokenFilter for auto-suggest.
 * 
 * @author rfrost
 * 
 * TODO If the StandardTokenizer is used, need to leverage the term vector position information to impact match score.
 *      It is unclear if this is possible without using SpanQueries or a CustomScoreQuery...
 */
public class AutoSuggestIndexAnalyzer extends Analyzer {

    /**
     * Creates a new EdgeNGramAnalyzer.
     */
    public AutoSuggestIndexAnalyzer() {
    }

    @Override
    public final TokenStream tokenStream(String fieldName, Reader reader) {
        Tokenizer tokenStream = new StandardTokenizer(Version.LUCENE_30, reader);
        // may want to use keyword tokenizer for more standard "single token" auto-suggest
        //Tokenizer tokenStream = new KeywordTokenizer(reader);
        TokenStream result = new StandardFilter(tokenStream);
        result = new LowerCaseFilter(result);
        result = new EdgeNGramTokenFilter(result, EdgeNGramTokenFilter.Side.FRONT,1, 20);
        return result;
    };
}
