package org.eaglei.lexical.lucene;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version;
import org.eaglei.lexical.EntityExtractionProvider;
import org.eaglei.lexical.EntityMatch;
import org.eaglei.lexical.EntityMatchRequest;
import org.eaglei.lexical.lucene.LuceneUtils;
import org.eaglei.model.EIURI;

/**
 * Implementation of EntityExtractorProvider that supports the entity extraction for
 * subtrees of a specified OntModel. 
 * 
 * Clients of this class must create and populate the Lucene Directory according to the 
 * schema in LuceneEntityExtractionIndexer before creating this provider. 
 * 
 * @author rfrost
 */
public class LuceneEntityExtractionProvider implements EntityExtractionProvider {

    private static final Log logger = LogFactory.getLog(LuceneEntityExtractionProvider.class);
    
    /**
     * Default score threshold
     */
    public static final float DEFAULT_SCORE_THRESHHOLD = .2f;
    
    /**
     * Max number of fragments for highlighting
     */
    public static final int MAX_FRAGMENTS = 4;        

    private QueryParser labelParser;    
    private IndexSearcher searcher;
    private Analyzer queryAnalyzer;
    private Analyzer indexAnalyzer;
    
    // score threshold
    private float scoreThreshold = DEFAULT_SCORE_THRESHHOLD;
    // set to true to enable a fuzzy search rewrite if the initial query does not return results
    private boolean fuzzyRewrite = false;
    
    /**
     * Creates a new ModelEntityExtractor that uses the same analyzer for indexing
     * and querying.
     * 
     * @param analyzer Analyzer used for both querying and indexing
     * @param directory Lucene directory. This directory should already contain a valid index populated according to 
     *        the schema in LuceneIndexer.
     *
     * @throws IOException Thrown if there is an error building the index.
     */
    public LuceneEntityExtractionProvider(final Analyzer analyzer, final Directory directory) throws IOException {
        this(analyzer, analyzer, directory);
    }
    
    /**
     * Creates a new ModelEntityExtractor.
     * 
     * @param queryAnalyzer Analyzer to use for query execution.
     * @param indexAnalyzer Analyzer to use for index creation.
     * @param directory Lucene directory. This directory should already contain a valid index populated according to 
     *        the schema in LuceneIndexer.
     *
     * @throws IOException Thrown if there is an error building the index.
     */
    public LuceneEntityExtractionProvider(final Analyzer queryAnalyzer, final Analyzer indexAnalyzer,
            final Directory directory) throws IOException {
        this.searcher = createSearcher(directory);
        this.labelParser = new QueryParser(Version.LUCENE_30, LuceneEntityExtractionIndexer.LABEL, queryAnalyzer);
        this.queryAnalyzer = queryAnalyzer;
        this.indexAnalyzer = indexAnalyzer;        
    }
    
    /**
     * Sets the flag that controls whether the original query is rewritten to a "fuzzy" form (levenstein edit distance)
     * if the original execution fails to retrieve results.
     * @param fuzzyRewrite True for fuzzy rewrite.
     */
    public void setFuzzyRewrite(final boolean fuzzyRewrite) {
        this.fuzzyRewrite = fuzzyRewrite;
    }
    
    /**
     * Sets the Lucene score threshold. Matches with a score of less than this
     * will not be returned. Set to <= 0 to return all matches.
     * 
     * @param threshold The threshold.
     */
    public void setScoreThreshold(final float threshold) {
        this.scoreThreshold = threshold;
    }
    
    @Override
    public List<EntityMatch> match(final EntityMatchRequest request) throws IOException {
        final List<EntityMatch> matches = new ArrayList<EntityMatch>();
        final String text = request.getText(); 
        if (text == null || text.length() == 0) {
            return matches;
        }
        try {
            Query query = parse(request);
            
            TopFieldDocs docs = search(query, request.getMaxMatches());
            
            if (docs.totalHits == 0 && this.fuzzyRewrite) {
            	query = LuceneUtils.rewriteToFuzzy(query);
                docs = search(query, request.getMaxMatches());
            }
            final Highlighter highlighter = new Highlighter(new QueryScorer(query)); 

            // Iterate through the results:
            for (int i = 0; i < docs.scoreDocs.length && i < request.getMaxMatches(); i++) {
                final ScoreDoc scoreDoc = docs.scoreDocs[i];
                final Document document = searcher.doc(scoreDoc.doc);
                final float score = scoreDoc.score;
                if (this.scoreThreshold <= 0 || score >= this.scoreThreshold) {
                	String label = null;
                	String highlight = null;
                	// compute the highlight
                	final String prefLabel = document.get(LuceneEntityExtractionIndexer.PREF_LABEL);
                    final Field[] labels = document.getFields(LuceneEntityExtractionIndexer.LABEL);
                    for (Field field: labels) {
                        label = field.stringValue();
                        try {
                            highlight = highlighter.getBestFragment(this.indexAnalyzer, LuceneEntityExtractionIndexer.LABEL, label); //MAX_FRAGMENTS);
                            if (highlight != null) {
                                break;
                            }
                        } catch (InvalidTokenOffsetsException itoe) {
                            logger.error(itoe);
                        } catch (IOException ioe) {
                            logger.error(ioe);
                        }
                    }
                    // if one of the label values was not identified as a highlight, display the pref label
                    if (highlight == null) {
                    	logger.warn("Could not find highlight for " + label);
                    	label = document.get(LuceneEntityExtractionIndexer.PREF_LABEL);
                        highlight = label;
                    }
                    final EntityMatch match = new EntityMatchImpl(prefLabel, label, highlight, document.get(LuceneEntityExtractionIndexer.URI), score);
                    matches.add(match);
                }
            }
        } catch (ParseException pe) {
            throw new IOException(pe);
        }
        return matches;
    }
    
    private IndexSearcher createSearcher(final Directory directory) throws IOException {
        IndexSearcher searcher = new IndexSearcher(directory, true);
        searcher.setDefaultFieldSortScoring(true, true);
        return searcher;
    }

    private Query parse(final EntityMatchRequest request) throws ParseException {
        final BooleanQuery query = new BooleanQuery();
        final String text = request.getText();
        final EIURI uri = request.getURI();
        
        final Query labelQuery = LuceneUtils.escapeIfInvalid(labelParser, text);
        query.add(labelQuery, BooleanClause.Occur.MUST);
        
        if (uri != null) {
            final PhraseQuery typeQuery = new PhraseQuery();
            typeQuery.add(new Term(LuceneEntityExtractionIndexer.TYPE, uri.toString()));
            query.add(typeQuery, BooleanClause.Occur.MUST);
        }
        
        return query;
    }
    
    private TopFieldDocs search(final Query query, final int maxMatches)
            throws IOException, ParseException {
        assert this.searcher != null;
        return searcher.search(query, null, maxMatches, Sort.RELEVANCE);
        /*
         * Do this? if (hits.length() == 0) { // if the conjunctive search
         * didn't yield any results, try a fuzzy search on all terms
         * rewrittenQuery = LuceneUtils.rewriteQuery(analyzer, "name",
         * cleanedQuery, true, true); query = parser.parse(rewrittenQuery); hits =
         * isearcher.search(query); }
         */
    }

}
