package org.eaglei.solr.suggest;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.index.TermPositionVector;
import org.apache.lucene.index.TermVectorOffsetInfo;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version;
import org.eaglei.lexical.EntityMatch;
import org.eaglei.lexical.EntityMatchRequest;
import org.eaglei.lexical.SuggestionProvider;
import org.eaglei.lexical.lucene.EntityMatchImpl;
import org.eaglei.lexical.lucene.LuceneEntityExtractionIndexer;
import org.eaglei.lexical.lucene.LuceneUtils;
import org.eaglei.model.EIURI;

/**
 * Clients of this class must create and populate the Lucene Directory according to the 
 * schema in LuceneAutoSuggestIndexSchema before using this provider. 
 * 
 * @author rfrost, tbashor
 */
public class LuceneDataSuggestProvider implements LuceneDataSuggestIndexSchema, SuggestionProvider {
 
    private static final Log logger = LogFactory.getLog(LuceneDataSuggestProvider.class);

    /**
     * Default score threshold
     */
    public static final float DEFAULT_SCORE_THRESHHOLD = 1.0f;
    
    /**
     * Max number of fragments for highlighting
     */
    public static final int MAX_FRAGMENTS = 4;        

    private Directory directory;
    private Analyzer indexAnalyzer;
    final Analyzer queryAnalyzer;
    
    // score threshold
    private float scoreThreshold = DEFAULT_SCORE_THRESHHOLD;
    
    /**
     * Creates a new ModelEntityExtractor that uses the same analyzer for indexing
     * and querying.
     * 
     * @param analyzer Analyzer used for both querying and indexing
     * @param directory Lucene directory. This directory should already contain a valid index populated according to 
     *        the schema in LuceneIndexer.
     *
     * @throws IOException Thrown if there is an error building the index.
     */
    public LuceneDataSuggestProvider(final Analyzer analyzer, final Directory directory) throws IOException {
        this(analyzer, analyzer, directory);
    }
    
    /**
     * Creates a new ModelEntityExtractor.
     * 
     * @param queryAnalyzer Analyzer to use for query execution.
     * @param indexAnalyzer Analyzer to use for index creation.
     * @param directory Lucene directory. This directory should already contain a valid index populated according to 
     *        the schema in LuceneIndexer.
     *
     * @throws IOException Thrown if there is an error building the index.
     */
    public LuceneDataSuggestProvider(final Analyzer queryAnalyzer, final Analyzer indexAnalyzer,
            final Directory directory) throws IOException {
        this.directory = directory;
        this.queryAnalyzer = queryAnalyzer;
        this.indexAnalyzer = indexAnalyzer;        
    }
    
    /**
     * Sets the Lucene score threshold. Matches with a score of less than this
     * will not be returned. Set to <= 0 to return all matches.
     * 
     * @param threshold The threshold.
     */
    public void setScoreThreshold(final float threshold) {
        this.scoreThreshold = threshold;
    }
    
    /**
     * 
     * @param request
     * @return
     * @throws IOException
     */
    public List<EntityMatch> suggest(final EntityMatchRequest request) throws IOException {
        final List<EntityMatch> matches = new ArrayList<EntityMatch>();
    	String queryStr = request.getText().toLowerCase();  // ???
        if (queryStr.length() == 0) {
            return matches;
        }
        try {
        	IndexSearcher searcher = new IndexSearcher(directory);
        	searcher.setDefaultFieldSortScoring(true, false);
        	
            final BooleanQuery query = new BooleanQuery();
            final String text = request.getText();
            
            QueryParser labelParser;
            labelParser = new QueryParser(Version.LUCENE_30, FIELD_SUGGEST_LABEL_SEARCH, queryAnalyzer);
            Query labelQuery;
			try {
				labelQuery = LuceneUtils.escapeIfInvalid(labelParser, text);
			} catch (ParseException e1) {
				// TODO Auto-generated catch block
				e1.printStackTrace();
				return matches;
			}
            query.add(labelQuery, BooleanClause.Occur.MUST);

            /*
        	TermQuery query = new TermQuery(new Term(FIELD_SUGGEST_LABEL_SEARCH, queryStr)); 
        	*/
            
        	// Get a few extra for term position
        	TopDocs hits = searcher.search(query, request.getMaxMatches());
        	QueryScorer scorer = new QueryScorer(query, FIELD_SUGGEST_LABEL_SEARCH);
            Highlighter highlighter = new Highlighter(scorer); 
            
            List<EntityMatchImpl> startWith = new ArrayList<EntityMatchImpl>(request.getMaxMatches());
            List<EntityMatchImpl> notStartWith = new ArrayList<EntityMatchImpl>(request.getMaxMatches());
            Map<String, Document> mapLabelToDoc = new HashMap<String, Document>();

            // Iterate through the results:
            for (int i = 0; i < hits.scoreDocs.length && matches.size() < request.getMaxMatches(); i++) {
            	final ScoreDoc scoreDoc = hits.scoreDocs[i];
                final float score = scoreDoc.score;
                
                if (this.scoreThreshold <= 0 || score >= this.scoreThreshold) {
                	
                    final Document doc = searcher.doc(scoreDoc.doc);
                	TokenStream stream = 
                		TokenSources.getAnyTokenStream(searcher.getIndexReader(), scoreDoc.doc, 
                				FIELD_SUGGEST_LABEL_SEARCH, doc, queryAnalyzer);
                	String label = doc.get(FIELD_SUGGEST_LABEL_KEY);
                	mapLabelToDoc.put(label, doc);
                	String fragment;
					try {
						fragment = highlighter.getBestFragment(stream, label);
					} catch (InvalidTokenOffsetsException e) {
						logger.error("Error generating highlight: " + request.toString(), e);
						continue;
					}
					
					//logger.debug("Suggest score: [" + label + "] score: " + score);
                    EntityMatchImpl match = new EntityMatchImpl(label, label, fragment, null, score);
                    if (!label.startsWith(queryStr)) {
                    	notStartWith.add(match);
                    } else {
                    	startWith.add(match);
                    }
                }
            }
        	for (int i=0; matches.size() < request.getMaxMatches() && i<startWith.size(); i++) {
        		EntityMatch match = startWith.get(i);
        		matches.add(match);
                if (matches.size() == 1) {
                	addCategorySuggest(matches, match, mapLabelToDoc.get(match.getMatchLabel()));
                }
        	}
        	for (int i=0; matches.size() < request.getMaxMatches() && i<notStartWith.size(); i++) {
        		EntityMatch match = notStartWith.get(i);
        		matches.add(match);
                if (matches.size() == 1) {
                	addCategorySuggest(matches, match, mapLabelToDoc.get(match.getMatchLabel()));
                }
        	}
        } catch (IOException e) {
			logger.error("Error generating suggestions: " + request.toString(), e);
        }
        return matches;
    }
    
    private void addCategorySuggest(List<EntityMatch> matches, EntityMatch matchRoot, Document doc) {
    	String[] categories = doc.getValues(FIELD_SUGGEST_INSTANCE_CATEGORY);
    	for (String category : categories) {
    		if (category.equals(UNKNOWN_CATEGORY)) {
    			continue;
    		}
    		EntityMatchImpl match = 
    			new EntityMatchImpl(matchRoot.getMatchLabel(), matchRoot.getMatchLabel(), 
    					matchRoot.getHighlight(), null, matchRoot.getScore());
    		match.setBindingType(category);
    		matches.add(match);
    	}    	
    }
    
    public List<String> extractURIs(String queryStr, int max) {
    	// Suggest keys are all lower
    	queryStr = queryStr.trim().toLowerCase();
        if (queryStr.length() == 0) {
            return Collections.emptyList();
        }
        try {  
	        ArrayList<String> results = new ArrayList<String>(max);
	        final IndexSearcher searcher = new IndexSearcher(directory, true);
	        //searcher.setDefaultFieldSortScoring(false, false);
        	TermQuery query = new TermQuery(new Term(FIELD_ENTITY_LABEL, queryStr)); 
	        TopDocs docs = searcher.search(query, max);
	        for (int i=0; i<docs.scoreDocs.length; i++) {
	        	results.add(searcher.doc(docs.scoreDocs[i].doc).get(FIELD_ENTITY_URI));
	        }
	        if (results.size() < max) {
	        	// Try to resolve as a synonym
	        	query = new TermQuery(new Term(FIELD_ENTITY_SYNONYM, queryStr)); 
		        docs = searcher.search(query, max);
		        for (int i=0; i<docs.scoreDocs.length && results.size() < max; i++) {
		        	results.add(searcher.doc(docs.scoreDocs[i].doc).get(FIELD_ENTITY_URI));
		        }
	        }
	        return results;
        } catch (IOException e) {
			logger.error("Error extracting URIs: " + queryStr, e);
            return Collections.emptyList();
        }
    	
    }
        
}
