package org.eaglei.search.provider.lucene;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version;

import org.eaglei.lexical.lucene.LuceneUtils;
import org.eaglei.model.EIClass;
import org.eaglei.model.EIEntity;
import org.eaglei.model.EIOntModel;
import org.eaglei.model.EIURI;
import org.eaglei.model.jena.JenaEIOntModel;
import org.eaglei.search.provider.SearchCountRequest;
import org.eaglei.search.provider.SearchCounts;
import org.eaglei.search.provider.SearchRequest;
import org.eaglei.search.provider.SearchResult;
import org.eaglei.search.provider.SearchResultSet;
import org.eaglei.search.provider.SearchProvider;
import org.eaglei.search.provider.SearchProviderUtil;

import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.vocabulary.RDF;

/**
 * Contains logic for building a Lucene Query from a SearchRequest for the index schema defined by LuceneSearchIndexSchema.

 * Creation of the query can potentially incorporate (neither of these are implemented):
 * 
 * <ul>
 * <li>query-expansion: using an ontology-based entity extraction technique similar to what is done for PubMed
 * <li>pseudo-relevance feedback: the top-X results of an initial search are used to create a modified query
 * </ul>
 *  
 * @author frost
 */
// TODO: implement query expansion (can look at the EFO Lucene-based impl, IndexBasedExpansionLookup.java) 
// TODO: implement pseudo-relevance feedback 

public final class LuceneQueryBuilder extends LuceneSearchIndexSchema { 

    private static final Log logger = LogFactory.getLog(LuceneQueryBuilder.class);
    private static final boolean DEBUG = logger.isDebugEnabled();

    /*
     * Handle to the in-memory eagle-i ontology
     */
    private final EIOntModel eagleiOntModel;
    /*
     * The Lucene analyzer used for both indexing and query execution.
     */
    private Analyzer analyzer;
    /**
     * Lucene QueryParser that is used for the pref text field 
     */
    private QueryParser prefTextParser;
    /**
     * Lucene QueryParser that is used for the text field 
     */    
    private QueryParser textParser;
    
    /**
     * Creates a LuceneQueryBuilder.
     * @param eagleiOntModel Reference to the eagle-i ontology model.
     * @param analyzer Analyzer to use for query execution.
     */
    public LuceneQueryBuilder(final EIOntModel eagleiOntModel, final Analyzer analyzer) throws IOException {
        assert eagleiOntModel != null;
        assert analyzer != null;        
        this.eagleiOntModel = eagleiOntModel;
        this.analyzer = analyzer;
        this.textParser = new QueryParser(Version.LUCENE_30, LuceneSearchProviderIndexer.TEXT, this.analyzer);                
        this.prefTextParser = new QueryParser(Version.LUCENE_30, LuceneSearchProviderIndexer.PREF_TEXT, this.analyzer);                        
    }
    
    /**
     * Creates a multi-field query from the SearchRequest. 
     * 
     * @param request The SearchRequest for which the Lucene Query is being created
     * 
     * @return The Lucene query.
     */
    public Query createQuery(final SearchRequest request) throws ParseException {

        final SearchRequest.Term term = request.getTerm();
        final EIURI institution = request.getInstitution();
        final BooleanQuery query = new BooleanQuery();

        // add the resource flag constraint
        final PhraseQuery resourceFlagQuery = new PhraseQuery();
        resourceFlagQuery.add(new Term(LuceneSearchProviderIndexer.RESOURCE_FLAG, Boolean.TRUE.toString()));
        query.add(resourceFlagQuery, BooleanClause.Occur.MUST);
        
        // add the institution constraint
        if (institution != null) {
            final PhraseQuery institutionQuery = new PhraseQuery();
            institutionQuery.add(new Term(LuceneSearchProviderIndexer.INSTITUTION_URI, institution.toString()));
            query.add(institutionQuery, BooleanClause.Occur.MUST);
        }
        
        // was a term specified?
        if (term != null) {
            final BooleanQuery termQuery = new BooleanQuery();
            query.add(termQuery, BooleanClause.Occur.MUST);
            
            // add the entity query components
            if (term.getURI() != null) {
                final EIURI uri = term.getURI();

                // if an ontology class, create a type constraint and constraints for
                // labels
                final EIClass eiClass = this.eagleiOntModel.getClass(uri);
                if (eiClass != null) {
                    // add a type query
                    final PhraseQuery typeQuery = new PhraseQuery();
                    typeQuery.add(new Term(LuceneSearchProviderIndexer.INFERRED_TYPE, uri.toString()));
                    termQuery.add(typeQuery, BooleanClause.Occur.SHOULD);
                    // synonym expansion
                    PhraseQuery synonymQuery;
                    for (String label: this.eagleiOntModel.getLabels(uri)) {
                        synonymQuery = new PhraseQuery();
                        synonymQuery.add(new Term(LuceneSearchProviderIndexer.TEXT, label));
                        termQuery.add(synonymQuery, BooleanClause.Occur.SHOULD);
                    }
                    /*
                    final StringBuilder sb = new StringBuilder();
                    boolean first = true;
                    for (String label: this.eagleiOntModel.getLabels(uri)) {
                        if (!first) {
                            sb.append(" ");
                        }
                        first = false;
                        sb.append(label);
                    }
                    final Query strTextQuery = LuceneUtils.escapeIfInvalid(textParser, sb.toString());
                    termQuery.add(strTextQuery, BooleanClause.Occur.SHOULD);   
                    */
                    /*
                    final Query strPrefTextQuery = LuceneUtils.escapeIfInvalid(prefTextParser, sb.toString());
                    termQuery.add(strPrefTextQuery, BooleanClause.Occur.SHOULD);  
                    */                         
                    /*
                    for (String label: this.eagleiOntModel.getLabels(uri)) {
                        Query labelQuery = new TermQuery(new Term(LuceneSearchProviderIndexer.PREF_TEXT, label)); //QueryParser.escape(label)));
                        termQuery.add(labelQuery, BooleanClause.Occur.SHOULD);       
                    }
                    */
                } 
                // add prop query
                final PhraseQuery propQuery = new PhraseQuery();
                propQuery.add(new Term(LuceneSearchProviderIndexer.RELATED, uri.toString()));
                termQuery.add(propQuery, BooleanClause.Occur.SHOULD);
            }
            
            // add the term (i.e. free-text query) constraint
            if (term.getQuery() != null) {
                // search on "text"
                final Query strTextQuery = LuceneUtils.escapeIfInvalid(textParser, term.getQuery());
                termQuery.add(strTextQuery, BooleanClause.Occur.SHOULD);
                // search on "pref_text"
                if (term.getURI() == null) {
                    final Query strPrefTextQuery = LuceneUtils.escapeIfInvalid(prefTextParser, term.getQuery());
                    termQuery.add(strPrefTextQuery, BooleanClause.Occur.SHOULD);
                }
            }
            
            // add pseudo-relevance result feedback
//            if (results != null) {
//                for (SearchResult result: results.getResults()) {
//                    // add a PhraseQuery for this URI
//                    logger.debug("Adding " + result.getEntity() + " via pseudo-relevance ranking");
//                    final PhraseQuery propQuery = new PhraseQuery();
//                    propQuery.add(new Term(LuceneSearchProviderIndexer.RELATED, result.getEntity().getURI().toString()));
//                    // give it a low boost
//                    termQuery.setBoost(LuceneSearchProviderIndexer.LOW_BOOST);
//                    termQuery.add(propQuery, BooleanClause.Occur.SHOULD);
//                    final String label = result.getEntity().getLabel();
//                    /*
//                // search on "text"
//                final Query strTextQuery = LuceneUtils.escapeIfInvalid(textParser, label);
//                strTextQuery.setBoost(LuceneSearchProviderIndexer.LOW_BOOST);
//                query.add(strTextQuery, BooleanClause.Occur.SHOULD);
//                // search on "pref_text"
//                final Query strPrefTextQuery = LuceneUtils.escapeIfInvalid(prefTextParser, label);
//                strPrefTextQuery.setBoost(LuceneSearchProviderIndexer.LOW_BOOST);
//                query.add(strPrefTextQuery, BooleanClause.Occur.SHOULD);
//                     */
//                }        
//            }
        }
        
        // add the type constraint
        final EIURI typeURI = SearchProviderUtil.getType(request);
        if (typeURI != null) {
            final PhraseQuery typeQuery = new PhraseQuery();
            typeQuery.add(new Term(LuceneSearchProviderIndexer.INFERRED_TYPE, typeURI.toString()));
            query.add(typeQuery, BooleanClause.Occur.MUST);
        }

        // add any property bindings
        SearchRequest.TypeBinding binding = request.getBinding();
        if (binding != null) {
            for (EIURI prop: binding.getDataTypeProperties()) {
                final String constraint =  binding.getDataTypeProperty(prop);
                final QueryParser propParser = new QueryParser(Version.LUCENE_30, prop.toString(), this.analyzer);
                final Query propQuery = propParser.parse(constraint);
                query.add(propQuery, BooleanClause.Occur.MUST);
            }
            for (EIURI prop: binding.getObjectProperties()) {
                final EIURI constraint =  binding.getObjectProperty(prop);
                final PhraseQuery propQuery = new PhraseQuery();
                propQuery.add(new Term(prop.toString() + LuceneSearchProviderIndexer.OBJECT_URI_POSTFIX, constraint.toString()));
                query.add(propQuery, BooleanClause.Occur.MUST);
            }            
        }
                
        // No bindings
        if (query.clauses().isEmpty()) {
            final Query allDocsQuery = new MatchAllDocsQuery();
            query.add(allDocsQuery, BooleanClause.Occur.MUST);
            return query;
        }
        
        if (DEBUG) {
            logger.debug("Query: " + query.toString());
        }
        
        return query;

    }

}
