package org.eaglei.search.provider.lucene.search;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version;

import org.eaglei.lexical.lucene.LuceneUtils;
import org.eaglei.model.EIClass;
import org.eaglei.model.EIEntity;
import org.eaglei.model.EIOntModel;
import org.eaglei.model.EIURI;
import org.eaglei.model.jena.JenaEIOntModel;
import org.eaglei.search.provider.SearchCountRequest;
import org.eaglei.search.provider.SearchCounts;
import org.eaglei.search.provider.SearchRequest;
import org.eaglei.search.provider.SearchResult;
import org.eaglei.search.provider.SearchResultSet;
import org.eaglei.search.provider.SearchProvider;
import org.eaglei.search.provider.SearchProviderUtil;
import org.eaglei.search.provider.lucene.LuceneSearchProviderIndexer;

import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.vocabulary.RDF;

/**
 * Contains logic for building a Lucene Query from a SearchRequest for the index schema defined by LuceneSearchIndexSchema.

 * Creation of the query can potentially incorporate (neither of these are implemented):
 * 
 * <ul>
 * <li>query-expansion: using an ontology-based entity extraction technique similar to what is done for PubMed
 * <li>pseudo-relevance feedback: the top-X results of an initial search are used to create a modified query
 * </ul>
 *  
 * @author frost
 */
// TODO: implement query expansion (can look at the EFO Lucene-based impl, IndexBasedExpansionLookup.java) 
// TODO: implement pseudo-relevance feedback 

public final class LuceneSearchQueryBuilder implements LuceneSearchIndexSchema { 

    private static final Log logger = LogFactory.getLog(LuceneSearchQueryBuilder.class);
    private static final boolean DEBUG = logger.isDebugEnabled();

    /*
     * Handle to the in-memory eagle-i ontology
     */
    private final EIOntModel eagleiOntModel;
    /*
     * The Lucene analyzer used for both indexing and query execution.
     */
    private Analyzer analyzer;
    /**
     * Lucene QueryParser that is used for the text field 
     */    
    private QueryParser labelParser;
    /**
     * Lucene QueryParser that is used for the text field 
     */    
    private QueryParser propTextParser;
    /**
     * Lucene QueryParser that is used for the inferred type field 
     */
    private QueryParser inferredTypeLabelParser;
    /**
     * Lucene QueryParser that is used for the inferred type field 
     */
    private QueryParser assertedTypeLabelParser;
    
    /**
     * Creates a LuceneQueryBuilder.
     * @param eagleiOntModel Reference to the eagle-i ontology model.
     * @param analyzer Analyzer to use for query execution.
     */
    public LuceneSearchQueryBuilder(final EIOntModel eagleiOntModel, final Analyzer analyzer) {
        assert eagleiOntModel != null;
        assert analyzer != null;        
        this.eagleiOntModel = eagleiOntModel;
        this.analyzer = analyzer;
        this.labelParser = new QueryParser(Version.LUCENE_30, FIELD_LABEL, this.analyzer);                
        this.propTextParser = new QueryParser(Version.LUCENE_30, FIELD_PROP_TEXT, this.analyzer);                
        this.inferredTypeLabelParser = new QueryParser(Version.LUCENE_30, FIELD_INFERRED_TYPE_LABEL, this.analyzer);                        
        this.assertedTypeLabelParser = new QueryParser(Version.LUCENE_30, FIELD_ASSERTED_TYPE_LABEL, this.analyzer);                        
    }
    
    /**
     * Creates a multi-field query from the SearchRequest. 
     * 
     * @param request The SearchRequest for which the Lucene Query is being created
     * 
     * @return The Lucene query.
     */
    public Query createQuery(final SearchRequest request) throws ParseException {

        final SearchRequest.Term term = request.getTerm();
        final BooleanQuery query = new BooleanQuery();

        // add the resource provider constraints
        if (request.getLocation() != null) {
            // institution constraint
        	if (request.getLocation().getInstitution() != null) {
                final PhraseQuery institutionQuery = new PhraseQuery();
                institutionQuery.add(new Term(FIELD_INSTITUTION_URI, request.getLocation().getInstitution().toString()));
                query.add(institutionQuery, BooleanClause.Occur.MUST);
        	}
        	// location instance constraint
        	if (request.getLocation().getURI() != null) {
	            // If Resource Provider uri is set, then need to expand that out into
	            // object property bindings
	            if (request.getBinding() != null) {
	                // If there's a type binding,
	                // lookup the resource provider property for that type binding.
	                EIURI providerPropertyURI = eagleiOntModel.getResourceProviderProperty(request.getBinding().getType());
	                if (providerPropertyURI == null) {
	                    logger.error("Unable to compute a resource provider property for request type binding: " + request.getBinding().getType());
	                } else {
	                    final PhraseQuery propQuery = new PhraseQuery();
	                    propQuery.add(new Term(providerPropertyURI.toString(), 
	                            request.getLocation().getURI().toString()));
	                    query.add(propQuery, BooleanClause.Occur.MUST);
	                }
	            } else {
	                final BooleanQuery providerQuery = new BooleanQuery();
	                query.add(providerQuery, BooleanClause.Occur.MUST);
	                // No type binding, use all the resource provider properties as search constraints
	                String providerValue = request.getLocation().getURI().toString();
	                for (EIURI providerPropertyURI : eagleiOntModel.getResourceProviderProperties()) {
	                    final PhraseQuery propQuery = new PhraseQuery();
	                    propQuery.add(new Term(providerPropertyURI.toString(), providerValue));
	                    providerQuery.add(propQuery, BooleanClause.Occur.SHOULD);
	                }  
	            }
        	}
        }
        
        // was a term specified?
        if (term != null) {
            final BooleanQuery termQuery = new BooleanQuery();
            query.add(termQuery, BooleanClause.Occur.MUST);
            
            // add the entity query components
            if (term.getURI() != null) {
                final EIURI uri = term.getURI();

                if (eagleiOntModel.isModelClassURI(uri.toString())) {
                    // if an ontology class URI query
                    // asserted type query
                    PhraseQuery typeQuery = new PhraseQuery();
                    typeQuery.add(new Term(FIELD_ASSERTED_TYPE_URI, uri.toString()));
                    termQuery.add(typeQuery, BooleanClause.Occur.SHOULD);
                    // inferred type query
                    typeQuery = new PhraseQuery();
                    typeQuery.add(new Term(FIELD_INFERRED_TYPE_URI, uri.toString()));
                    termQuery.add(typeQuery, BooleanClause.Occur.SHOULD);
                    // synonym expansion
                    for (String label: this.eagleiOntModel.getLabels(uri)) {
                    	boolean isQueryLabel = label.equals(term.getQuery());
                    	addLabelQuery(termQuery, label, !isQueryLabel);
                    } 
                } else {
                	// if a resource URI query
                    // uri query
                    final PhraseQuery uriQuery = new PhraseQuery();
                    uriQuery.add(new Term(FIELD_URI, uri.toString()));
                    termQuery.add(uriQuery, BooleanClause.Occur.SHOULD);
                    
                	addLabelQuery(termQuery, term.getQuery(), false);
                    // TODO Alternate name expansion
                } 
                
                // prop uri query
                final PhraseQuery propQuery = new PhraseQuery();
                propQuery.add(new Term(FIELD_PROP_URI, uri.toString()));
                termQuery.add(propQuery, BooleanClause.Occur.SHOULD);

            } else if (term.getQuery() != null) {
            	// query text but no URI
                Query textQuery;
                // resource label
                textQuery = LuceneUtils.escapeIfInvalid(labelParser, term.getQuery());
                termQuery.add(textQuery, BooleanClause.Occur.SHOULD);
                // asserted type label
                textQuery = LuceneUtils.escapeIfInvalid(assertedTypeLabelParser, term.getQuery());
                termQuery.add(textQuery, BooleanClause.Occur.SHOULD);
                // inferred type label
                textQuery = LuceneUtils.escapeIfInvalid(inferredTypeLabelParser, term.getQuery());
                termQuery.add(textQuery, BooleanClause.Occur.SHOULD);
                // property labels
                textQuery = LuceneUtils.escapeIfInvalid(propTextParser, term.getQuery());
                termQuery.add(textQuery, BooleanClause.Occur.SHOULD);
            }
        }
        
        // add the type constraint
        final EIURI typeURI = SearchProviderUtil.getType(request);
        if (typeURI != null) {
            final BooleanQuery typeQuery = new BooleanQuery();
            query.add(typeQuery, BooleanClause.Occur.MUST);
            final PhraseQuery assertedType = new PhraseQuery();
            assertedType.add(new Term(FIELD_ASSERTED_TYPE_URI, typeURI.toString()));
            typeQuery.add(assertedType, BooleanClause.Occur.SHOULD);
            final PhraseQuery inferredType = new PhraseQuery();
            inferredType.add(new Term(FIELD_INFERRED_TYPE_URI, typeURI.toString()));
            typeQuery.add(inferredType, BooleanClause.Occur.SHOULD);
        }

        // add any property bindings
        /*
        SearchRequest.TypeBinding binding = request.getBinding();
        if (binding != null) {
            for (EIURI prop: binding.getDataTypeProperties()) {
                final String constraint =  binding.getDataTypeProperty(prop);
                final QueryParser propParser = new QueryParser(Version.LUCENE_30, prop.toString(), this.analyzer);
                final Query propQuery = propParser.parse(constraint);
                query.add(propQuery, BooleanClause.Occur.MUST);
            }
            for (EIURI prop: binding.getObjectProperties()) {
                final EIURI constraint =  binding.getObjectProperty(prop);
                final PhraseQuery propQuery = new PhraseQuery();
                propQuery.add(new Term(prop.toString() + LuceneSearchProviderIndexer.OBJECT_URI_POSTFIX, constraint.toString()));
                query.add(propQuery, BooleanClause.Occur.MUST);
            }            
        }
        */
                        
        // No bindings
        if (query.clauses().isEmpty()) {
            final Query allDocsQuery = new MatchAllDocsQuery();
            query.add(allDocsQuery, BooleanClause.Occur.MUST);
            return query;
        }
        
        //if (DEBUG) {
        //    logger.debug("Query: " + query.toString());
        //}
        
        return query;

    }
    
    /*
     * Search using an entity label or entity synonym.
     */
    /*
    private void addLabelQuery(BooleanQuery termQuery, String label, boolean isSynonym) {
    	label = label.toLowerCase();
        PhraseQuery labelQuery;
        // entity label
        labelQuery = new PhraseQuery();
        labelQuery.add(new Term(FIELD_LABEL, label));
        if (isSynonym) {
        	labelQuery.setBoost(BOOST_SYNONYM);
        }
        termQuery.add(labelQuery, BooleanClause.Occur.SHOULD);
        // prop text
        labelQuery = new PhraseQuery();
        labelQuery.add(new Term(FIELD_PROP_TEXT, label));
        if (isSynonym) {
        	labelQuery.setBoost(BOOST_SYNONYM);
        }
        termQuery.add(labelQuery, BooleanClause.Occur.SHOULD);
    }
    */

    /*
     * Search using an entity label or entity synonym.
     */
    private void addLabelQuery(BooleanQuery termQuery, String label, boolean isSynonym) {
        try {
	        String phraseQueryStr = "\"" + label + "\"";
	        Query labelQuery;
	        // resource label
	        labelQuery = LuceneUtils.escapeIfInvalid(labelParser, phraseQueryStr);
	        if (isSynonym) {
	        	labelQuery.setBoost(BOOST_SYNONYM);
	        }
	        termQuery.add(labelQuery, BooleanClause.Occur.SHOULD);
	        // property labels
	        labelQuery = LuceneUtils.escapeIfInvalid(propTextParser, phraseQueryStr);
	        if (isSynonym) {
	        	labelQuery.setBoost(BOOST_SYNONYM);
	        }
	        termQuery.add(labelQuery, BooleanClause.Occur.SHOULD);
		} catch (ParseException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
   }
}
