package org.eaglei.search.provider.lucene;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version;

import org.eaglei.lexical.lucene.LuceneUtils;
import org.eaglei.model.EIClass;
import org.eaglei.model.EIEntity;
import org.eaglei.model.EIOntModel;
import org.eaglei.model.EIURI;
import org.eaglei.model.EagleIOntConstants;
import org.eaglei.model.jena.JenaEIOntModel;
import org.eaglei.search.provider.SearchCountRequest;
import org.eaglei.search.provider.SearchCounts;
import org.eaglei.search.provider.SearchRequest;
import org.eaglei.search.provider.SearchResult;
import org.eaglei.search.provider.SearchResultSet;
import org.eaglei.search.provider.SearchProvider;
import org.eaglei.search.provider.SearchProviderUtil;

import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.vocabulary.RDF;

/**
 * SearchProvider that queries a Lucene index populated according to the 
 * schema defined in LuceneSearchProviderIndexer.
 *
 * @author frost
 * @see LuceneSearchProviderIndexer
 */
public final class LuceneSearchProvider implements SearchProvider {

    private static final Log logger = LogFactory.getLog(LuceneSearchProvider.class);
    private static final boolean DEBUG = logger.isDebugEnabled();

    private final EIOntModel eagleiOntModel;
    private Directory dir;
    private Analyzer analyzer;
    private QueryParser prefTextParser;    
    private QueryParser textParser;        
    private QueryParser indirectTextParser;        
    
    // Preferred name properties
    private List<EIURI> prefLabelProperties = new ArrayList<EIURI>();

    /**
     * Creates a LuceneProvider that executes SearchRequests over the specified Directory using the
     * specified Analyzer. The Directory must be populated using LuceneIndexer.
     * @param dir Directory holding the Lucene index.
     * @param analyzer Analyzer to use for query execution.
     */
    public LuceneSearchProvider(final EIOntModel eagleiOntModel, final Directory dir, final Analyzer analyzer) throws IOException {
        assert dir != null;
        assert analyzer != null;        
        this.eagleiOntModel = eagleiOntModel;
        this.dir = dir;
        this.analyzer = analyzer;
        this.textParser = new QueryParser(Version.LUCENE_30, LuceneSearchProviderIndexer.TEXT, this.analyzer);                
        this.prefTextParser = new QueryParser(Version.LUCENE_30, LuceneSearchProviderIndexer.PREF_TEXT, this.analyzer);                        
        this.indirectTextParser = new QueryParser(Version.LUCENE_30, LuceneSearchProviderIndexer.INDIRECT_TEXT, this.analyzer);                                
        retrieveOntologyMetadata();
    }
    
    @Override
    public void init() throws IOException {
        // TODO Auto-generated method stub
        
    }
    
    /*
     * Retrieves various metadata from the eagle-i ontology that is cached as
     * instance vars in this provider and reused on queries.
     */
    private void retrieveOntologyMetadata() {
        // properties used to compute preferred labels
        List<Property> props = ((JenaEIOntModel) eagleiOntModel).getPrefLabelProperties();
        for (Property prop: props) {
            this.prefLabelProperties.add(EIURI.create(prop.getURI()));
        }
    }

    /* (non-Javadoc)
     * @see org.eaglei.search.provider.SearchProvider#query(org.eaglei.search.request.SearchRequest)
     */
    public SearchResultSet query(final SearchRequest request) throws IOException {
        return query(request, true);
    }
        
    // version of query that allows the optional creation of search results (for count use case)
    private SearchResultSet query(final SearchRequest request, final boolean createResults) throws IOException {
        assert request != null;
        Query query = null;
        
        // TODO get pseudo-relevance ranking working
        
        // if there is a term, perform pseudo-relevance ranking
        /*
        SearchResultSet pseudoRelevanceResults = null;
        SearchRequest pseudoRequest = new SearchRequest();
        if (request.getTerm() != null) {
            try {
                // strip out the type binding, this query is for pseudo-relevance ranking
                pseudoRequest.setInstitution(request.getInstitution());
                pseudoRequest.setTerm(request.getTerm());
                pseudoRequest.setMaxResults(request.getMaxResults());
                pseudoRequest.setStartIndex(request.getStartIndex());
                query = createQuery(pseudoRequest);
            } catch (ParseException pe) {
                throw new IOException(pe.getLocalizedMessage());
            }
            if (query != null) {
                pseudoRelevanceResults = executeSearch(pseudoRequest, query, true);
            }
        }
        */

        // pseudo-relevance feedback based on the initial page of results
        try {
            //query = createQuery(request, pseudoRelevanceResults);
            query = createQuery(request, null);
        } catch (ParseException pe) {
            throw new IOException(pe.getLocalizedMessage());
        }
        if (query == null) {
            return new SearchResultSet(request);
        }
        return executeSearch(request, query, createResults);
    }

    @Override
    public SearchCounts count(SearchCountRequest request) throws IOException {
        assert request != null;
        final SearchRequest searchRequest = request.getRequest();
        final SearchCounts counts = new SearchCounts(searchRequest);
        for (EIURI type: request.getCountTypes()) {
            SearchRequest countRequest = new SearchRequest(searchRequest.toURLParams());
            if (type == null) {
                countRequest.setBinding(null);
            } else {
                countRequest.setBinding(new SearchRequest.TypeBinding(type));
            }
            final SearchResultSet results = query(countRequest, false); // pass in false to create results
            counts.setClassCount(type, results.getTotalCount());
        }
        return counts;
    }    
    
    private TopFieldDocs retrieveDocs(final IndexSearcher searcher, final SearchRequest request, final Query query)
    throws IOException {        
        searcher.setDefaultFieldSortScoring(true, true);
        
        // execute the search
        final TopFieldDocs docs = searcher.search(query, null, request.getStartIndex() + request.getMaxResults(), Sort.RELEVANCE);
        logger.debug("Found " + docs.totalHits + " matches");
        return docs;
    }
    
    /*
     * Executes the Lucene query and creates SearchResults from the output docs.
     */
    private SearchResultSet executeSearch(final SearchRequest request, final Query query, final boolean createResults)
    throws IOException {
        
        final SearchResultSet results = new SearchResultSet(request);
        
        // TODO would like to create and reuse a single IndexSearcher but it is not seeing index changes on
        // reopen...
        final IndexSearcher searcher = new IndexSearcher(this.dir, true);        
        final TopFieldDocs docs = retrieveDocs(searcher, request, query);
        
        // get unique set of results
        results.setTotalCount(docs.totalHits);
        results.setStartIndex(request.getStartIndex());
        
        if (!createResults) {
            // if not populating individual SearchResults, return
            return results;
        }
        
        final Highlighter highlighter = new Highlighter(new QueryScorer(query)); 
        
        // get page subset
        final int cap = request.getStartIndex()+ request.getMaxResults();
        for (int i = request.getStartIndex(); (i < cap) && (i < docs.scoreDocs.length); i++) {
            final ScoreDoc scoreDoc = docs.scoreDocs[i];
            final Document document = searcher.doc(scoreDoc.doc);

            final float score = scoreDoc.score;
            final String resource = document.get(LuceneSearchProviderIndexer.URI);
            String label = null;
            for (EIURI prop: this.prefLabelProperties) {
                String[] values = document.getValues(prop.toString());
                if (values.length > 0) {
                    label = values[0];
                    break;
                }
            }
            final EIEntity resourceEntity = EIEntity.create(EIURI.create(resource), label);

            final String institution_uri = document.get(LuceneSearchProviderIndexer.INSTITUTION_URI);
            final String institution_label = document.get(LuceneSearchProviderIndexer.INSTITUTION_LABEL);
            final EIEntity institutionEntity = EIEntity.create(EIURI.create(institution_uri),
                    institution_label);
            
            // TODO this is just getting the first type...
            final String type = document.get(RDF.type.getURI() + LuceneSearchProviderIndexer.OBJECT_URI_POSTFIX);
            if (type == null) {
                logger.error("Null rdf:type for " + resource);
                continue;
            }
            final EIClass eiClass = eagleiOntModel.getClass(EIURI.create(type));
            if (eiClass == null) {
                logger.error("Unable to locate resource class " + type + " for " + resource);
                continue;
            }

            final SearchResult result = new SearchResult(resourceEntity, eiClass.getEntity(), null, institutionEntity);
            
            // add all datatype and object properties
            for (Fieldable f: document.getFields()) {
                final String name = f.name();
                final String strValue = f.stringValue();
                // check name against the known fields
                if (strValue != null && isPropertyField(name)) {
                    if (f.isTokenized()) {
                        //logger.debug("Adding data type property " + name + " and value " + strValue);
                        result.addDataTypeProperty(EIURI.create(name), strValue);
                        if (LuceneSearchProviderIndexer.INDEX_OBJECT_PROP_LABELS && isLabProperty(name)) {
                            String labURI = document.get(name + LuceneSearchProviderIndexer.OBJECT_URI_POSTFIX);
                            if (labURI != null) {
                                final EIEntity labEntity = EIEntity.create(EIURI.create(labURI), strValue);
                                //logger.debug("Found lab: " + labEntity);
                                result.setLab(labEntity);
                            }
                        }
                    } else {
                        //logger.debug("Adding object property " + name + " and value " + strValue);
                        EIURI propURI = EIURI.create(LuceneSearchProviderIndexer.stripObjectURIPostfix(name));
                        result.addObjectProperty(propURI, EIURI.create(strValue));
                        if (!LuceneSearchProviderIndexer.INDEX_OBJECT_PROP_LABELS && isLabProperty(propURI.toString())) {
                            addLab(result, strValue);
                        }
                    }
                }
            }
            
            // compute the highlight
            final String highlight = computeHighlight(highlighter, request, query, document);
            if (highlight != null) {
                result.setHighlight(highlight);
            }

            result.setURL(resource);
            result.setRank(score);
            
            if (results.getResults().contains(result)) {
                logger.error("Found duplicate result");
            } 
            results.getResults().add(result);
        }

        return results;
    }
    
    private static boolean isLabProperty(final String uri) {
        if (uri.equals(EagleIOntConstants.LOCATED_IN_URI)) {
            return true;
        } else if (uri.equals(EagleIOntConstants.USED_BY_URI)) {
            return true;
        } else if (uri.equals(EagleIOntConstants.SERVICE_PROVIDED_BY_URI)) {
            return true;            
        }
        return false;
    }
    
    /*
     * Adds a lab EIEntity to the SearchResult for the specified URI. Retrieves the label via a search
     * against the index.
     */
    private void addLab(final SearchResult result, final String labURI) throws IOException {
        final BooleanQuery query = new BooleanQuery();
        final PhraseQuery resourceQuery = new PhraseQuery();
        resourceQuery.add(new Term(LuceneSearchProviderIndexer.URI, labURI));
        query.add(resourceQuery, BooleanClause.Occur.MUST);
        final IndexSearcher searcher = new IndexSearcher(this.dir, true);        
        searcher.setDefaultFieldSortScoring(true, true);
        final TopFieldDocs docs = searcher.search(query, null, 1, Sort.RELEVANCE);
        //logger.debug("Found " + docs.totalHits + " lab matches");
        if (docs.totalHits > 0) {
            final ScoreDoc scoreDoc = docs.scoreDocs[0];
            final Document document = searcher.doc(scoreDoc.doc);
            final float score = scoreDoc.score;
            final String resource = document.get(LuceneSearchProviderIndexer.URI);
            String label = null;
            for (EIURI prop: this.prefLabelProperties) {
                String[] values = document.getValues(prop.toString());
                if (values.length > 0) {
                    label = values[0];
                    break;
                }
            }
            final EIEntity labEntity = EIEntity.create(EIURI.create(resource), label);
            //logger.debug("Found lab: " + label);
            result.setLab(labEntity);
        }
    }
    
    /*
     * Checks if the Lucene document field represents a data type or object property
     */
    protected static boolean isPropertyField(final String fieldName) {
        if (fieldName.equals(LuceneSearchProviderIndexer.URI)) {
            return false;
        } else if (fieldName.equals(LuceneSearchProviderIndexer.TEXT)) {
            return false;
        } else if (fieldName.equals(LuceneSearchProviderIndexer.INFERRED_TYPE)) {
            return false;            
        } else if (fieldName.equals(LuceneSearchProviderIndexer.INSTITUTION_LABEL)) {
            return false;            
        } else if (fieldName.equals(LuceneSearchProviderIndexer.INSTITUTION_URI)) {
            return false;
        } else if (fieldName.equals(LuceneSearchProviderIndexer.RELATED)) {
            return false;
        } else if (fieldName.equals(LuceneSearchProviderIndexer.RESOURCE_FLAG)) {
            return false;            
        }
        return true;
    }
   
    private String computeHighlight(final Highlighter highlighter, final SearchRequest request, final Query query, final Document document) {
        // compute a highlight if this was a term query
        SearchRequest.Term term = request.getTerm();
        if (term != null && term.getQuery() != null) {
            String highlight = getHighlightForField(highlighter, LuceneSearchProviderIndexer.PREF_TEXT, document);
            if (highlight != null) {
                return highlight;
            }
            highlight = getHighlightForField(highlighter, LuceneSearchProviderIndexer.TEXT, document);
            if (highlight != null) {
                return highlight;
            }
            return getHighlightForField(highlighter, LuceneSearchProviderIndexer.INDIRECT_TEXT, document);
        }
        
        // TODO how to handle highlights for type or institution matches?
        
        return null;
    }
    
    private String getHighlightForField(final Highlighter highlighter, final String fieldName, final Document document) {
        final Field[] texts = document.getFields(fieldName);
        for (Field field: texts) {
            String text = field.stringValue();
            try {
                final String highlight = highlighter.getBestFragment(this.analyzer, fieldName, text); //MAX_FRAGMENTS);
                if (highlight != null) {
                    return highlight;
                }
            } catch (InvalidTokenOffsetsException itoe) {
                logger.error(itoe);
            } catch (IOException ioe) {
                logger.error(ioe);
            }
        }
        return null;
    }
    
    /*
     * Creates a multi-field query from the SearchRequest
     */
    private Query createQuery(final SearchRequest request) throws ParseException {
        return createQuery(request, null);
    }
    
    /*
     * Creates a multi-field query from the SearchRequest
     */
    private Query createQuery(final SearchRequest request, final SearchResultSet results) throws ParseException {

        final SearchRequest.Term term = request.getTerm();
        final EIURI institution = request.getInstitution();
        final BooleanQuery query = new BooleanQuery();

        // add the resource flag constraint
        final PhraseQuery resourceFlagQuery = new PhraseQuery();
        resourceFlagQuery.add(new Term(LuceneSearchProviderIndexer.RESOURCE_FLAG, Boolean.TRUE.toString()));
        query.add(resourceFlagQuery, BooleanClause.Occur.MUST);
        
        // add the institution constraint
        if (institution != null) {
            final PhraseQuery institutionQuery = new PhraseQuery();
            institutionQuery.add(new Term(LuceneSearchProviderIndexer.INSTITUTION_URI, institution.toString()));
            query.add(institutionQuery, BooleanClause.Occur.MUST);
        }
        
        // was a term specified?
        if (term != null) {
            final BooleanQuery termQuery = new BooleanQuery();
            query.add(termQuery, BooleanClause.Occur.MUST);
            
            // add the term (i.e. free-text query) constraint
            if (term.getQuery() != null) {
                // search on "text"
                final Query strTextQuery = LuceneUtils.escapeIfInvalid(textParser, term.getQuery());
                termQuery.add(strTextQuery, BooleanClause.Occur.SHOULD);
                // search on "pref_text"
                final Query strPrefTextQuery = LuceneUtils.escapeIfInvalid(prefTextParser, term.getQuery());
                termQuery.add(strPrefTextQuery, BooleanClause.Occur.SHOULD);
                // search on "indirect_text"
                final Query strIndirectTextQuery = LuceneUtils.escapeIfInvalid(indirectTextParser, term.getQuery());
                termQuery.add(strIndirectTextQuery, BooleanClause.Occur.SHOULD);                
            }
            
            // add the entity query components
            if (term.getURI() != null) {
                final EIURI uri = term.getURI();

                // if an ontology class, create a type constraint and constraints for
                // labels
                final EIClass eiClass = this.eagleiOntModel.getClass(uri);
                if (eiClass != null) {
                    // add a type query
                    final PhraseQuery typeQuery = new PhraseQuery();
                    typeQuery.add(new Term(LuceneSearchProviderIndexer.INFERRED_TYPE, uri.toString()));
                    termQuery.add(typeQuery, BooleanClause.Occur.SHOULD);
                    // get the labels
                    final StringBuilder sb = new StringBuilder();
                    boolean first = true;
                    for (String label: this.eagleiOntModel.getLabels(uri)) {
                        if (!first) {
                            sb.append(" ");
                        }
                        first = false;
                        sb.append(label);
                    }
                    final Query strTextQuery = LuceneUtils.escapeIfInvalid(textParser, sb.toString());
                    termQuery.add(strTextQuery, BooleanClause.Occur.SHOULD);       
                    final Query strPrefTextQuery = LuceneUtils.escapeIfInvalid(prefTextParser, sb.toString());
                    termQuery.add(strPrefTextQuery, BooleanClause.Occur.SHOULD);                           
                } 
                // add prop query
                final PhraseQuery propQuery = new PhraseQuery();
                propQuery.add(new Term(LuceneSearchProviderIndexer.RELATED, uri.toString()));
                termQuery.add(propQuery, BooleanClause.Occur.SHOULD);
            }
            
            // add pseudo-relevance result feedback
//            if (results != null) {
//                for (SearchResult result: results.getResults()) {
//                    // add a PhraseQuery for this URI
//                    logger.debug("Adding " + result.getEntity() + " via pseudo-relevance ranking");
//                    final PhraseQuery propQuery = new PhraseQuery();
//                    propQuery.add(new Term(LuceneSearchProviderIndexer.RELATED, result.getEntity().getURI().toString()));
//                    // give it a low boost
//                    termQuery.setBoost(LuceneSearchProviderIndexer.LOW_BOOST);
//                    termQuery.add(propQuery, BooleanClause.Occur.SHOULD);
//                    final String label = result.getEntity().getLabel();
//                    /*
//                // search on "text"
//                final Query strTextQuery = LuceneUtils.escapeIfInvalid(textParser, label);
//                strTextQuery.setBoost(LuceneSearchProviderIndexer.LOW_BOOST);
//                query.add(strTextQuery, BooleanClause.Occur.SHOULD);
//                // search on "pref_text"
//                final Query strPrefTextQuery = LuceneUtils.escapeIfInvalid(prefTextParser, label);
//                strPrefTextQuery.setBoost(LuceneSearchProviderIndexer.LOW_BOOST);
//                query.add(strPrefTextQuery, BooleanClause.Occur.SHOULD);
//                     */
//                }        
//            }
        }
        
        // add the type constraint
        final EIURI typeURI = SearchProviderUtil.getType(request);
        if (typeURI != null) {
            final PhraseQuery typeQuery = new PhraseQuery();
            typeQuery.add(new Term(LuceneSearchProviderIndexer.INFERRED_TYPE, typeURI.toString()));
            query.add(typeQuery, BooleanClause.Occur.MUST);
        }

        // add any property bindings
        SearchRequest.TypeBinding binding = request.getBinding();
        if (binding != null) {
            for (EIURI prop: binding.getDataTypeProperties()) {
                final String constraint =  binding.getDataTypeProperty(prop);
                final QueryParser propParser = new QueryParser(Version.LUCENE_30, prop.toString(), this.analyzer);
                final Query propQuery = propParser.parse(constraint);
                query.add(propQuery, BooleanClause.Occur.MUST);
            }
            for (EIURI prop: binding.getObjectProperties()) {
                final EIURI constraint =  binding.getObjectProperty(prop);
                final PhraseQuery propQuery = new PhraseQuery();
                propQuery.add(new Term(prop.toString() + LuceneSearchProviderIndexer.OBJECT_URI_POSTFIX, constraint.toString()));
                query.add(propQuery, BooleanClause.Occur.MUST);
            }            
        }
                
        // No bindings
        if (query.clauses().isEmpty()) {
            final Query allDocsQuery = new MatchAllDocsQuery();
            query.add(allDocsQuery, BooleanClause.Occur.MUST);
            return query;
        }
        
        if (DEBUG) {
            logger.debug("Query: " + query.toString());
        }
        
        return query;

    }

}
