package org.eaglei.search.provider.lucene;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;

import org.eaglei.model.EIClass;
import org.eaglei.model.EIEntity;
import org.eaglei.model.EIOntModel;
import org.eaglei.model.EIURI;
import org.eaglei.model.EagleIOntConstants;
import org.eaglei.model.jena.JenaEIOntModel;
import org.eaglei.search.provider.SearchResult;
import org.eaglei.search.datagen.AbstractGenerator;

import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.vocabulary.RDF;

/**
 * Creates a Lucene index for eagle-i RDF resource data according to the schema defined in LuceneSearchIndexSchema.
 * @author frost
 */
public final class LuceneSearchProviderIndexer extends LuceneSearchIndexSchema {

    private static final Log logger = LogFactory.getLog(LuceneSearchProviderIndexer.class);
    private static final boolean DEBUG = logger.isDebugEnabled();

    /**
     * Cache of the URIs for preferred label properties
     */
    private List<EIURI> prefLabelProperties = new ArrayList<EIURI>();
    /**
     * Reference to the in-memory model of the eagle-i ontology
     */
    private final EIOntModel eagleiOntModel;
    /**
     * Handle to the index writer
     */
    private final IndexWriter iwriter;

    /**
     * Creates the LuceneSearchProviderIndexer
     * 
     * @param eagleiOntModel Referenced to the eagle-i ontology
     * @param analyzer The Lucene analyzer that is used for indexing and searching.
     * @param directory The directory that holds the index.
     * 
     * @throws IOException Thrown if an error is encountered.
     */
    public LuceneSearchProviderIndexer(final EIOntModel eagleiOntModel, final Analyzer analyzer, final Directory directory) throws IOException {
        this.eagleiOntModel = eagleiOntModel;
        this.iwriter = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.LIMITED);
        retrieveOntologyMetadata();        
    }

    /*
     * Retrieves various metadata from the eagle-i ontology that is cached as
     * instance vars in this provider and reused on queries.
     */
    private void retrieveOntologyMetadata() {
        // properties used to compute preferred labels
        List<Property> props = ((JenaEIOntModel) eagleiOntModel).getPrefLabelProperties();
        for (Property prop: props) {
            this.prefLabelProperties.add(EIURI.create(prop.getURI()));
        }
    }
    
    /**
     * Retrieves the IndexWriter
     * @return
     */
    public IndexWriter getIndexWriter() {
        return this.iwriter;
    }

    /**
     * Commits any pending changes the changes
     * @throws IOException
     */
    public void commit() throws IOException {
        iwriter.optimize();
        iwriter.commit();
    }
    
    /**
     * Gets the EIURIs of all documents that reference the specified document via an 
     * object property.
     * @param uri URI of property whose referencing documents are being retrieved.
     * @return List of URIs of referencing documents.
     * @throws IOException Thrown if an error is encountered executing the query
     */
    public List<EIURI> getRelatedDocuments(final EIURI uri) throws IOException {

        // create the Lucene query 
        final BooleanQuery query = new BooleanQuery();
        final PhraseQuery propQuery = new PhraseQuery();
        propQuery.add(new Term(RELATED, uri.toString()));
        query.add(propQuery, BooleanClause.Occur.MUST);
        
        // create an IndexSearcher
        final IndexSearcher searcher = new IndexSearcher(this.iwriter.getDirectory(), true);
        searcher.setDefaultFieldSortScoring(true, true);

        // collector that grabs the URIs of all retrieved Documents
        final List<EIURI> uris = new ArrayList<EIURI>();
        final Collector collector = new Collector(){
            IndexReader reader;
            int docbase;
            public void setNextReader(IndexReader reader, int docbase) throws IOException {
                this.reader = reader;
                this.docbase = docbase;
            }
            public void collect(final int doc) throws IOException {
                Document document = this.reader.document(this.docbase + doc);
                uris.add(EIURI.create(document.get(URI)));
            }
            
            public boolean acceptsDocsOutOfOrder() {
                return true;
            }
            public void setScorer(Scorer scorer) throws IOException {
                // no-op
            }
        };
        // execute the search
        searcher.search(query, null, collector);
        return uris;        
    }
    
    /**
     * Removes the _uri postfix from the document field name.
     * @param fieldWithPostfix Field with the _uri postfix
     * @return Field name without the _uri postfix
     */
    protected static String stripObjectURIPostfix(final String fieldWithPostfix) {
        assert fieldWithPostfix != null;
        if (!fieldWithPostfix.endsWith(OBJECT_URI_POSTFIX)) {
            return fieldWithPostfix;
        }
        return fieldWithPostfix.substring(0, fieldWithPostfix.length() - OBJECT_URI_POSTFIX.length());
    }
    
    /**
     * Updates the document with the specified URI to add object property labels.
     * @param uri URI of document to update with fields for the labels of resources connected via object properties.
     * @throws IOException Thrown if an error is encountered.
     */
    public void addIndirectProperties(final EIURI uri) throws IOException {
        //logger.debug("Adding indirect properties for " + uri);
        final Document document = getDocumentByURI(uri);
        if (document == null) {
            logger.debug("Failed to find " + uri + " in index");
            return;
        }
        
        // remove the text field, this stores the indirect properties; it also stores all the direct
        // datatype properties, the institution name and the names of the types so need to add those back
        document.removeFields(TEXT);
        
        // retrieve all of the properties that hold object prop URIs to value URIs
        final Map<String, String> objectPropURIToValue = new HashMap<String, String>();
        final Map<String, String> datatypePropURIToValue = new HashMap<String, String>();
        for (Fieldable f: document.getFields()) {
            final String name = f.name();
            final String strValue = f.stringValue();
            // check name against the known fields
            if (strValue != null && LuceneSearchProvider.isPropertyField(name)) {
                if (!f.isTokenized()) {
                    // remove the postfix
                    final String objectPropURI = stripObjectURIPostfix(name);
                    objectPropURIToValue.put(objectPropURI, strValue);
                } else {
                    // this is a datatype property, need to save so that it can
                    // be added back to the text field
                    datatypePropURIToValue.put(name, strValue);
                }
            }
        }
        
        // add all of the datatype props back to the text field
        for (String value: datatypePropURIToValue.values()) {
            addToText(document, value);
        }

        // remove all properties that hold object prop resource text
        for (String propURI: objectPropURIToValue.keySet()) {
            document.removeFields(propURI);
        }
        
        // update the object prop resource text
        for (String propURI: objectPropURIToValue.keySet()) {
            final String propValue = objectPropURIToValue.get(propURI);
            // find the document for the object prop resource in the
            // index
            Document objectDoc = getDocumentByURI(EIURI.create(propValue));
            if (objectDoc != null) {
                for (Fieldable prefTextField: objectDoc.getFieldables(PREF_TEXT)) {
                    final String prefText = prefTextField.stringValue();
                    //logger.debug("Adding text " + prefText + " for object prop " + propURI);
                    
                    // add the pref text fields for this resource to the text field
                    addToText(document, prefText);
                    
                    // add the pref text fields for this resource to a prop-specific field
                    Field objectPropLabel= new Field(propURI.toString(), prefText, Field.Store.YES, Field.Index.ANALYZED);
                    objectPropLabel.setBoost(LOW_BOOST);
                    document.add(objectPropLabel);                            
                }
            } else {
                // look up as an ontology class
               final EIClass eiClass = eagleiOntModel.getClass(EIURI.create(propValue));
               if (eiClass != null) {
                   final String prefText = eiClass.getEntity().getLabel();
                   // add the pref text fields for this resource to the text field
                   addToText(document, prefText);
                   // add the pref text fields for this resource to a prop-specific field
                   Field objectPropLabel= new Field(propURI.toString(), prefText, Field.Store.YES, Field.Index.ANALYZED);
                   objectPropLabel.setBoost(LOW_BOOST);
                   document.add(objectPropLabel);                            
               } else {
                   //logger.error("Did not find document or class for " + propValue);
               }
            }
        }
        
        // add the institution label back to text
        addToText(document, document.get(INSTITUTION_LABEL));

        // add the types back to the text
        String typeURI = document.get(RDF.type.getURI() + OBJECT_URI_POSTFIX);
        indexTypes(document, EIURI.create(typeURI), true, false);
        
        // update the document
        iwriter.updateDocument(new Term(URI, uri.toString()), document);
    }
    
    /*
     * Retrieves the relevant document by URI
     * @param uri URI of resource to retrieve.
     * @return Associated Lucene Document.
     */
    private Document getDocumentByURI(final EIURI uri) throws IOException {
        // create a query 
        final PhraseQuery propQuery = new PhraseQuery();
        propQuery.add(new Term(URI, uri.toString()));
        
        final IndexSearcher searcher = new IndexSearcher(this.iwriter.getDirectory(), true);
        searcher.setDefaultFieldSortScoring(true, true);
        
        final TopDocs docs = searcher.search(propQuery, 1);
        if (docs.totalHits == 0) {
            //logger.error("Did not find " + uri + " in search index");
            return null;
        }
        final ScoreDoc scoreDoc = docs.scoreDocs[0];
        final Document document = searcher.doc(scoreDoc.doc);
        return document;
    }
    
    /*
     * If there is a document with the specified URI, remove from index
     * @param uri URI of resource to remove from the index.
     */
    private void deleteDocumentByURI(final EIURI uri) throws IOException {
        final PhraseQuery query = new PhraseQuery();
        query.add(new Term(URI, uri.toString()));
        this.iwriter.deleteDocuments(query);        
    }
    
    /**
     * Checks if this SearchResult represents a deleted resource. The /harvest API returns a special
     * resource representation for resources that have been deleted since the specified timestamp. 
     * @return True if it represents a deleted resource.
     */
    protected static boolean isDeletedSearchResult(final SearchResult result) {
        if (result.getType().getURI().toString().equals(EagleIOntConstants.IS_DELETED)) {
            return true;
        }
        return false;
    }
    
    /**
     * Indexes the specified SearchResult.
     * @param result SearchResult
     * @param materializeTypes True if the types should be materialized.
     * @throws IOException Thrown if an error is encountered indexing the result
     */
    public void indexSearchResult(final SearchResult result, final boolean materializeTypes) throws IOException {
        
        final EIURI uri = result.getEntity().getURI(); 
        
        deleteDocumentByURI(uri);
        
        // if the type of the result is "isDeleted", don't add again
        if (isDeletedSearchResult(result)) {
            return;
        }
        
        // create a Lucene document for the resource
        final Document doc = new Document();

        // index the URI
        doc.add(new Field(URI, uri.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
        
        // add the institution URI and label
        final EIEntity institutionEntity = result.getInstitution();
        doc.add(new Field(INSTITUTION_URI, institutionEntity.getURI().toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
        doc.add(new Field(INSTITUTION_LABEL, institutionEntity.getLabel(), Field.Store.YES, Field.Index.ANALYZED));        
        
        // index the institution label
        addToText(doc, institutionEntity.getLabel());

        final EIEntity typeEntity = result.getType();
        
        // is this an eagle-i resource?
        final EIClass typeClass = eagleiOntModel.getClass(typeEntity.getURI());
        if (typeClass == null) { 
            logger.error("Resource " + result.getEntity() + " with type " + typeClass + " is not a valid eagle-i class");
            return;
        }
        doc.add(new Field(RESOURCE_FLAG, String.valueOf(typeClass.isEagleIResource()), Field.Store.YES, Field.Index.NOT_ANALYZED));   
        
        // index the asserted type
        // TODO handle multiple asserted types 
        doc.add(new Field(RDF.type.getURI() + OBJECT_URI_POSTFIX, typeEntity.getURI().toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));

        // add the types (potentially with materialization)
        // TODO retrieve the inferred types from the repository so we don't need to materialize here
        indexTypes(doc, typeEntity.getURI(), materializeTypes, true);

        // index each of the data type properties
        for (EIURI propURI: result.getDataTypeProperties()) {
            // add preferred label properties to the pref_text field
            boolean addToPrefText = prefLabelProperties.contains(propURI); 
            
            final Set<String> values = result.getDataTypeProperty(propURI);
            for (String value: values) {
                // index the property value using the URI
                doc.add(new Field(propURI.toString(), value.toString(), Field.Store.YES, Field.Index.ANALYZED));
                
                // add literal props to the text field
                if (addToPrefText) {
                    addToPrefText(doc, value);
                }
                addToText(doc, value);
            }
        }
        
        // index each of the object properties
        for (EIURI propURI: result.getObjectProperties()) {
            final Set<EIURI> values = result.getObjectProperty(propURI);
            for (EIURI value: values) {
                // index the property value using the URI
                doc.add(new Field(propURI.toString() + OBJECT_URI_POSTFIX, value.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                
                // add it to the related field
                // TODO boost high-value properties
                doc.add(new Field(RELATED, value.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));                
            }            
        }

        // add the document to the to the index
        this.iwriter.addDocument(doc);
    }
    
    /*
     * Index the types (potentially with materialization)
     */
    private void indexTypes(final Document doc, final EIURI typeURI, final boolean materializeTypes, final boolean indexURIs) {
        for (EIClass type : AbstractGenerator.getTypes(eagleiOntModel, typeURI, materializeTypes)) {
            final String uri = type.getEntity().getURI().toString();
            // index the inferred URIs
            if (indexURIs) {
                doc.add(new Field(INFERRED_TYPE, uri, Field.Store.YES, Field.Index.NOT_ANALYZED));
            }
            final String label = eagleiOntModel.getPreferredLabel(type.getEntity().getURI());
            if (label != null) {
                if (uri.equals(typeURI.toString())) {
                    // add the label to the pref_text if the direct type
                    addToPrefText(doc, label);
                } else {
                    // store label using rdf:type if not direct type (direct is added using standard object prop
                    // logic)
                    doc.add(new Field(RDF.type.getURI(), label, Field.Store.YES, Field.Index.ANALYZED));                    
                }
                // add both direct and inferred to text
                addToText(doc, label);
            }
        }
    }
    
    /*
     * Adds the specified string value to the document's text field 
     */
    private static void addToText(final Document doc, final String value) {
        final Field field = new Field(TEXT, value, Field.Store.YES, Field.Index.ANALYZED);
        field.setBoost(STANDARD_BOOST);
        doc.add(field);
    }

    /*
     * Adds the specified string value to the document's pref_text field 
     */
    private static void addToPrefText(final Document doc, final String value) {
        final Field field = new Field(PREF_TEXT, value, Field.Store.YES, Field.Index.ANALYZED);
        field.setBoost(HIGH_BOOST);
        doc.add(field);
    }
}
