package org.eaglei.search.provider.lucene;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.store.Directory;

import org.eaglei.model.EIClass;
import org.eaglei.model.EIEntity;
import org.eaglei.model.EIOntModel;
import org.eaglei.model.EIURI;
import org.eaglei.model.EagleIOntConstants;
import org.eaglei.model.jena.JenaEIOntModel;
import org.eaglei.search.provider.SearchResult;
import org.eaglei.search.datagen.AbstractGenerator;

import com.hp.hpl.jena.ontology.OntClass;
import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.vocabulary.RDF;

/**
 * Creates a Lucene index for eagle-i resource data. 
 * A lucene document is created for each resource with the following fields:
 * <ul>
 * <li>uri: The unique URI of the resource. Stored and indexed but not analyzed.
 * <li><property_uri>: The value for each object and datatype property is stored in a multi-valued field using 
 *                     the property URI. Stored and indexed. Data type properties are analyzed. Object properties
 *                     are not analyzed.
 * <li>pref_text: Multi-valued field that holds the lexical form of all preferred datatype properties.
 *          A boost is given to this field. Stored, indexed and analyzed. 
 * <li>text: Multi-valued field that holds the lexical form of non-preferred datatype properties. Stored, indexed and analyzed.
 * <li>resourceFlag: Field that holds a boolean value indicating whether the resource is an eagle-i resource
 *           (i.e. subclass of a top-level resource) or an instance of a non-resource class (e.g. technique). 
 *           Stored and indexed but not analyzed.
 * <li>inferredType: Multi-valued field that holds the URIs of all inferred classes. 
 *           Stored and indexed but not analyzed.
 * <li>related: Multi-valued field that holds the URIs of all object properties. 
 *              Stored and indexed but not analyzed.
 * <li>institution_uri: Holds the URI of the institution. Stored and indexed but not analyzed.
 * <li>institution_label: Holds the name of the institution. Stored, indexed and analyzed.
 * </ul>
 * 
 * @author frost
 */
public class LuceneSearchProviderIndexer {

    private static final Log logger = LogFactory.getLog(LuceneSearchProviderIndexer.class);
    private static final boolean DEBUG = logger.isDebugEnabled();

    public final static String URI = "uri";
    public final static String PREF_TEXT = "pref_text";
    public final static String TEXT = "text";
    public final static String RESOURCE_FLAG = "resourceFlag";        
    public final static String INFERRED_TYPE= "inferredType";    
    public final static String RELATED = "related";        
    public final static String INSTITUTION_URI = "institution_uri";
    public final static String INSTITUTION_LABEL = "institution_label";
    
    public final static float LOW_BOOST = 0.5f;
    public final static float STANDARD_BOOST = 1.0f;
    public final static float MEDIUM_BOOST = 2.0f;
    public final static float HIGH_BOOST = 5.0f;
    
    // properties that should not be indexed
    // TODO get this from the ont model via annotations
    private final static String[] propsToIgnore =
    {"http://purl.obolibrary.org/obo/IAO_0000232",
     "http://purl.obolibrary.org/obo/ERO_0000029"};

    // Preferred name properties
    private List<EIURI> prefLabelProperties = new ArrayList<EIURI>();
    
    private final EIOntModel eagleiOntModel;
    private IndexWriter iwriter;

    public LuceneSearchProviderIndexer(final EIOntModel eagleiOntModel, final Analyzer analyzer, final Directory directory) throws IOException {
        this.eagleiOntModel = eagleiOntModel;
        this.iwriter = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.LIMITED);
        retrieveOntologyMetadata();        
    }

    /*
     * Retrieves various metadata from the eagle-i ontology that is cached as
     * instance vars in this provider and reused on queries.
     */
    private void retrieveOntologyMetadata() {
        // properties used to compute preferred labels
        List<Property> props = ((JenaEIOntModel) eagleiOntModel).getPrefLabelProperties();
        for (Property prop: props) {
            this.prefLabelProperties.add(EIURI.create(prop.getURI()));
        }
    }
    
    /**
     * Retrieves the IndexWriter
     * @return
     */
    public IndexWriter getIndexWriter() {
        return this.iwriter;
    }

    /**
     * Commits any pending changes the changes
     * @throws IOException
     */
    public void commit() throws IOException {
        iwriter.optimize();
        iwriter.commit();
    }
    
    /**
     * Indexes the specified SearchResult.
     * @param result SearchResult
     * @param materializeTypes True if the types should be materialized.
     * @throws IOException Thrown if an error is encountered indexing the result
     */
    // TODO change to index EIInstances
    public void indexSearchResult(final SearchResult result, final boolean materializeTypes) throws IOException {
        
        final EIURI uri = result.getEntity().getURI(); 
        
        // if there is a document, remove from index
        final PhraseQuery query = new PhraseQuery();
        query.add(new Term(LuceneSearchProviderIndexer.URI, uri.toString()));
        this.iwriter.deleteDocuments(query);
        
        // if the type of the result is "isDeleted", don't add again
        if (result.getType().getURI().toString().equals(EagleIOntConstants.IS_DELETED)) {
            return;
        }
        
        // create a Lucene document for the resource
        final Document doc = new Document();

        // index the URI
        doc.add(new Field(URI, uri.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
        
        // add the institution URI and label
        final EIEntity institutionEntity = result.getInstitution();
        doc.add(new Field(INSTITUTION_URI, institutionEntity.getURI().toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
        doc.add(new Field(INSTITUTION_LABEL, institutionEntity.getLabel(), Field.Store.YES, Field.Index.ANALYZED));        
        
        // index the institution label
        doc.add(new Field(TEXT, institutionEntity.getLabel(), Field.Store.YES, Field.Index.ANALYZED));
        
        // TODO explicitly index the lab?

        // TODO There actually is a spring bean for the OntModel that is available.
        //      Could utilize that.
        final OntModel model = ((JenaEIOntModel) eagleiOntModel).getOntModel();
        
        // index each of the data type properties
        for (EIURI propURI: result.getDataTypeProperties()) {
            if (shouldIgnore(propURI)) {
                continue;
            }
            
            String fieldName = TEXT;
            float boost = STANDARD_BOOST;
            if (prefLabelProperties.contains(propURI)) {
                // add preferred label properties to the pref_text field
                fieldName = PREF_TEXT;
                // give a high boost
                boost = HIGH_BOOST;
            }
            
            final Set<String> values = result.getDataTypeProperty(propURI);
            for (String value: values) {
                // index the property value using the URI
                doc.add(new Field(propURI.toString(), value.toString(), Field.Store.YES, Field.Index.ANALYZED));
                
                // add literal props to the text field
                Field field = new Field(fieldName, ((String) value), Field.Store.YES, Field.Index.ANALYZED);
                field.setBoost(boost);
                doc.add(field);
            }
        }
        
        // index each of the object properties
        for (EIURI propURI: result.getObjectProperties()) {
            if (shouldIgnore(propURI)) {
                continue;
            }            
            final Set<EIURI> values = result.getObjectProperty(propURI);
            for (EIURI value: values) {
                // index the property value using the URI
                doc.add(new Field(propURI.toString(), value.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                
                // add it to the related field
                // TODO boost high-value properties
                if (value.toString().equals("http://qa.dartmouth.eagle-i.net/i/00000129-c63f-3397-0633-4ae580000000")) {
                    logger.debug("Adding " + value.toString() + " to related field");
                }
                doc.add(new Field(RELATED, value.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));                
            }            
        }

        final EIEntity typeEntity = result.getType();
        
        // is this an eagle-i resource?
        final EIClass typeClass = eagleiOntModel.getClass(typeEntity.getURI());
        if (!typeClass.isEagleIResource()) {
            //logger.warn("Resource " + result.getEntity() + " with type " + typeClass + " is not an eagle-i resource");
        }
        doc.add(new Field(RESOURCE_FLAG, String.valueOf(typeClass.isEagleIResource()), Field.Store.YES, Field.Index.NOT_ANALYZED));   
        
        // index the type
        // TODO handle multiple asserted types 
        doc.add(new Field(RDF.type.getURI(), typeEntity.getURI().toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));

        final OntClass resourceClass = model.getOntClass(typeEntity.getURI().toString());
        
        if (resourceClass == null) {
          logger.error("Null OntClass for " + typeEntity);
        }
        
        // add the types (potentially with materialization)
        for (EIClass type : AbstractGenerator.getTypes(eagleiOntModel, resourceClass, materializeTypes)) {
            final String typeURI = type.getEntity().getURI().toString();
            doc.add(new Field(INFERRED_TYPE, typeURI, Field.Store.YES, Field.Index.NOT_ANALYZED));
            // add the label to the text field
            final String label = eagleiOntModel.getPreferredLabel(type.getEntity().getURI());            
            if (label != null) {
                String fieldName = TEXT;
                float boost = STANDARD_BOOST;
                if (typeURI.equals(typeEntity.getURI().toString())) {
                    // boost the direct type
                    fieldName = PREF_TEXT;
                    boost = HIGH_BOOST;
                }
                Field field = new Field(fieldName, label, Field.Store.YES, Field.Index.ANALYZED);
                field.setBoost(boost);
                doc.add(field);
            }
        }

        // add the document to the to the index
        this.iwriter.addDocument(doc);
    }
    
    private static boolean shouldIgnore(EIURI uri) {
        for (String toIgnore: propsToIgnore) {
            if (toIgnore.equals(uri.toString())) {
                return true;
            }
        }
        return false;
    }
}
