package org.eaglei.search.provider.lucene.search;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import net.sf.cglib.transform.impl.AddPropertyTransformer;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;

import org.eaglei.model.EIClass;
import org.eaglei.model.EIDatatypeProperty;
import org.eaglei.model.EIEntity;
import org.eaglei.model.EIObjectProperty;
import org.eaglei.model.EIOntModel;
import org.eaglei.model.EIURI;
import org.eaglei.model.EIOntConstants;
import org.eaglei.model.jena.JenaEIOntModel;
import org.eaglei.search.provider.SearchResultSet;
import org.eaglei.search.provider.lucene.AbstractLuceneIndexer;
import org.eaglei.search.datagen.AbstractGenerator;
import org.eaglei.search.events.ChangeEventPayloadImpl;
import org.eaglei.search.events.ChangeEventPayload;
import org.eaglei.search.events.ChangeEventPayloadItem;
import org.eaglei.search.events.IndexChangeProcessor;
import org.eaglei.search.events.IndexChangeEvent;
import org.eaglei.search.harvest.ResourceChangeEvent;
import org.eaglei.search.harvest.ResourceChangeListener;

import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.vocabulary.RDF;

/**
 * Creates a Lucene index for eagle-i RDF resource data according to the schema defined in LuceneSearchIndexSchema.
 * @author frost
 */
public final class LuceneSearchIndexer extends AbstractLuceneIndexer 
					implements LuceneSearchIndexSchema,ResourceChangeListener {

    private static final Log logger = LogFactory.getLog(LuceneSearchIndexer.class);
    private static final boolean DEBUG = logger.isDebugEnabled();
    
    /**
     * Cache of type URIs for instances that should be flattened
     * into their referencing instance.
     */
    private Set<EIURI> flattenTypeURIs = new HashSet<EIURI>();
    
    private Set<EIURI> skipPropTextIndexing = new HashSet<EIURI>();
    
    private Map<EIURI, Float> mapTypeToBoost = new HashMap<EIURI, Float>();
    
    // Keep track of unexpected bad data
    private Set<EIURI> unresolvedInstanceRefs = new HashSet<EIURI>();
    private Set<EIURI> flattenInstancesWithNoRefs = new HashSet<EIURI>();
    
    private ClassUsageCache classUsageCache;
        
    /**
     * Creates the LuceneSearchProviderIndexer
     * 
     * @param eagleiOntModel Referenced to the eagle-i ontology
     * @param analyzer The Lucene analyzer that is used for indexing and searching.
     * @param directory The directory that holds the index.
     * 
     * @throws IOException Thrown if an error is encountered.
     */
    public LuceneSearchIndexer(final EIOntModel eiOntModel, final Analyzer analyzer, 
    						final Directory directory, ClassUsageCache classUsageCache) {
    	super(eiOntModel, analyzer, directory);
    	this.classUsageCache = classUsageCache;
    	initBoostMap();
    	initFlattenCache();
    	skipPropTextIndexing.add(EIURI.create(EIOntConstants.URL_PROPERTY_URI));
    }
    
    private void initBoostMap() {
    	// Organization
    	mapTypeToBoost.put(EIURI.create("http://purl.obolibrary.org/obo/OBI_0000245"), 0.1f);
    	// Private Company
    	mapTypeToBoost.put(EIURI.create("http://vivoweb.org/ontology/core#PrivateCompany"), 0.1f);
    	// Document
    	mapTypeToBoost.put(EIURI.create("http://purl.obolibrary.org/obo/IAO_0000310"), 0.1f);
    	// Publication
    	mapTypeToBoost.put(EIURI.create("http://purl.obolibrary.org/obo/IAO_0000311"), 0.1f);
    	// Person
    	mapTypeToBoost.put(EIURI.create("http://xmlns.com/foaf/0.1/Person"), 0.5f);
    	// Protocol
    	mapTypeToBoost.put(EIURI.create("http://purl.obolibrary.org/obo/OBI_0000272"), 0.8f);
    }
    
    private void initFlattenCache() {
		for (EIClass c : eiOntModel.getClassesInGroup(EIOntConstants.CG_EMBEDDED_CLASS)) {
			addFlattenClass(c);
		}    	
		for (EIClass c : eiOntModel.getClassesInGroup(EIOntConstants.CG_SEARCH_FLATTEN)) {
			addFlattenClass(c);
		}    	
    }
    
    private void addFlattenClass(EIClass c) {
    	flattenTypeURIs.add(c.getEntity().getURI());
    	for (EIClass sub : eiOntModel.getSubClasses(c.getEntity().getURI())) {
        	flattenTypeURIs.add(sub.getEntity().getURI());
    	}
    }
    
    private boolean isFlattenClass(EIURI uri) {
    	return flattenTypeURIs.contains(uri);
    }

	@Override
	public void onChangeEvent(ResourceChangeEvent event) {
    	super.onChangeEvent(event);
    	if (event.isDelete()) {
    		deleteResourceInstance(event);
    	} else {
	    	if (isFlattenClass(event.getType().getURI())) {
	    		indexFlattenClass(event);
	    	} else {
	    		indexResourceInstance(event);
	    	}
    	}
	}
    @Override
    public void onChangeStreamEnd(EIEntity institution, Date lastModifiedDate) {
    	// Print debugging info about any unresolved stub instances.
    	if (unresolvedInstanceRefs.size() > 0) {
    		logger.warn("Unresolved instance references in search index:  num: " + unresolvedInstanceRefs.size());
    	}
    	if (flattenInstancesWithNoRefs.size() > 0) {
    		logger.warn("Flatten instance URIs with no referring instance:  num: " + flattenInstancesWithNoRefs.size());
    	}
    	/*
    	for (EIURI stub : unresolvedStubs) {
        	List<Document> docs = getDocuments(stub);
        	if (docs != null && docs.size() > 0) {
        		logger.warn("Unresolved instance URI in search index:  " + stub + "Referenced by: ");
        		for (String refDocURI : docs.get(0).getValues(FIELD_REFERENCED_BY)) {
        			logger.warn("    referencing resource: " + refDocURI);
        		}
        	} else {
        		logger.error("URI found in unresolved stubs list that doesn't have a document in the index: " + stub);
        	}
    	}
    	*/
    	
    	super.onChangeStreamEnd(institution, lastModifiedDate);
    }

    private void deleteResourceInstance(final ResourceChangeEvent event) {
    	EIURI uri = event.getEntity().getURI();
    	EIEntity typeEntity = null;
    	Field[] referencedByFields = null;
    	List<Document> previousVersions = getDocuments(uri);
    	String previousLabel = null;
    	if (previousVersions != null && previousVersions.size() > 0) {
    		previousLabel = previousVersions.get(0).get(FIELD_ENTITY_LABEL);
    		referencedByFields = previousVersions.get(0).getFields(FIELD_REFERENCED_BY);
    		String typeLabel = previousVersions.get(0).get(FIELD_ASSERTED_TYPE_LABEL);
    		String typeURIStr = previousVersions.get(0).get(FIELD_ASSERTED_TYPE_URI);
    		if (typeURIStr != null && typeLabel != null) {
    			typeEntity = EIEntity.create(typeURIStr, typeLabel);
    		} else {
    			logger.warn("Unexpected: LuceneSearchIndexer.deleteResourceInstance(): deleting previous version of resource, but it doesn't have type fields: " + event.getEntity()); 
    		}
    	}
        // Update all documents that reference this one.
        if (referencedByFields != null) {
        	for (Field f : referencedByFields) {
        		// Null valueLabel indicates a delete
            	updateReferencingDocument(EIURI.create(f.stringValue()), uri.toString(), 
            			null, previousLabel);
        	}
        }
        deleteDocuments(uri);
        if (typeEntity != null) {
        	classUsageCache.removeUsage(typeEntity, null, event);
        }
    }
    
    private void indexFlattenClass(final ResourceChangeEvent event) {
    	//logger.debug("Index flatten resource " + event.getEntity() + "  Type: " + event.getType());
    	EIURI uri = event.getEntity().getURI();
    	List<Document> referencingDocuments = getReferencingDocuments(uri);
    	if (referencingDocuments == null || referencingDocuments.size() == 0) {
    		logger.error("No referencing documents found for flatten instance event: " + event.getEntity());
    		flattenInstancesWithNoRefs.add(uri);
    		return;
    	}   
    	// set the flatten class label
    	updateReferencingDocument(referencingDocuments, uri.toString(),
    			event.getEntity().getLabel(), null);
		// TODO: Should we add the flatten class type as a property?
    	for (Document refDoc : referencingDocuments) {
    		// Add properties from the flatten class to the referencing class
            addProperties(refDoc, event, true);
    	}
    	// Delete the stub for this flatten class.
    	// Remove the referenced by field before calling delete
    	// so that delete doesn't try to update the referencing list.
    	List<Document> docs = getDocuments(uri);
    	if (docs == null) {
    		// huh?  Should have been caught upstream
        	logger.error("No document indexed for flatten doc: " + event.getEntity());
        	return;
    	}
    	for (Document doc : docs) {
    		doc.removeFields(FIELD_REFERENCED_BY);
    	}
    	//logger.debug("Deleting flatten uri: " + uri);
    	deleteDocuments(uri);
    	unresolvedInstanceRefs.remove(uri);
    }
    
    /*
     * Note that this could be a new resource or an update.
     * If this is an update, the previous version of the Document
     * will be deleted on commit.
     */
    private List<Document> indexResourceInstance(final ResourceChangeEvent event) {  	
    	//logger.debug("Index resource " + event.getEntity() + "  Type: " + event.getType());
    	EIURI uri = event.getEntity().getURI();
    	Field field;
    	
        // create a new Lucene document for the resource
        final Document doc = new Document();
        
    	// Check if this document is already in the index, possibly as a stub.
    	// Copy the referencing uri list into the new Document.
    	String previousLabel = null;
    	Field[] referencedByFields = null;
    	List<Document> previousVersions = getDocuments(uri);
    	if (previousVersions != null && previousVersions.size() > 0) {
    		previousLabel = previousVersions.get(0).get(FIELD_ENTITY_LABEL);
    		referencedByFields = previousVersions.get(0).getFields(FIELD_REFERENCED_BY);
            if (referencedByFields != null) {
            	for (Field f : referencedByFields) {
            		doc.add(f);
            	}
            }
    	}

        // Entity URI
        field = new Field(FIELD_URI, uri.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED);
        field.setBoost(BOOST_URI);
        doc.add(field);
        // Entity label
        String label = event.getEntity().getLabel();
        field = new Field(FIELD_ENTITY_LABEL, label, Field.Store.YES, Field.Index.NO);
        doc.add(field);
        field = new Field(FIELD_LABEL, label, Field.Store.YES, Field.Index.ANALYZED);
        field.setBoost(BOOST_ENTITY_LABEL);
        doc.add(field);
        // TODO support Alternate Names
        // Institution URI
        if (event.getInstitution() != null) {
            // create a non-indexed field for the providing institution
        	String institutionURI = event.getInstitution().getURI().toString();
        	doc.add(new Field(FIELD_INSTITUTION_URI, institutionURI, Field.Store.YES, Field.Index.NOT_ANALYZED));
        }
        // Asserted type
        field = new Field(FIELD_ASSERTED_TYPE_URI, event.getType().getURI().toString(), Field.Store.YES, Field.Index.NOT_ANALYZED);
        field.setBoost(BOOST_ASSERTED_TYPE_URI);
        doc.add(field);
        field = new Field(FIELD_ASSERTED_TYPE_LABEL, event.getType().getLabel(), Field.Store.YES, Field.Index.ANALYZED);
        field.setBoost(BOOST_ASSERTED_TYPE_LABEL);
        doc.add(field);
        // Inferred types
        for (EIClass superclass : eiOntModel.getSuperClasses(event.getType().getURI())) {
            field = new Field(FIELD_INFERRED_TYPE_URI, superclass.getEntity().getURI().toString(), Field.Store.YES, Field.Index.NOT_ANALYZED);
            field.setBoost(BOOST_INFERRED_TYPE_URI);
            doc.add(field);
            field = new Field(FIELD_INFERRED_TYPE_LABEL, superclass.getEntity().getLabel(), Field.Store.YES, Field.Index.ANALYZED);
            field.setBoost(BOOST_INFERRED_TYPE_LABEL);
            doc.add(field);
        }
        // Properties
        addProperties(doc, event, false);
        
        // Set document-level boost based on resource type
        Float documentBoost = mapTypeToBoost.get(uri);
        if (documentBoost != null) {
        	doc.setBoost(documentBoost);
        }
        
        // add the document to the to the index
        List<Document> docs = new ArrayList<Document>(1);
        docs.add(doc);
        setDocuments(event.getEntity().getURI(), docs);
        unresolvedInstanceRefs.remove(uri);
        
        classUsageCache.addUsage(event.getType(), null, event);
        
        // Update all documents that reference this one.
        if (referencedByFields != null && !label.equals(previousLabel)) {
        	for (Field f : referencedByFields) {
            	updateReferencingDocument(EIURI.create(f.stringValue()), uri.toString(), 
            			label, previousLabel);
        	}
        }
        
        return docs;
    }
    
    private void addProperties(Document doc, ResourceChangeEvent event, boolean isFlattenClassEvent) {
    	Field field;
    	// Provider property
    	if (!isFlattenClassEvent) {
    		if (event.getProvider() != null) {
    			setResourceProvider(doc, event.getProvider());    			
    		}
    	}
        // Datatype properties
        for (EIDatatypeProperty prop : event.getDataTypeProperties()) {
        	if (skipPropTextIndexing.contains(prop.getEntity().getURI())) {
        		continue;
        	}
        	for (String valueLabel : event.getDataTypeProperty(prop)) {
        		addDatatypePropertyTextField(doc, prop, valueLabel);
        	}
        }
        // Object properties
        for (EIObjectProperty prop : event.getObjectProperties()) {
        	for (EIURI valueURI : event.getObjectProperty(prop)) {
                field = new Field(FIELD_PROP_URI, valueURI.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED);
                field.setBoost(BOOST_PROP_URI);
                doc.add(field);
                if (eiOntModel.isModelClassURI(valueURI.toString())) {
                	// Value is a model class
                    EIClass valueEIClass = eiOntModel.getClass(valueURI);
                	addObjectPropertyTextField(doc, prop, valueURI, valueEIClass.getEntity().getLabel());
                } else {
                	// Value is a resource
                	List<Document> valueDocuments = getDocuments(valueURI);
                	if (valueDocuments != null && valueDocuments.size() > 0) {
                		Field entityLabelField = valueDocuments.get(0).getField(FIELD_ENTITY_LABEL);
            			// Null for stubs
                		if (entityLabelField != null) {
                    		String valueLabel = entityLabelField.stringValue();
                    		if (valueLabel != null) {
                    			addObjectPropertyTextField(doc, prop, valueURI, valueLabel);
                    		}
                		}
                	}
                	setReferencingDocument(event.getEntity().getURI(), valueURI);
                }
        	}
        }    	
    }
    
    private void addDatatypePropertyTextField(Document doc, EIDatatypeProperty prop, String valueLabel) {
    	String propFieldName = LuceneSearchUtil.getPropertyFieldName(prop.getEntity(), null);
    	addPropertyTextField(doc, propFieldName, valueLabel);
    }
    
    private void addObjectPropertyTextField(Document doc, EIObjectProperty prop, EIURI valueURI, String valueLabel) {
    	String propFieldName = LuceneSearchUtil.getPropertyFieldName(prop.getEntity(), valueURI.toString());
    	addPropertyTextField(doc, propFieldName, valueLabel);
    }
    
    private void addPropertyTextField(Document doc, String propFieldName, String valueLabel) {
		Field field;
		// Stores property value in a unique field
		// This field will be used to compute highlighting prepended
		// with the property label.
		field = new Field(propFieldName, valueLabel, Field.Store.YES, Field.Index.ANALYZED);
		doc.add(field);
		addPropertyTextField(doc, valueLabel);
    }
    
    private void addPropertyTextField(Document doc, String valueLabel) {
		Field field;
		// Adds property value to a common field that holds all property value text
		// This is the field that will be searched.
		field = new Field(FIELD_PROP_TEXT, valueLabel, Field.Store.YES, Field.Index.ANALYZED);
		field.setBoost(BOOST_PROP_TEXT); 
		doc.add(field);
    }
    
    private void updateReferencingDocument(EIURI referencingURI, String valueURIStr, String valueLabel, String previousValueLabel) {
    	//logger.debug("    Updating Referencing Document " + referencingURI + "    value: " + valueLabel + " : " + valueURIStr);
    	List<Document> docs = getDocuments(referencingURI);
    	if (docs == null || docs.size() == 0) {
    		logger.error("No Document found for " + referencingURI +" which is expected to reference: " + valueLabel + " : " + valueURIStr);
    		return;
    	}  
    	updateReferencingDocument(docs, valueURIStr, valueLabel, previousValueLabel);
    }
    
	private void updateReferencingDocument(List<Document> docs, String valueURIStr, String valueLabel, String previousValueLabel) {
    	for (Document doc : docs) {
    		// If the referenced resource is our provider, then update
    		// the provider fields
    		final String providerURIStr = doc.get(FIELD_PROVIDER_URI);
    		if (providerURIStr != null && providerURIStr.equals(valueURIStr)) {
    			doc.removeFields(FIELD_PROVIDER_URI);
    			setResourceProvider(doc, EIURI.create(providerURIStr));
    		}
    		// The label of a referenced resource has changed.
    		// Blow away all existing property text fields.  Iterate through
    		// all the instance properties and add their text values
    		// back, substituting in the new resource label.
    		// TODO as an optimization, if previousValueLabel was null,
    		//      don't actually have to blow away the FIELD_PROP_TEXT.
    		//      But would need to change the addPropertyTextField method.
			doc.removeFields(FIELD_PROP_TEXT);
			HashMap<String,String> mapPropFieldNameToValue = new HashMap<String,String>();
            for (Fieldable f: doc.getFields()) {
                final String fieldName = f.name();
                if (f.isTokenized()) {
                	String someValueURIStr = LuceneSearchUtil.getPropertyValueURIString(fieldName);
                	if (valueURIStr.equals(someValueURIStr)) {
                		// valueLabel will be null if referenced resource was deleted
                		if (valueLabel != null) {
                			mapPropFieldNameToValue.put(fieldName, valueLabel);
                		}
                	} else if (fieldName.startsWith(LuceneSearchUtil.DATATYPE_PROP_FIELD_PREFIX) 
                			|| fieldName.startsWith(LuceneSearchUtil.OBJECT_PROP_FIELD_PREFIX)) {
                		mapPropFieldNameToValue.put(fieldName, f.stringValue());
                	}
                }
            }
            for (Map.Entry<String, String> entry : mapPropFieldNameToValue.entrySet()) {
            	doc.removeFields(entry.getKey());
        		addPropertyTextField(doc, entry.getKey(), entry.getValue());
            }
    	}
    }
    
    private void setReferencingDocument(EIURI referencedBy, EIURI referenced) {
    	List<Document> referencedDocuments = getDocuments(referenced);
    	if (referencedDocuments == null || referencedDocuments.size() == 0) {
    		Document stubDoc = createStubInstance(referenced);
    		setReferencingDocument(referencedBy, stubDoc);
    	} else {
	    	for (Document referencedDoc : referencedDocuments) {
	    		setReferencingDocument(referencedBy, referencedDoc);
	    	}
    	}
    }
    
    private void setReferencingDocument(EIURI referencedBy, Document referenced) {
        Field field = new Field(FIELD_REFERENCED_BY, referencedBy.toString(), Field.Store.YES, Field.Index.NO);
        referenced.add(field);
    }
    
    private List<Document> getReferencingDocuments(EIURI uri) {
    	List<Document> result = new ArrayList<Document>();
    	List<Document> docs = getDocuments(uri);
    	if (docs != null && docs.size() > 0) {
    		for (String refDocURI : docs.get(0).getValues(FIELD_REFERENCED_BY)) {
    			List<Document> refDocs = getDocuments(EIURI.create(refDocURI));
    			if (refDocs != null) {
    				result.addAll(refDocs);
    			}
    		}
    	}
    	return result;
    }
    
    private Document createStubInstance(final EIURI uri) {      
    	//logger.debug("    Creating stub instance for " + uri);
        // create a Lucene document for the resource
        final Document doc = new Document();

        // Entity URI
        Field field = new Field(FIELD_URI, uri.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED);
        doc.add(field);
        
        // Don't want it findable.
    	//doc.setBoost(0.0f);
        
        // add the document to the to the index
        List<Document> docs = new ArrayList<Document>(1);
        docs.add(doc);
        setDocuments(uri, docs);
        unresolvedInstanceRefs.add(uri);
        return doc;
    }

    private void setResourceProvider(Document doc, EIURI providerURI) {
    	List<Document> providerDocuments = getDocuments(providerURI);
    	if (providerDocuments != null && providerDocuments.size() > 0) {
	        Field field;
	        field = new Field(FIELD_PROVIDER_URI, providerURI.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED);
	        doc.add(field);
    		Document providerDoc = providerDocuments.get(0);
    		String providerLabel = providerDoc.get(FIELD_ENTITY_LABEL);
			// Null for stubs, skip
    		if (providerLabel != null) {
    	        field = new Field(FIELD_PROVIDER_LABEL, providerLabel, Field.Store.YES, Field.Index.NOT_ANALYZED);
    	        doc.add(field);
    	        String providerAssertedTypeURI = providerDoc.get(FIELD_ASSERTED_TYPE_URI);
    	        field = new Field(FIELD_PROVIDER_TYPE_URI, providerAssertedTypeURI, Field.Store.YES, Field.Index.NOT_ANALYZED);
    	        doc.add(field);
    	        for (Field f : providerDoc.getFields(FIELD_INFERRED_TYPE_URI)) {
        	        String providerInferredTypeURI = f.stringValue();
        	        field = new Field(FIELD_PROVIDER_TYPE_URI, providerInferredTypeURI, Field.Store.YES, Field.Index.NOT_ANALYZED);
        	        doc.add(field);    	        	
    	        }
    		}
    	}
    }
}
