package org.eaglei.search.provider.lucene;

import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.eaglei.model.EIURI;
import org.eaglei.search.provider.SearchRequest;
import org.eaglei.search.provider.SearchResult;
import org.eaglei.search.provider.SearchResultSet;
import org.eaglei.search.provider.SearchProvider;

/**
 * Runnable that periodically updates the Lucene index. To support the indexing of indirect properties (e.g. A->B->"some literal", 
 * where A is the RDF resource associated with the Lucene Document), a 2-pass indexing scheme is performed:
 * 
 * <ul>
 * <li>During the first pass all Documents for the resources associated with all input SearchResults are created. If a Document already 
 * exists, it is deleted. For SearchResults that represent deleted resources, a Document is not added back. 
 * This phase corresponds to multiple calls to LuceneSearchProviderIndexer.indexSearchResult().
 * <li>During the second pass, the Documents are updated with indirect properties. The set of Documents to updated corresponds to all new
 * Documents plus the Documents for all referencing Resources. The update call is performed by LuceneSearchProviderIndexer.addIndirectProperties()
 * </ul>
 * @author frost
 */
public final class LuceneSearchProviderIndexUpdater implements Runnable {

    private static final Log logger = LogFactory.getLog(LuceneSearchProviderIndexUpdater.class);
 
    /*
     * Maximum number of results to retrieve underlying institutional node(s).
     * TODO support retrieval of ALL results
     */
    private static final int MAX_RESULTS_TO_INDEX_FROM_EACH_NODE = 100000;
        
    private final SearchProvider nestedProvider;
    private final LuceneSearchProviderIndexer indexer;
    private long updateFrequency = LuceneSearchProviderProperties.DEFAULT_UPDATE_FREQ; 
    
    /**
     * Creates an index updater
     * @param nestedProvider Provider that is queried to populate the index.
     * @param indexer The indexer.
     */
    public LuceneSearchProviderIndexUpdater(final SearchProvider nestedProvider, final LuceneSearchProviderIndexer indexer) {
        this.nestedProvider = nestedProvider;
        this.indexer = indexer;
        final String updateFrequencyProp = System.getProperty(LuceneSearchProviderProperties.UPDATE_FREQUENCY); 

        if (updateFrequencyProp != null) {
            try {
                setUpdateFrequency(Long.parseLong(updateFrequencyProp));
            } catch (NumberFormatException nfe) {
                // default
            }
        } // default
    }

    /**
     * Sets the frequency for updating the lucene index from the embedded provider.
     * @param updateFrequency Update frequency in msec. 
     */
    public void setUpdateFrequency(final long updateFrequency) {
        // must be longer than 1 second 
        if (updateFrequency < 1000) {
            this.updateFrequency = 1000;
        }
    }
    
    /**
     * @see #setUpdateFrequency(long)
     */
    public long getUpdateFrequency() {
        return this.updateFrequency;
    }
    
    /**
     * Dynamically builds the Lucene index from results retrieved from the nested SearchProvider
     */
    public void updateIndex() throws IOException {
        //long start = System.currentTimeMillis();
        
        // execute a query for all resources
        final SearchRequest request = new SearchRequest();
        request.setMaxResults(MAX_RESULTS_TO_INDEX_FROM_EACH_NODE); 
        final SearchResultSet resultSet = this.nestedProvider.query(request);
        
        //long total = System.currentTimeMillis() - start;
        //logger.info("Time for index query: " + total + " msec");

        if (resultSet.getTotalCount() > 0) {

            // Phase 1 of indexing: delete all Document from index corresponding to SearchResults and
            // recreate if the SearchResult is not for a deleted resource
            
            // index all of the results
            for (SearchResult result: resultSet.getResults()) {
                indexer.indexSearchResult(result, true);
            }
            
            // commit the index changes
            indexer.commit();
            
            // Phase 2 of indexing: index the indirect properties of the new SearchResults and all referencing 
            // search results
                    
            if (LuceneSearchProviderIndexer.INDEX_OBJECT_PROP_LABELS) {
                // build up a list of documents that require an update due
                // to referenced data (labels of object properties)
                final Set<EIURI> docsToUpdate = new HashSet<EIURI>();
                for (SearchResult result: resultSet.getResults()) {
                    EIURI uri = result.getEntity().getURI();
                    // by default, we are going to update (to index the labels
                    // of object properties) all docs we originally indexed 
                    // (unless they are deleted docs)
                    if (!LuceneSearchProviderIndexer.isDeletedSearchResult(result)) {
                        docsToUpdate.add(uri);
                    }

                    // search for all docs that reference this doc
                    // want to update each of them
                    List<EIURI> relatedDocs = indexer.getRelatedDocuments(uri);
                    //logger.debug("Found " + relatedDocs.size() + " related docs for " + uri);
                    docsToUpdate.addAll(relatedDocs);
                }

                // reindex all of the results with object prop labels
                for (EIURI uri: docsToUpdate) {
                    indexer.addIndirectProperties(uri);
                }

                // commit the index changes
                indexer.commit();
            }
            
            //total = System.currentTimeMillis() - start;
            //logger.info("Total indexing time: " + total + " msec");
        }
    }
    
    public void run() {
        while (true) {
            try {
                // (re)build the Lucene index based on a query againt the underlying provider
                this.updateIndex();
            } catch (Throwable t) {
                logger.error("Failed to rebuild lucene index", t);
            }
            
            try {
                // sleep for the specified time period
                Thread.sleep(this.getUpdateFrequency());
            } catch (InterruptedException ie) {
                // swallow
            }
        }
    }    
}
