package org.eaglei.search.provider.lucene.harvest;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Collections;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.SimpleTimeZone;
import java.util.TimeZone;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
import org.apache.commons.httpclient.SimpleHttpConnectionManager;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.eaglei.model.EIEntity;
import org.eaglei.model.EIOntConstants;
import org.eaglei.model.EIOntModel;
import org.eaglei.model.EIURI;
import org.eaglei.model.jena.JenaEIOntModel;
import org.eaglei.search.provider.SearchCountRequest;
import org.eaglei.search.provider.SearchCounts;
import org.eaglei.search.provider.SearchRequest;
import org.eaglei.search.provider.SearchResult;
import org.eaglei.search.provider.SearchResultSet;
import org.eaglei.services.InstitutionRegistry;
import org.eaglei.services.repository.ProviderUtils;
import org.eaglei.services.repository.RepositoryHttpConfig;

import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.ResultSetFactory;
import com.hp.hpl.jena.query.ResultSetFormatter;
import com.hp.hpl.jena.rdf.model.Literal;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.sparql.resultset.SPARQLResult;
import com.hp.hpl.jena.sparql.resultset.XMLInput;
import com.hp.hpl.jena.vocabulary.RDF;

/**
 * Extension of AbstractRDFProvider that uses the Repository /harvest API (see /harvest REST API spec for details).
 * <ul>
 * <li>The results from the harvest call (returned in SPARQL tabular format) are parsed into a set of SPARQL 
 *     QuerySolutions.
 * <li>The QuerySolutions are used to create a set of SearchResults (one search result for each unique subject URI). 
 * <li>The SearchResults are gathered together into a SearchResultSet.
 * </ul>
 * @author tbashor
 */
// TODO implement streaming processing of harvest results rather than batch processing of entire set of query
//solutions
// TODO retrieve inferred triples as well (currently inferred triples are being computed locally by indexer)
// TODO this currently relies on clock sync between the client and repo server (which will
// be a non-issue when the client and repo run on the same machine); need to replace the 
// logic that sets this with code that gets the timestamp from the server
public final class FileStreamHarvester extends AbstractStreamHarvester {

    private static final Log logger = LogFactory.getLog(RepositoryStreamHarvester.class);
    private static final boolean DEBUG = logger.isDebugEnabled();
    
    private Date lastModifiedDate = null;
    
    private static final DateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS");
    private static final SimpleDateFormat preciseHTTPDate =
        new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss.SSS zzz");
    static {
        format.setTimeZone(TimeZone.getTimeZone("GMT"));
        preciseHTTPDate.setCalendar(new GregorianCalendar(new SimpleTimeZone(0, "GMT"), Locale.getDefault()));
    }
    
    private final String fileName;
    private boolean readOnce = false;
        
    /**
     * Creates a new RepositoryHarvester.
     */
    /*
    public FileStreamHarvester(final EIOntModel eiOntModel, final InstitutionRegistry institutionRegistry,
            final ResourceChangeListener listener) {
        this(eiOntModel, institutionRegistry.getInstitution(), 
                System.getProperty(InstitutionRegistry.EAGLEI_TIER_PROPERTY),
                System.getProperty(InstitutionRegistry.EAGLEI_SUBDOMAIN_PROPERTY),
                listener);
    }
    */

    /**
     * Creates a new RepositoryHarvester.
     */
    public FileStreamHarvester(final EIOntModel eiOntModel, final EIEntity institution,
            final InstitutionRegistry institutionRegistry) {
        super(eiOntModel, institution);

        String tier = institutionRegistry.getTier();
        String subdomain = institutionRegistry.getLocalNodeId();
        this.fileName = tier + "_" + subdomain + ".txt";
        if (DEBUG) {
            logger.debug("Created file-based harvester for " + institution.getLabel());
        }
    }
    
    public String getHarvestInfo() {
    	if (lastModifiedDate != null) {
    		return "Dataset:   " + formatWithTZ.format(lastModifiedDate) + "       [" + fileName + "]";
    	} else {
    		return fileName;
    	}
    }

    /**
     * Executes the repository /harvest API and notifies listeners of change information.
     */    
    public void harvest() {  
    	
    	if (!readOnce) {
    		readOnce = true;
    	} else {
    		return;
    	}
        
        if (DEBUG) {
        	logger.debug("Reading harvest data from " + fileName);
        }
        
        InputStream is = null;
        BufferedInputStream in = null;
        try {
            // execute the /harvest call    
        	long startTime = System.currentTimeMillis();
            
            notifyChangeStreamStart();
            is = new FileInputStream(fileName);
            lastModifiedDate = new Date();
            int count = generateResourceChangeEvents(is);
			notifyChangeStreamEnd(lastModifiedDate);
        } catch (IOException e) {
        	logger.debug("Unexpected error in FileStreamHarvester " + fileName, e);        	
        } finally {
            if (is != null) {
                try {
					is.close();
				} catch (IOException e) {}
            }
        }        
    }

}