package org.eaglei.search.provider.rdf;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Collections;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.List;
import java.util.Locale;
import java.util.SimpleTimeZone;
import java.util.TimeZone;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
import org.apache.commons.httpclient.SimpleHttpConnectionManager;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.eaglei.model.EIEntity;
import org.eaglei.model.EIOntConstants;
import org.eaglei.model.EIOntModel;
import org.eaglei.search.provider.SearchCountRequest;
import org.eaglei.search.provider.SearchCounts;
import org.eaglei.search.provider.SearchRequest;
import org.eaglei.search.provider.SearchResult;
import org.eaglei.search.provider.SearchResultSet;
import org.eaglei.services.repository.ProviderUtils;
import org.eaglei.services.repository.RepositoryHttpConfig;

import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.ResultSetFactory;
import com.hp.hpl.jena.query.ResultSetFormatter;

/**
 * Extension of AbstractRDFProvider that uses the Repository /harvest API (see /harvest REST API spec for details).
 * <ul>
 * <li>The results from the harvest call (returned in SPARQL tabular format) are parsed into a set of SPARQL 
 *     QuerySolutions.
 * <li>The QuerySolutions are used to create a set of SearchResults (one search result for each unique subject URI). 
 * <li>The SearchResults are gathered together into a SearchResultSet.
 * </ul>
 * @author frost
 */
// TODO implement streaming processing of harvest results rather than batch processing of entire set of query
//solutions
// TODO retrieve inferred triples as well (currently inferred triples are being computed locally by indexer)
// TODO this currently relies on clock sync between the client and repo server (which will
// be a non-issue when the client and repo run on the same machine); need to replace the 
// logic that sets this with code that gets the timestamp from the server
public final class RepositoryHarvester extends AbstractRDFProvider {

    private static final Log logger = LogFactory.getLog(RepositoryHarvester.class);
    private static final boolean DEBUG = logger.isDebugEnabled();
    
    /*
     * "from" time; lower bound for harvest changes. This is initially null to ensure we get a
     * full dump from the repository. It is then set to the time right before the request until
     * we get new changes from the repository. Whenever changes are retrieved, the fromTime is updated. 
     */
    private String fromTimeStr = null;
    private Date fromTime = null;
    
    private static final DateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS");
    private static final SimpleDateFormat preciseHTTPDate =
        new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss.SSS zzz");
    static {
        format.setTimeZone(TimeZone.getTimeZone("GMT"));
        preciseHTTPDate.setCalendar(new GregorianCalendar(new SimpleTimeZone(0, "GMT"), Locale.getDefault()));
    }
    
    private final RepositoryHttpConfig repoConfig;
    
    /*
     * HttpClient that is reused. It will reuse the underlying connection or recreate if it gets dropped.
     */
    private HttpClient httpclient;
    
    /**
     * Creates a new RepositoryHarvester.
     * @param eagleiOntModel Handle to the in-memory representation of the eagle-i ontology.
     * @param institution Handle to the institution.
     * @param repoClient Configuration of the repository.
     */
    public RepositoryHarvester(final EIOntModel eagleiOntModel, final EIEntity institution,
            final RepositoryHttpConfig repoConfig) {
        super(eagleiOntModel, institution);
        this.repoConfig = repoConfig;
        httpclient = RepositoryHttpConfig.createHttpClient(repoConfig.getSearchUsername(), repoConfig.getSearchPassword());
        // Assume single threaded use of this client
        // Configure to try to keep connection open between uses.
        httpclient.setHttpConnectionManager(new SimpleHttpConnectionManager(false));
    }

    @Override
    public void init() throws IOException {
    }
    
    @Override
    public SearchCounts count(SearchCountRequest request) throws IOException {
        // no-op
        return new SearchCounts(request.getRequest());     
    }
    
    @Override
    protected QueryExecution getQueryExecution(Query query) {
        // no-op
        return null;
    }
    
    @Override
    public SearchResultSet query(final SearchRequest request) throws IOException {

        // call the /harvest API and parse results into SPARQL QuerySolutions
        final List<QuerySolution> resultList = harvest();
        
        // turn the QuerySolutions into SearchResults
        final List<SearchResult> results = getSearchResultsFromSPARQLResults(resultList, request);
        if (DEBUG) {
            if (results.size() > 0) {
                logger.debug("Harvest " + results.size() + " resource change events");
            }
            int i = 1;
            for (SearchResult r : results) {
                String delete = EIOntConstants.IS_DELETED.equals(r.getType().getURI().toString()) ? "DELETE :" : "";
                logger.debug( "  " + i++ + " of " + results.size() + " : " + delete + r.getEntity());
                if (i == 6) break; // Just print the first 5
            }
        }
        request.setMaxResults(results.size());
        
        // create a SearchResultSet with the correct paginated range 
        final SearchResultSet resultSet = createSearchResultSet(results, request);

        return resultSet;        
    }
    
    /**
     * Executes the repository /harvest API and returns the results as a SPARQL ResultSet.
     * @return ResultSet response.
     * @throws IOException Thrown if there is an error.
     */    
    private List<QuerySolution> harvest() throws IOException {        
        
        // create PostMethod and set parameters
        final PostMethod method = new PostMethod(repoConfig.getHarvestUrl());
        method.setParameter("view", RepositoryHttpConfig.PUBLISHED_VIEW);
        method.setParameter("detail", "full");
        
        // if we have a fromTime value, set that
        if (fromTime != null) {
            method.setParameter("after", format.format(fromTime) + "Z"); // GMT time 
            //method.setParameter("after", fromTimeStr); 
            //method.setParameter("from", format.format(fromTime) + "Z"); // GMT time 
        } 
        
        // record the time before we make the call
        //final Date timeBeforeCall = new Date();
        
        InputStream is = null;
        try {
            // execute the /harvest call    
        	long startTime = System.currentTimeMillis();
            int status = httpclient.executeMethod(method);
            if ( status != HttpStatus.SC_OK  ) {
                final String response = ProviderUtils.getStringFromInputStream( method.getResponseBodyAsStream() );
                logger.error( "harvest failed with status: " + status + "\r\n" + response);
                return Collections.emptyList();
            }

            /*
            final String response = ProviderUtils.getStringFromInputStream( method.getResponseBodyAsStream() );
            final File outputDir = new File("target");
            if (!outputDir.mkdirs()) {
                logger.error("Failed to create output directory " + outputDir.getAbsolutePath());
                return null;
            }
            File f = new File(outputDir, "harvest.txt");
            if (!f.exists()) {
                logger.error("Harvest dump " + f.getAbsolutePath());
                FileWriter fw = new FileWriter(f);
                fw.write(response);
                fw.close();
            }
            final ResultSet results = ResultSetFactory.fromXML(response);
            */

            // parse the results as QuerySolutions
            is = method.getResponseBodyAsStream();
            final ResultSet results = ResultSetFactory.fromXML(is);
            final List<QuerySolution> resultList = ResultSetFormatter.toList(results);
            //if (DEBUG) {
            //    logger.debug( "harvest request time: " + );
            //}

            // if there were results, update the fromTime
            if (!resultList.isEmpty()) {
                fromTimeStr = method.getResponseHeader("X-Precise-Last-Modified").getValue();
                try {
    				fromTime = preciseHTTPDate.parse(fromTimeStr);
    			} catch (ParseException e) {
                    logger.error("Error parsing X-Precise-Last-Modified " + fromTimeStr);
    			}
    			//this.fromTime = timeBeforeCall;
    			
                if (DEBUG) {
                    if (resultList.size() > 0) {
                        logger.debug("Harvest " + resultList.size() + 
                        		" SELECT results in " + (System.currentTimeMillis() - startTime) + 
                        		" msec;  last-modified: " + fromTimeStr);
                    }
                }
            }

            return resultList;
        } finally {
            method.releaseConnection();
            if (is != null) {
                is.close();
            }
        }        
    }
        
}