package org.eaglei.search.provider.rdf;

import java.io.IOException;
import java.io.InputStream;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
import java.util.TimeZone;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.UsernamePasswordCredentials;
import org.apache.commons.httpclient.auth.AuthScope;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.eaglei.model.EIEntity;
import org.eaglei.model.EIOntModel;
import org.eaglei.search.provider.SearchCountRequest;
import org.eaglei.search.provider.SearchCounts;
import org.eaglei.search.provider.SearchRequest;
import org.eaglei.search.provider.SearchResult;
import org.eaglei.search.provider.SearchResultSet;
import org.eaglei.services.repository.RepositoryHttpClient;

import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.ResultSetFactory;
import com.hp.hpl.jena.query.ResultSetFormatter;

/**
 * Extension of AbstractRDFProvider that uses the Repository /harvest API (see /harvest REST API spec for details).
 * <ul>
 * <li>The results from the harvest call (returned in SPARQL tabular format) are parsed into a set of SPARQL 
 *     QuerySolutions.
 * <li>The QuerySolutions are used to create a set of SearchResults (one search result for each unique subject URI). 
 * <li>The SearchResults are gathered together into a SearchResultSet.
 * </ul>
 * @author frost
 */
// TODO implement streaming processing of harvest results rather than batch processing of entire set of query
//solutions
// TODO retrieve inferred triples as well (currently inferred triples are being computed locally by indexer)
// TODO this currently relies on clock sync between the client and repo server (which will
// be a non-issue when the client and repo run on the same machine); need to replace the 
// logic that sets this with code that gets the timestamp from the server
public final class RepositoryHarvester extends AbstractRDFProvider {

    private static final Log logger = LogFactory.getLog(RepositoryHarvester.class);
    private static final boolean DEBUG = logger.isDebugEnabled();
    
    /*
     * "from" time; lower bound for harvest changes. This is initially null to ensure we get a
     * full dump from the repository. It is then set to the time right before the request until
     * we get new changes from the repository. Whenever changes are retrieved, the fromTime is updated. 
     */
    private Date fromTime = null;
    
    /*
     * HttpClient that is reused. It will reuse the underlying connection or recreate if it gets dropped.
     */
    private HttpClient httpclient;
    
    /**
     * Creates a new RepositoryHarvester.
     * @param eagleiOntModel Handle to the in-memory representation of the eagle-i ontology.
     * @param institution Handle to the institution.
     * @param repoClient Configuration of the repository.
     */
    public RepositoryHarvester(final EIOntModel eagleiOntModel, final EIEntity institution,
            final RepositoryHttpClient repoClient) {
        super(eagleiOntModel, institution, repoClient);
    }

    @Override
    public void init() throws IOException {
        this.httpclient = createHttpClient(this.repoClient);
    }
    
    @Override
    public SearchCounts count(SearchCountRequest request) throws IOException {
        // no-op
        return new SearchCounts(request.getRequest());     
    }
    
    @Override
    protected QueryExecution getQueryExecution(Query query) {
        // no-op
        return null;
    }

    @Override
    public SearchResultSet query(final SearchRequest request) throws IOException {

        // call the /harvest API and parse results into SPARQL QuerySolutions
        final List<QuerySolution> resultList = harvest();
        
        // turn the QuerySolutions into SearchResults
        final List<SearchResult> results = getSearchResultsFromSPARQLResults(resultList, request);
        request.setMaxResults(results.size());
        
        // create a SearchResultSet with the correct paginated range 
        final SearchResultSet resultSet = createSearchResultSet(results, request);

        return resultSet;        
    }
    
    /**
     * Executes the repository /harvest API and returns the results as a SPARQL ResultSet.
     * @return ResultSet response.
     * @throws IOException Thrown if there is an error.
     */    
    private List<QuerySolution> harvest() throws IOException {        
        
        // create PostMethod and set parameters
        final PostMethod method = new PostMethod(this.repoClient.getHarvestUrl());
        method.setParameter("view", this.repoClient.getView());
        method.setParameter("detail", "full");
        
        // if we have a fromTime value, set that
        if (fromTime != null) {
            final DateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS");
            format.setTimeZone(TimeZone.getTimeZone("GMT"));
            final String dateStr = format.format(fromTime);
            method.setParameter("from", dateStr + "Z"); // GMT time 
        } 
        
        // record the time before we make the call
        final Date timeBeforeCall = new Date();

        InputStream is = null;
        try {
            // execute the /harvest call
            httpclient.executeMethod(method);
            
            // parse the results as QuerySolutions
            is = method.getResponseBodyAsStream();
            final ResultSet results = ResultSetFactory.fromXML(is);
            final List<QuerySolution> resultList = ResultSetFormatter.toList(results);

            // if there were results, update the fromTime
            if (!resultList.isEmpty()) {
                this.fromTime = timeBeforeCall;
                if (DEBUG) {
                    if (resultList.size() > 0) {
                        logger.debug("Harvest query got " + resultList.size() + " results from " + this.repoClient.getHarvestUrl());
                    }
                }
            }

            return resultList;
        } finally {
            method.releaseConnection();
            if (is != null) {
                is.close();
            }
        }        
    }
    
    /*
     * Creates and configures the HttpClient
     */
    private static HttpClient createHttpClient(final RepositoryHttpClient repoClient) { 
        final HttpClient httpclient = new HttpClient();
        // set up the authentication credentials
        final UsernamePasswordCredentials credentials = 
            new UsernamePasswordCredentials(repoClient.getUsername(), repoClient.getPassword());
        httpclient.getState().setCredentials(AuthScope.ANY, credentials);
        httpclient.getParams().setAuthenticationPreemptive(true);
        return httpclient;
    }
        
}