package org.eaglei.repository.model;

import java.util.GregorianCalendar;
import java.util.Collections;
import java.util.Date;
import java.util.Set;
import java.util.Map;
import java.util.HashMap;
import java.util.HashSet;
import javax.xml.datatype.DatatypeConfigurationException;
import javax.xml.datatype.DatatypeFactory;
import javax.servlet.http.HttpServletRequest;

import org.apache.log4j.Logger;
import org.apache.log4j.LogManager;
import org.eaglei.repository.Lifecycle;

import org.openrdf.OpenRDFException;
import org.openrdf.model.URI;
import org.openrdf.model.Literal;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.Value;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.datatypes.XMLDatatypeUtil;
import org.openrdf.model.vocabulary.XMLSchema;
import org.openrdf.model.vocabulary.RDFS;
import org.openrdf.model.impl.CalendarLiteralImpl;
import org.openrdf.repository.RepositoryResult;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.RepositoryException;
import org.openrdf.query.TupleQuery;
import org.openrdf.query.TupleQueryResultHandlerBase;
import org.openrdf.query.TupleQueryResultHandlerException;
import org.openrdf.query.BindingSet;
import org.openrdf.query.QueryLanguage;
import org.openrdf.query.MalformedQueryException;
import org.openrdf.query.Dataset;
import org.openrdf.query.impl.DatasetImpl;

import org.eaglei.repository.auth.Authentication;
import org.eaglei.repository.servlet.WithRepositoryConnection;
import org.eaglei.repository.vocabulary.REPO;
import org.eaglei.repository.vocabulary.DCTERMS;
import org.eaglei.repository.util.SPARQL;
import org.eaglei.repository.util.Utils;
import org.eaglei.repository.status.InternalServerErrorException;

/**
 * Utility class to manage provenance metadata, whcih is a constellation
 * of statements about a resource instance or Named Graph URI.  A
 * Provenance object models this metadata and provides convenient
 * methods to read and set it.  It supports the repo's underlying RDF
 * ontology for provenance, mostly borrowed from Dublin Core.  Other
 * repo code accesses the provenance statements directly through SPARQL
 * queries, so DO NOT CHANGE it.
 *
 * Note that one difficulty is how some objects of provenance
 * statements are blank nodes (e.g. dcterms:source) and so the
 * RDF has to be transformed into a flat model here with "virtual"
 * predicates.
 *
 * This class also enforces the informal rule that the value of
 * dcterms:modified MUST be a datatyped literal of the "dateTime" (or other
 * comparable timestamp) type, so date comparisons can be done efficiently.
 * This also applies to the date value of dcterms:created, although
 * nothing yet depends on it as harvest does on modified.
 *
 * Another limitation is that only the latest value is recorded for each
 * predicate, there is no history -- that decision was made to save space
 * and processing time since we haven't got a complete use case for
 * provenance yet anyway, really just the *latest* date for dcterms:modified.
 *
 * Provenance methods that change the "last modified" date of anything
 * also update the global last-modified timestamp, maintained by the
 * Lifecycle singleton.
 *
 * @author Larry Stone
 */
public class Provenance
{
    private static Logger log = LogManager.getLogger(Provenance.class);

    /** Graph where ALL provenance metadata is stored. */
    public static final URI PROVENANCE_GRAPH = REPO.NG_METADATA;

    /**
     * IMPORTANT:
     * List ALL provenance predicate URIs here.  This is what the /update
     * service uses to check for Provenance statements.
     * Although this *does* bake the ontology URIs into the code, instead of
     * getting them out of the RDF data with a query, that's OK since the
     * use of them is hardcoded too so any ontology changes will require
     * code changes anyway.
     */
    private static final Set<URI> predicates = new HashSet<URI>();
    static {
        Collections.addAll(predicates,
                           DCTERMS.CREATED,
                           DCTERMS.CREATOR,
                           DCTERMS.CONTRIBUTOR,
                           DCTERMS.MODIFIED,
                           DCTERMS.MEDIATOR,
                           DCTERMS.SOURCE,
                           DCTERMS.IDENTIFIER);
    }

    // cached dataset for provenance queries - can be set up once and shared
    private static DatasetImpl provDataset = null;

    // the resource for which we have provenance metadata
    private URI uri = null;

    // cached map of read-only provenance metadata values for this instance
    private Map<URI,String> stmCache = null;

    /**
     * SPARQL query to collect provenance about ?subject
     * Need to bind ?subject
     * Note that it collects direct provenance statements from the provenance
     * graph, but labels of values and subvalues come from any graph.
     * (e.g. these would be the dc:creator user, or dc:source values..)
     */
    private static final String provQuery =
      "SELECT * WHERE { GRAPH <"+PROVENANCE_GRAPH+"> { \n"+
      "?subject ?dcterm ?dcvalue \n"+
      "OPTIONAL { GRAPH ?g {?dcvalue <"+RDFS.LABEL+"> ?dcvalueLabel}} \n"+
      "OPTIONAL { ?dcvalue ?dcsubterm ?dcsubvalue \n"+
      " OPTIONAL { GRAPH ?gg {?dcsubvalue <"+RDFS.LABEL+"> ?dcsubvalueLabel}}}}}";

    /**
     * Constructor
     * @param uri the subject uri
     */
    public Provenance(URI uri)
    {
        super();
        this.uri = uri;
    }

    /**
     * Predicate method, true if URI is a provenance predicate URI.
     *
     * @param pp a {@link org.openrdf.model.URI} object.
     * @return true if pp names a provenance predicate.
     */
    public static boolean isProvenancePredicate(URI pp)
    {
        return predicates.contains(pp);
    }

    /**
     * Convenience method to set Created provenance values, upon creation.
     * Also sets Modified provenance metadata automatically, since the
     * last-modified date is used to judge when a resource needs to be
     * re-indexed.
     *
     * @param request a {@link javax.servlet.http.HttpServletRequest} object.
     * @param when time at which it was created
     */
    public void setCreated(HttpServletRequest request, Date when)
    {
        Literal lwhen = makeDateTime(when);
        setProvenanceInternal(request, DCTERMS.CREATOR, DCTERMS.CREATED, lwhen);
        setProvenanceInternal(request, DCTERMS.CONTRIBUTOR, DCTERMS.MODIFIED, lwhen);
        Lifecycle.getInstance().updateLastModified(when);
    }

    /**
     * Convenience method to set provenance values for "mediated" creation,
     * i.e. when instance already contains a dcterms:creator that has to
     * be preserved.  Record the creating user as dcterms:mediator instead.
     *
     * @param request a {@link javax.servlet.http.HttpServletRequest} object.
     * @param when time at which it was created
     */
    public void setMediated(HttpServletRequest request, Date when)
    {
        Literal lwhen = makeDateTime(when);
        setProvenanceInternal(request, DCTERMS.MEDIATOR, DCTERMS.CREATED, lwhen);
        setProvenanceInternal(request, DCTERMS.CONTRIBUTOR, DCTERMS.MODIFIED, lwhen);
        Lifecycle.getInstance().updateLastModified(when);
    }

    /**
     * Convenience method to set provenance values upon last modification.
     *
     * @param request a {@link javax.servlet.http.HttpServletRequest} object.
     * @param when time at which it was last modified
     */
    public void setModified(HttpServletRequest request, Date when)
    {
        setProvenanceInternal(request, DCTERMS.CONTRIBUTOR, DCTERMS.MODIFIED, makeDateTime(when));
        Lifecycle.getInstance().updateLastModified(when);
    }

    /** internal driver to set provenance values */
    private void setProvenanceInternal(HttpServletRequest request, URI whoTerm, URI whenTerm, Literal when)
    {
        RepositoryConnection rc = WithRepositoryConnection.get(request);
        URI who = Authentication.getPrincipalURI(request);
        setProvenanceStatement(rc, whoTerm, who);
        setProvenanceStatement(rc, whenTerm, when);
    }

    /**
     * Set provenance values directly, bypass e.g. the automatic use of
     * current authenticated user.  Needed on rare occasions such as
     * bootstrap graph setup where the actor is the system itself.
     * THIS IS DANGEROUS, since it does NOT type-check that the value
     * of a time property (e.g. dcterms:modified) is a calendar literal.
     *
     * @param rc a {@link org.openrdf.repository.RepositoryConnection} object.
     * @param term a provenance predicate, a {@link org.openrdf.model.URI} object.
     * @param value a {@link org.openrdf.model.Value} object.
     */
    public void setProvenanceStatement(RepositoryConnection rc, URI term, Value value)
    {
        try {
            rc.remove(uri, term, null, PROVENANCE_GRAPH);
            rc.add(uri, term, value, PROVENANCE_GRAPH);
            if (log.isDebugEnabled())
                log.debug("Setting provenance: ("+uri+", "+term+", "+value+")");
        } catch (RepositoryException e) {
            log.error(e);
            throw new InternalServerErrorException(e);
        }
        decache();
    }

    /**
     * Add/replace the Source provenance fields:
     *  dcterms:source -> bnode
     *                     dcterms:identifier -> filename or URI it came from
     *                     dcterms:modified -> last-mod date of source
     *
     * @param request a {@link javax.servlet.http.HttpServletRequest} object.
     * @param source value of identifier, a {@link java.lang.String} object.
     * @param sourceModified mod time for source or null if unknown.
     */
    public void setSource(HttpServletRequest request, String source, Date sourceModified)
    {
        // sanity check
        if (source == null)
            return;

        RepositoryConnection rc = WithRepositoryConnection.get(request);
        ValueFactory vf = rc.getValueFactory();
        setSourceStatements(rc, vf.createLiteral(source, XMLSchema.STRING),
                (sourceModified == null ? null : makeDateTime(sourceModified)));
    }

    /**
     * Convenience method to set source provenance identifier and
     * last-mod time of resource ingested into a graph.  This is
     * worth the trouble since it is useful to test for graphs that
     * need updating.
     *
     * THIS IS DANGEROUS, since it does NOT type-check that the value
     * of a time property (e.g. dcterms:modified) is a calendar literal.
     *
     * @param rc a {@link org.openrdf.repository.RepositoryConnection} object.
     * @param source value of identifier, a {@link java.lang.String} object.
     * @param sourceModified mod time for source or null if unknown.
     */
    public void setSourceStatements(RepositoryConnection rc, Value source, Value sourceModified)
    {
        // sanity check
        if (source == null)
            return;

        try {

            // get rid of existing source AND anything attached to its bnode:
            if (rc.hasStatement(uri, DCTERMS.SOURCE, null, true, PROVENANCE_GRAPH)) {
                RepositoryResult<Statement> rr = rc.getStatements(uri, DCTERMS.SOURCE, null, true, PROVENANCE_GRAPH);
                try {
                    while (rr.hasNext()) {
                        Statement s = rr.next();
                        Value bn = s.getObject();
                        if (bn instanceof Resource)
                            rc.remove((Resource)bn, null, null, PROVENANCE_GRAPH);
                    }
                } finally {
                    rr.close();
                }
                rc.remove(uri, DCTERMS.SOURCE, null, PROVENANCE_GRAPH);
            }
            Resource bn = rc.getValueFactory().createBNode();
            rc.add(bn, DCTERMS.IDENTIFIER, source, PROVENANCE_GRAPH);
            if (log.isDebugEnabled()) {
                log.debug("Setting provenance source: ("+uri+", dcterms:source, "+bn+")");
                log.debug("Setting provenance source: ("+bn+", dcterms:identifier, "+source+")");
            }
            // sanity-check that value is a date-time, for comparisons
            if (sourceModified != null) {
                URI dt = sourceModified instanceof Literal ? ((Literal)sourceModified).getDatatype() : null;
                if (dt != null && XMLDatatypeUtil.isCalendarDatatype(dt)) {
                    rc.add(bn, DCTERMS.MODIFIED, sourceModified, PROVENANCE_GRAPH);
                    log.debug("Setting provenance source: ("+bn+", dcterms:modified, "+sourceModified+")");
                } else {
                    throw new IllegalArgumentException("The dcterms:modified provenance property requires a literal date/time value, this was not: "+sourceModified);
                }
            }
            rc.add(uri, DCTERMS.SOURCE, bn, PROVENANCE_GRAPH);
            decache();
        } catch (RepositoryException e) {
            log.error(e);
            throw new InternalServerErrorException(e);
      /** may need this later
        } catch (DatatypeConfigurationException  e) {
            log.error(e);
            throw new InternalServerErrorException(e);
        **/
        }
    }

    /**
     * Utility translation method, returns typed literal of specified moment
     *
     * @param when a {@link java.util.Date} object.
     * @return specified 'when' as a {@link org.openrdf.model.Literal} object.
     */
    public static Literal makeDateTime(Date when)
    {
        try {
            GregorianCalendar gc = new GregorianCalendar();
            gc.setTime(when);
            return new CalendarLiteralImpl(DatatypeFactory.newInstance().newXMLGregorianCalendar(gc));
        } catch (DatatypeConfigurationException  e) {
            log.error(e);
            throw new InternalServerErrorException(e);
        }
    }

    /** invalidate the provenance staetment cache */
    private void decache()
    {
        stmCache = null;
    }

    /** get cached map of provenance predicates to values */
    private Map<URI,String> getStatementCache(HttpServletRequest request)
    {
        if (stmCache == null) {
            RepositoryConnection rc = WithRepositoryConnection.get(request);
            stmCache = new HashMap<URI,String>();
            try {
                log.debug("Provenance SPARQL query = "+provQuery);
                TupleQuery q = rc.prepareTupleQuery(QueryLanguage.SPARQL, provQuery);
                q.setBinding("subject", uri);
                q.setDataset(getProvDataset(request));
                q.setIncludeInferred(false);
                q.evaluate(new provenanceHandler(uri, stmCache));
            } catch (MalformedQueryException e) {
                log.error("Rejecting malformed query:"+e);
                throw new InternalServerErrorException(e);
            } catch (OpenRDFException e) {
                log.error(e);
                throw new InternalServerErrorException(e);
            }
        }
        return stmCache;
    }

    /**
     * <p>getField - get the value of a provenance predicate.</p>
     *
     * @param request a {@link javax.servlet.http.HttpServletRequest} object.
     * @param field the URI of the metadata field
     * @return value of the field or null if not set
     * @throws javax.servlet.ServletException if any.
     */
    public String getField(HttpServletRequest request, URI field)
    {
        return getStatementCache(request).get(field);
    }

    /**
     * Get the (cached) Dataset to use on provenance queries.
     * It has to include NG_Users to get the user label values..
     */
    private Dataset getProvDataset(HttpServletRequest request)
    {
        if (provDataset == null) {
            provDataset = SPARQL.copyDataset(SPARQL.getInternalAndMetadataGraphs(request));
            SPARQL.addGraph(provDataset, REPO.NG_USERS);
        }
        return provDataset;
    }

    /**
     * SPARQL result handler that collects provenance values into map, look for:
     * ?dcterm ?dcvalue , ?dcsubterm ?dcsubvalue
     */
    private static class provenanceHandler extends TupleQueryResultHandlerBase
    {
        // key is graph URI for easy comparison and lookup of duplicates
        private Map<URI,String> fields = null;
        private URI uri = null;

        /** constructor with target instance and field map */
        provenanceHandler(URI uri, Map<URI,String> fields)
        {
            super();
            provenanceHandler.this.uri = uri;
            provenanceHandler.this.fields = fields;
        }

        /**
         * {@inheritDoc}
         */
        public void handleSolution(BindingSet bs)
            throws TupleQueryResultHandlerException
        {
            Value dcterm = bs.getValue("dcterm");
            Value dcvalue = bs.getValue("dcvalue");
            Value dcvalueLabel = bs.getValue("dcvalueLabel");
            Value dcsubterm = bs.getValue("dcsubterm");
            Value dcsubvalue = bs.getValue("dcsubvalue");
            Value dcsubvalueLabel = bs.getValue("dcsubvalueLabel");

            //TEMP
            //log.debug("Got prov solution: term="+dcterm+", value="+dcvalue+", dcvalueLabel="+dcvalueLabel);
            if (DCTERMS.SOURCE.equals(dcterm)) {
                if (DCTERMS.IDENTIFIER.equals(dcsubterm))
                    fields.put(DCTERMS.SOURCE_IDENTIFIER, Utils.valueAsString(dcsubvalue));
                else if (DCTERMS.MODIFIED.equals(dcsubterm))
                    fields.put(DCTERMS.SOURCE_MODIFIED, Utils.valueAsString(dcsubvalue));
                else
                    log.warn("Found unrecognized provenance value for subject="+uri+", dcterms:source sub-term="+dcsubterm);
            } else if (dcterm instanceof URI)
                fields.put((URI)dcterm, Utils.valueAsString(dcvalue));

            // store labels as extra fields..
            if (dcvalueLabel != null && dcvalue instanceof URI)
                fields.put((URI)dcvalue, Utils.valueAsString(dcvalueLabel));
            if (dcsubvalueLabel != null && dcsubvalue instanceof URI)
                fields.put((URI)dcsubvalue, Utils.valueAsString(dcsubvalueLabel));
        }
    }
}
