package org.eaglei.repository;

import java.util.GregorianCalendar;
import java.util.Date;
import java.util.Set;
import java.util.Map;
import java.util.HashMap;
import java.util.HashSet;
import javax.xml.datatype.DatatypeConfigurationException;
import javax.xml.datatype.DatatypeFactory;
import javax.servlet.http.HttpServletRequest;

import org.apache.log4j.Logger;
import org.apache.log4j.LogManager;

import org.openrdf.OpenRDFException;
import org.openrdf.model.URI;
import org.openrdf.model.Literal;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.Value;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.RepositoryException;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.vocabulary.XMLSchema;
import org.openrdf.model.vocabulary.RDFS;
import org.openrdf.model.impl.CalendarLiteralImpl;
import org.openrdf.repository.RepositoryResult;
import org.openrdf.query.TupleQuery;
import org.openrdf.query.TupleQueryResultHandlerBase;
import org.openrdf.query.TupleQueryResultHandlerException;
import org.openrdf.query.BindingSet;
import org.openrdf.query.QueryLanguage;
import org.openrdf.query.MalformedQueryException;
import org.openrdf.query.Dataset;
import org.openrdf.query.impl.DatasetImpl;

import org.eaglei.repository.servlet.WithRepositoryConnection;
import org.eaglei.repository.vocabulary.REPO;
import org.eaglei.repository.vocabulary.DCTERMS;
import org.eaglei.repository.util.SPARQL;
import org.eaglei.repository.status.InternalServerErrorException;

/**
 * Utility class to manage provenance metadata on all objects.
 * An instance is attached to a resource URI, typically either a
 * named graph or eagle-i resource instance, which it describes.
 * Methods that change the "last modified" date of anything also update
 * the global last-modified timestamp, maintained by the DataRepository
 * singleton.
 *
 * @author Larry Stone
 * @version $Id: $
 */
public class Provenance
{
    private static Logger log = LogManager.getLogger(Provenance.class);

    /** Graph where provenance metadata is stored. */
    public static URI PROVENANCE_GRAPH = REPO.NG_METADATA;

    // all provenance predicates MUST be listed here so ingested
    // statements (and possibly delete requests) can be checked.
    private static URI predicatesArray[] = {
        DCTERMS.CREATED,
        DCTERMS.CREATOR,
        DCTERMS.CONTRIBUTOR,
        DCTERMS.MODIFIED,
        DCTERMS.MEDIATOR,
        DCTERMS.SOURCE,
        DCTERMS.IDENTIFIER
    };
    private static Set<URI> predicates = new HashSet<URI>();
    static {
        for (URI p : predicatesArray)
            predicates.add(p);
    }

    // dataset for provenance queries
    private static DatasetImpl provDataset = null;

    // map of read-only provenance metadata values - null means not loaded yet.
    private Map<URI,String> provenance = null;

    // the resource for which we have provenance metadata
    private URI uri = null;

    // SPARQL query to collect provenance about ?subject
    // Need to bind ?subject
    private static final String provQuery =
      "SELECT * WHERE { GRAPH <"+PROVENANCE_GRAPH+"> { \n"+
      "?subject ?dcterm ?dcvalue \n"+
      "OPTIONAL { GRAPH ?g {?dcvalue <"+RDFS.LABEL+"> ?dcvalueLabel}} \n"+
      "OPTIONAL { ?dcvalue ?dcsubterm ?dcsubvalue \n"+
      " OPTIONAL { GRAPH ?gg {?dcsubvalue <"+RDFS.LABEL+"> ?dcsubvalueLabel}}}}}";

    /**
     * Return the singleton instance.
     *
     * @return the singleton instance.
     */
    public Provenance(URI uri)
    {
        super();
        this.uri = uri;
    }

    /**
     * True if URI is a provenance predicate, this function is
     * really a predicate predicate.  only in RDF.
     *
     * @param pp a {@link org.openrdf.model.URI} object.
     * @return true if pp names a provenance predicate.
     */
    public static boolean isProvenancePredicate(URI pp)
    {
        return predicates.contains(pp);
    }

    /**
     * Convenience method to set Created provenance values, upon creation.
     * Also sets Modified provenance metadata automatically, since the
     * last-modified date is used to judge when a resource needs to be
     * re-indexed.
     *
     * @param request a {@link javax.servlet.http.HttpServletRequest} object.
     * @param when time at which it was created
     */
    public void setCreated(HttpServletRequest request, Date when)
    {
        Literal lwhen = getDateTime(when);
        setProvenanceInternal(request, DCTERMS.CREATOR, DCTERMS.CREATED, lwhen);
        setProvenanceInternal(request, DCTERMS.CONTRIBUTOR, DCTERMS.MODIFIED, lwhen);
        DataRepository.getInstance().setLastModified(when);
    }

    /**
     * Convenience method to set provenance values for "mediated" creation,
     * i.e. when instance already contains a dcterms:creator that has to
     * be preserved.  Record the creating user as dcterms:mediator instead.
     *
     * @param request a {@link javax.servlet.http.HttpServletRequest} object.
     * @param when time at which it was created
     */
    public void setMediated(HttpServletRequest request, Date when)
    {
        Literal lwhen = getDateTime(when);
        setProvenanceInternal(request, DCTERMS.MEDIATOR, DCTERMS.CREATED, lwhen);
        setProvenanceInternal(request, DCTERMS.CONTRIBUTOR, DCTERMS.MODIFIED, lwhen);
        DataRepository.getInstance().setLastModified(when);
    }


    /**
     * Convenience method to set provenance values upon last modification.
     *
     * @param request a {@link javax.servlet.http.HttpServletRequest} object.
     * @param when time at which it was last modified
     */
    public void setModified(HttpServletRequest request, Date when)
    {
        setProvenanceInternal(request, DCTERMS.CONTRIBUTOR, DCTERMS.MODIFIED, getDateTime(when));
        DataRepository.getInstance().setLastModified(when);
    }

    private void setProvenanceInternal(HttpServletRequest request, URI whoTerm, URI whenTerm, Literal when)
    {
        RepositoryConnection rc = WithRepositoryConnection.get(request);
        URI who = Access.getPrincipalURI(request);
        setProvenance(rc, whoTerm, who);
        setProvenance(rc, whenTerm, when);
    }

    /**
     * Set provenance values directly, bypass e.g. the automatic use of
     * current authenticated user.  Needed on rare occasions such as
     * bootstrap graph setup where the actor is the system itself.
     * THIS IS DANGEROUS, since it does NOT type-check that the value
     * of a time property (e.g. dcterms:modified) is a calendar literal.
     *
     * @param rc a {@link org.openrdf.repository.RepositoryConnection} object.
     * @param term provenance predicate, a {@link org.openrdf.model.URI} object.
     * @param value a {@link org.openrdf.model.Value} object.
     */
    public void setProvenance(RepositoryConnection rc, URI term, Value value)
    {
        try {
            rc.remove(uri, term, null, PROVENANCE_GRAPH);
            rc.add(uri, term, value, PROVENANCE_GRAPH);
            log.debug("Setting provenance: ("+uri+", "+term+", "+value+")");
        } catch (RepositoryException e) {
            log.error(e);
            throw new InternalServerErrorException(e);
        }
        if (provenance != null)
            provenance.put(term, value.stringValue());
    }

    /**
     * Add/replace the Source provenance fields:
     *  dcterms:source -> bnode
     *                     dcterms:identifier -> filename or URI it came from
     *                     dcterms:modified -> last-mod date of source
     *
     * @param request a {@link javax.servlet.http.HttpServletRequest} object.
     * @param source value of identifier, a {@link java.lang.String} object.
     * @param sourceModified mod time for source or null if unknown.
     */
    public void setSource(HttpServletRequest request, String source, Date sourceModified)
    {
        // sanity check
        if (source == null)
            return;

        RepositoryConnection rc = WithRepositoryConnection.get(request);
        ValueFactory vf = rc.getValueFactory();
        setSource(rc, vf.createLiteral(source, XMLSchema.STRING),
                (sourceModified == null ? null : getDateTime(sourceModified)));
    }

    /**
     * Convenience method to set source provenance: identifier and
     * last-mod time of resource ingested into a graph.  This is
     * worth the trouble since it is useful to test for graphs that
     * need updating.
     *
     * THIS IS DANGEROUS, since it does NOT type-check that the value
     * of a time property (e.g. dcterms:modified) is a calendar literal.
     *
     * @param rc a {@link org.openrdf.repository.RepositoryConnection} object.
     * @param source value of identifier, a {@link java.lang.String} object.
     * @param sourceModified mod time for source or null if unknown.
     */
    public void setSource(RepositoryConnection rc, Value source, Value sourceModified)
    {
        // sanity check
        if (source == null)
            return;

        try {

            // get rid of existing source AND anything attached to its bnode:
            if (rc.hasStatement(uri, DCTERMS.SOURCE, null, true, PROVENANCE_GRAPH)) {
                RepositoryResult<Statement> rr = rc.getStatements(uri, DCTERMS.SOURCE, null, true, PROVENANCE_GRAPH);
                try {
                    while (rr.hasNext()) {
                        Statement s = rr.next();
                        Value bn = s.getObject();
                        if (bn instanceof Resource)
                            rc.remove((Resource)bn, null, null, PROVENANCE_GRAPH);
                    }
                } finally {
                    rr.close();
                }
                rc.remove(uri, DCTERMS.SOURCE, null, PROVENANCE_GRAPH);
            }
            Resource bn = rc.getValueFactory().createBNode();
            rc.add(bn, DCTERMS.IDENTIFIER, source, PROVENANCE_GRAPH);
            log.debug("Setting provenance source: ("+uri+", dcterms:source, "+bn+")");
            log.debug("Setting provenance source: ("+bn+", dcterms:identifier, "+source+")");
            // XXX FIXME this is bogus, should parse and regenerate proper DATE
            if (sourceModified != null) {
                rc.add(bn, DCTERMS.MODIFIED, sourceModified, PROVENANCE_GRAPH);
                log.debug("Setting provenance source: ("+bn+", dcterms:modified, "+sourceModified+")");
            }
            rc.add(uri, DCTERMS.SOURCE, bn, PROVENANCE_GRAPH);

            // update the cache
            if (provenance != null) {
                provenance.put(DCTERMS.SOURCE_IDENTIFIER, source.stringValue());
                if (sourceModified != null)
                    provenance.put(DCTERMS.SOURCE_MODIFIED, sourceModified.stringValue());
                else
                    provenance.remove(DCTERMS.SOURCE_MODIFIED);
            }
        } catch (RepositoryException e) {
            log.error(e);
            throw new InternalServerErrorException(e);
      /** may need this later
        } catch (DatatypeConfigurationException  e) {
            log.error(e);
            throw new InternalServerErrorException(e);
        **/
        }
    }

    /**
     * Utility translation method, returns typed literal of specified moment
     *
     * @param when a {@link java.util.Date} object.
     * @return specified 'when' as a {@link org.openrdf.model.Literal} object.
     */
    public static Literal getDateTime(Date when)
    {
        try {
            GregorianCalendar gc = new GregorianCalendar();
            gc.setTime(when);
            return new CalendarLiteralImpl(DatatypeFactory.newInstance().newXMLGregorianCalendar(gc));
        } catch (DatatypeConfigurationException  e) {
            log.error(e);
            throw new InternalServerErrorException(e);
        }
    }

    /**
     * <p>commit - commit any changes made to this object (and any others)</p>
     *
     * @param request a {@link javax.servlet.http.HttpServletRequest} object.
     * @param field the URI of the metadata field
     * @return value of the field or null if not set
     * @throws javax.servlet.ServletException if any.
     */
    public String getField(HttpServletRequest request, URI field)
    {
        if (provenance == null) {
            RepositoryConnection rc = WithRepositoryConnection.get(request);
            provenance = new HashMap<URI,String>();
            try {
                log.debug("Provenance SPARQL query = "+provQuery);
                TupleQuery q = rc.prepareTupleQuery(QueryLanguage.SPARQL, provQuery);
                q.setBinding("subject", uri);
                q.setDataset(getProvDataset(request));
                q.setIncludeInferred(false);
                q.evaluate(new provenanceHandler(uri, provenance));
            } catch (MalformedQueryException e) {
                log.error("Rejecting malformed query:"+e);
                throw new InternalServerErrorException(e);
            } catch (OpenRDFException e) {
                log.error(e);
                throw new InternalServerErrorException(e);
            }
        }
        return provenance.get(field);
    }

    // cons up a special dataset, since it has to include NG_Users to
    // get the user label values..
    private Dataset getProvDataset(HttpServletRequest request)
    {
        if (provDataset == null) {
            provDataset = SPARQL.copyDataset(SPARQL.getInternalAndMetadataGraphs(request));
            SPARQL.addGraph(provDataset, REPO.NG_USERS);
        }
        return provDataset;
    }

    /** Labels for provenance value URIs are stored in the same map as fields */
    public String getLabel(HttpServletRequest request, URI field)
    {
        return getField(request, field);
    }

    // collect provenance values into map, look for:
    // ?dcterm ?dcvalue , ?dcsubterm ?dcsubvalue
    private static class provenanceHandler extends TupleQueryResultHandlerBase
    {
        // key is graph URI for easy comparison and lookup of duplicates
        Map<URI,String> fields = null;
        URI uri = null;

        public provenanceHandler(URI uri, Map<URI,String> fields)
        {
            super();
            provenanceHandler.this.uri = uri;
            provenanceHandler.this.fields = fields;
        }

        // columns: namedGraphURI, namedGraphLabel, typeURI, typeLabel, anon
        public void handleSolution(BindingSet bs)
            throws TupleQueryResultHandlerException
        {
            Value dcterm = bs.getValue("dcterm");
            Value dcvalue = bs.getValue("dcvalue");
            Value dcvalueLabel = bs.getValue("dcvalueLabel");
            Value dcsubterm = bs.getValue("dcsubterm");
            Value dcsubvalue = bs.getValue("dcsubvalue");
            Value dcsubvalueLabel = bs.getValue("dcsubvalueLabel");

            //TEMP
            //log.debug("Got prov solution: term="+dcterm+", value="+dcvalue+", dcvalueLabel="+dcvalueLabel);
            if (DCTERMS.SOURCE.equals(dcterm)) {
                if (DCTERMS.IDENTIFIER.equals(dcsubterm))
                    fields.put(DCTERMS.SOURCE_IDENTIFIER, dcsubvalue.stringValue());
                else if (DCTERMS.MODIFIED.equals(dcsubterm))
                    fields.put(DCTERMS.SOURCE_MODIFIED, dcsubvalue.stringValue());
                else
                    log.warn("Found unrecognized provenance value for subject="+uri+", dcterms:source sub-term="+dcsubterm);
            } else if (dcterm instanceof URI)
                fields.put((URI)dcterm, dcvalue.stringValue());

            // store labels as extra fields..
            if (dcvalueLabel != null && dcvalue instanceof URI)
                fields.put((URI)dcvalue, dcvalueLabel.stringValue());
            if (dcsubvalueLabel != null && dcsubvalue instanceof URI)
                fields.put((URI)dcsubvalue, dcsubvalueLabel.stringValue());
        }
    }
}
