package org.eaglei.repository.inferencer;

import java.util.concurrent.ArrayBlockingQueue;
import java.util.Arrays;
import java.util.Set;
import java.util.HashSet;
import java.io.IOException;

import org.apache.log4j.Logger;
import org.apache.log4j.LogManager;

import info.aduna.iteration.CloseableIteration;

import org.openrdf.query.Dataset;
import org.openrdf.query.impl.DatasetImpl;
import org.openrdf.query.impl.EmptyBindingSet;
import org.openrdf.query.BindingSet;
import org.openrdf.query.algebra.StatementPattern;
import org.openrdf.query.algebra.Distinct;
import org.openrdf.query.algebra.TupleExpr;
import org.openrdf.query.algebra.Var;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.vocabulary.RDF;
import org.openrdf.model.vocabulary.RDFS;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.sail.SailConnectionListener;
import org.openrdf.sail.SailException;
import org.openrdf.sail.inferencer.InferencerConnection;
import org.openrdf.sail.inferencer.InferencerConnectionWrapper;
import org.openrdf.OpenRDFException;
import org.openrdf.query.QueryEvaluationException;

import org.eaglei.repository.vocabulary.REPO;
import org.eaglei.repository.util.Utils;

/**
 * Sesame RDF Database extension to add:
 * Custom minimalist inferencing SAIL layer for
 * eagle-i Data Repoository.  This wraps a Sesame RepositoryConnection
 * and updates inferred statements after every change.
 *
 * Note that this is an extremely stripped-down MINIMALIST form of
 * "inference", intended to support FAST RESPONSE TO INCREMENTAL CHANGES.
 * Typically, sophisticated inference engines do not respond quickly to
 * even a small change in their data model (assertions), since it is hard
 * to predict what any added, or, especially, removed statements will do
 * without reasoning everything over again from the start.
 *
 * Since the eagle-i repository is designed to contain a large set
 * (thousands) of instances that get updated fairly frequently, e.g.
 * edits by data collectors and curators, handling small changes efficiently
 * is of the utmost importance.
 *
 * See http://www.w3.org/TR/rdf-mt/ (the RDF Semantics spec)
 * for the entailment rules we infer.
 *
 * Our approach is layered:
 *
 * I. Different goals and strategies for TBox ("terminology", i.e.
 *    ontology graphs) and ABox ("assertion", i.e. instances).
 *
 *    A. For the TBox:
 *      - Changes are likely to be very infrequent, but complexity is high.
 *      - Compute direct subclass and subproperty statements for all
 *        inferred subclasses and subproperties, used to drive further
 *        inference and answer queries.
 *    B. For the ABox:
 *      - Changes are frequent, so avoid re-computing any more inferred
 *        statements than necessary.
 *      - Assume (for now) all instances are independent with respect to
 *        inference, so each subject can be considered independently.
 *      - Only infer rdf:type (RDFS rule "rdfs9") on instances; store
 *        new types as inferred statements.
 *    C. Other requirements
 *     - Store all inferred TBox statements in the same graphs as the TBox.
 *     - Store all inferred ABox statements in the NG_Inferred named graph
 *       which must be of type 'published' and publically visible, since
 *       it is joined to other published graphs in Views.
 *
 * II. Implementation
 *    A. TBox:
 *      - This requires each ontology to exist on a separate named graph,
 *        and it is entirely self-contained (no imports, all materialized).
 *      - After any change, crank the entire new ontology through a
 *        reasoner again to regenerate all inferred TBox statements.
 *      - Change to any TBox requires recomputing all ABox inferences too.
 *      - Create statements (marked as inferred) for all inferred:
 *        - direct rdfs:subClassOf relationships (entailment rule rdfs11)
 *        - direct rdfs:subPropertyOf relationships (entailment rule rdfs5)
 *        - rdf:type statements (entailment rule rdfs9)
 *
 *    B. For the ABox:
 *      - After a change to the asserted rdf:type statements for an
 *        instance, recompute ALL inferred types for that instance.
 *
 *    C. Determining which graphs are which
 *      - there are only two TBox graphs, they are hardcoded:
 *        - the repo's own ontology graph, http://eagle-i.org/ont/repo/1.0/
 *        - eagle-i data model: http://purl.obolibrary.org/obo/ero.owl
 *
 * @see TBoxInferencer
 *
 * @author Larry Stone
 * Started May 30 2010
 */
class MinimalInferencerConnection
    extends InferencerConnectionWrapper
    implements SailConnectionListener
{
    private Logger log = LogManager.getLogger(this.getClass());

    // name of each TBox graph modified in this transaction, if any.
    private Set<URI> modifiedTBoxGraphs = null;

    // ABox instances (subject URI) to be re-inferred
    private Set<URI> modifiedABoxSubjects = null;

    // counters for summary logging
    private int aboxCount = 0;
    private int tboxCount = 0;

    // flag to prevent feedback through notification
    private boolean doingInferencing = false;

    // flag set when TBox is changed, need to re-infer all ABox instances.
    private boolean reInferAllABox = false;

    // buffer of new ABox type statements;  accumulate them to avoid
    // interleaving too many writes (which flush updates to the lower Sail)
    // and reads (required to get next inferred types).
    private ArrayBlockingQueue<URI[]> inferredTypeBuffer =
        new ArrayBlockingQueue<URI[]>(100000);

    // cache of TBoxInferencer's configured graphs
    private static Set<URI> tboxGraphs = null;
    private static URI tboxGraphsAsArray[] = null;

    /**
     * <p>Constructor for MinimalInferencerConnection.</p>
     *
     * @param con a {@link org.openrdf.sail.inferencer.InferencerConnection} object.
     */
    public MinimalInferencerConnection(InferencerConnection con)
    {
        super(con);
        con.addConnectionListener(this);

        // initialize statics
        synchronized (this.getClass()) {
            if (tboxGraphs == null) {
                tboxGraphs = TBoxInferencer.getInstance().getTBoxGraphs();
                tboxGraphsAsArray = tboxGraphs.toArray(new URI[tboxGraphs.size()]);
                log.info("Initialized TBox graph array = "+Arrays.deepToString(tboxGraphsAsArray));
            }
        }
    }


    /**
     * {@inheritDoc}
     *
     * This is a callback invoked by the lower Sail when a statement
     * is added.  Beware of recursive calls -- adding inferred statements will
     * also call this, so do NOT let that cause more inferencing.
     */
    public void statementAdded(Statement st)
    {
        if (!doingInferencing) {
            // check for TBox change first, if not..
            if (!markTBox(st)) {

                // check if it's a statement about rdf:type, mark subject
                markABox(st);
            }
        }
    }

    /**
     * {@inheritDoc}
     *
     * This is a callback invoked by the lower Sail when a statement is
     * removed.  Beware of recursive calls -- removing inferred
     * statements will also call this, so do NOT let that cause more inferencing.
     */
    public void statementRemoved(Statement st)
    {
        if (!doingInferencing) {
            if (!markTBox(st)) {

                // check if it's a statement about rdf:type, mark subject
                markABox(st);
            }
        }
    }

    /**
     * If this statement modifies a TBox graph, add that graph to
     * the modified set.  Returns true if this statement modifies TBox.
     * Also sets the flag to re-compute all ABox inference.
     */
    private boolean markTBox(Statement st)
    {
        Resource ctx = st.getContext();
        if (ctx != null && ctx instanceof URI &&
              tboxGraphs.contains((URI)ctx)) {
            if (modifiedTBoxGraphs == null)
                modifiedTBoxGraphs = new HashSet<URI>();
            if (modifiedTBoxGraphs.add((URI)ctx) && log.isDebugEnabled())
                log.debug("Marked dirty TBox graph: "+ctx);
            reInferAllABox = true;
            if (modifiedABoxSubjects != null)
                modifiedABoxSubjects.clear();
            return true;
        }
        return false;
    }

    /**
     * If this statement changes an ABox instance in such a way as to
     * affect inference (i.e. add or remove a statement with predicate
     * rdf:type), mark that instance to be re-inferred.
     * Be sure to skip statements on the inferred graph.
     */
    private boolean markABox(Statement st)
    {
        if (!reInferAllABox && RDF.TYPE.equals(st.getPredicate())) {
            Resource s = st.getSubject();
            Resource ctx = st.getContext();
            if (s != null && s instanceof URI &&
                  ctx != null && !REPO.NG_INFERRED.equals((URI)ctx)) {
                if (modifiedABoxSubjects == null)
                    modifiedABoxSubjects = new HashSet<URI>();
                if (modifiedABoxSubjects.add((URI)s))
                    ;
             /*** XXX FOR EXTREME DEBUG ONLY
                    log.debug("Marked dirty ABox instance: "+it);
               **/
                return true;
            }
        }
        return false;
    }

    /**
     * {@inheritDoc}
     *
     * Execute whatever inferencing is triggered by changes in this transaction
     */
    @Override
    public void flushUpdates()
            throws SailException
    {
        super.flushUpdates();
        doInferencing();
    }

    private void doInferencing()
            throws SailException
    {
        try {
            InferencerConnection rc = getWrappedConnection();
            long startMs = System.currentTimeMillis();
            boolean touched = false;
            aboxCount = 0;
            tboxCount = 0;
            doingInferencing = true;

            // If an ontology graphs was changed, redo all its inferencing.
            // This assumes each ontology graph is self-contained and independent.
            // Ontology changes also require all ABox inferencing to be redone.
            if (!(modifiedTBoxGraphs == null || modifiedTBoxGraphs.isEmpty())) {
                for (URI g : modifiedTBoxGraphs) {
                    try {
                        tboxCount += TBoxInferencer.getInstance().doTBoxInference(rc, g);
                    } catch (IOException e) {
                        throw new SailException(e);
                    } catch (RDFHandlerException e) {
                        throw new SailException(e);
                    }
                    touched = true;
                }
                modifiedTBoxGraphs.clear();
            }

            // Did a TBox change require all ABox data to be re-inferenced?
            if (reInferAllABox) {
                rc.clearInferred(REPO.NG_INFERRED);
                DatasetImpl ds = getABoxDataset(rc);
                doABoxTypeInference(null, ds, rc);
                flushInferredTypes(rc);
                touched = true;

            // Process any changed ABox instances:
            //  1. clear inferred types on any instances with rdf:type changes.
            //  1. update inferred types on any instances with rdf:type changes.
            } else if (!(modifiedABoxSubjects == null || modifiedABoxSubjects.isEmpty())) {
                log.debug("Got ABox changes, count="+modifiedABoxSubjects.size());
                DatasetImpl ds = getABoxDataset(rc);
                for (URI sub : modifiedABoxSubjects) {
                    rc.removeInferredStatement(sub, RDF.TYPE, null, REPO.NG_INFERRED);
                    // XXX DEBUG
                    //log.debug("Clearing inferred types for subject = "+sub);
                }
                for (URI sub : modifiedABoxSubjects) {
                    doABoxTypeInference(sub, ds, rc);
                }
                flushInferredTypes(rc);
                modifiedABoxSubjects.clear();
                touched = true;
            }

            if (log.isDebugEnabled()) {
                if (touched)
                    log.debug("flushUpdates: Inferencing summary: added "+aboxCount+" statements to ABox, "+tboxCount+" to TBox (ontology)"+
                        ", elapsed time = "+String.valueOf(System.currentTimeMillis()-startMs)+" mSec");
                else
                    log.debug("No inferencing changes upon this flushUpdates()");
            }
        } finally {
            doingInferencing = false;
            clearMarks();
        }
    }

    /** {@inheritDoc} */
    @Override
    public void rollback()
            throws SailException
    {
        log.debug("called rollback()");
        super.rollback();
        clearMarks();
    }

    // release any cached changes - after abort of successful inference
    private void clearMarks()
    {
        if (modifiedTBoxGraphs != null)
            modifiedTBoxGraphs.clear();
        reInferAllABox = false;

        if (modifiedABoxSubjects != null)
            modifiedABoxSubjects.clear();
    }

    /**
     * Compute ABox inferred rdf:type statements on either a single
     * instance (when subject is a URI), or all instances in the ABox
     * part of the repository (subject == null).
     * Dataset is provided to contain the search for all subjects.
     * Execute query that is like "SELECT DISTINCT ?s ?type WHERE {?s a ?type}"
     * to collect the subjects and asserted types from all relevant graphs.
     * Since the inferred statements are all going into the same NG_INFERRED
     * graph, we only need to process each tuple (subject, type) once no matter
     * how many graphs it appears in - though it should only be in one anyway.
     */
    private void doABoxTypeInference(URI subject, Dataset ds, InferencerConnection rc)
        throws SailException
    {
        Var sv = new Var("subject");
        if (subject != null)
            sv.setValue(subject);
        TupleExpr q = new Distinct(
                new StatementPattern(sv, new Var("p", RDF.TYPE), new Var("object")));
        try {
            CloseableIteration<? extends BindingSet,QueryEvaluationException> bi =
              rc.evaluate(q, ds, new EmptyBindingSet(), false);
            try {
                while (bi.hasNext()) {
                    BindingSet bs = bi.next();
                    Value s = bs.getValue("subject");
                    Value t = bs.getValue("object");
                    if (s != null && s instanceof URI && t != null)
                        addInferredTypes(rc, (URI)s, (URI)t);
                }
            } finally {
                bi.close();
            }
        } catch (OpenRDFException e) {
            throw new SailException(e);
        }
    }

        // populate dataset with all contexts BUT tbox and inferred
    private DatasetImpl getABoxDataset(InferencerConnection rc)
        throws SailException
    {
        DatasetImpl ds = new DatasetImpl();
        CloseableIteration<? extends Resource,SailException> ci =
          rc.getContextIDs();
        try {
            while (ci.hasNext()) {
                Resource c = ci.next();
                if (c instanceof URI &&
                      !(tboxGraphs.contains((URI)c) ||
                        REPO.NG_INFERRED.equals((URI)c))) {
                    ds.addDefaultGraph((URI)c);
                }
            }
            if (log.isDebugEnabled())
                log.debug("Dataset for ABox query = "+Utils.prettyPrint(ds));
        } finally {
            ci.close();
        }
        return ds;
    }

    // collect supertype statements from TBox graphs and add inferred rdf:type statements
    private void addInferredTypes(InferencerConnection rc, URI subject, URI assertedType)
        throws SailException
    {
        if (log.isDebugEnabled())
            log.debug("addInferredTypes subject="+subject+", asserted type="+assertedType);
        CloseableIteration<? extends Statement,SailException> sti =
          rc.getStatements((URI)assertedType, RDFS.SUBCLASSOF, null, true, tboxGraphsAsArray);
        try {
            while (sti.hasNext()) {
                Statement sts = sti.next();
                Value superType = sts.getObject();
                if (superType instanceof URI) {
        // XXX DEBUG
        //            log.debug("  ...supertype = "+superType);

                    //if (rc.addInferredStatement(subject, RDF.TYPE, superType, REPO.NG_INFERRED))
                    //    ++aboxCount;
                    URI tuple[] = new URI[2];
                    tuple[0] = subject;
                    tuple[1] = (URI)superType;
                    if (!inferredTypeBuffer.offer(tuple)) {
                        flushInferredTypes(rc);
                        if (!inferredTypeBuffer.offer(tuple))
                            log.error("Failed to enqueue inferred statement after flushing! subj = "+subject);
                    }
                }
            }
        } finally {
            sti.close();
        }
    }

    private void flushInferredTypes(InferencerConnection rc)
        throws SailException
    {
        log.debug("Flushing inferred rdf:type statements, count="+inferredTypeBuffer.size());
        for (URI pair[] : inferredTypeBuffer) {
            if (rc.addInferredStatement(pair[0], RDF.TYPE, pair[1], REPO.NG_INFERRED)) {
                ++aboxCount;
                if (log.isDebugEnabled())
                    log.debug("rc.addInferredStatement("+pair[0]+", "+RDF.TYPE+", "+ pair[1]+", "+REPO.NG_INFERRED+")");
            }
        }
        inferredTypeBuffer.clear();
    }

    /**
     * <p>commit</p>
     *
     * @throws org.openrdf.sail.SailException if any.
     */
    public void commit()
        throws SailException
    {
        super.commit();
        log.debug("Called commit()");
        // sanity check..
        if (inferredTypeBuffer.size() > 0)
            log.error("There are uncommitted ABox changes!");
    }
}
