package org.eaglei.repository.format;

import java.io.IOException;
import java.io.Writer;
import java.io.BufferedWriter;
import java.nio.charset.Charset;
import java.util.regex.Pattern;
import java.util.regex.Matcher;

import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFWriter;
import org.openrdf.rio.helpers.RDFHandlerBase;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.model.BNode;
import org.openrdf.model.Statement;
import org.openrdf.model.Value;
import org.openrdf.model.URI;
import org.openrdf.model.Literal;
import org.openrdf.model.Resource;

/**
 * Writer for "Context NTriples" RDF serialization format
 *
 * About the Format:
 * This is NOT a standard or "offcial" format.  We made this up
 * for the eagle-i project, to have a simple line-based QUAD (not TRIPLE)
 * format that makes it easy to compare the contents of two groups of quads,
 * or dumps of a Sesame context-aware triplestore (really quad-store).
 *
 * Note that there IS a similar quasi-official semi-standard, "N-Quads"
 * format documented ehre:  http://sw.deri.org/2008/07/n-quads/
 * It is NOT useful for our needs since they put the context last and leave
 * it as an optional addition, so statements might be rendered as triples;
 * this makes simple text comparison difficult and columnar manipulation
 * (e.g. with "awk" is impossible, since the 3rd element of a statement
 * might be a literal with embedded colum-sep characters.. It's much more
 * sensible efficeint to put the context in the first column!
 *
 * The only purpose of tihs format is to render "quads" - i.e. the
 * statement *and* its context(s) - in a format that is guaranteed to to
 * be the same for any equivalent graph, modulo the order of the lines.
 * This allows sorted output to be compared for equality with a simple
 * text comparision.
 *
 * It CANNOT be parsed into reasonable RDF again because the identifiers
 * in blank nodes are lost.  So there is no parser.
 *
 * Format is similar to NTriples; see:
 *   http://www.w3.org/TR/rdf-testcases/#ntriples
 * ..only each line STARTS with a forth value, the context (or named
 * graph); null context represented by "<>".
 *
 * Destructive changes when filtering output:
 * 1. Optionally normalize all blank nodes so they compare as equal, all named _:bnode
 * 2. Transform newline characters (both \r and \n) into escaped versions
 *    so they do not induce "false" line breaks.
 *
 * Remember, it is *ONLY* useful to  debug and compare this output!
 *
 * @author Larry Stone
 * Started May 10, 2010
 * @version $Id: $
 */
public class RDFContextNTriplesWriter extends RDFHandlerBase implements RDFWriter
{
    /** Constant <code>CONTEXT_NTRIPLES_MIME="text/x-rdf-context-ntriples"</code> */
    public static final String CONTEXT_NTRIPLES_MIME = "text/x-rdf-context-ntriples";

    private BufferedWriter out = null;

    public static final boolean normalizeBNodes = false;

    /** Constant <code>CONTEXT_NTRIPLES</code> */
    public static final RDFFormat CONTEXT_NTRIPLES =
        new RDFFormat("ContextNTriples", CONTEXT_NTRIPLES_MIME, Charset.defaultCharset(),
                      "cnt", false, true);

    /**
     * <p>getRDFFormat</p>
     *
     * @return a {@link org.openrdf.rio.RDFFormat} object.
     */
    public RDFFormat getRDFFormat()
    {
        return CONTEXT_NTRIPLES;
    }

    /**
     * <p>Constructor for RDFContextNTriplesWriter.</p>
     *
     * @param out a {@link java.io.Writer} object.
     */
    public RDFContextNTriplesWriter(Writer out)
    {
        super();
        this.out = out instanceof BufferedWriter ? ((BufferedWriter)out) :
                                                   new BufferedWriter(out);
    }

    /** {@inheritDoc} */
    public void handleStatement(Statement s)
        throws RDFHandlerException
    {
        try {
            // context
            Resource ctx = s.getContext();
            if (ctx == null)
                out.write("<>");
            else
                handleResource(ctx);
            out.write(" ");

            // subject
            handleResource(s.getSubject());
             
            // predicate - also the spaces before and after
            out.write(" <");
            out.write(s.getPredicate().toString());
            out.write("> ");
             
            // value
            Value so = s.getObject();

            // transform \n and \r to literal "\n" and "\r" to avoid false line breaks in output
            if (so instanceof Literal) {
                String ls = so.toString();
                Matcher m = Pattern.compile("[\n\r]").matcher(ls);
                int lastEnd = 0;
                while (m.find()) {
                    int len1 = m.start() - lastEnd;
                    if (len1 > 0)
                        out.write(ls, lastEnd, len1);
                    char c = ls.charAt(m.start());
                    out.write(c == '\n' ? "\\n" : "\\r");
                    lastEnd = m.end();
                }
                out.write(ls, lastEnd, ls.length() - lastEnd);
            } else
                handleResource((Resource)so);
            out.write(" .");
             
            out.newLine();
        } catch (IOException e) {
            throw new RDFHandlerException(e);
        }
    }

    private void handleResource(Resource ss)
        throws IOException
    {
        if (ss instanceof URI) {
            out.write("<"); out.write(ss.toString()); out.write(">");

        // XXX kludge: all blank nodes look the same; may cause false
        // positives in comparison, but at least no false negatives..
        } else {
            if (normalizeBNodes)
                out.write("_:bnode");
            else
                out.write("_:"+((BNode)ss).getID());
        }
    }

    /**
     * <p>endRDF</p>
     *
     * @throws org.openrdf.rio.RDFHandlerException if any.
     */
    public void endRDF()
        throws RDFHandlerException
    {
        try {
            out.close();
        } catch (IOException e) {
            // log.error("Failed closign output stream: ",e);
            throw new RDFHandlerException(e);
        }
    }
}
