/**
 * The eagle-i consortium
 * Harvard University
 * Jul 14, 2010
 */
package org.eaglei.datatools.etl.utils;

import static org.eaglei.datatools.model.DataToolsOntConstants.nameSpaceMap;

import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.StringTokenizer;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.eaglei.datatools.interim.cores.OldFileUtils;

import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource;

/**
 * @author Daniela Bourges-Waldegg
 *
 * Utility to convert a CMAP Propositions file (export -> Propositions as text) to RDF/XML
 */
public class CmapToRDF {
	private static final Log logger = LogFactory
			.getLog(CmapToRDF.class);
	
	private static boolean isDebugEnabled = logger.isDebugEnabled();
	
	/**
	 * 
	 * @param args - input directory, output directory (absolute paths)
	 * @throws IOException 
	 * @throws URISyntaxException 
	 */
	public static void main(String[] args) throws IOException, URISyntaxException {
		// TODO Auto-generated method stub
		if (args.length <= 1 || args[0].length() == 0 || args[1].length() == 0) {
			logger.error("CmapToRDF <input dir> <output dir>");
			return;
		}
		processAll(args[0], args[1]);
	}
	
	private static void processAll(String inputPath, String outputPath) throws IOException, URISyntaxException {
		final File inputDir = new File(inputPath);
		logger.info("Reading cmap text files from directory: " + inputDir.getAbsolutePath());
		// check if directory exists
		OldFileUtils.validateDirectory(inputDir, false);
		List<File> files = OldFileUtils.getAllFiles(inputDir);
		for(File file : files) {
			
			final String cmapFile = file.getAbsolutePath();
			try {
			generateToFile(cmapFile, outputPath);
			} catch (Exception e) {
				logger.error("Unable to process file: " + cmapFile);
				e.printStackTrace();
			}
		}
	}
	
	private static void generateToFile(String cmapFile, String outputPath) throws IOException, URISyntaxException {
		final Model model = generateToModel(cmapFile);
		final File outputDir= new File(outputPath);
		// check if directory exists; create if it doesn't
		OldFileUtils.validateDirectory(outputDir, true);
	
		logger.info("Writing RDF/XML file to directory: " + outputDir.getAbsolutePath());
		
		String extension = ".rdf";
		
		final File nn = new File(cmapFile);
		int i = nn.getName().lastIndexOf(".");

		final String rdfFile;
		if(i>0)
			rdfFile = nn.getName().substring(0,i) + extension;
		else
			rdfFile = nn.getName() + extension;

		OutputStreamWriter out = new OutputStreamWriter(new FileOutputStream( new File(outputDir, rdfFile)), "UTF-8");
		try {
			model.write(out);
			out.flush();
		} finally {
			out.close();
		}
		logger.info("Done!");
		logger.info("============");
		
		
	}

	private static Model generateToModel(String cmapFile) throws IOException, URISyntaxException {
		logger.info("Processing file: " + cmapFile);
		if (cmapFile == null || cmapFile.length()==0 || !cmapFile.endsWith(".txt")) {
			logger.error("File name null or empty");
			throw new IllegalArgumentException("File name null or empty");
		}
		
		Model model = ModelFactory.createDefaultModel();
		//final Scanner scanner = new Scanner(ClassLoader.getSystemResourceAsStream(cmapFile), "UTF-8");
		final Scanner scanner = new Scanner(new File(cmapFile), "UTF-8");
		final StringBuilder sb = new StringBuilder();
		final String NL = System.getProperty("line.separator");

		try {
			while (scanner.hasNextLine()){
				//Get line from cmap text
				final String completeLine = scanner.nextLine();
				if(isDebugEnabled)logger.debug("Processing line: " + completeLine);
				if(completeLine.startsWith("Template") || completeLine.length() == 0)
					continue;	
				//Remove anything between []
				final String line = completeLine.replaceAll("\\[[.[^\\]]]*\\]", "");
				if(isDebugEnabled)logger.debug("Pruned line: " + line);
				//Get the triple
				final StringTokenizer t = new StringTokenizer(line);
				final List<String> triple = new ArrayList<String>();
				while(t.hasMoreTokens()) {
					final String token = t.nextToken();
					if(isDebugEnabled)logger.debug("Processing token: " +token );
					triple.add(token);
				}
				if(triple.size() != 3) throw new RuntimeException("Input line does not represent a triple: " + line);
				
				//replace namespaces
				//TODO use Jena's factory methods with namespaces
				final String subject = replacePrefix(triple.get(0).trim());
				final String predicate = replacePrefix(triple.get(1).trim());
				final String object = replacePrefix(triple.get(2).trim());
				
				model.setNsPrefixes(nameSpaceMap);
				final Resource subjectResource = model.createResource(subject);
				final Property predicateProperty = model.createProperty(predicate);
				RDFNode objectNode;
				if(object.contains("^^") || !object.contains(":")) {
					objectNode = model.createLiteral(object);
				} else {
					objectNode = model.createResource(object);
				}
				model.add(model.createStatement(subjectResource, predicateProperty, objectNode));
			}
		}
		finally{
			if(scanner != null) scanner.close();
		}
		return model;
	}
	
	

	private static final String replacePrefix(String string) {
		if(isDebugEnabled) logger.debug("Replacing namespace prefix in string: " + string);
		for(Map.Entry<String, String > entry : nameSpaceMap.entrySet()) {
			if(string.contains(entry.getKey()+":")) {
				final String result = string.replace(entry.getKey()+":", entry.getValue());
				if(isDebugEnabled)logger.debug("Returning reoplacement: " + result);
				return result;
			}
		}
		return string;
	}

}
	

