/**
 * 
 */
package org.eaglei.datatools.etl.server.extractor.parsers.xmlparser;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Vector;
import java.util.regex.Pattern;

import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import org.apache.commons.configuration.ConfigurationException;
import org.eaglei.datatools.etl.server.exceptions.ExtractorException;
import org.eaglei.datatools.etl.server.extractor.parsers.Data;
import org.eaglei.datatools.etl.server.extractor.parsers.EIFileParser;
import org.eaglei.datatools.etl.server.extractor.parsers.MapFileConfiguration;
import org.eaglei.datatools.etl.utils.BasicUtils;
import org.eaglei.datatools.etl.utils.ETLUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/**
 * @author Sravan Cheriyala
 * 
 */
public class XmlFileParser extends EIFileParser<Map<String, Data>> {

	private File xmlFile;
	private Document xmlDoc;
	private String contextNode;
	private Map<String, String> namespaceMap;

	public XmlFileParser(File xmlFile) throws ParserConfigurationException, SAXException, IOException {
		this.xmlFile = xmlFile;
		this.xmlDoc = getXmlDocument( xmlFile );

	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see org.eaglei.datatools.etl.server.extractor.parsers.EIFileParser#toCSV(java.io.File)
	 */
	@Override
	public Map<String, Data> parse(File mapDirectory) throws ConfigurationException, IOException, ParseException, ExtractorException {
		Map<String, Data> dataMap = new HashMap<String, Data>();
		try {

			String[] mapFileOrder = MapFileConfiguration.getOrderofMapFiles( mapDirectory.getPath() );
			for (String mapFile : mapFileOrder) {
				contextNode = XmlFileConfiguration.getContextNode( mapDirectory.getPath() );
				Set<String> expressionSet = getExpressionSetofMapFile( mapDirectory, mapFile );
				List<String> expressionList = Arrays.asList( expressionSet.toArray( new String[expressionSet.size()] ) );
				List<XPathExpression> xpathExpressionSet = getXPathExpressionsList( expressionList );
				Data data = makeDataByExpressionSet( xpathExpressionSet, expressionList );
				dataMap.put( mapFile.toUpperCase(), data );
			}
		} catch (XPathExpressionException e) {
			throw new ExtractorException( e );
		}
		return dataMap;
	}

	public Map<String, String> getAllNamespacesFromXmlDocument() throws IOException {
		final int PREFIX = 1;
		final int URI = 2;
		namespaceMap = new HashMap<String, String>();
		FileInputStream fileInputStream = new FileInputStream( xmlFile );
		String xmlString = BasicUtils.convertStreamToString( fileInputStream );
		java.util.regex.Pattern instancePattern = java.util.regex.Pattern.compile( "xmlns:([^=].*?)=\"([^\"]*?)\"", Pattern.CASE_INSENSITIVE );
		java.util.regex.Matcher match = instancePattern.matcher( xmlString );
		while ( match.find() ) {
			int count = match.groupCount();
			namespaceMap.put( match.group( PREFIX ), match.group( URI ) );
		}
		return namespaceMap;
	}

	private Set<String> getExpressionSetofMapFile(File mapDirectory, String mapFile) throws FileNotFoundException, IOException {
		FileInputStream mapStream = new FileInputStream( mapDirectory.getPath() + "\\" + mapFile + ".rdf" );
		Set<String> expressionSet = ETLUtils.getExpressionsInMapFile( mapStream );
		return expressionSet;
	}

	private Document getXmlDocument(File xmlFile) throws ParserConfigurationException, SAXException, IOException {
		DocumentBuilderFactory fact = DocumentBuilderFactory.newInstance();
		DocumentBuilder builder = fact.newDocumentBuilder();
		Document doc = builder.parse( xmlFile );
		return doc;
	}

	private List<XPathExpression> getXPathExpressionsList(List<String> expressionList) throws XPathExpressionException, IOException {
		XPath xpath = getXpath();
		List<XPathExpression> xpathExpressionSet = new ArrayList<XPathExpression>();
		for (String mapExpression : expressionList) {
			XPathExpression xpathExpression = xpath.compile( mapExpression );
			xpathExpressionSet.add( xpathExpression );
		}
		return xpathExpressionSet;
	}

	private XPath getXpath() throws IOException {
		XPathFactory factory = XPathFactory.newInstance();
		XPath xpath = factory.newXPath();
		xpath.setNamespaceContext( getNamespaceContextOfCurrentDocument() );
		return xpath;

	}

	private NamespaceContext getNamespaceContextOfCurrentDocument() throws IOException {
		final Map<String, String> nameSpaceMap = getAllNamespacesFromXmlDocument();
		NamespaceContext namespaceContext = new NamespaceContext() {

			@Override
			public String getNamespaceURI(String prefix) {
				return nameSpaceMap.get( prefix );
			}

			@Override
			public String getPrefix(String namespaceURI) {
				throw new UnsupportedOperationException();
			}

			@Override
			public Iterator getPrefixes(String namespaceURI) {
				throw new UnsupportedOperationException();
			}

		};
		return namespaceContext;
	}

	private Data makeDataByExpressionSet(List<XPathExpression> xPathExpressionList, List<String> mapExpressionList) throws XPathExpressionException, FileNotFoundException {
		List<List<String[]>> allElementsList = extractElementsFromXML( xPathExpressionList, mapExpressionList );
		Data data = getDataFromAllColumnsList( mapExpressionList, allElementsList );
		return data;
	}

	private Data getDataFromAllColumnsList(List<String> mapExpressionList, List<List<String[]>> allElementsList) {
		int i = 0;
		Data data = new Data();
		for (List<String[]> columnList : allElementsList) {
			data.addColumn( mapExpressionList.get( i++ ), new Vector<String[]>( columnList ) );
		}
		return data;
	}

	private List<List<String[]>> extractElementsFromXML(List<XPathExpression> xPathExpressionList, List<String> mapExpressionList) throws FileNotFoundException, XPathExpressionException {
		List<List<String[]>> allElementsList = new ArrayList<List<String[]>>();
		int i = 0;
		for (XPathExpression xpathExpression : xPathExpressionList) {
			NodeList nodeList = evaluateXpathExpression( xpathExpression );
			List<String[]> elementList = getListFromNodeList( nodeList, mapExpressionList.get( i ) );
			allElementsList.add( elementList );
			i++;
		}
		return allElementsList;
	}

	private NodeList evaluateXpathExpression(XPathExpression xpathExpression) throws FileNotFoundException, XPathExpressionException {
		FileInputStream fileInputStream = new FileInputStream( xmlFile );
		NodeList columns = (NodeList)xpathExpression.evaluate( new InputSource( fileInputStream ), XPathConstants.NODESET );
		return columns;
	}

	private List<String[]> getListFromNodeList(NodeList columns, String mapExpression) {
		Map<Node, String[]> nodeMap = new HashMap<Node, String[]>();
		for (int i = 0; i < columns.getLength(); i++) {
			prepareMapWithParentNodeAsKey( columns.item( i ), mapExpression, nodeMap );
		}
		return new ArrayList<String[]>( nodeMap.values() );
	}

	private void prepareMapWithParentNodeAsKey(Node node, String mapExpression, Map<Node, String[]> nodeMap) {
		Node currentContextNode = getContextNodeOfTheCurrentNode( node );
		if ( nodeMap.get( currentContextNode ) != null ) {
			String[] aryOfExpressionAndValue = nodeMap.get( currentContextNode );
			aryOfExpressionAndValue[1] = aryOfExpressionAndValue[1] + ";" + node.getFirstChild().getNodeValue();
		} else {
			String[] aryOfExpressionAndValue = new String[2];
			aryOfExpressionAndValue[0] = mapExpression;
			aryOfExpressionAndValue[1] = node.getFirstChild().getNodeValue();
			nodeMap.put( currentContextNode, aryOfExpressionAndValue );
		}
	}

	private Node getContextNodeOfTheCurrentNode(Node currentNode) {
		Node parentNode = currentNode.getParentNode();
		if ( !parentNode.getNodeName().equalsIgnoreCase( contextNode ) ) {
			parentNode = getContextNodeOfTheCurrentNode( parentNode );
		}
		return parentNode;
	}

}
