package org.eaglei.search.provider.ncbi;

import java.io.IOException;
import java.io.InputStream;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.bootstrap.DOMImplementationRegistry;
import org.w3c.dom.ls.DOMImplementationLS;
import org.w3c.dom.ls.LSSerializer;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/**
 * Contains logic for using NCBI EUtils.
 *
 * @author rfrost
 */
public final class NCBIEUtils {

    private static final Log logger = LogFactory.getLog(NCBIEUtils.class);
    private static final boolean DEBUG = logger.isDebugEnabled();
    
    private static final String EUTILS_SEARCH_URL = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?";
    private static final String EUTILS_SUMMARY_URL = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?";

    // eSearch params
    private static final String DB = "db=";
    private static final String TERM = "term=";
    private static final String RETMAX = "retmax=";
    private static final String RETSTART = "retstart=";    
    private static final String TOOL = "tool=";    
    private static final String ID_PARAM= "id=";
    private static final String RETMODE= "retmode=";
    
    // eSearch response elements
    public static final String IDLIST= "IdList";
    public static final String ID= "Id";    

    /**
     * Executes an NCBI request at the specified eUtils URL and returns the XML response as a DOM Document
     * @param url eUtils URL with all params.
     * @return Response as DOM Document.
     * @throws IOException Thrown if there is an error.
     */
    public static Document executeNCBIRequest(final String url) throws IOException {
        final HttpClient httpclient = new HttpClient();
        final GetMethod httpget = new GetMethod(url);
        InputStream is = null;
        try {
            httpclient.executeMethod(httpget);
            is = httpget.getResponseBodyAsStream();
            Document doc = parse(is);
            //if (DEBUG) {
            //logger.debug(serializeDocument(doc));
            //}
            return doc;
        } finally {
            httpget.releaseConnection();
            if (is != null) {
                is.close();
            }
        }        
    }
    
    public static String serializeDocument(final Document doc) {
        try {
            DOMImplementationRegistry registry = DOMImplementationRegistry.newInstance();
            DOMImplementationLS impl = (DOMImplementationLS)registry.getDOMImplementation("LS");
            LSSerializer writer = impl.createLSSerializer();
            return writer.writeToString(doc);
        } catch (Exception e) {
        }
        return null;
    }

    public static String buildSearchURL(final String query, final String db, final String tool, final int start, final int max) {
        StringBuilder sb = new StringBuilder();
        sb.append(EUTILS_SEARCH_URL);
        sb.append("&" +DB + db);
        sb.append("&" +TOOL + tool);
        sb.append("&" +TERM + URLEncoder.encode(query));
        sb.append("&" +RETMODE + "xml");            
        sb.append("&" +RETSTART + start);                    
        sb.append("&" +RETMAX + max);
        return sb.toString();
    }
    
    public static String buildSummaryURL(final ESearchResult result, final String db, final String tool) {
        StringBuilder sb = new StringBuilder();
        sb.append(EUTILS_SUMMARY_URL);
        sb.append("&" +DB + db);
        sb.append("&" +TOOL + tool);
        sb.append("&" + ID_PARAM);
        boolean first = true;
        for (String id: result.ids) {
            if (!first) {
                sb.append(",");
            }
            first = false;
            sb.append(id);
        }
        sb.append("&" +RETMODE + "xml");            
        sb.append("&" +RETMAX + result.max);
        return sb.toString();
    }

    /**
     * Simple representation of an eSearch result
     */
    public static class ESearchResult {
        public List<String> ids = new ArrayList<String>();
        public List<DocSummary> docs = new ArrayList<DocSummary>();
        public int count;
        public int start;
        public int max;
    }
    
    /**
     * Parses an eSearch XML result and returns the IDs.
     * @param doc XML Document.
     * @return List of IDs
     * @throws IOException
     */
    public static ESearchResult parseESearchResult(final Document doc) throws IOException {
        final Element eSearchResult = doc.getDocumentElement();
        final NodeList children = eSearchResult.getChildNodes();
        final ESearchResult result = new ESearchResult();
        for (int i = 0; i < children.getLength(); i++) {
            final Node node = children.item(i);
            final String nodeName = node.getNodeName();
            if (nodeName.equals(IDLIST)) {
                final NodeList idNodes = node.getChildNodes();
                for (int j = 0; j < idNodes.getLength(); j++) {
                    final Node idNode = idNodes.item(j);
                    if (idNode.getNodeName().equals(ID)) {
                        result.ids.add(idNode.getTextContent());
                    }
                }
            } else if(nodeName.equals("Count")) {
                result.count = Integer.parseInt(node.getTextContent());
            } else if(nodeName.equals("RetMax")) {            
                result.max = Integer.parseInt(node.getTextContent());
            } else if(nodeName.equals("RetStart")) {            
                result.start = Integer.parseInt(node.getTextContent());
            }
        }
        return result;
    }
    
    /**
     * Simple representation of a DocSum from an eSummary search
     */
    public static class DocSummary {
        public String id;
        public Map<String, List<Item>> items = new HashMap<String, List<Item>>();           
    }
    
    public static class Item {
        public String name;
        public String type;
        public String value;
    }
    
    public static class ListItem extends Item {
        public Map<String, List<Item>> items = new HashMap<String, List<Item>>();   
    }
    
    /**
     * Parses an eSummary XML result and returns the IDs.
     * @param doc XML Document.
     * @return List of IDs
     * @throws IOException
     */
    public static List<DocSummary> parseESummaryResult(final Document doc) throws IOException {
        final Element eSummaryResult = doc.getDocumentElement();
        final NodeList children = eSummaryResult.getChildNodes();
        final List<DocSummary> docs = new ArrayList<DocSummary>();
        for (int i = 0; i < children.getLength(); i++) {
            final Node node = children.item(i);
            final String nodeName = node.getNodeName();
            if (nodeName.equals("DocSum")) {
                DocSummary docSum = new DocSummary();
                docs.add(docSum);
                final NodeList idNodes = node.getChildNodes();
                for (int j = 0; j < idNodes.getLength(); j++) {
                    final Node child = idNodes.item(j);
                    if (child.getNodeName().equals("Id")) {
                        docSum.id = child.getTextContent();
                    } else if (child.getNodeName().equals("Item")) {
                        Item item = parseItem(child);
                        if (!docSum.items.containsKey(item.name)) {
                            docSum.items.put(item.name, new ArrayList<Item>());
                        }
                        docSum.items.get(item.name).add(item);
                    }
                }
            } 
        }
        return docs;
    }
    
    private static Item parseItem(Node itemNode) throws IOException {
        String type = itemNode.getAttributes().getNamedItem("Type").getNodeValue();
        Item item = null;
        if (type.equals("List") || type.equals("Structure")) {
            item = new ListItem();
            final NodeList idNodes = itemNode.getChildNodes();
            for (int j = 0; j < idNodes.getLength(); j++) {
                final Node child = idNodes.item(j);
                if (child.getNodeName().equals("Item")) {
                    Item childItem = parseItem(child);
                    if (!((ListItem)item).items.containsKey(childItem.name)) {
                        ((ListItem)item).items.put(childItem.name, new ArrayList<Item>());
                    }
                    ((ListItem)item).items.get(childItem.name).add(childItem);                    
                }
            }
        } else {
            item = new Item();
            item.value = itemNode.getTextContent();
        }
        item.name = itemNode.getAttributes().getNamedItem("Name").getNodeValue(); 
        return item;
    }
    
    public static Document parse(final InputStream stream) throws IOException {
        try {
            DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
            DocumentBuilder builder = factory.newDocumentBuilder();
            return builder.parse(new InputSource(stream));
        } catch (SAXException saxe) {
            throw new IOException(saxe);
        } catch (ParserConfigurationException pce) {
            throw new IOException(pce);
        }
    }
}
