/**
 * eagle-i Project
 * Harvard University
 * Jan 19, 2010
 */
package org.eaglei.datatools.excel;

import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.FormulaEvaluator;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;


/**
 * @author dbw
 *
 */
public class ExcelUtils {
	private static final Log logger = LogFactory.getLog(ExcelUtils.class);

	public static Workbook openWorkBook(String fileName){
		if (fileName == null || fileName.length()==0) {
			logger.error("File name null or empty");
			throw new IllegalArgumentException("File name null or empty");
		}
		if (!fileName.endsWith(".xlsx") || fileName.startsWith("~$")) {
    		logger.error("File doesn't look like an Excel file: " + fileName);
    		throw new IllegalArgumentException("File doesn't look like an Excel file: ");
    	}
		Workbook wb;
		InputStream is=null;
		try {
			is = ClassLoader.getSystemResourceAsStream(fileName);
			wb = WorkbookFactory.create(is);
			return wb;
		} catch  (FileNotFoundException e) {
			throw new ExcelParserException(e);
		} catch (InvalidFormatException e) {
			throw new ExcelParserException("Expecting an Excel file", e);
		} catch (IOException e) {
			throw new ExcelParserException(e);
		} finally {
			try { if (is != null) is.close(); } catch(IOException e) {/*can't do anything*/}
		}
	}
	
	

	/**
	 * Extract a information in a Tab into a List of
	 * Map of <property, value>
	 * @param sheet
	 * @param startRow
	 * @return
	 */
	public static List<Map<String, String>> processTab(Sheet sheet, int startRow) {
		FormulaEvaluator evaluator = sheet.getWorkbook().getCreationHelper().createFormulaEvaluator();
		List<Map<String,String>> resourceList = new ArrayList<Map<String,String>>();
		List<String> headers = new ArrayList<String>();
		Row row = sheet.getRow(0); // Header row;
		if(row == null)
			throw new ExcelParserException("Tab is missing headers");
		
		//TODO validate		
		//doesn't implement Iterable
		Iterator<Cell> cIt = row.cellIterator();
		while(cIt.hasNext()) {
			final Cell cell = cIt.next();
			if(cell != null && cell.getCellType() != Cell.CELL_TYPE_BLANK) {	
				final String header = cell.getStringCellValue();
				int i = header.indexOf("(");
				if(i >= 0)
					headers.add(header.substring(0,i).trim().replaceAll("\\s{1,}", "_").replaceAll("\\W{1,}", ""));
				else
					headers.add(header.trim().replaceAll("\\s{1,}", "_").replaceAll("\\W{1,}", ""));
			}
		}
		
		//TODO This used to be a check for specific eagle-i interim annotation formats; making the code more generic, so this check needs
		//to be performed at another moment
		/*
		 * if(!headers.contains("ANNOTATION_FORM_VERSION")) 
			throw new ExcelParserException("File is not Annotation Form");
			*/

		
		//process rest of rows; skip hidden rows
		//Note that in many cases rows/cells contain only hidden info and will need to be ignored
		//I'm still not clear what makes it into the physical rows (as opposed to get last row)
		int rows = sheet.getPhysicalNumberOfRows();
		for (int r = startRow-1; r<rows; r++) {
			row = sheet.getRow(r);
				if (row == null) {
					continue;
				}
				//Check if row has only one formula and skip right away (there's no case of row with one cell of data only)
				if(row.getPhysicalNumberOfCells() <= 1)
					break;
				final Map<String, String> resourceInstance = new HashMap<String, String>();				
				cIt = row.cellIterator();
				while(cIt.hasNext()) {
					final Cell cell = cIt.next();
					final String value;
						if (cell == null || cell.getCellType() == Cell.CELL_TYPE_BLANK) {
							continue;
						}
						switch (evaluator.evaluateInCell(cell).getCellType()) {
							case Cell.CELL_TYPE_FORMULA:
								value = cell.getCellFormula();
								break;
							case Cell.CELL_TYPE_NUMERIC:
								//TODO change to normal integer
								value = String.valueOf(Double.valueOf(cell.getNumericCellValue()).intValue());
								break;
							case Cell.CELL_TYPE_STRING:
								value = cell.getStringCellValue();
								break;
							default:
								value=null;
								break;
						}
							int colIndex = cell.getColumnIndex();
							if(colIndex < headers.size()) {
								if(value != null && value.length() > 0) {
									resourceInstance.put(headers.get(cell.getColumnIndex()), value.replaceAll("\\n{1,}", " ").trim());
								}
							}
							else
								if(logger.isDebugEnabled())logger.debug("Cell without column header; Value: " + value);
					}
				//This will happen if a row contained only formulas but no values
				if(!resourceInstance.isEmpty())
					resourceList.add(resourceInstance);
				}
		//May return an empty list
		return resourceList;
	}

	public static boolean isBlankRow(Row row)
	{
		
		
		if (row != null)
		{
			Iterator<Cell> cellIter = row.cellIterator();
			while (cellIter.hasNext()) {
				Cell cell = cellIter.next();
				if(cell.getCellType()!=Cell.CELL_TYPE_BLANK)
					return false;
			}
			
		}
		
		return true;
		
	}
	
}
