/**
 * 
 */
package org.eaglei.datatools.etl.server;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.text.ParseException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

import javax.swing.table.DefaultTableModel;

import org.apache.commons.configuration.ConfigurationException;
import org.apache.log4j.Logger;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.eaglei.datatools.etl.server.ExcelAbstractions.ExcelTab;
import org.eaglei.datatools.etl.utils.Configure;
import org.eaglei.datatools.excel.ExcelParserException;
import org.eaglei.datatools.excel.ExcelUtils;

/**
 * @author Sravan Kumar Cheriyala FIXME:write comments for readMapDirectoryPathFromFile(),readFormatConfigPropertiesOfFile(),FormatPropertiesOfTab(),readFormatConfigPropertiesForTab()
 */

public class ExcelFileParser extends EIFileParser<Map<String, ExcelTab>> {

	private static org.apache.log4j.Logger logger = Logger.getLogger( ExcelFileParser.class );
	private static String FORM_VERSION_STRING = "FORM VERSION";

	private Map<String, Integer> numberOfRowsReadFromTabs;

	/*
	 * this is the parent directory path of the map direcotries , this directories have sub directories like annotation_v1,reagnet_v1,organisms_v1 etc , we can only know which sub-directory to use by reading excel file . "FORM VERSION" column in
	 * excel file tells which map directory we need to get maps from.
	 */
	private String mapsParentDirecotryPath;

	/*
	 * we need to process excel tabs in purticular order,this order is configured in properties file which will be located in map directory.this String array holds the tab names in order to be processed
	 */
	private String[] tabOrder;

	/*
	 * some column values of first tab (usually lab tab) need to be used across all tabs in excel sheet, the String array of globalColumnsOrderInExcel stores those column names in the order they appear in excel file , we configure the order of Excel
	 * columns in properties file,This properties file will be located in map directory. The Excel sheet should have columns in same order which is configured in properties file ,otherwise there can data errors
	 */
	private String[] globalColumnsOrderInExcel;
	/*
	 * the String array of globalColumnsOrderInCSV stores the the columns to be used across all tabs in the order it should be in csv we are about to make (its the same order which is mapped in map file)
	 */
	private String[] globalColumnsOrderInCSV;

	/* boolean to know if the we wanted to used global columns in the excel file */
	private boolean globalColumns;

	/* We need LAB sheet across the methods in this class */
	private Sheet LAB_SHEET;

	/* this is used to store the metadata of the given excel file */
	private ExcelFileMetaData fileMetaData;

	/* constructor which takes maps parent directory */
	public ExcelFileParser(String mapsParentDirecotryPath) {
		this.mapsParentDirecotryPath = mapsParentDirecotryPath;
		numberOfRowsReadFromTabs = new HashMap<String, Integer>();
		fileMetaData = new ExcelFileParser.ExcelFileMetaData();
	}

	@Override
	public Map<String, ExcelTab> toCSV(InputStream excelInputStream) throws ConfigurationException, IOException, ParseException {

		/*
		 * getWorkbookOfExcel gets the workbook of excel also does the exception handling
		 */
		Workbook wb = getWorkbookOfExcel( excelInputStream );
		Map<String, ExcelTab> ExcelTabNameToTabDataMap = new HashMap<String, ExcelTab>();

		/*
		 * In Excel File there is this column called FORM VERSION where the name and version of map is mentioned which will be used to get the directory where map files are stored. This column will be present in all tabs ,however we will pick it from
		 * lab tab .readMapDirectoryPathFromFile() the map Direcotry path from the FORM VERSION Column. Directory path is stored in variable called mapDirectory
		 */
		LAB_SHEET = wb.getSheet( "lab" );
		String mapDirecotryPath = readMapDirectoryPathFromFile();
		setMapDirecotryPath( mapDirecotryPath );

		/*
		 * there are some format properties configured in properties file such as order of tabs to process,global columns,global columns order to appear in csv etc . These properties belong to properties of entire Excel File and tus CSV
		 * file.readFormatConfigPropertiesOfFile reads the properties file and get the configure parameters into class
		 */
		readConfigPropertiesOfFile();

		/*
		 * setting boolean flag to say that we are using global columns ,if this flag is set to true then we need to insert global column values in csv content
		 */
		if ( globalColumnsOrderInExcel != null && globalColumnsOrderInExcel.length > 1 )
			globalColumns = true;

		/*
		 * this map is to hold the values of the global column name to global column values ,which will be used across all the tabs in excel
		 */
		HashMap<String, String> globalColumnsHash = new HashMap<String, String>();

		/*
		 * looping through the tab,FYI:In tabOrder Array the name of tabs are stored in order as they are configured in properties file
		 */
		for (String sheetName : tabOrder) {
			Sheet sheet = wb.getSheet( sheetName );

			fileMetaData.sheetNameToSheetMetaData.put( sheetName, readSheetMetaData( sheet ) );

			if ( sheet == null ) {
				logger.info( sheetName + " is not present in excel tab ...continuing onto next tab" );
				continue;
			}

			logger.info( "Reading " + sheet.getSheetName() );

			/* check if the row has data,if not continue onto next row */
			int rowcount = sheet.getLastRowNum();
			if ( rowcount == 0 ) {
				logger.info( "no Rows in sheet...ignoring" );
				continue;
			}

			/* getting the column count on the sheet */
			int colcount;
			if ( sheet.getRow( 1 ) == null ) {
				continue;
			} else {
				colcount = sheet.getRow( 1 ).getLastCellNum();
			}
			double startRow = 0;
			double endRow = 0;
			int rowheadStart = 0;

			endRow = sheet.getPhysicalNumberOfRows();

			/*
			 * there are some configuration parameters about the format of tab like Start Row and Start column header etc .readFormatConfigPropertiesForTab(Sheet sheet) reads the those configuration properties from Configuration file
			 */
			TabConfigProperites properties = readConfigPropertiesForTab( sheet );
			startRow = properties.getStartRow();

			/*
			 * The data of excel sheet is going to be stored in ExcelAbstractions.ExcelTabData tabData ,ExcelTabData spills out the CSV
			 */
			ExcelAbstractions.ExcelTabData tabData = ExcelAbstractions.getInstance().new ExcelTabData( 1, colcount + 4 );

			/*
			 * if we are using global columns then the column count going to be coulmnCount + (number of global Columns) .Note we only use global columns in sheet other than lab ,FYI:we take global column values from lab sheet
			 */
			if ( globalColumns && !sheet.getSheetName().equalsIgnoreCase( "lab" ) ) {
				tabData.setColumnCount( tabData.getColumnCount() + globalColumnsOrderInCSV.length + 1 );
			}

			/*
			 * looping through the rows in the sheet
			 */
			int i = (int)startRow - 1;
			int rowCount = 0;
			while ( i <= endRow - 1 ) {
				Row row = sheet.getRow( i );
				/*
				 * 
				 * if blank row or null row then continue
				 */
				if ( row == null ) {
					i++;
					continue;
				}

				if ( ExcelUtils.isBlankRow( row ) ) {
					i++;
					continue;
				}
				tabData.setRowCount( rowCount + 1 );
				/*
				 * looping through the columns/Cells
				 */
				Iterator<Cell> cellIter = row.cellIterator();
				int tempLoop = 0;
				int colNum = 0;
				while ( cellIter.hasNext() ) {
					if ( tempLoop >= colcount ) {
						break;
					}
					Cell cell = cellIter.next();

					/*
					 * if this is lab sheet and there are global columns configured in configuration file then get those global column values from excel sheet and fill them in globalColumnsHash Map .The key of the map is the name of the global column
					 * and value is the value found under that column in excel sheet
					 */
					if ( sheet.getSheetName().equalsIgnoreCase( "lab" ) && globalColumns ) {
						if ( tempLoop <= globalColumnsOrderInExcel.length - 1 ) {
							globalColumnsHash.put( globalColumnsOrderInExcel[tempLoop], cell.getStringCellValue() );
						}
					}

					int rowNum = rowCount;

					/*
					 * we need to add the value found at this cell in excell sheet to our ExcelTabData which spills out CSV, The "If" condition below is to check if we are using any global columns ,if we dont then we would add at the column index in
					 * ExcelTabData as it appears in excel sheet but if we do use global columns then we need to add cell values at the cell whose index is columnIndex + number of global Columns in csv.,FYI: if we append global columns we always
					 * append at the beginning of rows not in middle or at end.
					 */
					if ( globalColumns && rowNum >= rowheadStart && globalColumnsOrderInExcel[0] != null && !sheet.getSheetName().equalsIgnoreCase( "lab" ) ) {
						colNum = cell.getColumnIndex() + globalColumnsOrderInCSV.length;
					} else {
						colNum = cell.getColumnIndex();
					}

					/*
					 * once we figured at which column in row we need to insert the value then add content to ExcelTabData by calling addCell
					 */
					addCell( tabData, cell, rowNum, colNum );
					tempLoop++;
				}
				/*
				 * in the above Columns looping we looped the row of excel and we read the content of row and pushed those into ExcelTabData,we did few checks if we are using any global columns.If we do use global column values we need to append
				 * those values to our CSV ,thats what being done below.
				 */
				addGlobalColumnsToRow( globalColumnsHash, sheet, rowheadStart, tabData, i, rowCount );
				i++;
				rowCount++;
			}

			numberOfRowsReadFromTabs.put( sheet.getSheetName(), rowCount );
			/*
			 * once we finished reading whole sheet into ExcelTabData then create ExcelTab
			 */
			ExcelAbstractions.ExcelTab excelTab = ExcelAbstractions.getInstance().new ExcelTab( sheet.getSheetName(), tabData );
			/*
			 * ExcelTab we made above will be put in to Map of String(tabname) and ExcelTab ,which will be returned.
			 */
			ExcelTabNameToTabDataMap.put( sheet.getSheetName().toUpperCase(), excelTab );

		}
		return ExcelTabNameToTabDataMap;
	}

	private void addGlobalColumnsToRow(HashMap<String, String> globalColumnsHash, Sheet sheet, int rowheadStart, ExcelAbstractions.ExcelTabData tabData, int i, int rowCount) throws UnsupportedEncodingException {

		if ( globalColumns && globalColumnsOrderInExcel[0] != null && !sheet.getSheetName().equalsIgnoreCase( "lab" ) ) {
			/*
			 * if its the column header row then append the column names to this row
			 */
			if ( i == rowheadStart && globalColumnsOrderInExcel.length > 0 ) {
				int y = 0;
				for (String s : globalColumnsOrderInCSV) {
					addCell( tabData, s, rowCount, y );
					y++;
				}
			}
			/*
			 * if its the normal row other than column header then add those global column values we read from lab tab.Remember we stored those values in globalColumnsHash.
			 */
			else if ( i > rowheadStart && globalColumnsHash != null ) {
				int y = 0;
				for (String strColumn : globalColumnsOrderInCSV) {
					addCell( tabData, globalColumnsHash.get( strColumn ).toString(), rowCount, y );
					y++;
				}
			}
		}
	}

	private Workbook getWorkbookOfExcel(InputStream excelInputStream) {
		Workbook wb;
		try {
			wb = WorkbookFactory.create( excelInputStream );
		} catch (FileNotFoundException e) {
			throw new ExcelParserException( e );
		} catch (InvalidFormatException e) {
			logger.error( "Expecting an Excel file.." + e + "Ignoring...." );
			throw new ExcelParserException( e );
		} catch (IOException e) {
			logger.error( "IOExpecting an Excel file.." + e + "Ignoring...." );
			throw new ExcelParserException( e );
		}
		return wb;
	}

	@Override
	protected String readMapDirectoryPathFromFile() throws ParseException {
		return readMapDirectoryPathFromFile( LAB_SHEET );
	}

	private String readMapDirectoryPathFromFile(Sheet sheet) throws ParseException {
		String mapDirectoryName = "";
		String mapDirectoryVersion = "";
		boolean formVersionColumnFound = false;
		/* parses and gets the header row */
		Row headerow = getHeaderRow( sheet );
		Iterator<Cell> cellIter = headerow.cellIterator();
		/* iterates the row for the FORM_VERSION_STRING column where the the form version is given which then use to calculate the maps path */
		int cellNum = 0;
		while ( cellIter.hasNext() ) {
			Cell cell = cellIter.next();
			if ( cell.getStringCellValue().contains( FORM_VERSION_STRING ) ) {
				mapDirectoryName = cell.getStringCellValue().split( FORM_VERSION_STRING )[0].toLowerCase().trim();
				formVersionColumnFound = true;
			}
			if ( formVersionColumnFound ) {
				Row valueRow = sheet.getRow( headerow.getRowNum() + 1 );
				Cell valuecell = valueRow.getCell( cellNum );
				if ( !valuecell.getStringCellValue().equals( "" ) ) {
					mapDirectoryVersion = valuecell.getStringCellValue().toLowerCase();
					break;
				}
			}
			cellNum++;
		}
		return mapsParentDirecotryPath + "/" + mapDirectoryName + "_" + mapDirectoryVersion;
	}

	private Row getHeaderRow(Sheet sheet) throws ParseException {
		Row row;
		double endRow = 0;
		int k = 0;
		endRow = sheet.getPhysicalNumberOfRows();
		while ( k <= endRow - 1 ) {
			row = sheet.getRow( k );
			Iterator<Cell> cellIter = row.cellIterator();
			while ( cellIter.hasNext() ) {
				Cell cell = cellIter.next();
				if ( cell.getStringCellValue().contains( FORM_VERSION_STRING ) ) {
					return row;
				}
			}
			k++;
		}
		throw new ParseException( "Header row not found in this Excel File", 0 );
	}

	private SheetMetaData readSheetMetaData(Sheet sheet) throws ParseException {

		Map<String, Integer> columnNameToNumberMap = new HashMap<String, Integer>();
		SheetMetaData sheetMetaData = new SheetMetaData();
		int i = 1;
		/* if global columns is set to true then all those columns in the map */
		if ( globalColumns && !sheet.getSheetName().equalsIgnoreCase( "lab" ) ) {
			for (String globalColumnName : globalColumnsOrderInCSV) {
				columnNameToNumberMap.put( globalColumnName, i );
				i++;
			}
		}
		Row headerow = getHeaderRow( sheet );
		Iterator<Cell> cellIter = headerow.cellIterator();
		/* iterates the row for the FORM_VERSION_STRING column where the the form version is given which then use to calculate the maps path */
		while ( cellIter.hasNext() ) {
			Cell cell = cellIter.next();
			columnNameToNumberMap.put( cell.getStringCellValue(), i );
			i++;

		}
		sheetMetaData.columnNameToRowNumberMap = columnNameToNumberMap;
		return sheetMetaData;
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see org.eaglei.datatools.etl.server.FileToCSV#readFormatConfigPropertiesOfFile ()
	 */
	@Override
	protected void readConfigPropertiesOfFile() throws IOException, ConfigurationException {
		Map<String, String> rowConfigMap = RowConfiguration.getFormatingInfoOfFile( this.getMapDirecotryPath() );
		String order = rowConfigMap.get( "order" );
		tabOrder = order.split( "#" );
		if ( rowConfigMap.get( "globalcolumnsorderinexcel" ) != null ) {
			globalColumnsOrderInExcel = rowConfigMap.get( "globalcolumnsorderinexcel" ).split( "#" );
		}

		if ( rowConfigMap.get( "globalcolumnsorderintoCSV" ) != null ) {
			globalColumnsOrderInCSV = rowConfigMap.get( "globalcolumnsorderintoCSV" ).split( "#" );
		}

	}

	protected TabConfigProperites readConfigPropertiesForTab(Sheet sheet) throws ConfigurationException {
		Map<String, String> rowConfigMap = RowConfiguration.getFormatingInfoOfRow( this.getMapDirecotryPath(), sheet.getSheetName() );
		int startRow = Integer.parseInt( rowConfigMap.get( "startrow" ) );
		return new TabConfigProperites( startRow, sheet.getSheetName() );

	}

	private class TabConfigProperites {

		int startRow;
		String tabName;

		TabConfigProperites(int startRow, String tabName) {
			this.startRow = startRow;
			this.tabName = tabName;
		}

		public int getStartRow() {
			return startRow;
		}

		public String getTabName() {
			return tabName;
		}

	}

	private void addCell(DefaultTableModel tableModel, Cell cell, int rowNum, int colNum) {
		try {
			// converting to unicode and replacing any error character containing _x[A-Z,0-9]_ pattern
			tableModel.setValueAt( new String( cell.getStringCellValue().replaceAll( "_x[^_]*?_", "" ).getBytes(), "utf-8" ) + "", rowNum, colNum + Configure.baseCol );
		} catch (ArrayIndexOutOfBoundsException e) {
			throw e;
		} catch (Exception e) {
			tableModel.setValueAt( (int)cell.getNumericCellValue() + "", rowNum, colNum + Configure.baseCol );
		}
	}

	private void addCell(DefaultTableModel tableModel, final String cellValue, final int rowNum, final int colNum) throws UnsupportedEncodingException {
		// converting to unicode and replacing any error character containing _x[A-Z,0-9]_ pattern
		tableModel.setValueAt( new String( cellValue.replaceAll( "_x[^_]*?_", "" ).getBytes(), "utf-8" ), rowNum, colNum + Configure.baseCol );
	}

	public String[] getTabOrder() {
		return tabOrder;
	}

	public void setTabOrder(String[] tabOrder) {
		this.tabOrder = tabOrder;
	}

	public Map<String, Integer> getNumberOfRowsReadFromTabs() {
		return numberOfRowsReadFromTabs;
	}

	public FileMetaData getMetaData() {
		return fileMetaData;
	}

	public class ExcelFileMetaData implements FileMetaData {
		private Map<String, SheetMetaData> sheetNameToSheetMetaData;

		public ExcelFileMetaData() {
			sheetNameToSheetMetaData = new HashMap<String, SheetMetaData>();
		}

		public SheetMetaData getSheetMetaData(String sheetName) {
			return sheetNameToSheetMetaData.get( sheetName );
		}

	}

	public class SheetMetaData {
		private Map<String, Integer> columnNameToRowNumberMap;

		public int getColumnNumberByColumnName(String columnName) {
			return columnNameToRowNumberMap.get( columnName );
		}

		public Map<String, Integer> getMapofColumnNameAndNumber() {
			return columnNameToRowNumberMap;
		}

	}

}
