/*
 * Decompiled with CFR 0.152.
 */
package edu.mayo.bmi.smoking.ae;

import edu.mayo.bmi.smoking.ae.ResolutionAnnotator;
import edu.mayo.bmi.smoking.i2b2.type.RecordSentence;
import edu.mayo.bmi.smoking.type.SmokingDocumentClassification;
import edu.mayo.bmi.smoking.util.ClassifiableEntry;
import edu.mayo.bmi.smoking.util.TruthValue;
import edu.mayo.bmi.uima.core.resource.FileLocator;
import edu.mayo.bmi.uima.core.type.DocumentID;
import edu.mayo.bmi.uima.core.type.Segment;
import edu.mayo.bmi.uima.core.type.Sentence;
import edu.mayo.bmi.uima.libsvm.type.NominalAttributeValue;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import org.apache.log4j.Logger;
import org.apache.uima.UIMAFramework;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.annotator.AnnotatorProcessException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.ResourceManager;
import org.apache.uima.resource.ResourceProcessException;
import org.apache.uima.resource.ResourceSpecifier;
import org.apache.uima.util.CasCreationUtils;
import org.apache.uima.util.XMLInputSource;

public class ClassifiableEntries
extends JCasAnnotator_ImplBase {
    public static final String PARAM_SMOKING_STATUS_DESC_STEP1 = "UimaDescriptorStep1";
    public static final String PARAM_SMOKING_STATUS_DESC_STEP2 = "UimaDescriptorStep2";
    public static final String PARAM_TRUTH_FILE = "TruthFile";
    public static final String PARAM_TRUTH_FILE_DELIMITER = "TruthFileDelimiter";
    public static final String PARAM_ALLOWED_CLASSES = "AllowedClassifications";
    public static final String PARAM_PARSE_SECTIONS = "ParseSections";
    public static final String PARAM_IGNORE_SECTIONS = "SectionsToIgnore";
    private int iv_classifiableIdx;
    private List<ClassifiableEntry> iv_procEntryList;
    private List<Segment> iv_segList;
    private Map<String, List<ClassifiableEntry>> iv_entryIndexMap;
    private Map<Integer, TruthValue> iv_truthMap;
    private Set<String> iv_allowedClassifications;
    private AnalysisEngine taeStep1;
    private AnalysisEngine taeStep2;
    private ResourceSpecifier taeSpecifierStep1;
    private ResourceSpecifier taeSpecifierStep2;
    protected Logger iv_logger = Logger.getLogger(this.getClass().getName());
    private int iSmokerCtr;
    private int iPastSmokerCtr;
    private int iCurrentCtr;
    private int iNonSmokerCtr;
    private int iUnknownCtr;
    private String apiMacroHome = "\\$main_root";
    private JCas jcas_local;
    private ResolutionAnnotator ra;
    private ResourceManager ResMgr = UIMAFramework.newDefaultResourceManager();
    private Set<String> sectionsToIgnore;

    @Override
    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        super.initialize(aContext);
        this.iv_procEntryList = new ArrayList<ClassifiableEntry>();
        this.iv_entryIndexMap = new HashMap<String, List<ClassifiableEntry>>();
        this.iv_segList = new ArrayList<Segment>();
        try {
            this.initTruthData();
            String step1Desc = (String)aContext.getConfigParameterValue(PARAM_SMOKING_STATUS_DESC_STEP1);
            File desc1File = FileLocator.locateFile(step1Desc);
            System.out.println(desc1File);
            XMLInputSource xis = new XMLInputSource(desc1File);
            this.taeSpecifierStep1 = UIMAFramework.getXMLParser().parseResourceSpecifier(xis);
            String step2Desc = (String)aContext.getConfigParameterValue(PARAM_SMOKING_STATUS_DESC_STEP2);
            File desc2File = FileLocator.locateFile(step2Desc);
            this.taeSpecifierStep2 = UIMAFramework.getXMLParser().parseResourceSpecifier(new XMLInputSource(desc2File));
            this.ra = new ResolutionAnnotator();
            this.ra.initialize(aContext);
            this.taeStep1 = UIMAFramework.produceAnalysisEngine(this.taeSpecifierStep1, this.ResMgr, null);
            this.taeStep2 = UIMAFramework.produceAnalysisEngine(this.taeSpecifierStep2, this.ResMgr, null);
            this.jcas_local = CasCreationUtils.createCas(this.taeStep1.getAnalysisEngineMetaData()).getJCas();
            String[] sections = (String[])this.getContext().getConfigParameterValue(PARAM_IGNORE_SECTIONS);
            this.sectionsToIgnore = new HashSet<String>();
            int i = 0;
            while (i < sections.length) {
                this.sectionsToIgnore.add(sections[i]);
                ++i;
            }
        }
        catch (Exception e) {
            throw new ResourceInitializationException(e);
        }
    }

    private void initTruthData() throws Exception {
        String truthFilePath = (String)this.getContext().getConfigParameterValue(PARAM_TRUTH_FILE);
        if (truthFilePath != null && truthFilePath.length() > 0) {
            String delimiter = "\t";
            File truthFile = new File(truthFilePath);
            this.loadTruthData(truthFile, delimiter);
            String[] allowedArr = (String[])this.getContext().getConfigParameterValue(PARAM_ALLOWED_CLASSES);
            this.iv_allowedClassifications = new HashSet<String>();
            int i = 0;
            while (i < allowedArr.length) {
                String classification = allowedArr[i];
                if (!(classification.equals("CURRENT_SMOKER") || classification.equals("NON_SMOKER") || classification.equals("PAST_SMOKER") || classification.equals("SMOKER") || classification.equals("UNKNOWN"))) {
                    throw new Exception("Invalid classification value for param AllowedClassifications:" + classification);
                }
                this.iv_allowedClassifications.add(classification);
                ++i;
            }
        }
    }

    private void loadTruthData(File truthFile, String delimiter) throws Exception {
        this.iv_truthMap = new HashMap<Integer, TruthValue>();
        BufferedReader br = new BufferedReader(new FileReader(truthFile));
        int lineNum = 1;
        String line = br.readLine();
        while (line != null) {
            StringTokenizer st = new StringTokenizer(line, delimiter);
            if (st.countTokens() == 4) {
                Integer recordID = new Integer(st.nextToken().trim());
                String truthVal = st.nextToken().trim();
                String sentence = st.nextToken().trim();
                String ssClass = null;
                if (truthVal.equals("CURRENT SMOKER")) {
                    ssClass = "CURRENT_SMOKER";
                } else if (truthVal.equals("PAST SMOKER")) {
                    ssClass = "PAST_SMOKER";
                } else if (truthVal.equals("SMOKER")) {
                    ssClass = "SMOKER";
                } else if (truthVal.equals("NON-SMOKER")) {
                    ssClass = "NON_SMOKER";
                } else if (truthVal.equals("UNKNOWN")) {
                    ssClass = "UNKNOWN";
                } else {
                    throw new Exception("Invalid truth value for line:" + line);
                }
                TruthValue tVal = this.iv_truthMap.get(recordID);
                if (tVal == null) {
                    tVal = new TruthValue();
                    tVal.iv_sentenceList = new ArrayList();
                    tVal.iv_classification = ssClass;
                }
                tVal.iv_sentenceList.add(sentence);
                this.iv_truthMap.put(recordID, tVal);
            } else {
                this.iv_logger.warn("Malformed line " + lineNum + ": " + line);
            }
            line = br.readLine();
            ++lineNum;
        }
        br.close();
        if (this.iv_logger.isInfoEnabled()) {
            this.iv_logger.info("Truth data loaded for " + this.iv_truthMap.keySet().size() + " records");
        }
    }

    @Override
    public void process(JCas jcas) {
        FSIterator docItr;
        this.iv_entryIndexMap.clear();
        this.iv_procEntryList.clear();
        this.iv_segList.clear();
        ArrayList<ClassifiableEntry> entryList = new ArrayList<ClassifiableEntry>();
        String recordID = null;
        if (this.iv_logger.isInfoEnabled() && (docItr = jcas.getJFSIndexRepository().getAnnotationIndex(DocumentID.type).iterator()).hasNext()) {
            DocumentID didAnn = (DocumentID)docItr.next();
            recordID = didAnn.getDocumentID();
            if (this.iv_logger.isInfoEnabled()) {
                this.iv_logger.info("Processing record [" + recordID + "]");
            }
        }
        FSIterator sentItr = jcas.getJFSIndexRepository().getAnnotationIndex(Sentence.type).iterator();
        while (sentItr.hasNext()) {
            Sentence sentAnn = (Sentence)sentItr.next();
            FSIterator segItr = jcas.getJFSIndexRepository().getAnnotationIndex(Segment.type).iterator();
            Boolean skip = false;
            while (segItr.hasNext() && !skip.booleanValue()) {
                Segment segment = (Segment)segItr.next();
                if (segment.getBegin() > sentAnn.getBegin() || segment.getEnd() < sentAnn.getEnd() || !this.sectionsToIgnore.contains(segment.getId())) continue;
                skip = true;
            }
            if (skip.booleanValue()) continue;
            ClassifiableEntry entry = new ClassifiableEntry();
            entry.iv_recordID = recordID;
            entry.iv_begin = sentAnn.getBegin();
            entry.iv_end = sentAnn.getEnd();
            entry.iv_text = sentAnn.getCoveredText();
            entryList.add(entry);
        }
        FSIterator segItr = jcas.getJFSIndexRepository().getAnnotationIndex(Segment.type).iterator();
        while (segItr.hasNext()) {
            Segment segAnn = (Segment)segItr.next();
            this.iv_segList.add(segAnn);
        }
        this.iv_entryIndexMap.put(recordID, entryList);
        this.buildProcEntryList();
        try {
            this.iv_classifiableIdx = 0;
            while (this.iv_classifiableIdx < this.iv_procEntryList.size()) {
                this.jcas_local.reset();
                ClassifiableEntry entry = this.iv_procEntryList.get(this.iv_classifiableIdx);
                RecordSentence rs = new RecordSentence(this.jcas_local);
                rs.setRecordID(entry.iv_recordID);
                rs.setBegin(0);
                rs.setRecordTextBegin(0);
                rs.setEnd(entry.iv_text.length());
                rs.setRecordTextEnd(entry.iv_text.length());
                this.jcas_local.setDocumentText(entry.iv_text);
                Segment sa = this.getSegment(entry);
                if (sa != null) {
                    Segment copy_sa = new Segment(this.jcas_local);
                    copy_sa.setBegin(rs.getBegin());
                    copy_sa.setEnd(rs.getEnd());
                    copy_sa.setId(sa.getId());
                    copy_sa.addToIndexes();
                } else if (this.iv_logger.isDebugEnabled()) {
                    this.iv_logger.error("Invalid Segment for sentence [" + rs.getCoveredText() + "]");
                }
                if (entry.iv_classification != null) {
                    rs.setClassification(entry.iv_classification);
                }
                rs.addToIndexes();
                this.taeStep1.process(this.jcas_local);
                if (this.isSmokingStatusKnown(this.jcas_local)) {
                    this.taeStep2.process(this.jcas_local);
                }
                this.ra.process(this.jcas_local);
                this.performRecordResolution(this.jcas_local);
                ++this.iv_classifiableIdx;
            }
            this.collectionProcessComplete(jcas);
        }
        catch (Exception aep) {
            try {
                throw new AnnotatorProcessException(aep);
            }
            catch (AnnotatorProcessException e) {
                e.printStackTrace();
            }
        }
    }

    @Override
    public void destroy() {
        super.destroy();
        this.taeStep1.destroy();
        this.taeStep2.destroy();
    }

    private boolean isSmokingStatusKnown(JCas jcas_local) {
        boolean known = true;
        FSIterator nominalAttrItr = jcas_local.getJFSIndexRepository().getAnnotationIndex(NominalAttributeValue.type).iterator();
        while (nominalAttrItr.hasNext()) {
            NominalAttributeValue nav = (NominalAttributeValue)nominalAttrItr.next();
            if (!nav.getAttributeName().equalsIgnoreCase("smoking_status") || !nav.getNominalValue().equalsIgnoreCase("UNKNOWN")) continue;
            known = false;
        }
        return known;
    }

    private Segment getSegment(ClassifiableEntry rs) {
        int i = 0;
        while (i < this.iv_segList.size()) {
            Segment sa = this.iv_segList.get(i);
            if (rs.iv_begin >= sa.getBegin() && rs.iv_end <= sa.getEnd()) {
                return sa;
            }
            ++i;
        }
        return null;
    }

    private void performRecordResolution(JCas jcas_local) throws AnnotatorProcessException {
        try {
            FSIterator rsItr = jcas_local.getJFSIndexRepository().getAnnotationIndex(RecordSentence.type).iterator();
            if (rsItr.hasNext()) {
                FSIterator navItr = jcas_local.getJFSIndexRepository().getAnnotationIndex(NominalAttributeValue.type).iterator();
                while (navItr.hasNext()) {
                    NominalAttributeValue nav = (NominalAttributeValue)navItr.next();
                    String classification = nav.getNominalValue();
                    this.storeAssignedClasses(classification);
                }
            }
        }
        catch (Exception e) {
            throw new AnnotatorProcessException(e);
        }
    }

    public void collectionProcessComplete(JCas jcas) throws ResourceProcessException, IOException {
        try {
            String finalClassification = this.resolveClassification();
            SmokingDocumentClassification docClass = new SmokingDocumentClassification(jcas);
            docClass.addToIndexes();
            docClass.setClassification(finalClassification);
            this.resetCounts();
        }
        catch (Exception e) {
            throw new ResourceProcessException(e);
        }
    }

    private String resolveClassification() {
        if (this.iUnknownCtr > 0 && this.iSmokerCtr == 0 && this.iPastSmokerCtr == 0 && this.iCurrentCtr == 0 && this.iNonSmokerCtr == 0) {
            return "UNKNOWN";
        }
        if (this.iNonSmokerCtr >= 1 && this.iUnknownCtr >= 0 && this.iPastSmokerCtr == 0 && this.iCurrentCtr == 0 && this.iSmokerCtr == 0) {
            return "NON_SMOKER";
        }
        if (this.iCurrentCtr >= 1) {
            return "CURRENT_SMOKER";
        }
        if (this.iPastSmokerCtr >= 1 && this.iCurrentCtr <= 0) {
            return "PAST_SMOKER";
        }
        if (this.iSmokerCtr >= 1 && this.iCurrentCtr <= 0 && this.iPastSmokerCtr <= 0) {
            return "SMOKER";
        }
        return null;
    }

    private void storeAssignedClasses(String smokClass) {
        if (smokClass.equals("CURRENT_SMOKER")) {
            ++this.iCurrentCtr;
        } else if (smokClass.equals("NON_SMOKER")) {
            ++this.iNonSmokerCtr;
        } else if (smokClass.equals("PAST_SMOKER")) {
            ++this.iPastSmokerCtr;
        } else if (smokClass.equals("SMOKER")) {
            ++this.iSmokerCtr;
        } else if (smokClass.equals("UNKNOWN")) {
            ++this.iUnknownCtr;
        }
    }

    private void resetCounts() {
        this.iSmokerCtr = 0;
        this.iPastSmokerCtr = 0;
        this.iCurrentCtr = 0;
        this.iNonSmokerCtr = 0;
        this.iUnknownCtr = 0;
    }

    private void buildProcEntryList() {
        int allowedCnt = 0;
        int disallowedCnt = 0;
        for (String recordID : this.iv_entryIndexMap.keySet()) {
            for (ClassifiableEntry entry : this.iv_entryIndexMap.get(recordID)) {
                if (this.iv_allowedClassifications == null || this.iv_allowedClassifications.contains(entry.iv_classification)) {
                    this.iv_procEntryList.add(entry);
                    ++allowedCnt;
                    continue;
                }
                if (this.iv_logger.isInfoEnabled()) {
                    this.iv_logger.info("disallowed value:" + entry.iv_classification);
                }
                ++disallowedCnt;
            }
        }
        int totalCnt = allowedCnt + disallowedCnt;
        if (this.iv_logger.isInfoEnabled()) {
            this.iv_logger.info("# total sentences: " + totalCnt);
            this.iv_logger.info("# allowed sentences: " + allowedCnt);
            this.iv_logger.info("# disallowed sentences: " + disallowedCnt);
        }
    }
}

