/*
 * Decompiled with CFR 0.152.
 */
package edu.harvard.catalyst.scheduler.subjectDataCleaner;

import com.opencsv.CSVWriter;
import edu.harvard.catalyst.scheduler.util.SubjectDataEncryptor;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.io.Writer;
import java.security.Key;
import java.sql.Connection;
import java.sql.Date;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.SortedSet;
import java.util.TreeSet;
import javax.sql.DataSource;
import org.apache.commons.io.FileDeleteStrategy;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.ExampleMode;
import org.kohsuke.args4j.Option;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import org.springframework.stereotype.Component;

@Component
class SubjectDataCleaner {
    static final String DEFAULT_OUTPUT_DIRECTORY = "data-cleaner-output";
    static final String DEFAULT_DUPLICATE_SUBJECTS_TO_CURATE_FILENAME = "duplicateSubjectsToCurate.csv";
    static final String DEFAULT_NON_DUPLICATE_SUBJECTS_TO_CURATE_FILENAME = "nonDuplicateSubjectsToCurate.csv";
    static double MILLISECONDS_PER_MONTH = 2.592E9;
    static final String QUERY_ALL_SUBJECTS = "SELECT su.id, sm.mrn, su.created_date, su.first_name, su.last_name, su.birthdate, su.created_date, su.street_address1, su.city, sta.name as state, su.zip, c.name as country, su.primary_contact_number, ge.code as gender FROM subject su   JOIN country c ON c.id = su.country   JOIN state as sta ON sta.id = su.state   JOIN subject_mrn as sm ON sm.subject = su.id   JOIN gender ge ON ge.id = su.gender WHERE   su.archival_status IS NULL;";
    static final String QUERY_SUBJECTS_WITH_OLD_CREATION_DATE_AND_NOT_ON_ANY_STUDY = "SELECT su.id, sm.mrn ,su.created_date, su.first_name, su.last_name, su.birthdate, su.created_date, su.street_address1, su.city, sta.name as state, su.zip, c.name as country, su.primary_contact_number, ge.code as gender FROM subject su   JOIN subject_mrn as sm ON sm.subject = su.id   JOIN gender ge ON ge.id = su.gender   JOIN country c ON c.id = su.country   JOIN state as sta ON sta.id = su.state   LEFT JOIN study_subject ss ON ss.subject_mrn = sm.mrn WHERE su.created_date < DATE_SUB(?, INTERVAL ? MONTH)   AND ss.id IS NULL   AND su.archival_status IS NULL";
    static final String QUERY_SUBJECTS_NOT_ADDED_TO_ANY_STUDY_RECENTLY_AND_NEVER_SCHEDULED = "SELECT su.id, sm.mrn ,su.created_date, su.first_name, su.last_name, su.birthdate, su.created_date, su.street_address1, su.city, sta.name as state, su.zip, c.name as country, su.primary_contact_number, ge.code as gender,   (SELECT MAX(date) FROM activity_log       WHERE affected_subject = su.id       AND action_performed = 'ADD SUBJECT TO STUDY') AS last_assigned_to_a_study FROM subject su   JOIN subject_mrn as sm ON sm.subject = su.id   JOIN gender ge ON ge.id = su.gender   JOIN country c ON c.id = su.country   JOIN state as sta ON sta.id = su.state     WHERE   (SELECT COUNT(*) FROM booked_visit bv    JOIN subject_mrn sm2 ON bv.subject_mrn = sm2.id    WHERE sm2.subject = su.id) = 0   AND   (SELECT COUNT(*) FROM activity_log   WHERE date > DATE_SUB(?, INTERVAL ? MONTH)     AND affected_subject = su.id     AND action_performed = 'ADD SUBJECT TO STUDY') = 0   AND su.archival_status IS NULL;";
    static final String QUERY_STUDIES_ASSIGNED_TO_SUBJECT_LONG_AGO_AND_NEVER_SCHEDULED = "SELECT su.id as subject_id, sm.mrn ,su.created_date, su.first_name, su.last_name, su.birthdate, su.created_date, su.street_address1, su.city, sta.name as state, su.zip, c.name as country, su.primary_contact_number ,ge.code as gender, ss.id AS study_subject_id, ss.study AS study_id, st.name AS study_name, al.date AS date_added_to_study   FROM subject su     JOIN subject_mrn as sm ON sm.subject = su.id     JOIN gender ge ON ge.id = su.gender     JOIN country c ON c.id = su.country     JOIN state as sta ON sta.id = su.state     LEFT JOIN study_subject ss ON ss.subject_mrn = sm.id     LEFT JOIN study st ON ss.study = st.id     LEFT JOIN activity_log al ON (al.affected_subject = su.id AND al.affected_study = ss.study)   WHERE al.action_performed = 'ADD SUBJECT TO STUDY'   AND al.date < DATE_SUB(?, INTERVAL ? MONTH)   AND (SELECT COUNT(*) FROM booked_visit bv     WHERE bv.subject_mrn = sm.id and bv.study = st.id) = 0   AND su.archival_status IS NULL;";
    static final String QUERY_ALL_STUDIES_FOR_A_SUBJECT = "SELECT su.id as subject_id, ss.id AS study_subject_id, ss.study AS study_id, st.name AS study_name, st.local_id as local_id, al.date AS date_added_to_study, (SELECT COUNT(*) FROM booked_visit bv     JOIN subject_mrn sm ON bv.subject_mrn = sm.id    WHERE sm.subject = su.id) as number_of_booked_visits  FROM subject su     JOIN subject_mrn as sm ON sm.subject = su.id     JOIN gender ge ON ge.id = su.gender     LEFT JOIN study_subject ss ON ss.subject_mrn = sm.id     LEFT JOIN study st ON ss.study = st.id     LEFT JOIN activity_log al ON (al.affected_subject = su.id AND al.affected_study = ss.study)   WHERE su.id = ?     AND al.action_performed = 'ADD SUBJECT TO STUDY'     AND su.archival_status IS NULL;";
    DataSource dataSource;
    @Option(name="-help", required=false, usage="outputs usage info")
    boolean help = false;
    @Option(name="-noDecrypt", required=false, usage="do not decrypt subjects")
    boolean noDecrypt = false;
    @Option(name="-outputDir", required=false, usage="the working directory; it will be cleared by the program, and then the output CSV file paths will be relative to it")
    String outputDirectory = "data-cleaner-output";
    @Option(name="-dupesFileName", required=false, metaVar="[filename]", usage="output file path for duplicate subjects")
    String dupesFilename = "duplicateSubjectsToCurate.csv";
    @Option(name="-noDupesFileName", required=false, metaVar="[filename]", usage="output file path for non-duplicate subjects")
    String noDupesFilename = "nonDuplicateSubjectsToCurate.csv";
    @Option(name="-noMatchOnFullNamesOnly", required=false, usage="skip subject matches based on first and last name only")
    boolean skipFullnameMatches = false;
    @Option(name="-matchOnLastnameAndBirthdate", required=false, usage="match subjects with the same last name and birhtday.")
    boolean matchOnLastnameAndBirthdate = false;
    @Option(name="-msu", required=false, metaVar="[number of months]", usage="number of months before a subject is considered stale")
    int numberOfMonthsForASubjectToBeStale = 0;
    @Option(name="-mst", required=false, metaVar="[number of months]", usage="number of months before a study assignment is considered stale")
    int numberOfMonthsForAStudySubjectAssignmentToBeStale = 0;
    final java.util.Date today;
    final String todayString;
    final long todayMilliseconds;
    boolean decrypt = true;
    int numberOfSubjectsWithOldCreationDateAndNotOnAnyStudy;
    int numberOfSubjectsNotAddedToAnyStudyRecentlyAndNeverScheduled;
    int numberOfSubjectsStudiesAddedToStudyLongAgoAndNeverScheduled;
    String dupesFilePath;
    String noDupesFilePath;
    final Comparator<SubjectSummary> subjectSummaryComparator = new Comparator<SubjectSummary>(){

        @Override
        public int compare(SubjectSummary leftSubject, SubjectSummary rightSubject) {
            boolean rightMrnIsANumber;
            boolean leftMrnIsANumber;
            String leftMrn = leftSubject.normalizedMrn.toLowerCase();
            String rightMrn = rightSubject.normalizedMrn.toLowerCase();
            if (leftMrn == null) {
                if (rightMrn == null) {
                    return this.compareByLastName(leftSubject, rightSubject);
                }
                return 1;
            }
            if (rightMrn == null) {
                return -1;
            }
            int numericLeftMrn = 0;
            int numericRightMrn = 0;
            try {
                numericLeftMrn = Integer.parseInt(leftSubject.normalizedMrn);
                leftMrnIsANumber = true;
            }
            catch (NumberFormatException e) {
                leftMrnIsANumber = false;
            }
            try {
                numericRightMrn = Integer.parseInt(rightSubject.normalizedMrn);
                rightMrnIsANumber = true;
            }
            catch (NumberFormatException e) {
                rightMrnIsANumber = false;
            }
            if (!leftMrnIsANumber) {
                if (!rightMrnIsANumber) {
                    return this.compareByLastName(leftSubject, rightSubject);
                }
                return 1;
            }
            if (!rightMrnIsANumber) {
                return -1;
            }
            return numericLeftMrn == numericRightMrn ? this.compareByLastName(leftSubject, rightSubject) : (numericLeftMrn < numericRightMrn ? -1 : 1);
        }

        private int compareByLastName(SubjectSummary leftSubject, SubjectSummary rightSubject) {
            int comparison = leftSubject.lastName.toLowerCase().compareTo(rightSubject.lastName.toLowerCase());
            if (comparison == 0) {
                return this.compareByFirstName(leftSubject, rightSubject);
            }
            return comparison;
        }

        private int compareByFirstName(SubjectSummary leftSubject, SubjectSummary rightSubject) {
            return leftSubject.firstName.toLowerCase().compareTo(rightSubject.firstName.toLowerCase());
        }
    };
    static final Logger log = LogManager.getLogger(SubjectDataCleaner.class);

    public static void main(String[] args) {
        try {
            java.util.Date startDate = new java.util.Date();
            ClassPathXmlApplicationContext context = new ClassPathXmlApplicationContext("spring-subject-data-cleaner.xml");
            SubjectDataCleaner subjectDataCleaner = (SubjectDataCleaner)context.getBean("subjectDataCleaner");
            subjectDataCleaner.setUp(args);
            subjectDataCleaner.createSubjectCurationLists();
            java.util.Date endDate = new java.util.Date();
            long elapsed = endDate.getTime() - startDate.getTime();
            SubjectDataCleaner.outputToLogAndStdout("Executed in " + elapsed / 1000L + " seconds");
        }
        catch (Exception e) {
            SubjectDataCleaner.outputToLogAndError("The following exception/error was thrown\n", e);
        }
    }

    @Autowired
    SubjectDataCleaner(DataSource dataSource, @Qualifier(value="encryptionKeyForSubjectDataCleanUp") Key key) {
        this.dataSource = dataSource;
        SubjectDataEncryptor.setEncryptionKey((Key)key);
        this.today = new java.util.Date();
        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd");
        this.todayString = dateFormat.format(this.today);
        this.todayMilliseconds = this.today.getTime();
    }

    private void setUp(String[] args) throws IOException {
        this.parseCommandLineAndExitIfNeeded(args);
        SubjectDataCleaner.outputToLogAndStdout("Runnind Subject Data Cleaner on " + this.todayString + " with the following input parameters:");
        SubjectDataCleaner.outputToLogAndStdout("decrypt: " + this.decrypt);
        SubjectDataCleaner.outputToLogAndStdout("numberOfMonthsForASubjectToBeStale: " + this.numberOfMonthsForASubjectToBeStale);
        SubjectDataCleaner.outputToLogAndStdout("numberOfMonthsForAStudySubjectAssignmentToBeStale: " + this.numberOfMonthsForAStudySubjectAssignmentToBeStale);
        SubjectDataCleaner.outputToLogAndStdout("base output directory (will be cleared): " + this.outputDirectory);
        SubjectDataCleaner.outputToLogAndStdout("output file for duplicates: " + this.dupesFilePath);
        SubjectDataCleaner.outputToLogAndStdout("output file for non-dupicates: " + this.noDupesFilePath);
        File outputDir = new File(this.outputDirectory);
        outputDir.mkdirs();
        for (File file : outputDir.listFiles()) {
            FileDeleteStrategy.FORCE.delete(file);
        }
    }

    static void outputToLogAndStdout(String message) {
        log.info(message);
        System.out.println(message);
    }

    static void outputToLogAndError(String message) {
        log.error(message);
        System.err.println(message);
    }

    static void outputToLogAndError(String message, Throwable t) {
        log.error(message, t);
        System.err.println(message);
        t.printStackTrace(System.err);
    }

    void parseCommandLineAndExitIfNeeded(String[] args) {
        CmdLineParser parser = new CmdLineParser((Object)this);
        try {
            parser.parseArgument(args);
            if (this.help) {
                this.printUsageAndExampleCommandLine(parser);
                System.exit(0);
            }
        }
        catch (CmdLineException e) {
            SubjectDataCleaner.outputToLogAndError(e.getMessage());
            this.printUsageAndExampleCommandLine(parser);
            System.exit(-1);
        }
        this.decrypt = !this.noDecrypt;
        this.dupesFilePath = this.outputDirectory + "/" + this.dupesFilename;
        this.noDupesFilePath = this.outputDirectory + "/" + this.noDupesFilename;
    }

    void printUsageAndExampleCommandLine(CmdLineParser parser) {
        System.err.println("Usage:");
        System.err.println("  java " + this.getClass().getName() + " [options...]");
        System.err.println("where the options are:");
        parser.printUsage((OutputStream)System.err);
        System.err.println("  Example: java " + this.getClass().getName() + " " + parser.printExample(ExampleMode.ALL));
    }

    void createSubjectCurationLists() throws SQLException, IOException {
        java.util.Date startDate = new java.util.Date();
        try (Connection dbConnection = this.dataSource.getConnection();){
            PreparedStatement queryAllSubjectsPreparedStatement = dbConnection.prepareStatement(QUERY_ALL_SUBJECTS);
            PreparedStatement querySubjectsWithOldCreationDateAndNotOnAnyStudyPreparedStatement = dbConnection.prepareStatement(QUERY_SUBJECTS_WITH_OLD_CREATION_DATE_AND_NOT_ON_ANY_STUDY);
            PreparedStatement querySubjectsNotAddedToAnyStudyRecentlyAndNeverScheduledPreparedStatement = dbConnection.prepareStatement(QUERY_SUBJECTS_NOT_ADDED_TO_ANY_STUDY_RECENTLY_AND_NEVER_SCHEDULED);
            PreparedStatement queryStudiesAssignedToSubjectLongAgoAndNeverScheduledPreparedStatement = dbConnection.prepareStatement(QUERY_STUDIES_ASSIGNED_TO_SUBJECT_LONG_AGO_AND_NEVER_SCHEDULED);
            PreparedStatement queryAllStudiesForASubjectPreparedStatement = dbConnection.prepareStatement(QUERY_ALL_STUDIES_FOR_A_SUBJECT);
            ArrayList<Cluster> matchClusters = new ArrayList<Cluster>();
            Map<Integer, SubjectSummary> allSubjectsMap = this.getAllSubjects(queryAllSubjectsPreparedStatement);
            ArrayList<SubjectSummary> subjectsList = new ArrayList<SubjectSummary>(allSubjectsMap.values());
            Map<Integer, SubjectSummary> subjectsWithOldCreationDateAndNotOnAnyStudy = this.getSubjectsWithOldCreationDateAndNotOnAnyStudy(querySubjectsWithOldCreationDateAndNotOnAnyStudyPreparedStatement, allSubjectsMap);
            Map<Integer, SubjectSummary> subjectsNotAddedToAnyStudyRecentlyAndNeverScheduled = this.getSubjectsNotAddedToAnyStudyRecentlyAndNeverScheduled(querySubjectsNotAddedToAnyStudyRecentlyAndNeverScheduledPreparedStatement, allSubjectsMap);
            Map<Integer, SubjectSummary> subjectsStudiesAddedToStudyLongAgoAndNeverScheduled = this.getStudiesAssignedToSubjectLongAgoAndNeverScheduled(queryStudiesAssignedToSubjectLongAgoAndNeverScheduledPreparedStatement, allSubjectsMap);
            this.numberOfSubjectsWithOldCreationDateAndNotOnAnyStudy = subjectsWithOldCreationDateAndNotOnAnyStudy.size();
            this.numberOfSubjectsNotAddedToAnyStudyRecentlyAndNeverScheduled = subjectsNotAddedToAnyStudyRecentlyAndNeverScheduled.size();
            this.numberOfSubjectsStudiesAddedToStudyLongAgoAndNeverScheduled = subjectsStudiesAddedToStudyLongAgoAndNeverScheduled.size();
            int clusterIndex = 0;
            while (!subjectsList.isEmpty()) {
                SubjectSummary subject = (SubjectSummary)subjectsList.remove(0);
                List<SubjectSummary> matchingSubjects = this.findMatches(subjectsList, subject);
                if (matchingSubjects.size() > 0) {
                    Cluster cluster = new Cluster(clusterIndex++);
                    cluster.subjects = matchingSubjects;
                    cluster.subjects.add(0, subject);
                    matchClusters.add(cluster);
                    cluster.subjects.stream().forEach(matchedSubject -> this.mergeStaleSubjects((SubjectSummary)matchedSubject, subjectsWithOldCreationDateAndNotOnAnyStudy, subjectsNotAddedToAnyStudyRecentlyAndNeverScheduled, subjectsStudiesAddedToStudyLongAgoAndNeverScheduled));
                }
                SubjectDataCleaner.outputToLogAndStdout("Processed subject id " + subject.subjectId);
            }
            TreeSet<SubjectSummary> nonClusteredSubjectsNeedingCuration = new TreeSet<SubjectSummary>(this.subjectSummaryComparator);
            subjectsWithOldCreationDateAndNotOnAnyStudy.entrySet().stream().forEach(entry -> {
                SubjectSummary subject = (SubjectSummary)entry.getValue();
                this.mergeStaleSubjects(subject, null, subjectsNotAddedToAnyStudyRecentlyAndNeverScheduled, subjectsStudiesAddedToStudyLongAgoAndNeverScheduled);
                nonClusteredSubjectsNeedingCuration.add(subject);
            });
            subjectsNotAddedToAnyStudyRecentlyAndNeverScheduled.entrySet().stream().forEach(entry -> {
                SubjectSummary subject = (SubjectSummary)entry.getValue();
                this.mergeStaleSubjects(subject, null, null, subjectsStudiesAddedToStudyLongAgoAndNeverScheduled);
                nonClusteredSubjectsNeedingCuration.add(subject);
            });
            subjectsStudiesAddedToStudyLongAgoAndNeverScheduled.entrySet().stream().forEach(entry -> {
                SubjectSummary subject = (SubjectSummary)entry.getValue();
                nonClusteredSubjectsNeedingCuration.add(subject);
            });
            java.util.Date endDate = new java.util.Date();
            long elapsed = endDate.getTime() - startDate.getTime();
            SubjectDataCleaner.outputToLogAndStdout("Processed all subjects in " + elapsed / 1000L + " seconds");
            for (Cluster cluster : matchClusters) {
                for (SubjectSummary subject : cluster.subjects) {
                    subject.studies = this.getStudiesForSubject(queryAllStudiesForASubjectPreparedStatement, subject.subjectId);
                }
            }
            for (SubjectSummary subject : nonClusteredSubjectsNeedingCuration) {
                subject.studies = this.getStudiesForSubject(queryAllStudiesForASubjectPreparedStatement, subject.subjectId);
            }
            this.generateCsvForDupes(matchClusters);
            this.generateCsvForNonDupes(nonClusteredSubjectsNeedingCuration);
        }
    }

    boolean mergeStaleSubjects(SubjectSummary subject, Map<Integer, SubjectSummary> subjectsWithOldCreationDateAndNotOnAnyStudy, Map<Integer, SubjectSummary> subjectsNotAddedToAnyStudyRecentlyAndNeverScheduled, Map<Integer, SubjectSummary> subjectsStudiesAddedToStudyLongAgoAndNeverScheduled) {
        SubjectSummary staleSubject;
        boolean mergeOccurred = false;
        if (subjectsWithOldCreationDateAndNotOnAnyStudy != null && (staleSubject = subjectsWithOldCreationDateAndNotOnAnyStudy.get(subject.subjectId)) != null) {
            subjectsWithOldCreationDateAndNotOnAnyStudy.remove(staleSubject.subjectId);
            subject.oldCreationDateAndNotOnAnyStudy = true;
            mergeOccurred = true;
        }
        if (subjectsNotAddedToAnyStudyRecentlyAndNeverScheduled != null && (staleSubject = subjectsNotAddedToAnyStudyRecentlyAndNeverScheduled.get(subject.subjectId)) != null) {
            subjectsNotAddedToAnyStudyRecentlyAndNeverScheduled.remove(staleSubject.subjectId);
            subject.notAddedToAnyStudyRecentlyAndNeverScheduled = true;
            mergeOccurred = true;
        }
        if (subjectsStudiesAddedToStudyLongAgoAndNeverScheduled != null && (staleSubject = subjectsStudiesAddedToStudyLongAgoAndNeverScheduled.get(subject.subjectId)) != null) {
            subjectsStudiesAddedToStudyLongAgoAndNeverScheduled.remove(staleSubject.subjectId);
            subject.hasStaleStudies = true;
            mergeOccurred = true;
        }
        return mergeOccurred;
    }

    Map<Integer, SubjectSummary> getAllSubjects(PreparedStatement preparedStatement) throws SQLException {
        log.info("Running the following query for getAllSubjects() :\nSELECT su.id, sm.mrn, su.created_date, su.first_name, su.last_name, su.birthdate, su.created_date, su.street_address1, su.city, sta.name as state, su.zip, c.name as country, su.primary_contact_number, ge.code as gender FROM subject su   JOIN country c ON c.id = su.country   JOIN state as sta ON sta.id = su.state   JOIN subject_mrn as sm ON sm.subject = su.id   JOIN gender ge ON ge.id = su.gender WHERE   su.archival_status IS NULL;");
        java.util.Date startDate = new java.util.Date();
        HashMap<Integer, SubjectSummary> subjects = new HashMap<Integer, SubjectSummary>(20000);
        ResultSet resultSet = preparedStatement.executeQuery();
        while (resultSet.next()) {
            SubjectSummary subject = this.makeSubjectSummaryFromResultSet(resultSet);
            if (subjects.containsKey(subject.subjectId)) {
                throw new RuntimeException("Subject ID " + subject.subjectId + " found multiple times (probably with multiple MRNs in database, including the following MRN values: " + subject.mrn + " and " + ((SubjectSummary)subjects.get((Object)Integer.valueOf((int)subject.subjectId))).mrn + ")");
            }
            subjects.put(subject.subjectId, subject);
        }
        SubjectDataCleaner.outputToLogAndStdout("Found " + subjects.size() + " subjects in the database");
        java.util.Date endDate = new java.util.Date();
        long elapsed = endDate.getTime() - startDate.getTime();
        SubjectDataCleaner.outputToLogAndStdout("Got all subjects from database in " + elapsed / 1000L + " seconds");
        return subjects;
    }

    Map<Integer, SubjectSummary> getSubjectsWithOldCreationDateAndNotOnAnyStudy(PreparedStatement preparedStatement, Map<Integer, SubjectSummary> allSubjectsMap) throws SQLException {
        log.info("Running the following query for getSubjectsWithOldCreationDateAndNotOnAnyStudy() :\nSELECT su.id, sm.mrn ,su.created_date, su.first_name, su.last_name, su.birthdate, su.created_date, su.street_address1, su.city, sta.name as state, su.zip, c.name as country, su.primary_contact_number, ge.code as gender FROM subject su   JOIN subject_mrn as sm ON sm.subject = su.id   JOIN gender ge ON ge.id = su.gender   JOIN country c ON c.id = su.country   JOIN state as sta ON sta.id = su.state   LEFT JOIN study_subject ss ON ss.subject_mrn = sm.mrn WHERE su.created_date < DATE_SUB(?, INTERVAL ? MONTH)   AND ss.id IS NULL   AND su.archival_status IS NULL");
        HashMap<Integer, SubjectSummary> subjectsMap = new HashMap<Integer, SubjectSummary>(20000);
        preparedStatement.setDate(1, new Date(this.todayMilliseconds));
        preparedStatement.setInt(2, this.numberOfMonthsForASubjectToBeStale);
        ResultSet resultSet = preparedStatement.executeQuery();
        while (resultSet.next()) {
            int subjectId = resultSet.getInt("id");
            SubjectSummary subject = allSubjectsMap.get(subjectId);
            subject.oldCreationDateAndNotOnAnyStudy = true;
            subjectsMap.put(subjectId, subject);
        }
        SubjectDataCleaner.outputToLogAndStdout("Found " + subjectsMap.size() + " subjects WithOldCreationDateAndNotOnAnyStudy");
        return subjectsMap;
    }

    Map<Integer, SubjectSummary> getSubjectsNotAddedToAnyStudyRecentlyAndNeverScheduled(PreparedStatement preparedStatement, Map<Integer, SubjectSummary> allSubjectsMap) throws SQLException {
        log.info("Running the following query for getSubjectsNotAddedToAnyStudyRecentlyAndNeverScheduled() :\nSELECT su.id, sm.mrn ,su.created_date, su.first_name, su.last_name, su.birthdate, su.created_date, su.street_address1, su.city, sta.name as state, su.zip, c.name as country, su.primary_contact_number, ge.code as gender,   (SELECT MAX(date) FROM activity_log       WHERE affected_subject = su.id       AND action_performed = 'ADD SUBJECT TO STUDY') AS last_assigned_to_a_study FROM subject su   JOIN subject_mrn as sm ON sm.subject = su.id   JOIN gender ge ON ge.id = su.gender   JOIN country c ON c.id = su.country   JOIN state as sta ON sta.id = su.state     WHERE   (SELECT COUNT(*) FROM booked_visit bv    JOIN subject_mrn sm2 ON bv.subject_mrn = sm2.id    WHERE sm2.subject = su.id) = 0   AND   (SELECT COUNT(*) FROM activity_log   WHERE date > DATE_SUB(?, INTERVAL ? MONTH)     AND affected_subject = su.id     AND action_performed = 'ADD SUBJECT TO STUDY') = 0   AND su.archival_status IS NULL;");
        HashMap<Integer, SubjectSummary> subjectsMap = new HashMap<Integer, SubjectSummary>(20000);
        preparedStatement.setDate(1, new Date(this.todayMilliseconds));
        preparedStatement.setInt(2, this.numberOfMonthsForAStudySubjectAssignmentToBeStale);
        ResultSet resultSet = preparedStatement.executeQuery();
        while (resultSet.next()) {
            int subjectId = resultSet.getInt("id");
            SubjectSummary subject = allSubjectsMap.get(subjectId);
            subject.notAddedToAnyStudyRecentlyAndNeverScheduled = true;
            subjectsMap.put(subjectId, subject);
        }
        SubjectDataCleaner.outputToLogAndStdout("Found " + subjectsMap.size() + " subjects NotAddedToAnyStudyRecentlyAndNeverScheduled");
        return subjectsMap;
    }

    Map<Integer, SubjectSummary> getStudiesAssignedToSubjectLongAgoAndNeverScheduled(PreparedStatement preparedStatement, Map<Integer, SubjectSummary> allSubjectsMap) throws SQLException {
        log.info("Running the following query for getStudiesAssignedToSubjectLongAgoAndNeverScheduled() :\nSELECT su.id as subject_id, sm.mrn ,su.created_date, su.first_name, su.last_name, su.birthdate, su.created_date, su.street_address1, su.city, sta.name as state, su.zip, c.name as country, su.primary_contact_number ,ge.code as gender, ss.id AS study_subject_id, ss.study AS study_id, st.name AS study_name, al.date AS date_added_to_study   FROM subject su     JOIN subject_mrn as sm ON sm.subject = su.id     JOIN gender ge ON ge.id = su.gender     JOIN country c ON c.id = su.country     JOIN state as sta ON sta.id = su.state     LEFT JOIN study_subject ss ON ss.subject_mrn = sm.id     LEFT JOIN study st ON ss.study = st.id     LEFT JOIN activity_log al ON (al.affected_subject = su.id AND al.affected_study = ss.study)   WHERE al.action_performed = 'ADD SUBJECT TO STUDY'   AND al.date < DATE_SUB(?, INTERVAL ? MONTH)   AND (SELECT COUNT(*) FROM booked_visit bv     WHERE bv.subject_mrn = sm.id and bv.study = st.id) = 0   AND su.archival_status IS NULL;");
        HashMap<Integer, SubjectSummary> subjectsToStudiesMap = new HashMap<Integer, SubjectSummary>(20000);
        preparedStatement.setDate(1, new Date(this.todayMilliseconds));
        preparedStatement.setInt(2, this.numberOfMonthsForAStudySubjectAssignmentToBeStale);
        ResultSet resultSet = preparedStatement.executeQuery();
        while (resultSet.next()) {
            int subjectId = resultSet.getInt("subject_id");
            SubjectSummary subject = allSubjectsMap.get(subjectId);
            subjectsToStudiesMap.put(subjectId, subject);
        }
        SubjectDataCleaner.outputToLogAndStdout("Found " + subjectsToStudiesMap.size() + " subjects assigned to studies long ago and never scheduled");
        return subjectsToStudiesMap;
    }

    List<StudyAssignment> getStudiesForSubject(PreparedStatement preparedStatement, int subjectId) throws SQLException {
        log.info("Running the following query for getStudiesForSubject() :\nSELECT su.id as subject_id, ss.id AS study_subject_id, ss.study AS study_id, st.name AS study_name, st.local_id as local_id, al.date AS date_added_to_study, (SELECT COUNT(*) FROM booked_visit bv     JOIN subject_mrn sm ON bv.subject_mrn = sm.id    WHERE sm.subject = su.id) as number_of_booked_visits  FROM subject su     JOIN subject_mrn as sm ON sm.subject = su.id     JOIN gender ge ON ge.id = su.gender     LEFT JOIN study_subject ss ON ss.subject_mrn = sm.id     LEFT JOIN study st ON ss.study = st.id     LEFT JOIN activity_log al ON (al.affected_subject = su.id AND al.affected_study = ss.study)   WHERE su.id = ?     AND al.action_performed = 'ADD SUBJECT TO STUDY'     AND su.archival_status IS NULL;");
        ArrayList<StudyAssignment> studies = new ArrayList<StudyAssignment>(20);
        preparedStatement.setInt(1, subjectId);
        ResultSet resultSet = preparedStatement.executeQuery();
        while (resultSet.next()) {
            StudyAssignment study = this.makeStudyAssignmentFromResultSet(resultSet);
            studies.add(study);
        }
        return studies;
    }

    long monthsSinceDate(java.util.Date date) {
        return (long)(Math.floor(this.todayMilliseconds - date.getTime()) / MILLISECONDS_PER_MONTH);
    }

    SubjectSummary makeSubjectSummaryFromResultSet(ResultSet resultSet) throws SQLException {
        boolean mrnIsNumeric;
        SubjectSummary subject = new SubjectSummary();
        subject.subjectId = resultSet.getInt("id");
        subject.gender = resultSet.getString("gender");
        subject.dateCreated = resultSet.getDate("created_date");
        subject.monthsSinceCreation = (int)this.monthsSinceDate(subject.dateCreated);
        subject.birthdate = resultSet.getDate("birthdate");
        subject.state = resultSet.getString("state");
        subject.gender = resultSet.getString("gender");
        subject.country = resultSet.getString("country");
        if (this.decrypt) {
            subject.mrn = SubjectDataEncryptor.decrypt((String)resultSet.getString("mrn"));
            subject.firstName = SubjectDataEncryptor.decrypt((String)resultSet.getString("first_name"));
            subject.lastName = SubjectDataEncryptor.decrypt((String)resultSet.getString("last_name"));
            subject.streetAddress = SubjectDataEncryptor.decrypt((String)resultSet.getString("street_address1"));
            subject.city = SubjectDataEncryptor.decrypt((String)resultSet.getString("city"));
            subject.zipcode = SubjectDataEncryptor.decrypt((String)resultSet.getString("zip"));
            subject.phone = SubjectDataEncryptor.decrypt((String)resultSet.getString("primary_contact_number"));
        } else {
            subject.mrn = resultSet.getString("mrn");
            subject.firstName = resultSet.getString("first_name");
            subject.lastName = resultSet.getString("last_name");
            subject.streetAddress = resultSet.getString("street_address1");
            subject.city = resultSet.getString("city");
            subject.zipcode = resultSet.getString("zip");
            subject.phone = resultSet.getString("primary_contact_number");
        }
        subject.mrn = subject.mrn == null ? "" : subject.mrn;
        subject.normalizedMrn = subject.mrn.replace("-", "").replace(" ", "");
        subject.mrnNormalizationNeeded = !subject.normalizedMrn.equals(subject.mrn);
        try {
            int numericMrn = Integer.parseInt(subject.normalizedMrn);
            mrnIsNumeric = true;
        }
        catch (NumberFormatException e) {
            mrnIsNumeric = false;
        }
        subject.mrnIsInvalid = !mrnIsNumeric;
        return subject;
    }

    StudyAssignment makeStudyAssignmentFromResultSet(ResultSet resultSet) throws SQLException {
        StudyAssignment study = new StudyAssignment();
        study.studySubjectId = resultSet.getInt("subject_id");
        study.localId = resultSet.getString("local_id");
        study.studyName = resultSet.getString("study_name");
        study.dateAddedToStudy = resultSet.getDate("date_added_to_study");
        study.monthsSinceAddedToStudy = study.dateAddedToStudy != null ? (int)this.monthsSinceDate(study.dateAddedToStudy) : -1;
        study.numberOfBookedVisists = resultSet.getInt("number_of_booked_visits");
        return study;
    }

    List<SubjectSummary> findMatches(List<SubjectSummary> subjects, SubjectSummary subject) {
        ArrayList<SubjectSummary> matches = new ArrayList<SubjectSummary>(100);
        ListIterator<SubjectSummary> subjectsIterator = subjects.listIterator();
        while (subjectsIterator.hasNext()) {
            SubjectSummary matchCandidate = subjectsIterator.next();
            MatchConfidence matchConfidence = this.match(subject, matchCandidate);
            if (matchConfidence == MatchConfidence.NoMatch) continue;
            matches.add(matchCandidate);
            subjectsIterator.remove();
            matchCandidate.match = new Match(subject, matchConfidence);
            List<SubjectSummary> subMatches = this.findMatches(subjects, matchCandidate);
            matches.addAll(subMatches);
            subjectsIterator = subjects.listIterator();
        }
        return matches;
    }

    MatchConfidence match(SubjectSummary subject, SubjectSummary matchCandidate) {
        if (matchCandidate.subjectId == subject.subjectId) {
            throw new RuntimeException("Came across a matchSubject candidate with the same subjecId as the subject being compared to!");
        }
        boolean mrnsMatch = subject.normalizedMrn.equals(matchCandidate.normalizedMrn);
        boolean firstNamesMatch = subject.firstName.replace(" ", "").equals(matchCandidate.firstName.replace(" ", ""));
        boolean lastNamesMatch = subject.lastName.replace(" ", "").equals(matchCandidate.lastName.replace(" ", ""));
        boolean birthdatesMatch = subject.birthdate.equals(matchCandidate.birthdate);
        if (mrnsMatch) {
            if (firstNamesMatch && lastNamesMatch && birthdatesMatch) {
                return MatchConfidence.High;
            }
            if (lastNamesMatch && birthdatesMatch) {
                return MatchConfidence.PartialHigh;
            }
            if (firstNamesMatch && birthdatesMatch) {
                return MatchConfidence.PartialHigh;
            }
            if (this.skipFullnameMatches) {
                return MatchConfidence.NoMatch;
            }
            if (firstNamesMatch && lastNamesMatch) {
                return MatchConfidence.PartialHigh;
            }
            return MatchConfidence.PartialLow;
        }
        if (this.matchOnLastnameAndBirthdate && lastNamesMatch && birthdatesMatch) {
            return MatchConfidence.PartialLow;
        }
        if (this.skipFullnameMatches) {
            return MatchConfidence.NoMatch;
        }
        if (firstNamesMatch && lastNamesMatch) {
            return MatchConfidence.PartialLow;
        }
        return MatchConfidence.NoMatch;
    }

    void generateCsvForDupes(List<Cluster> clusters) throws IOException {
        try (FileWriter csvFileWriter = new FileWriter(this.dupesFilePath);
             CSVWriter csvWriter = new CSVWriter((Writer)csvFileWriter, ',');){
            this.generateCsvHeader(csvWriter, "SUBJECT DATA CURATION LIST");
            this.generateCsvBlankLine(csvWriter);
            this.generateCsvDoublet(csvWriter, "Date:", this.todayString);
            this.generateCsvHeader(csvWriter, "Program parameters");
            this.generateCsvDoublet(csvWriter, "decrypt:", String.valueOf(this.decrypt));
            this.generateCsvDoublet(csvWriter, "output file name:", this.dupesFilePath);
            this.generateCsvDoublet(csvWriter, "number of months for subjects to be considered stale:", String.valueOf(this.numberOfMonthsForASubjectToBeStale));
            this.generateCsvDoublet(csvWriter, "number of months for an assignment of a subject to a study to be considered stale:", String.valueOf(this.numberOfMonthsForAStudySubjectAssignmentToBeStale));
            this.generateCsvBlankLine(csvWriter);
            this.generateCsvDoublet(csvWriter, "Number Of Subjects With Old Creation Date And Not On Any Study:", String.valueOf(this.numberOfSubjectsWithOldCreationDateAndNotOnAnyStudy));
            this.generateCsvDoublet(csvWriter, "Number Of Subjects Not Added To Any Study Recently And Never Scheduled:", String.valueOf(this.numberOfSubjectsNotAddedToAnyStudyRecentlyAndNeverScheduled));
            this.generateCsvDoublet(csvWriter, "Number Of Studies Assigned to Subject Long Ago And Never Scheduled:", String.valueOf(this.numberOfSubjectsStudiesAddedToStudyLongAgoAndNeverScheduled));
            this.generateCsvBlankLine(csvWriter);
            this.generateCsvHeader(csvWriter, "SUBJECTS WITH MATCHES, NEEDING CURATION");
            this.generateCsvBlankLine(csvWriter);
            this.generateCsvBlankLine(csvWriter);
            this.generateCsvHeadings(csvWriter);
            this.generateCsvBlankLine(csvWriter);
            clusters.stream().forEach(cluster -> {
                this.generateCsvBlankLine(csvWriter);
                cluster.subjects.stream().forEach(subject -> this.generateSubjectAndItsStudies(csvWriter, (SubjectSummary)subject));
            });
            this.generateCsvBlankLine(csvWriter);
            this.generateCsvBlankLine(csvWriter);
        }
    }

    void generateCsvForNonDupes(SortedSet<SubjectSummary> nonClusteredSubjectsNeedingCuration) throws IOException {
        try (FileWriter csvFileWriter = new FileWriter(this.noDupesFilePath);
             CSVWriter csvWriter = new CSVWriter((Writer)csvFileWriter, ',');){
            this.generateCsvHeader(csvWriter, "SUBJECT DATA CURATION LIST");
            this.generateCsvBlankLine(csvWriter);
            this.generateCsvDoublet(csvWriter, "Date:", this.todayString);
            this.generateCsvHeader(csvWriter, "Program parameters");
            this.generateCsvDoublet(csvWriter, "decrypt:", String.valueOf(this.decrypt));
            this.generateCsvDoublet(csvWriter, "output file name:", this.dupesFilePath);
            this.generateCsvDoublet(csvWriter, "number of months for subjects to be considered stale:", String.valueOf(this.numberOfMonthsForASubjectToBeStale));
            this.generateCsvDoublet(csvWriter, "number of months for an assignment of a subject to a study to be considered stale:", String.valueOf(this.numberOfMonthsForAStudySubjectAssignmentToBeStale));
            this.generateCsvBlankLine(csvWriter);
            this.generateCsvDoublet(csvWriter, "Number Of Subjects With Old Creation Date And Not On Any Study:", String.valueOf(this.numberOfSubjectsWithOldCreationDateAndNotOnAnyStudy));
            this.generateCsvDoublet(csvWriter, "Number Of Subjects Not Added To Any Study Recently And Never Scheduled:", String.valueOf(this.numberOfSubjectsNotAddedToAnyStudyRecentlyAndNeverScheduled));
            this.generateCsvDoublet(csvWriter, "Number Of Studies Assigned to Subject Long Ago And Never Scheduled:", String.valueOf(this.numberOfSubjectsStudiesAddedToStudyLongAgoAndNeverScheduled));
            this.generateCsvBlankLine(csvWriter);
            this.generateCsvHeader(csvWriter, "SUBJECTS WITH NO MATCHES, BUT IN NEED OF CURATION");
            this.generateCsvBlankLine(csvWriter);
            this.generateCsvBlankLine(csvWriter);
            this.generateCsvHeadings(csvWriter);
            this.generateCsvBlankLine(csvWriter);
            nonClusteredSubjectsNeedingCuration.stream().forEach(subject -> this.generateSubjectAndItsStudies(csvWriter, (SubjectSummary)subject));
            csvWriter.flush();
            csvFileWriter.flush();
        }
    }

    void generateSubjectAndItsStudies(CSVWriter csvWriter, SubjectSummary subject) {
        Object[] studies = subject.studies.toArray();
        StudyAssignment firstStudy = studies.length > 0 ? (StudyAssignment)studies[0] : null;
        this.generateCsvSubject(csvWriter, subject, true, firstStudy);
        for (int i = 1; i < studies.length; ++i) {
            this.generateCsvSubject(csvWriter, subject, false, (StudyAssignment)studies[i]);
        }
    }

    void generateCsvBlankLine(CSVWriter csvWriter) {
        String[] blankLine = new String[]{""};
        csvWriter.writeNext(blankLine);
    }

    void generateCsvHeader(CSVWriter csvWriter, String header) {
        String[] entry = new String[]{header};
        csvWriter.writeNext(entry);
    }

    void generateCsvDoublet(CSVWriter csvWriter, String name, String value) {
        String[] entry = new String[]{name, value};
        csvWriter.writeNext(entry);
    }

    void generateCsvHeadings(CSVWriter csvWriter) {
        String[] entries = new String[21];
        int index = 0;
        entries[index++] = "Scheduler ID";
        entries[index++] = "MRN";
        entries[index++] = "Suggested MRN";
        entries[index++] = "Time Since Creation";
        entries[index++] = "Study Name";
        entries[index++] = "Local ID";
        entries[index++] = "Time Since Added to Study";
        entries[index++] = "Scheduled? (Y/N)";
        entries[index++] = "Last Name";
        entries[index++] = "First Name";
        entries[index++] = "DOB";
        entries[index++] = "Gender";
        entries[index++] = "Street Addr 1";
        entries[index++] = "City";
        entries[index++] = "State";
        entries[index++] = "Zip Code";
        entries[index++] = "Country";
        entries[index++] = "Primary Phone";
        entries[index++] = "Expected MRN";
        entries[index++] = "Delete Subject?";
        entries[index++] = "Remove Subject from Study?";
        csvWriter.writeNext(entries);
    }

    void generateCsvSubject(CSVWriter csvWriter, SubjectSummary subject, boolean isFirst, StudyAssignment study) {
        String[] entries = new String[18];
        int index = 0;
        entries[index++] = String.valueOf(subject.subjectId);
        entries[index++] = isFirst ? (subject.mrn == null ? "" : "=\"" + subject.mrn + "\"") : "";
        entries[index++] = isFirst ? (subject.mrn == null || subject.mrn.trim().isEmpty() ? "MRN missing. Please update" : (subject.mrnIsInvalid ? "MRN not valid. Please update" : "=\"" + subject.normalizedMrn + "\"")) : "";
        entries[index++] = isFirst ? String.valueOf(subject.monthsSinceCreation) + this.pluralizeIfNeeded(subject.monthsSinceCreation, " month") : "";
        entries[index++] = study != null ? study.studyName : "";
        String string = entries[index++] = study != null ? String.valueOf(study.localId) : "";
        entries[index++] = study != null ? (study.monthsSinceAddedToStudy >= 0 ? String.valueOf(study.monthsSinceAddedToStudy) + this.pluralizeIfNeeded(study.monthsSinceAddedToStudy, " month") : "") : "";
        entries[index++] = study != null ? (study.numberOfBookedVisists > 0 ? "Y" : "N") : "N";
        entries[index++] = subject.lastName;
        entries[index++] = subject.firstName;
        entries[index++] = subject.birthdate.toString();
        entries[index++] = subject.gender;
        entries[index++] = subject.streetAddress;
        entries[index++] = subject.city;
        entries[index++] = subject.state;
        entries[index++] = subject.zipcode;
        entries[index++] = subject.country;
        entries[index++] = subject.phone;
        csvWriter.writeNext(entries);
    }

    String pluralizeIfNeeded(int count, String string) {
        return count > 1 ? string + "s" : string;
    }

    class Cluster {
        int id;
        List<SubjectSummary> subjects;

        Cluster(int id) {
            this.id = id;
        }
    }

    class StudyAssignment {
        int studySubjectId;
        String localId;
        String studyName;
        java.util.Date dateAddedToStudy;
        int monthsSinceAddedToStudy = 9999;
        int numberOfBookedVisists;

        StudyAssignment() {
        }
    }

    class Match {
        SubjectSummary matchedSubject;
        MatchConfidence matchConfidence;

        Match(SubjectSummary matchedSubject, MatchConfidence matchConfidence) {
            this.matchedSubject = matchedSubject;
            this.matchConfidence = matchConfidence;
        }
    }

    static enum MatchConfidence {
        High,
        PartialHigh,
        PartialLow,
        NoMatch;

    }

    class SubjectSummary {
        int subjectId;
        String mrn;
        String normalizedMrn;
        boolean mrnIsInvalid;
        String lastName;
        String firstName;
        String gender;
        String streetAddress;
        String city;
        String state;
        String zipcode;
        String country;
        String phone;
        java.util.Date birthdate;
        java.util.Date dateCreated;
        boolean mrnNormalizationNeeded;
        Match match;
        int monthsSinceCreation;
        boolean oldCreationDateAndNotOnAnyStudy;
        boolean notAddedToAnyStudyRecentlyAndNeverScheduled;
        boolean hasStaleStudies;
        List<StudyAssignment> studies = new ArrayList<StudyAssignment>(20);

        SubjectSummary() {
        }
    }
}

