#!/bin/bash
#
# Copyright (c) 2015, President and Fellows of Harvard College
# Portions Copyright (c) 2015, VectorC, LLC
# Portions Copyright (c) 2015, Wonder Lake Software, LLC
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
# EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#


## This script will load organisms into a repository's published graph.
##
## Use of this script makes the following assumptions about the organism URI:
## - They are most likely human readable
## - Use of random generated URI is not desirable
## - They are already in a resolvable format, i.e. http://global.ei.net/i/somthing
##
## The organisms will be read from a file that:
## - is in the form of ntriples 
## -- i.e. 	<http://global.dev.eagle-i.net/i/Aconitum>
##			<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
##			<http://purl.obolibrary.org/obo/NCBITaxon_49188> .
## - ntriples for each organism are grouped together
## -- i.e. ntriples for different organisms are not interleaved

# Fail on subprocess failures
set -o errexit

# read args
if [ "$#" -lt 4 ]; then
    echo "Usage: $0  username  password  repo-URL-root  filename, e.g. $0  bigbird passwird https://repo.mysite.edu:8443 loadme.ntriples"
    exit 1
else
	username=$1
    creds=$1:$2
    remoteHost=$3
    fileToLoad=$4
fi


subjectsFile=$(mktemp /tmp/load_global.XXXXXXXXX)
inputFile=$(mktemp /tmp/load_global.XXXXXXXXX)
temp2File=temp2.txt

if [ ! -d logs ] ; then
    mkdir logs
fi

echo "Java info: "
which java
java -version 2>&1
echo ""
echo -e "Using values:"
echo -e "\tRepository username:			 \t $username"
echo -e "\tRepository location:          \t $repoUrl"
echo -e "\tLoad organisms from file:     \t $fileToLoad"


# Clean up the input file to remove leading whitespaces
cat ${fileToLoad} | awk '{sub(/^[ \t]+/, ""); print }' > ${inputFile}

# Create a list of URIs contained in file
cat ${fileToLoad} | awk -v RS='\r' '{sub(/^[ \t]+/, ""); '/./'; print $1}' | grep "." | uniq > ${subjectsFile}


while read -r line
do
	# Extract the subject uri, without brackets
	openBracketPos=$(echo ${line} | awk '{print index($1,"<")}')
	closeBracketPos=$(echo ${line} | awk '{print index($1,">")}')
	subjectUri=${line:${openBracketPos}:${closeBracketPos}-2}

	echo "Loading "${subjectUri}"..."
	# Get the matching triples for this subject from the inputFile	
	triples=$(grep ${line} ${inputFile})

	# Do the curl, NOT verbose
	echo ${triples} | curl -s -u ${creds} -F "action=create" -F "insert=@-;type=text/rdf+n3" -F "workspace=http://eagle-i.org/ont/repo/1.0/NG_Published" -F "uri=${subjectUri}" ${remoteHost}/repository/update

	# Move resource to curation
	echo "... moving resource to curation ..."
	curl -s -u ${creds} -F "uri=${subjectUri}" -F "transition=http://eagle-i.org/ont/repo/1.0/WFT_1" ${remoteHost}/repository/workflow/push

	# Now reclaim resource and move to published
	echo "... moving resource to PUBLISH ..."
	curl -s -u ${creds} -F "uri=${subjectUri}" ${remoteHost}/repository/workflow/claim
	curl -s -u ${creds} -F "uri=${subjectUri}" -F "transition=http://eagle-i.org/ont/repo/1.0/WFT_2" ${remoteHost}/repository/workflow/push
	
	# Should at least check status but too tired now :(
done < "$subjectsFile"

# Remove the temp files
rm -f ${inputFile} 
rm -f ${subjectsFile}
