#!/bin/bash

function print_help_and_exit
{
cat >&2 << 'EOF'

'voronota-js-receptor-data-graph' script describes a receptor protein structure as an annotated graph.

Options:
    --input                   string  *  path to input protein file
    --probe-min               number     scanning probe radius minimum, default is 2.0
    --probe-max               number     scanning probe radius maximum, default is 30.0
    --buriedness-core         number     buriedness minimum for pocket start, default is 0.7
    --buriedness-rim          number     buriedness maximum for pocket end, default is 0.4
    --subpockets              number     number of sorted subpockets to include, default is 999999
    --output-dir              string  *  output directory path
    --output-naming           string     output files naming mode, default is 'BASENAME/name', other possibilities are 'BASENAME_name' and 'BASENAME/BASENAME_name'
    --help | -h                          flag to display help message and exit
    
Standard output:
    Information messages in stdout, error messages in stderr
    
Examples:
    voronota-js-receptor-data-graph --input "./2zsk.pdb" --output-dir "./output"
    
    voronota-js-receptor-data-graph --input "./2zsk.pdb" --output-dir "./output" --probe-min 1 --probe-max 5 --buriedness-core 0.8 --buriedness-rim 0.7 --subpockets 1
EOF
exit 1
}

readonly ZEROARG=$0

if [ -z "$1" ]
then
	print_help_and_exit
fi

SELFLOCATION="$(dirname ${ZEROARG})"

if [[ $ZEROARG == *"/"* ]]
then
	cd "$SELFLOCATION"
	SELFLOCATION="$(pwd)"
	export PATH="${SELFLOCATION}:${PATH}"
	cd - &> /dev/null
fi

export LC_ALL=C

command -v voronota-js &> /dev/null || { echo >&2 "Error: 'voronota-js' executable not in binaries path"; exit 1; }

INFILE=""
PROBE_MIN="2.0"
PROBE_MAX="30.0"
BURIEDNESS_CORE="0.7"
BURIEDNESS_RIM="0.4"
SUBPOCKETS="999999"
TANGENT_LEEWAY="0.5"
TANGENT_RADIUS_MIN="1.4"
TETRAHEDRON_EDGE_MAX="999999"
OUTPUT_DIR=""
OUTPUT_NAMING="BASENAME/name"
HELP_MODE="false"

while [[ $# > 0 ]]
do
	OPTION="$1"
	OPTARG="$2"
	shift
	case $OPTION in
	--input)
		INFILE="$OPTARG"
		shift
		;;
	--probe-min)
		PROBE_MIN="$OPTARG"
		shift
		;;
	--probe-max)
		PROBE_MAX="$OPTARG"
		shift
		;;
	--buriedness-core)
		BURIEDNESS_CORE="$OPTARG"
		shift
		;;
	--buriedness-rim)
		BURIEDNESS_RIM="$OPTARG"
		shift
		;;
	--subpockets)
		SUBPOCKETS="$OPTARG"
		shift
		;;
	--tangent-leeway)
		TANGENT_LEEWAY="$OPTARG"
		shift
		;;
	--tangent-radius-min)
		TANGENT_RADIUS_MIN="$OPTARG"
		shift
		;;
	--tetrahedron-edge-max)
		TETRAHEDRON_EDGE_MAX="$OPTARG"
		shift
		;;
	--output-dir)
		OUTPUT_DIR="$OPTARG"
		shift
		;;
	--output-naming)
		OUTPUT_NAMING="$OPTARG"
		shift
		;;
	-h|--help)
		HELP_MODE="true"
		;;
	*)
		echo >&2 "Error: invalid command line option '$OPTION'"
		exit 1
		;;
	esac
done

if [ "$HELP_MODE" == "true" ]
then
	print_help_and_exit
fi

if [ -z "$INFILE" ]
then
	echo >&2 "Error: no input file provided"
	exit 1
fi

if [ -z "$OUTPUT_DIR" ]
then
	echo >&2 "Error: no output directory provided"
	exit 1
fi

if [ "$OUTPUT_NAMING" != "BASENAME/name" ] && [ "$OUTPUT_NAMING" != "BASENAME_name" ] && [ "$OUTPUT_NAMING" != "BASENAME/BASENAME_name" ]
then
	echo >&2 "Error: invalid output naming mode"
	exit 1
fi

if [ ! -s "$INFILE" ]
then
	echo >&2 "Error: input structure file '$INFILE' does not exist"
	exit 1
fi

BASENAME="$(basename ${INFILE} .pdb)"

OUTPREFIX="${OUTPUT_DIR}/${BASENAME}/"

if [ "$OUTPUT_NAMING" == "BASENAME/name" ]
then
	OUTPREFIX="${OUTPUT_DIR}/${BASENAME}/"
fi

if [ "$OUTPUT_NAMING" == "BASENAME_name" ]
then
	OUTPREFIX="${OUTPUT_DIR}/${BASENAME}_"
fi

if [ "$OUTPUT_NAMING" == "BASENAME/BASENAME_name" ]
then
	OUTPREFIX="${OUTPUT_DIR}/${BASENAME}/${BASENAME}_"
fi

DATA_DESCRIPTION="input '$INFILE'"

mkdir -p "$(dirname ${OUTPREFIX}name)"

{
cat << EOF
var params={}
params.input_structure_file='$INFILE';
params.pocket_probe_min=$PROBE_MIN;
params.pocket_probe_max=$PROBE_MAX;
params.pocket_buriedness_core=$BURIEDNESS_CORE;
params.pocket_buriedness_rim=$BURIEDNESS_RIM;
params.pocket_subpockets=$SUBPOCKETS;
params.pocket_tangent_leeway=$TANGENT_LEEWAY;
params.pocket_tangent_radius_min=$TANGENT_RADIUS_MIN;
params.pocket_tetrahedron_edge_max=$TETRAHEDRON_EDGE_MAX;
params.output_prefix='$OUTPREFIX';
EOF

cat << 'EOF'
voronota_auto_assert_full_success=true;

voronota_import('-file', params.input_structure_file, '-as-assembly', '-title', 'model');

voronota_restrict_atoms("-use", "[-protein]");

voronota_construct_contacts('-probe', 0.01);

voronota_set_adjunct_of_atoms_by_expression('-expression', '_linear_combo', '-input-adjuncts', 'volume', '-parameters', [1.0, 0.0], '-output-adjunct', 'volume_vdw');

voronota_delete_adjuncts_of_atoms('-adjuncts', ['volume']);

voronota_construct_contacts('-probe', 1.4, '-adjunct-solvent-direction', '-calculate-bounding-arcs', '-force');

voronota_voromqa_global("-adj-atom-sas-potential", "voromqa_sas_potential", "-adj-contact-energy", "voromqa_energy", "-smoothing-window", 0, "-adj-atom-quality", "voromqa_score_a", "-adj-residue-quality", "voromqa_score_r");

voronota_set_adjunct_of_atoms_by_type_number("-name", "atom_type", "-typing-mode", "protein_atom");
voronota_set_adjunct_of_atoms_by_type_number("-name", "residue_type", "-typing-mode", "protein_residue");

voronota_set_adjunct_of_atoms_by_contact_areas("-use [-solvent] -name sas_area");
voronota_set_adjunct_of_atoms_by_contact_adjuncts('[-solvent]', '-source-name', 'solvdir_x', '-destination-name', 'solvdir_x', '-pooling-mode', 'min');
voronota_set_adjunct_of_atoms_by_contact_adjuncts('[-solvent]', '-source-name', 'solvdir_y', '-destination-name', 'solvdir_y', '-pooling-mode', 'min');
voronota_set_adjunct_of_atoms_by_contact_adjuncts('[-solvent]', '-source-name', 'solvdir_z', '-destination-name', 'solvdir_z', '-pooling-mode', 'min');

voronota_set_adjuncts_of_atoms_by_ufsr('[-aname CA]', '-name-prefix', 'mc_ufsr');
voronota_set_adjunct_of_atoms_by_residue_pooling('-source-name mc_ufsr_a1 -destination-name ufsr_a1 -pooling-mode max');
voronota_set_adjunct_of_atoms_by_residue_pooling('-source-name mc_ufsr_b1 -destination-name ufsr_b1 -pooling-mode max');
voronota_set_adjunct_of_atoms_by_residue_pooling('-source-name mc_ufsr_c1 -destination-name ufsr_c1 -pooling-mode max');
voronota_set_adjunct_of_atoms_by_residue_pooling('-source-name mc_ufsr_a2 -destination-name ufsr_a2 -pooling-mode max');
voronota_set_adjunct_of_atoms_by_residue_pooling('-source-name mc_ufsr_b2 -destination-name ufsr_b2 -pooling-mode max');
voronota_set_adjunct_of_atoms_by_residue_pooling('-source-name mc_ufsr_c2 -destination-name ufsr_c2 -pooling-mode max');
voronota_set_adjunct_of_atoms_by_residue_pooling('-source-name mc_ufsr_a3 -destination-name ufsr_a3 -pooling-mode max');
voronota_set_adjunct_of_atoms_by_residue_pooling('-source-name mc_ufsr_b3 -destination-name ufsr_b3 -pooling-mode max');
voronota_set_adjunct_of_atoms_by_residue_pooling('-source-name mc_ufsr_c3 -destination-name ufsr_c3 -pooling-mode max');

voronota_describe_exposure("-adj-atom-exposure-value ev14 -probe-min 1.4 -probe-max 30 -expansion 1 -smoothing-iterations 0 -smoothing-depth 0");
voronota_describe_exposure("-adj-atom-exposure-value ev28 -probe-min 2.8 -probe-max 30 -expansion 1 -smoothing-iterations 0 -smoothing-depth 0");
voronota_describe_exposure("-adj-atom-exposure-value ev56 -probe-min 5.6 -probe-max 30 -expansion 1 -smoothing-iterations 0 -smoothing-depth 0");

voronota_auto_assert_full_success=false;
voronota_set_adjunct_of_atoms("-use [-v! sas_area] -name sas_area -value 0");
voronota_set_adjunct_of_atoms("-use [-v! solvdir_x] -name solvdir_x -value 0");
voronota_set_adjunct_of_atoms("-use [-v! solvdir_y] -name solvdir_y -value 0");
voronota_set_adjunct_of_atoms("-use [-v! solvdir_z] -name solvdir_z -value 0");
voronota_set_adjunct_of_atoms("-use [-v! voromqa_sas_potential] -name voromqa_sas_potential -value 0");
voronota_set_adjunct_of_atoms("-use [-v! ev14] -name ev14 -value 2");
voronota_set_adjunct_of_atoms("-use [-v! ev28] -name ev28 -value 2");
voronota_set_adjunct_of_atoms("-use [-v! ev56] -name ev56 -value 2");
voronota_auto_assert_full_success=true;

voronota_set_adjunct_of_atoms_by_expression("-use [] -expression _multiply -input-adjuncts voromqa_sas_potential sas_area -output-adjunct voromqa_sas_energy");

voronota_set_adjunct_of_contacts("-use [] -name seq_sep_class -value 5");
voronota_set_adjunct_of_contacts("-use [] -name covalent_bond -value 0");

voronota_auto_assert_full_success=false;
voronota_set_adjunct_of_contacts("-use [-min-seq-sep 0 -max-seq-sep 0] -name seq_sep_class -value 0");
voronota_set_adjunct_of_contacts("-use [-min-seq-sep 1 -max-seq-sep 1] -name seq_sep_class -value 1");
voronota_set_adjunct_of_contacts("-use [-min-seq-sep 2 -max-seq-sep 2] -name seq_sep_class -value 2");
voronota_set_adjunct_of_contacts("-use [-min-seq-sep 3 -max-seq-sep 3] -name seq_sep_class -value 3");
voronota_set_adjunct_of_contacts("-use [-min-seq-sep 4 -max-seq-sep 4] -name seq_sep_class -value 4");
voronota_set_adjunct_of_contacts("-use ([-max-seq-sep 0 -max-dist 1.8] or [-min-seq-sep 1 -max-seq-sep 1 -a1 [-aname N] -a2 [-aname C] -max-dist 1.8]) -name covalent_bond -value 1");
voronota_set_adjunct_of_contacts("-use [-v! voromqa_energy] -name voromqa_energy -value 0");
voronota_auto_assert_full_success=true;

voronota_describe_exposure("-probe-min", params.pocket_probe_min, "-probe-max", params.pocket_probe_max, "-adj-atom-exposure-value", "buriedness", "-weight-power", 3, "-expansion", params.pocket_tangent_leeway, "-smoothing-iterations", 3, "-smoothing-depth", 1);
voronota_find_connected_components("-atoms-core-use", "[-v buriedness="+params.pocket_buriedness_core+":1.0]", "-atoms-all-use", "[-v buriedness="+params.pocket_buriedness_rim+":1.0]", "-adj-component-number", "pocket_id", "-contacts-use", "[-t peripherial]");
voronota_select_atoms("-use", "[-v pocket_id=1:"+params.pocket_subpockets+"]", "-name", "pocket_atoms_main");
voronota_select_atoms_by_triangulation_query("-use", "[pocket_atoms_main]", "-expansion", params.pocket_tangent_leeway, "-min-radius", params.pocket_tangent_radius_min, "-max-edge", params.pocket_tetrahedron_edge_max, "-name", "pocket_atoms_all");

voronota_auto_assert_full_success=false;
voronota_set_adjunct_of_atoms("-use [-v! buriedness] -name buriedness -value 2");
voronota_set_adjunct_of_atoms("-name pocketness -value 0");
voronota_set_adjunct_of_atoms("-use [pocket_atoms_all] -name pocketness -value 1");
voronota_auto_assert_full_success=true;


voronota_auto_assert_full_success=false;

voronota_set_adjunct_of_atoms_by_residue_pooling('-source-name sas_area -destination-name residue_sas_area -pooling-mode sum');
voronota_set_adjunct_of_atoms_by_residue_pooling('-source-name volume -destination-name residue_volume -pooling-mode sum');
voronota_set_adjunct_of_atoms_by_residue_pooling('-source-name volume_vdw -destination-name residue_volume_vdw -pooling-mode sum');
voronota_set_adjunct_of_atoms_by_residue_pooling('-source-name voromqa_sas_energy -destination-name residue_voromqa_sas_energy -pooling-mode sum');
voronota_set_adjunct_of_atoms_by_residue_pooling('-source-name voromqa_depth -destination-name residue_mean_voromqa_depth -pooling-mode mean');

voronota_set_adjunct_of_atoms_by_residue_pooling('-source-name buriedness -destination-name residue_min_buriedness -pooling-mode min');
voronota_set_adjunct_of_atoms_by_residue_pooling('-source-atoms-use [-v buriedness=0:1] -destination-atoms-use [] -source-name buriedness -destination-name residue_max_buriedness -pooling-mode max');
voronota_set_adjunct_of_atoms("-use [-v! residue_max_buriedness] -name residue_max_buriedness -value 2");
voronota_set_adjunct_of_atoms_by_expression("-use [-v buriedness=0:1] -expression _multiply -input-adjuncts buriedness sas_area -output-adjunct weighted_buriedness");
voronota_set_adjunct_of_atoms("-use [-v! weighted_buriedness] -name weighted_buriedness -value 0");
voronota_set_adjunct_of_atoms_by_residue_pooling('-source-name weighted_buriedness -destination-name residue_weighted_buriedness -pooling-mode sum');
voronota_set_adjunct_of_atoms_by_expression("-use [-v! residue_sas_area=0] -expression _divide -input-adjuncts residue_weighted_buriedness residue_sas_area -output-adjunct residue_mean_buriedness");
voronota_set_adjunct_of_atoms("-use ([-v residue_sas_area=0] or [-v residue_mean_buriedness=0]) -name residue_mean_buriedness -value 2");

voronota_set_adjunct_of_atoms_by_residue_pooling('-source-name ev14 -destination-name residue_min_ev14 -pooling-mode min');
voronota_set_adjunct_of_atoms_by_residue_pooling('-source-atoms-use [-v ev14=0:1] -destination-atoms-use [] -source-name ev14 -destination-name residue_max_ev14 -pooling-mode max');
voronota_set_adjunct_of_atoms("-use [-v! residue_max_ev14] -name residue_max_ev14 -value 2");
voronota_set_adjunct_of_atoms_by_expression("-use [-v ev14=0:1] -expression _multiply -input-adjuncts ev14 sas_area -output-adjunct weighted_ev14");
voronota_set_adjunct_of_atoms("-use [-v! weighted_ev14] -name weighted_ev14 -value 0");
voronota_set_adjunct_of_atoms_by_residue_pooling('-source-name weighted_ev14 -destination-name residue_weighted_ev14 -pooling-mode sum');
voronota_set_adjunct_of_atoms_by_expression("-use [-v! residue_sas_area=0] -expression _divide -input-adjuncts residue_weighted_ev14 residue_sas_area -output-adjunct residue_mean_ev14");
voronota_set_adjunct_of_atoms("-use ([-v residue_sas_area=0] or [-v residue_mean_ev14=0]) -name residue_mean_ev14 -value 2");

voronota_set_adjunct_of_atoms_by_residue_pooling('-source-name ev28 -destination-name residue_min_ev28 -pooling-mode min');
voronota_set_adjunct_of_atoms_by_residue_pooling('-source-atoms-use [-v ev28=0:1] -destination-atoms-use [] -source-name ev28 -destination-name residue_max_ev28 -pooling-mode max');
voronota_set_adjunct_of_atoms("-use [-v! residue_max_ev28] -name residue_max_ev28 -value 2");
voronota_set_adjunct_of_atoms_by_expression("-use [-v ev28=0:1] -expression _multiply -input-adjuncts ev28 sas_area -output-adjunct weighted_ev28");
voronota_set_adjunct_of_atoms("-use [-v! weighted_ev28] -name weighted_ev28 -value 0");
voronota_set_adjunct_of_atoms_by_residue_pooling('-source-name weighted_ev28 -destination-name residue_weighted_ev28 -pooling-mode sum');
voronota_set_adjunct_of_atoms_by_expression("-use [-v! residue_sas_area=0] -expression _divide -input-adjuncts residue_weighted_ev28 residue_sas_area -output-adjunct residue_mean_ev28");
voronota_set_adjunct_of_atoms("-use ([-v residue_sas_area=0] or [-v residue_mean_ev28=0]) -name residue_mean_ev28 -value 2");

voronota_set_adjunct_of_atoms_by_residue_pooling('-source-name ev56 -destination-name residue_min_ev56 -pooling-mode min');
voronota_set_adjunct_of_atoms_by_residue_pooling('-source-atoms-use [-v ev56=0:1] -destination-atoms-use [] -source-name ev56 -destination-name residue_max_ev56 -pooling-mode max');
voronota_set_adjunct_of_atoms("-use [-v! residue_max_ev56] -name residue_max_ev56 -value 2");
voronota_set_adjunct_of_atoms_by_expression("-use [-v ev56=0:1] -expression _multiply -input-adjuncts ev56 sas_area -output-adjunct weighted_ev56");
voronota_set_adjunct_of_atoms("-use [-v! weighted_ev56] -name weighted_ev56 -value 0");
voronota_set_adjunct_of_atoms_by_residue_pooling('-source-name weighted_ev56 -destination-name residue_weighted_ev56 -pooling-mode sum');
voronota_set_adjunct_of_atoms_by_expression("-use [-v! residue_sas_area=0] -expression _divide -input-adjuncts residue_weighted_ev56 residue_sas_area -output-adjunct residue_mean_ev56");
voronota_set_adjunct_of_atoms("-use ([-v residue_sas_area=0] or [-v residue_mean_ev56=0]) -name residue_mean_ev56 -value 2");

voronota_auto_assert_full_success=true;


voronota_select_atoms('-use', '[-v voromqa_depth]', '-name', 'tessellated'); 

voronota_export_adjuncts_of_atoms('-file', params.output_prefix+'graph_nodes.csv', '-use', '[tessellated]', '-no-serial', '-adjuncts', ['atom_index', 'residue_index', 'atom_type', 'residue_type', 'center_x', 'center_y', 'center_z', 'radius', 'sas_area', 'solvdir_x', 'solvdir_y', 'solvdir_z', 'voromqa_sas_energy', 'voromqa_depth', 'voromqa_score_a', 'voromqa_score_r', 'volume', 'volume_vdw', 'ev14', 'ev28', 'ev56', 'ufsr_a1', 'ufsr_a2', 'ufsr_a3', 'ufsr_b1', 'ufsr_b2', 'ufsr_b3', 'ufsr_c1', 'ufsr_c2', 'ufsr_c3', 'buriedness', 'pocketness'], '-sep', ',', '-expand-ids', true);

voronota_export_adjuncts_of_contacts('-file', params.output_prefix+'graph_links.csv', '-atoms-use', '[tessellated]', '-contacts-use', '[-no-solvent]', '-no-serial', '-adjuncts', ['atom_index1', 'atom_index2', 'area', 'boundary', 'distance', 'voromqa_energy', 'seq_sep_class', 'covalent_bond'], '-sep', ',', '-expand-ids', true);

voronota_export_adjuncts_of_atoms('-file', params.output_prefix+'coarse_graph_nodes.csv', '-use', '(([tessellated]) and ([-aname CA]))', '-no-serial', '-no-name', '-adjuncts', ['residue_index', 'residue_type', 'residue_sas_area', 'residue_volume', 'residue_voromqa_sas_energy', 'residue_mean_voromqa_depth', 'voromqa_score_r', 'residue_min_ev14', 'residue_max_ev14', 'residue_mean_ev14', 'residue_min_ev28', 'residue_max_ev28', 'residue_mean_ev28', 'residue_min_ev56', 'residue_max_ev56', 'residue_mean_ev56', 'residue_min_buriedness', 'residue_max_buriedness', 'residue_mean_buriedness'], '-sep', ',', '-expand-ids', true);

voronota_export_adjuncts_of_contacts('-inter-residue', true, '-file', params.output_prefix+'coarse_graph_links.csv', '-atoms-use', '[tessellated]', '-contacts-use', '[-no-solvent -min-seq-sep 1]', '-no-serial', '-adjuncts', ['area', 'boundary', 'distance', 'voromqa_energy'], '-sep', ',', '-expand-ids', true);

voronota_export_atoms("-use", "[tessellated]", "-as-pdb", "-pdb-b-factor", "buriedness", "-file", params.output_prefix+"buriedness.pdb");

voronota_export_atoms("-use", "[tessellated]", "-as-pdb", "-pdb-b-factor", "residue_mean_buriedness", "-file", params.output_prefix+"coarse_mean_buriedness.pdb");

voronota_export_atoms("-use", "[tessellated]", "-as-pdb", "-pdb-b-factor", "pocketness", "-file", params.output_prefix+"pocketness.pdb");
EOF
} \
| voronota-js

if [ ! -s "${OUTPREFIX}graph_nodes.csv" ] || [ ! -s "${OUTPREFIX}graph_links.csv" ] || [ ! -s "${OUTPREFIX}buriedness.pdb" ] || [ ! -s "${OUTPREFIX}pocketness.pdb" ]
then
	echo >&2 "Failed: graph data for $DATA_DESCRIPTION"
	exit 1
fi

cat "${OUTPREFIX}graph_nodes.csv" | tr ',' '\t' > "${OUTPREFIX}atom_graph_nodes.tsv"
cat "${OUTPREFIX}graph_links.csv" | tr ',' '\t' > "${OUTPREFIX}atom_graph_links.tsv"
cat "${OUTPREFIX}coarse_graph_nodes.csv" | tr ',' '\t' > "${OUTPREFIX}residue_graph_nodes.tsv"
cat "${OUTPREFIX}coarse_graph_links.csv" | tr ',' '\t' > "${OUTPREFIX}residue_graph_links.tsv"

rm -f "${OUTPREFIX}graph_nodes.csv" "${OUTPREFIX}graph_links.csv" "${OUTPREFIX}coarse_graph_nodes.csv" "${OUTPREFIX}coarse_graph_links.csv"

exit 0

