#!/bin/bash
#
# This shell script prepares the data, s.t. Proper can handle it better
#
# use -help to see options
#
# FracPete

# the usage of this script
function usage()
{
   echo
   echo "usage: ${0##*/} -i <input-dir> -o <output-dir> [-h]"
   echo 
   echo "creates 'relational' data from the CSV files, i.e. it creates a file"
   echo "that contains only the bag-id and the class, whereas the other file"
   echo "contains the rest"
   echo
   echo " -h   this help"
   echo " -i   <input-dir>"
   echo "      the directory with the unprocessed files"
   echo "      default: $SRC"
   echo " -o   <output-dir>"
   echo "      where to put the processed files"
   echo "      default: $DEST"
   echo
}

# cleans up temporary files
function clean_up
{
   rm -f *_
   rm -f xx*
}

# returns the count of columns in TMP
function count_cols()
{
   TMP=`head -n1 $SRCFILE | sed s/" "/_/g | sed s/","/" "/g | wc -w | sed s/" "*//g`
}

# creates relational data from the file, i.e. it splits the file into two, 
# where one contains the bag and the class and the other rest of it (bag-id is
# the reference)
function create_relational()
{
   SRCFILE=$TMP
   TMPFILE=`echo $SRCFILE | sed s/".csv"//g`
   count_cols;COUNT=$TMP
   cat $SRCFILE | sed s/" "*//g | sed s/"."$//g | cut -f1,$COUNT -d"," | sort -u > $TMPFILE-bag.txt
   cat $SRCFILE | sed s/" "*//g | sed s/"."$//g | cut -f1-$(($COUNT-1)) -d"," > $TMPFILE-data.txt
}

# variables
ROOT=`expr "$0" : '\(.*\)/'`
SRC="$ROOT/original"
DEST="$ROOT"

# interprete parameters
while getopts ":hi:o:" flag
do
   case $flag in
      i) SRC=$OPTARG
         ;;
      o) DEST=$OPTARG
         ;;
      h) usage
         exit 0
         ;;
      *) usage
         exit 1
         ;;
   esac
done

# copy files
echo "copying files..."
cp $SRC/*.csv $DEST

# create relational data
echo "creating relational data..."
TMP="$DEST/atoms.csv";create_relational
TMP="$DEST/bonds.csv";create_relational
TMP="$DEST/chains.csv";create_relational

# delete files
echo "deleting files..."
rm $DEST/*.csv

# clean up
clean_up

