#!/bin/bash
# script to convert pdf bank statements to
# csv files

# these files may have been produced by idiots who
# put white space in files names
while IFS='' read -r -d '' fname ; do
   nname="${fname##*/}"
   mv -v -n "${fname}"  "${fname%/*}/${nname//[[:space:]]/_}"
done < <(find "$(pwd)"  -name "* *" -type f  -print0)

rm -rf dd.txt
a=`ls *.pdf`

for l in $a; 
do   
	echo "processing" $l
	ps2ascii $l >> dd.txt; 
done
view dd.txt
cat dd.txt | sed -n  '/.*[0-9][0-9][A-Z][a-z][a-z]/p' | sed 's/,//g'  > ddd.txt
view ddd.txt
#27SidlawSt,Facsimile: (04) 387 1639
#  35 HomeLoanFixedRate,0501-070235084-1004,26Jun2036,7.90%,97590.29 DR
#  36 HomeLoanFixedRate,0501-070235084-1005,27Jun2021,6.99%,9912.63 DR
#  37 StatementPeriod,07Feb09to06Mar09

cat ddd.txt | sed 's/     */,/g' | sed 's/^,//'  > m_spreadsheet.txt
cat m_spreadsheet.txt | sed 's/\(HomeLoanFixedRate\)/,,,,,,,\1/' | sed 's/StatementPeriod/,,,,,,,StatementPeriod/' | sed 's/StatementPeriod/,,,,,,,StatementPeriod/' > m_spreadsheet.csv

view m_spreadsheet.csv