diff --git a/pdf_to_csv_mt.sh b/pdf_to_csv_mt.sh new file mode 100755 index 0000000..9522282 --- /dev/null +++ b/pdf_to_csv_mt.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# script to convert pdf bank statements to +# csv files + +# these files may have been produced by idiots who +# put white space in files names +while IFS='' read -r -d '' fname ; do + nname="${fname##*/}" + mv -v -n "${fname}" "${fname%/*}/${nname//[[:space:]]/_}" +done < <(find "$(pwd)" -name "* *" -type f -print0) + +rm -rf dd.txt +a=`ls *.pdf` + +for l in $a; +do + echo "processing" $l + ps2ascii $l >> dd.txt; +done +view dd.txt +cat dd.txt | sed -n '/.*[0-9][0-9][A-Z][a-z][a-z]/p' | sed 's/,//g' > ddd.txt +view ddd.txt +#27SidlawSt,Facsimile: (04) 387 1639 +# 35 HomeLoanFixedRate,0501-070235084-1004,26Jun2036,7.90%,97590.29 DR +# 36 HomeLoanFixedRate,0501-070235084-1005,27Jun2021,6.99%,9912.63 DR +# 37 StatementPeriod,07Feb09to06Mar09 + +cat ddd.txt | sed 's/ */,/g' | sed 's/^,//' > m_spreadsheet.txt +cat m_spreadsheet.txt | sed 's/\(HomeLoanFixedRate\)/,,,,,,,\1/' | sed 's/StatementPeriod/,,,,,,,StatementPeriod/' | sed 's/StatementPeriod/,,,,,,,StatementPeriod/' > m_spreadsheet.csv + +view m_spreadsheet.csv +