From 4c38218342a742ed1124cc9993b65204e3edc9b0 Mon Sep 17 00:00:00 2001 From: "Robin. P. Clark" Date: Fri, 23 Dec 2022 21:36:53 +0000 Subject: [PATCH] for nz bank statements --- bank_statement_pdf_to_csv.sh | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 bank_statement_pdf_to_csv.sh diff --git a/bank_statement_pdf_to_csv.sh b/bank_statement_pdf_to_csv.sh new file mode 100644 index 0000000..ed97fac --- /dev/null +++ b/bank_statement_pdf_to_csv.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# script to convert pdf bank statements to +# csv files + +# these files may have been produced by idiots who +# put white space in files names +while IFS='' read -r -d '' fname ; do + nname="${fname##*/}" + mv -v -n "${fname}" "${fname%/*}/${nname//[[:space:]]/_}" +done < <(find "$(pwd)" -name "* *" -type f -print0) + +rm -rf dd.txt +a=`ls *.pdf` + +for l in $a; +do + echo "processing" $l + ps2ascii $l >> dd.txt; +done + +cat dd.txt | sed -n '/.*[0-9][0-9] * [A-Z][a-z][a-z] [A-Z][A-Z]/p' | sed 's/,//g' > ddd.txt +cat ddd.txt | sed 's/ */,/g' | sed 's/^,//' > spreadsheet.csv + +view spreadsheet.csv