useful_linux_scripts/pdf_to_csv_mt.sh
Robin P. Clark fecd8dbbd8 rescale
2023-09-01 16:35:48 +01:00

33 lines
1.0 KiB
Bash

#!/bin/bash
# script to convert pdf bank statements to
# csv files
# these files may have been produced by idiots who
# put white space in files names
while IFS='' read -r -d '' fname ; do
nname="${fname##*/}"
mv -v -n "${fname}" "${fname%/*}/${nname//[[:space:]]/_}"
done < <(find "$(pwd)" -name "* *" -type f -print0)
rm -rf dd.txt
a=`ls *.pdf`
for l in $a;
do
echo "processing" $l
ps2ascii $l >> dd.txt;
done
view dd.txt
cat dd.txt | sed -n '/.*[0-9][0-9][A-Z][a-z][a-z]/p' | sed 's/,//g' > ddd.txt
view ddd.txt
#27SidlawSt,Facsimile: (04) 387 1639
# 35 HomeLoanFixedRate,0501-070235084-1004,26Jun2036,7.90%,97590.29 DR
# 36 HomeLoanFixedRate,0501-070235084-1005,27Jun2021,6.99%,9912.63 DR
# 37 StatementPeriod,07Feb09to06Mar09
cat ddd.txt | sed 's/ */,/g' | sed 's/^,//' > m_spreadsheet.txt
cat m_spreadsheet.txt | sed 's/\(HomeLoanFixedRate\)/,,,,,,,\1/' | sed 's/StatementPeriod/,,,,,,,StatementPeriod/' | sed 's/StatementPeriod/,,,,,,,StatementPeriod/' > m_spreadsheet.csv
view m_spreadsheet.csv