packages = ["pandas", "setuptools", "pdfreader-0.1.13.dev0-py3-none-any.whl"]
from pdfreader import SimplePDFViewer, PDFDocument
import pandas as pd
from pyodide.ffi.wrappers import add_event_listener
from pyodide.ffi import to_js
from js import document, window, Blob, URL
df = None
async def get_bytes_from_file(file):
array_buf = await file.arrayBuffer()
return array_buf.to_bytes()
async def process_file(e):
global df
file_list = e.target.files
first_item = file_list.item(0)
fd: bytes = await get_bytes_from_file(first_item)
viewer = SimplePDFViewer(fd)
doc = PDFDocument(fd)
pages = len([p for p in doc.pages()])
extracted = []
l_keywords = [
"NEW TRANSACTIONS",
"AMOUNT(S$)"
]
r_keywords = [
"Total :",
"CR",
"DB"
]
for i in range(1, pages + 1):
viewer.navigate(i)
viewer.render()
data = viewer.canvas.strings
length = len(data)
l, r = 0, length - 1
while (not any([kw in data[l] for kw in l_keywords]) and l < r):
l += 1
if l + 1 < length and ("PayLah! Wallet No." in data[l + 1]):
l += 1
l += 1
if data[r] not in ["CR", "DB"]:
while (not any([kw in data[r] for kw in r_keywords]) and l < r):
r -= 1
if "Total :" in data[r - 4]:
r -= 4
r += 1
s = slice(l, r)
extracted += viewer.canvas.strings[s]
extracted = [extracted[i : i + 5] for i in range(0, len(extracted), 5)]
extracted.pop()
df = pd.DataFrame(extracted, columns=["Date", "Merchant", "Ref", "Amount", "CR/DB"])
df["Amount"] = df.apply(lambda x: float(x["Amount"] if x["CR/DB"] == "CR" else -float(x["Amount"])), axis=1)
df = df[["Date", "Merchant", "Amount"]]
def download_file(*args):
global df
if df is None: return
data = df.to_csv(index=False)
encoded_data = data.encode('utf-8')
content = to_js(encoded_data)
file = Blob.new([content], {"type": "text/csv"})
url = URL.createObjectURL(file)
hidden_link = document.createElement("a")
hidden_link.setAttribute("download", "data.csv")
hidden_link.setAttribute("href", url)
hidden_link.click()
add_event_listener(document.getElementById("file-upload"), "change", process_file)
add_event_listener(document.getElementById("download"), "click", download_file)