summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--importers/.gitignore1
-rw-r--r--importers/__init__.py0
-rw-r--r--importers/conftest.py2
-rw-r--r--importers/cragr/__init__.py0
-rw-r--r--importers/cragr/cragr_mhtml.py178
-rw-r--r--importers/cragr/cragr_ofx.py48
6 files changed, 229 insertions, 0 deletions
diff --git a/importers/.gitignore b/importers/.gitignore
new file mode 100644
index 0000000..bee8a64
--- /dev/null
+++ b/importers/.gitignore
@@ -0,0 +1 @@
+__pycache__
diff --git a/importers/__init__.py b/importers/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/importers/__init__.py
diff --git a/importers/conftest.py b/importers/conftest.py
new file mode 100644
index 0000000..a763223
--- /dev/null
+++ b/importers/conftest.py
@@ -0,0 +1,2 @@
+# This adds the --generate option.
+pytest_plugins = "beancount.ingest.regression_pytest"
diff --git a/importers/cragr/__init__.py b/importers/cragr/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/importers/cragr/__init__.py
diff --git a/importers/cragr/cragr_mhtml.py b/importers/cragr/cragr_mhtml.py
new file mode 100644
index 0000000..280d25c
--- /dev/null
+++ b/importers/cragr/cragr_mhtml.py
@@ -0,0 +1,178 @@
+"""Importer for Crédit Agricole webpages."""
+#
+# Copyright (C) 2019 Nicolas Schodet
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#
+import datetime
+import decimal
+import email.parser
+import email.policy
+import os.path
+import re
+from lxml import etree
+
+from beancount.ingest import importer
+from beancount.core import data, amount
+
+
+class Importer(importer.ImporterProtocol):
+ """An importer for Crédit Agricole webpages saved as MHTML."""
+
+ def __init__(self, account_number, account, basename=None):
+ """Create a new importer posting to the given account.
+
+ Args:
+ account_number: Account number.
+ account: An account string, the account onto which to post all the
+ amounts parsed.
+ basename: An optional string, the name of the new files.
+ """
+ self.account_number = account_number
+ self.account = account
+ self.basename = basename
+
+ def name(self):
+ """Include the filing account in the name."""
+ return '{}: "{}"'.format(super().name(), self.file_account(None))
+
+ def identify(self, file):
+ """Check whether a file is handled by this importer."""
+ if file.mimetype() != 'message/rfc822':
+ return False
+ tree = file.convert(gettree)
+ accounts = get_header(tree).xpath(
+ ".//div[contains(@class,'OperationMainAccount-headerNumber')]"
+ "/text()")
+ if accounts and re.match(r' *N° +' + self.account_number,
+ accounts[0]):
+ return True
+ return False
+
+ def file_account(self, _):
+ """Return the account against which we post transactions."""
+ return self.account
+
+ def file_name(self, file):
+ """Return the optional renamed account filename."""
+ if self.basename:
+ return self.basename + os.path.splitext(file.name)[1]
+
+ def file_date(self, file):
+ """Return the file date."""
+ tree = file.convert(gettree)
+ operations = get_operations(tree)
+ first = build_transaction(operations[0], file.name, 0, self.account,
+ self.FLAG)
+ return first.date
+
+ def extract(self, file, existing_entries=None):
+ """Extract a list of partially complete transactions from the file."""
+ tree = file.convert(gettree)
+ entries = []
+ for i, op in enumerate(get_operations(tree)):
+ entry = build_transaction(op, file.name, i, self.account,
+ self.FLAG)
+ entries.append(entry)
+ entries = list(reversed(entries))
+ baldate = entries[-1].date + datetime.timedelta(days=1)
+ balmeta = data.new_metadata(file.name, len(entries))
+ balentry = data.Balance(balmeta, baldate, self.account,
+ get_balance(tree), None, None)
+ entries.append(balentry)
+ return entries
+
+
+def gettree(filename):
+ """Extract the HTML attachment and parse it."""
+ email_parser = email.parser.BytesParser(policy=email.policy.default)
+ content = email_parser.parse(open(filename, 'rb'))
+ body = content.get_body(('html',))
+ body.set_charset('UTF-8')
+ html = body.get_content()
+ html_parser = etree.HTMLParser()
+ tree = etree.fromstring(html, html_parser)
+ return tree
+
+
+def get_header(tree):
+ """Get the header div with the account details and balance."""
+ return tree.xpath(
+ "//div[@class='OperationMainAccount-headerContentDescription']"
+ )[0]
+
+
+def get_balance(tree):
+ """Get the account balance at the date of the capture."""
+ header = get_header(tree)
+ bal = header.xpath(
+ ".//div[contains(@class,'OperationMainAccount-headerAmountValue')]"
+ )[0].text
+ return amount.Amount(parse_amount(bal), 'EUR')
+
+
+def get_operations(tree):
+ """Get the list of operations."""
+ return tree.xpath(
+ "//ul[@id='bloc-operations']"
+ "/li[contains(@id,'operation-detail-')]")
+
+
+def parse_date(datestr):
+ """Parse a date, ignore the time which is always the same."""
+ m = re.fullmatch(r'(\w{3} \d{1,2}, \d{4}) 12:00:00 AM', datestr)
+ return datetime.datetime.strptime(m.group(1), '%b %d, %Y').date()
+
+
+def parse_amount(amountstr):
+ """Parse an amount in french format."""
+ m = re.fullmatch('(?:\\+ )?(-?(?:\\d|\xa0)+),(\\d\\d)\xa0€', amountstr)
+ ip = m.group(1).replace('\xa0', '')
+ fp = m.group(2)
+ return decimal.Decimal('{}.{}'.format(ip, fp))
+
+
+def build_transaction(op, filename, i, account, flag):
+ """Build a beancount transaction from a <li>."""
+ # Parse operation.
+ op_id = op.get('id')
+ m = re.fullmatch(r'operation-detail-(\d+)', op_id)
+ op_i = int(m.group(1))
+ assert i == op_i
+ op_date = op.xpath("./a/div[@id='dateOperation']")[0].get('aria-label')
+ op_date = parse_date(op_date)
+ op_name = op.xpath(".//div[@id='libelleOperation']")[0].text
+ op_name = op_name.replace("'", ' ')
+ op_memo = op.xpath(".//div[@class='Operation-descriptionLine']")
+ if op_memo and op_memo[0].text is not None:
+ op_memo = op_memo[0].text
+ op_memo = op_memo.replace("'", ' ')
+ op_memo = ' '.join(op_memo.split())
+ else:
+ op_memo = None
+ op_amount = op.xpath(".//div[@id='montant']")[0].text
+ op_amount = parse_amount(op_amount)
+ # Prepare beancount transaction.
+ narration = ' / '.join(filter(None, [op_name, op_memo]))
+ units = amount.Amount(op_amount, 'EUR')
+ posting = data.Posting(account, units, None, None, None, None)
+ metadata = data.new_metadata(filename, i)
+ payee = None
+ return data.Transaction(metadata, op_date, flag, payee, narration,
+ data.EMPTY_SET, data.EMPTY_SET, [posting])
diff --git a/importers/cragr/cragr_ofx.py b/importers/cragr/cragr_ofx.py
new file mode 100644
index 0000000..25abb30
--- /dev/null
+++ b/importers/cragr/cragr_ofx.py
@@ -0,0 +1,48 @@
+"""Importer for Crédit Agricole OFX files.
+
+Based on OFX importer, clean the narration field."""
+#
+# Copyright (C) 2019 Nicolas Schodet
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#
+from beancount.ingest.importers import ofx
+from beancount.core import data
+import re
+
+
+class Importer(ofx.Importer):
+ """An importer for Crédit Agricole OFX files."""
+
+ def extract(self, file, existing_entries=None):
+ entries = super().extract(file)
+ new_entries = []
+ for entry in entries:
+ if isinstance(entry, data.Transaction):
+ m = re.fullmatch(r'(.*) / (.*) / OTHER', entry.narration)
+ if m:
+ name, memo = m.groups()
+ if memo == '.':
+ memo = None
+ else:
+ memo = ' '.join(memo.split())
+ new_narration = ' / '.join(filter(None, [name, memo]))
+ entry = entry._replace(narration=new_narration)
+ new_entries.append(entry)
+ return new_entries