#!/usr/bin/env python3 """Fetch OFAC SDN list (CSV format) and normalize. Public endpoint: https://www.treasury.gov/ofac/downloads/sdn.csv Format reference: https://ofac.treasury.gov/specially-designated-nationals-and-blocked-persons-list-sdn-human-readable-lists The SDN CSV uses a specific 12-column format with no header row: ent_num, sdn_name, sdn_type, program, title, call_sign, vess_type, tonnage, grt, vess_flag, vess_owner, remarks Address and AKA records live in separate files. We fetch all three and join. """ from __future__ import annotations import argparse import csv import io import sys from collections import defaultdict from pathlib import Path sys.path.insert(0, str(Path(__file__).parent)) from _http import get # noqa: E402 SDN_URL = "https://www.treasury.gov/ofac/downloads/sdn.csv" ADD_URL = "https://www.treasury.gov/ofac/downloads/add.csv" ALT_URL = "https://www.treasury.gov/ofac/downloads/alt.csv" SDN_COLS = [ "ent_num", "sdn_name", "sdn_type", "program", "title", "call_sign", "vess_type", "tonnage", "grt", "vess_flag", "vess_owner", "remarks", ] ADD_COLS = [ "ent_num", "add_num", "address", "city_state_zip", "country", "add_remarks", ] ALT_COLS = [ "ent_num", "alt_num", "alt_type", "alt_name", "alt_remarks", ] COLUMNS = [ "entity_id", "name", "entity_type", "program_list", "title", "nationalities", "aka_list", "addresses", "dob", "pob", "remarks", "last_updated", ] _TYPE_MAP = { "individual": "individual", "entity": "entity", "vessel": "vessel", "aircraft": "aircraft", } def _read_csv(url: str, columns: list[str]) -> list[dict[str, str]]: body = get(url, timeout=60).decode("latin-1", errors="replace") reader = csv.reader(io.StringIO(body)) out = [] for row in reader: if not row: continue # Pad/truncate to expected width. row = row[: len(columns)] + [""] * (len(columns) - len(row)) out.append(dict(zip(columns, row))) return out def _strip_quotes(s: str) -> str: s = s.strip() if s.startswith('"') and s.endswith('"'): s = s[1:-1] if s == "-0-": return "" return s def fetch( program: str | None, entity_type: str | None, out_path: str, ) -> int: sdn = _read_csv(SDN_URL, SDN_COLS) addresses = _read_csv(ADD_URL, ADD_COLS) akas = _read_csv(ALT_URL, ALT_COLS) addr_by_ent: dict[str, list[str]] = defaultdict(list) for a in addresses: ent = _strip_quotes(a["ent_num"]) parts = [ _strip_quotes(a[c]) for c in ("address", "city_state_zip", "country") if _strip_quotes(a[c]) ] if parts: addr_by_ent[ent].append(", ".join(parts)) aka_by_ent: dict[str, list[str]] = defaultdict(list) for k in akas: ent = _strip_quotes(k["ent_num"]) name = _strip_quotes(k["alt_name"]) if name: aka_by_ent[ent].append(name) rows: list[dict[str, str]] = [] for r in sdn: ent_num = _strip_quotes(r["ent_num"]) if not ent_num: continue sdn_type = _TYPE_MAP.get(_strip_quotes(r["sdn_type"]).lower(), _strip_quotes(r["sdn_type"])) if entity_type and sdn_type != entity_type: continue progs = _strip_quotes(r["program"]) if program and program.upper() not in progs.upper().split(";"): continue remarks = _strip_quotes(r["remarks"]) # DOB / POB are commonly embedded in remarks for individuals. dob = "" pob = "" if sdn_type == "individual" and remarks: for chunk in remarks.split(";"): ch = chunk.strip() if ch.upper().startswith("DOB"): dob = ch.split(maxsplit=1)[1] if " " in ch else "" elif ch.upper().startswith("POB"): pob = ch.split(maxsplit=1)[1] if " " in ch else "" rows.append( { "entity_id": ent_num, "name": _strip_quotes(r["sdn_name"]), "entity_type": sdn_type, "program_list": "; ".join(p.strip() for p in progs.split(";") if p.strip()), "title": _strip_quotes(r["title"]), "nationalities": "", # not in this CSV; available in XML format "aka_list": "; ".join(aka_by_ent.get(ent_num, [])), "addresses": "; ".join(addr_by_ent.get(ent_num, [])), "dob": dob, "pob": pob, "remarks": remarks, "last_updated": "", } ) Path(out_path).parent.mkdir(parents=True, exist_ok=True) with open(out_path, "w", newline="", encoding="utf-8") as fh: w = csv.DictWriter(fh, fieldnames=COLUMNS) w.writeheader() w.writerows(rows) return len(rows) def main() -> int: p = argparse.ArgumentParser(description=__doc__) p.add_argument("--program", help="Filter to specific sanctions program (e.g. SDGT, IRAN)") p.add_argument( "--entity-type", choices=["individual", "entity", "vessel", "aircraft"], help="Filter to a specific entity type", ) p.add_argument("--out", required=True) a = p.parse_args() n = fetch(program=a.program, entity_type=a.entity_type, out_path=a.out) print(f"Wrote {n} OFAC SDN rows to {a.out}") return 0 if __name__ == "__main__": raise SystemExit(main())