#!/home/jevans/audit_reports/okta_system_logs/.venv/bin/python import os import re import sys import csv import requests # ---------------- .env loading (KEY=VALUE; quotes supported) ---------------- _ENV_LINE_RE = re.compile(r'^\s*([A-Za-z_][A-Za-z0-9_]*)\s*=\s*(.*)\s*$') def _strip_quotes(val: str) -> str: val = val.strip() if len(val) >= 2 and (val[0] == val[-1]) and val[0] in ("'", '"'): return val[1:-1] return val def load_env(): """Load KEY=VALUE pairs from .env in script dir or cwd.""" script_dir = os.path.dirname(os.path.abspath(__file__)) candidates = [os.path.join(script_dir, ".env"), os.path.join(os.getcwd(), ".env")] for path in candidates: if os.path.exists(path): with open(path, "r", encoding="utf-8") as f: for raw in f: line = raw.strip() if not line or line.startswith("#"): continue m = _ENV_LINE_RE.match(line) if not m: continue key, val = m.group(1), _strip_quotes(m.group(2)) if key and key not in os.environ: os.environ[key] = val load_env() # ---------------- Config ---------------- OKTA_DOMAIN = os.getenv("OKTA_DOMAIN", "gallaudet.okta.com") API_TOKEN = os.getenv("OKTA_API_TOKEN") if not API_TOKEN: sys.stderr.write("ERROR: Missing OKTA_API_TOKEN in .env\n") sys.exit(1) BASE_URL = f"https://{OKTA_DOMAIN}" USERS_URL = f"{BASE_URL}/api/v1/users" HEADERS = { "Authorization": f"SSWS {API_TOKEN}", "Accept": "application/json", } CSV_FILENAME = "okta_deprovisioned_users.csv" COLUMNS = [ "firstName","lastName","email","title","organization", "wdEmployeeRole","wdJobProfile","status","employeeStatus","wdHireDate", "wdTerminated","wdTerminationDate","wdHasAcademicAppointment","wdFutureHire", "InstructorKeepActiveTo","wdTerminatedWorkerKeepActiveTo","department", "wdJobFamilyGroup","wdEmployeeType","userRole","wdIsWorkerActiveStudent", "created","activated","statusChanged","lastLogin","lastUpdated", "passwordChanged","ADpwdLastSet","displayName","login","secondEmail" ] # ---------------- Helpers ---------------- def get_all_users(): """ Return users whose status is one of STAGED, DEPROVISIONED, PROVISIONED, or RECOVERY. Handles Okta Link header pagination. """ users = [] url = USERS_URL params = { "limit": 200, 'filter': 'status eq "STAGED" or status eq "DEPROVISIONED" or status eq "PROVISIONED" or status eq "RECOVERY"', } while url: if url == USERS_URL: resp = requests.get(url, headers=HEADERS, params=params, timeout=15) else: resp = requests.get(url, headers=HEADERS, timeout=15) if resp.status_code != 200: print(f"Failed to fetch users: {resp.status_code}") print(resp.text) break data = resp.json() if isinstance(data, list): users.extend(data) # Parse pagination link header next_link = None link_hdr = resp.headers.get("link") or resp.headers.get("Link") if link_hdr: parts = [p.strip() for p in link_hdr.split(",")] for part in parts: if 'rel="next"' in part: # format: ; rel="next" start = part.find("<") + 1 end = part.find(">") if start > 0 and end > start: next_link = part[start:end] break url = next_link return users def get_user_by_login(user_login: str): """ Return a list with the matching user (or empty list if not found). """ params = {"filter": f'profile.login eq "{user_login}"'} resp = requests.get(USERS_URL, headers=HEADERS, params=params, timeout=15) if resp.status_code != 200: print(f"Failed to fetch user: {resp.status_code}") print(resp.text) return [] users = resp.json() if users: return users print(f"No user found with login: {user_login}") return [] def format_user(user: dict) -> dict: """ Flatten user object into the CSV field set. Pull top-level lifecycle fields from user, everything else from profile. """ row = {} profile = user.get("profile", {}) or {} for field in COLUMNS: if field in {"status","created","activated","statusChanged","lastLogin","lastUpdated","passwordChanged"}: row[field] = user.get(field, "") else: row[field] = profile.get(field, "") return row def save_to_csv(users: list, filename: str): with open(filename, "w", newline="", encoding="utf-8") as csvfile: writer = csv.DictWriter(csvfile, fieldnames=COLUMNS, extrasaction="ignore") writer.writeheader() for user in users: row = format_user(user) # Only write users with a non-empty "title" (as in your original script) if row.get("title") and str(row.get("title")).strip(): writer.writerow(row) print(f"User data saved to {filename}") # ---------------- Main ---------------- if __name__ == "__main__": if len(sys.argv) > 1 and sys.argv[1].endswith("@gallaudet.edu"): user_login = sys.argv[1] users = get_user_by_login(user_login) out_file = f"okta_user_{user_login.replace('@','_at_').replace('.','_')}.csv" save_to_csv(users, out_file) else: users = get_all_users() save_to_csv(users, CSV_FILENAME)