Generate synthetic data for Accounts Receivable
For those who would like to generate dummy/synthetic data for Accounts Receivable, use Python script below.
It can be run in Google Collab notebook if you don’t have local Python environment setup:
Edit variables - date period, # of customers and transactions, reporting date (default is today).
Run the code
Navigate to Files and download the 3 CSV files generated.
import pandas as pd import numpy as np from datetime import datetime, timedelta import random # Set random seed for reproducibility np.random.seed(42) # Configuration start_date = datetime(2023, 1, 1) end_date = datetime(2025, 3, 31) num_customers = 75 num_transactions = 10000 # Create customer segments customer_segments = ['Enterprise', 'Mid-Market', 'Small Business', 'Government'] customer_credit_terms = [30, 45, 60, 90, 180] # Create customers customers = [] for i in range(1, num_customers + 1): segment = random.choice(customer_segments) credit_terms = random.choice(customer_credit_terms) customers.append({ 'CustomerID': f'CUST{i:04d}', 'CustomerName': f'Customer {i}', 'Segment': segment, 'CreditTerms': credit_terms }) customers_df = pd.DataFrame(customers) # Create invoices with seasonal patterns invoices = [] transaction_id = 1 for _ in range(num_transactions): # Select a random customer customer = random.choice(customers) # Create random invoice date days_range = (end_date - start_date).days random_days = random.randint(0, days_range) invoice_date = start_date + timedelta(days=random_days) # Add seasonality to invoice amounts month = invoice_date.month # Higher amounts in Q4 (Oct-Dec) if month in [10, 11, 12]: base_amount = np.random.normal(10000, 3000) # Lower amounts in Q1 (Jan-Mar) elif month in [1, 2, 3]: base_amount = np.random.normal(5000, 1500) # Medium amounts rest of year else: base_amount = np.random.normal(7500, 2000) # Adjust amount based on customer segment if customer['Segment'] == 'Enterprise': amount = base_amount * 2.5 elif customer['Segment'] == 'Mid-Market': amount = base_amount * 1.5 elif customer['Segment'] == 'Government': amount = base_amount * 1.8 else: # Small Business amount = base_amount * 0.7 # Round to 2 decimal places invoice_amount = round(max(amount, 100), 2) # Minimum $100 # Calculate due date based on credit terms due_date = invoice_date + timedelta(days=customer['CreditTerms']) # Determine if paid and payment date with seasonal collection patterns is_paid = random.random() < 0.85 # 85% of invoices eventually get paid if is_paid: # Create seasonal payment behavior # December has faster payments (bonus incentives) if due_date.month == 12: days_to_pay = max(int(np.random.normal(-5, 10)), -15) # More likely to pay early # January has slower payments elif due_date.month == 1: days_to_pay = max(int(np.random.normal(15, 12)), -5) # More likely to pay late # Quarter-end months have slightly faster payments elif due_date.month in [3, 6, 9]: days_to_pay = int(np.random.normal(0, 15)) # Normal months else: days_to_pay = int(np.random.normal(5, 20)) # Adjust by customer segment if customer['Segment'] == 'Enterprise': days_to_pay += int(np.random.normal(0, 5)) elif customer['Segment'] == 'Small Business': days_to_pay += int(np.random.normal(10, 8)) elif customer['Segment'] == 'Government': days_to_pay += int(np.random.normal(15, 10)) payment_date = due_date + timedelta(days=days_to_pay) # Ensure payment date is not before invoice date if payment_date < invoice_date: payment_date = invoice_date + timedelta(days=random.randint(0, 5)) # Ensure payment date is not in the future if payment_date > datetime.now(): days_to_pay = None payment_date = None is_paid = False else: days_to_pay = None payment_date = None invoices.append({ 'TransactionID': f'INV{transaction_id:06d}', 'CustomerID': customer['CustomerID'], 'InvoiceDate': invoice_date, 'DueDate': due_date, 'InvoiceAmount': invoice_amount, 'IsPaid': is_paid, 'PaymentDate': payment_date, 'DaysToPayment': days_to_pay }) transaction_id += 1 # Create DataFrame invoices_df = pd.DataFrame(invoices) # Calculate aging buckets using current date for reporting reporting_date = datetime.now() # Use current date for aging snapshot def calculate_aging(row): if row['IsPaid']: if row['PaymentDate'] <= reporting_date: return 'Paid' days_outstanding = (reporting_date - row['DueDate']).days if days_outstanding <= 0: return 'Current' elif days_outstanding <= 30: return '1-30 Days' elif days_outstanding <= 60: return '31-60 Days' elif days_outstanding <= 90: return '61-90 Days' else: return '90+ Days' invoices_df['AgingBucket'] = invoices_df.apply(calculate_aging, axis=1) # Create monthly sales summary for forecasting invoices_df['YearMonth'] = invoices_df['InvoiceDate'].dt.strftime('%Y-%m') monthly_sales = invoices_df.groupby('YearMonth')['InvoiceAmount'].sum().reset_index() monthly_sales = monthly_sales.sort_values('YearMonth') # Save to CSV invoices_df.to_csv('synthetic_ar_transactions.csv', index=False) customers_df.to_csv('synthetic_customers.csv', index=False) monthly_sales.to_csv('synthetic_monthly_sales.csv', index=False) print("Generated synthetic AR dataset with:") print(f"- {len(customers_df)} customers") print(f"- {len(invoices_df)} invoice transactions") print(f"- Data spanning from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}") print("Files saved: synthetic_ar_transactions.csv, synthetic_customers.csv, synthetic_monthly_sales.csv")