Test Data Generation Recipe
Implementation Reference
Source Files:
packages/core/src/operations/file.ts
- File operationspackages/core/src/utils/data.ts
- Data utilitiespackages/core/examples/data-generation.ts
- Generation examples
Key Functions:
fs.writeFile()
- Write generated data to filesJSON.stringify()
- Serialize data structuresCSV.generate()
- CSV data generation
Overview
This recipe demonstrates how to generate realistic test data for development and testing using Faker.js, custom generators, and data transformation utilities with Xec.
Basic Data Generation
User Data Generation
// generate-users.ts
import { $ } from '@xec-sh/core';
import { faker } from '@faker-js/faker';
import { writeFile } from 'fs/promises';
interface User {
id: string;
username: string;
email: string;
firstName: string;
lastName: string;
avatar: string;
birthDate: Date;
registeredAt: Date;
isActive: boolean;
role: 'admin' | 'user' | 'moderator';
preferences: {
theme: 'light' | 'dark';
notifications: boolean;
language: string;
};
}
function generateUser(): User {
const firstName = faker.person.firstName();
const lastName = faker.person.lastName();
return {
id: faker.string.uuid(),
username: faker.internet.userName({ firstName, lastName }),
email: faker.internet.email({ firstName, lastName }),
firstName,
lastName,
avatar: faker.image.avatar(),
birthDate: faker.date.birthdate({ min: 18, max: 65, mode: 'age' }),
registeredAt: faker.date.past({ years: 2 }),
isActive: faker.datatype.boolean({ probability: 0.9 }),
role: faker.helpers.arrayElement(['admin', 'user', 'moderator']),
preferences: {
theme: faker.helpers.arrayElement(['light', 'dark']),
notifications: faker.datatype.boolean({ probability: 0.7 }),
language: faker.helpers.arrayElement(['en', 'es', 'fr', 'de', 'ja'])
}
};
}
async function generateUsers(count: number = 100) {
console.log(`Generating ${count} users...`);
const users: User[] = [];
for (let i = 0; i < count; i++) {
users.push(generateUser());
}
// Ensure at least one admin
users[0].role = 'admin';
users[0].isActive = true;
// Save as JSON
await writeFile('users.json', JSON.stringify(users, null, 2));
console.log(`✅ Generated ${count} users in users.json`);
// Save as CSV
const csv = [
'id,username,email,firstName,lastName,role,isActive',
...users.map(u =>
`${u.id},${u.username},${u.email},${u.firstName},${u.lastName},${u.role},${u.isActive}`
)
].join('\n');
await writeFile('users.csv', csv);
console.log(`✅ Generated users.csv`);
return users;
}
generateUsers(100).catch(console.error);
Product Data Generation
// generate-products.ts
import { $ } from '@xec-sh/core';
import { faker } from '@faker-js/faker';
import { writeFile } from 'fs/promises';
interface Product {
id: string;
sku: string;
name: string;
description: string;
category: string;
subcategory: string;
price: number;
cost: number;
stock: number;
images: string[];
tags: string[];
specifications: Record<string, string>;
ratings: {
average: number;
count: number;
};
createdAt: Date;
updatedAt: Date;
}
const categories = {
'Electronics': ['Smartphones', 'Laptops', 'Tablets', 'Accessories'],
'Clothing': ['Men', 'Women', 'Kids', 'Shoes'],
'Home': ['Furniture', 'Decor', 'Kitchen', 'Garden'],
'Books': ['Fiction', 'Non-Fiction', 'Technical', 'Comics'],
'Sports': ['Equipment', 'Apparel', 'Footwear', 'Accessories']
};
function generateProduct(): Product {
const category = faker.helpers.objectKey(categories);
const subcategory = faker.helpers.arrayElement(categories[category]);
const cost = faker.number.float({ min: 10, max: 500, precision: 0.01 });
const margin = faker.number.float({ min: 1.2, max: 2.5, precision: 0.01 });
return {
id: faker.string.uuid(),
sku: faker.string.alphanumeric(8).toUpperCase(),
name: faker.commerce.productName(),
description: faker.commerce.productDescription(),
category,
subcategory,
price: parseFloat((cost * margin).toFixed(2)),
cost,
stock: faker.number.int({ min: 0, max: 1000 }),
images: Array.from({ length: faker.number.int({ min: 1, max: 5 }) },
() => faker.image.url()),
tags: faker.helpers.multiple(
() => faker.commerce.productAdjective(),
{ count: { min: 2, max: 5 } }
),
specifications: {
weight: `${faker.number.float({ min: 0.1, max: 10, precision: 0.1 })} kg`,
dimensions: `${faker.number.int({ min: 10, max: 100 })}x${faker.number.int({ min: 10, max: 100 })}x${faker.number.int({ min: 10, max: 100 })} cm`,
material: faker.commerce.productMaterial(),
color: faker.color.human(),
warranty: `${faker.number.int({ min: 1, max: 3 })} years`
},
ratings: {
average: faker.number.float({ min: 1, max: 5, precision: 0.1 }),
count: faker.number.int({ min: 0, max: 500 })
},
createdAt: faker.date.past({ years: 1 }),
updatedAt: faker.date.recent({ days: 30 })
};
}
async function generateProducts(count: number = 500) {
console.log(`Generating ${count} products...`);
const products = Array.from({ length: count }, generateProduct);
await writeFile('products.json', JSON.stringify(products, null, 2));
console.log(`✅ Generated ${count} products in products.json`);
// Generate category summary
const summary = products.reduce((acc, product) => {
acc[product.category] = (acc[product.category] || 0) + 1;
return acc;
}, {} as Record<string, number>);
console.log('\nCategory Distribution:');
Object.entries(summary).forEach(([cat, count]) => {
console.log(` ${cat}: ${count} products`);
});
return products;
}
generateProducts(500).catch(console.error);
Relational Data Generation
Order and Transaction Data
// generate-orders.ts
import { $ } from '@xec-sh/core';
import { faker } from '@faker-js/faker';
import { writeFile, readFile } from 'fs/promises';
interface Order {
id: string;
orderNumber: string;
userId: string;
status: 'pending' | 'processing' | 'shipped' | 'delivered' | 'cancelled';
items: OrderItem[];
subtotal: number;
tax: number;
shipping: number;
total: number;
shippingAddress: Address;
billingAddress: Address;
paymentMethod: PaymentMethod;
createdAt: Date;
updatedAt: Date;
}
interface OrderItem {
productId: string;
quantity: number;
price: number;
total: number;
}
interface Address {
street: string;
city: string;
state: string;
country: string;
postalCode: string;
}
interface PaymentMethod {
type: 'credit_card' | 'paypal' | 'bank_transfer';
last4?: string;
brand?: string;
}
async function generateOrders() {
// Load existing users and products
const users = JSON.parse(await readFile('users.json', 'utf-8'));
const products = JSON.parse(await readFile('products.json', 'utf-8'));
if (!users.length || !products.length) {
throw new Error('Please generate users and products first');
}
const orders: Order[] = [];
const orderCount = faker.number.int({ min: 100, max: 500 });
console.log(`Generating ${orderCount} orders...`);
for (let i = 0; i < orderCount; i++) {
const user = faker.helpers.arrayElement(users);
const itemCount = faker.number.int({ min: 1, max: 5 });
const items: OrderItem[] = [];
for (let j = 0; j < itemCount; j++) {
const product = faker.helpers.arrayElement(products);
const quantity = faker.number.int({ min: 1, max: 3 });
items.push({
productId: product.id,
quantity,
price: product.price,
total: product.price * quantity
});
}
const subtotal = items.reduce((sum, item) => sum + item.total, 0);
const tax = subtotal * 0.08; // 8% tax
const shipping = subtotal > 100 ? 0 : 10; // Free shipping over $100
const total = subtotal + tax + shipping;
const order: Order = {
id: faker.string.uuid(),
orderNumber: `ORD-${faker.string.numeric(8)}`,
userId: user.id,
status: faker.helpers.weightedArrayElement([
{ weight: 10, value: 'pending' },
{ weight: 15, value: 'processing' },
{ weight: 20, value: 'shipped' },
{ weight: 50, value: 'delivered' },
{ weight: 5, value: 'cancelled' }
]),
items,
subtotal: parseFloat(subtotal.toFixed(2)),
tax: parseFloat(tax.toFixed(2)),
shipping: parseFloat(shipping.toFixed(2)),
total: parseFloat(total.toFixed(2)),
shippingAddress: {
street: faker.location.streetAddress(),
city: faker.location.city(),
state: faker.location.state(),
country: faker.location.country(),
postalCode: faker.location.zipCode()
},
billingAddress: {
street: faker.location.streetAddress(),
city: faker.location.city(),
state: faker.location.state(),
country: faker.location.country(),
postalCode: faker.location.zipCode()
},
paymentMethod: {
type: faker.helpers.arrayElement(['credit_card', 'paypal', 'bank_transfer']),
last4: faker.finance.creditCardNumber().slice(-4),
brand: faker.helpers.arrayElement(['Visa', 'MasterCard', 'Amex'])
},
createdAt: faker.date.past({ years: 1 }),
updatedAt: faker.date.recent({ days: 7 })
};
orders.push(order);
}
// Sort by date
orders.sort((a, b) => b.createdAt.getTime() - a.createdAt.getTime());
await writeFile('orders.json', JSON.stringify(orders, null, 2));
console.log(`✅ Generated ${orders.length} orders in orders.json`);
// Generate summary statistics
const stats = {
totalOrders: orders.length,
totalRevenue: orders.reduce((sum, o) => sum + o.total, 0),
averageOrderValue: orders.reduce((sum, o) => sum + o.total, 0) / orders.length,
statusBreakdown: orders.reduce((acc, o) => {
acc[o.status] = (acc[o.status] || 0) + 1;
return acc;
}, {} as Record<string, number>)
};
console.log('\nOrder Statistics:');
console.log(` Total Orders: ${stats.totalOrders}`);
console.log(` Total Revenue: $${stats.totalRevenue.toFixed(2)}`);
console.log(` Average Order Value: $${stats.averageOrderValue.toFixed(2)}`);
console.log(' Status Breakdown:');
Object.entries(stats.statusBreakdown).forEach(([status, count]) => {
console.log(` ${status}: ${count}`);
});
return orders;
}
generateOrders().catch(console.error);
Time Series Data
Metrics and Analytics Data
// generate-metrics.ts
import { $ } from '@xec-sh/core';
import { faker } from '@faker-js/faker';
import { writeFile } from 'fs/promises';
interface Metric {
timestamp: Date;
cpu: number;
memory: number;
disk: number;
network: {
in: number;
out: number;
};
requests: number;
errors: number;
responseTime: number;
}
function generateMetrics(hours: number = 24): Metric[] {
const metrics: Metric[] = [];
const now = new Date();
const interval = 60; // 1 minute intervals
const dataPoints = hours * 60;
let baselineCpu = 30;
let baselineMemory = 50;
let baselineRequests = 100;
for (let i = 0; i < dataPoints; i++) {
const timestamp = new Date(now.getTime() - (dataPoints - i) * interval * 1000);
const hour = timestamp.getHours();
// Simulate daily patterns
const isBusinessHours = hour >= 9 && hour <= 17;
const isPeakHour = hour === 12 || hour === 15;
// Add some randomness and patterns
baselineCpu += faker.number.float({ min: -2, max: 2 });
baselineCpu = Math.max(10, Math.min(90, baselineCpu));
baselineMemory += faker.number.float({ min: -1, max: 1 });
baselineMemory = Math.max(30, Math.min(80, baselineMemory));
if (isBusinessHours) {
baselineRequests = faker.number.int({ min: 150, max: 300 });
if (isPeakHour) {
baselineRequests = faker.number.int({ min: 300, max: 500 });
}
} else {
baselineRequests = faker.number.int({ min: 50, max: 150 });
}
metrics.push({
timestamp,
cpu: baselineCpu + faker.number.float({ min: -5, max: 5 }),
memory: baselineMemory + faker.number.float({ min: -3, max: 3 }),
disk: faker.number.float({ min: 40, max: 60 }),
network: {
in: faker.number.int({ min: 1000, max: 10000 }),
out: faker.number.int({ min: 5000, max: 20000 })
},
requests: baselineRequests,
errors: faker.number.int({ min: 0, max: baselineRequests * 0.05 }),
responseTime: faker.number.float({ min: 50, max: 500 })
});
}
return metrics;
}
async function generateMetricsData() {
console.log('Generating metrics data...');
const metrics = generateMetrics(24);
// Save as JSON
await writeFile('metrics.json', JSON.stringify(metrics, null, 2));
console.log(`✅ Generated ${metrics.length} metric data points`);
// Save as CSV for analysis
const csv = [
'timestamp,cpu,memory,disk,network_in,network_out,requests,errors,response_time',
...metrics.map(m =>
`${m.timestamp.toISOString()},${m.cpu.toFixed(2)},${m.memory.toFixed(2)},${m.disk.toFixed(2)},${m.network.in},${m.network.out},${m.requests},${m.errors},${m.responseTime.toFixed(2)}`
)
].join('\n');
await writeFile('metrics.csv', csv);
console.log('✅ Generated metrics.csv');
// Generate summary
const avgCpu = metrics.reduce((sum, m) => sum + m.cpu, 0) / metrics.length;
const avgMemory = metrics.reduce((sum, m) => sum + m.memory, 0) / metrics.length;
const totalRequests = metrics.reduce((sum, m) => sum + m.requests, 0);
const totalErrors = metrics.reduce((sum, m) => sum + m.errors, 0);
console.log('\nMetrics Summary:');
console.log(` Average CPU: ${avgCpu.toFixed(2)}%`);
console.log(` Average Memory: ${avgMemory.toFixed(2)}%`);
console.log(` Total Requests: ${totalRequests}`);
console.log(` Total Errors: ${totalErrors}`);
console.log(` Error Rate: ${((totalErrors / totalRequests) * 100).toFixed(2)}%`);
}
generateMetricsData().catch(console.error);
Database Seeding
PostgreSQL Seeding
// seed-postgres.ts
import { $ } from '@xec-sh/core';
import { readFile } from 'fs/promises';
async function seedPostgres() {
const users = JSON.parse(await readFile('users.json', 'utf-8'));
const products = JSON.parse(await readFile('products.json', 'utf-8'));
console.log('Seeding PostgreSQL database...');
// Create tables
const schema = `
CREATE TABLE IF NOT EXISTS users (
id UUID PRIMARY KEY,
username VARCHAR(50) UNIQUE NOT NULL,
email VARCHAR(100) UNIQUE NOT NULL,
first_name VARCHAR(50),
last_name VARCHAR(50),
role VARCHAR(20),
is_active BOOLEAN,
created_at TIMESTAMP
);
CREATE TABLE IF NOT EXISTS products (
id UUID PRIMARY KEY,
sku VARCHAR(20) UNIQUE NOT NULL,
name VARCHAR(200) NOT NULL,
description TEXT,
category VARCHAR(50),
price DECIMAL(10, 2),
stock INTEGER,
created_at TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_users_email ON users(email);
CREATE INDEX IF NOT EXISTS idx_products_category ON products(category);
`;
await $`docker exec -i postgres-dev psql -U developer -d myapp`.stdin(schema);
console.log('✅ Tables created');
// Insert users
for (const user of users.slice(0, 100)) { // Insert first 100
const sql = `
INSERT INTO users (id, username, email, first_name, last_name, role, is_active, created_at)
VALUES ('${user.id}', '${user.username}', '${user.email}', '${user.firstName}',
'${user.lastName}', '${user.role}', ${user.isActive}, '${user.registeredAt}')
ON CONFLICT (id) DO NOTHING;
`;
await $`docker exec -i postgres-dev psql -U developer -d myapp`.stdin(sql).nothrow();
}
console.log(`✅ Inserted ${users.length} users`);
// Insert products
for (const product of products.slice(0, 100)) {
const sql = `
INSERT INTO products (id, sku, name, description, category, price, stock, created_at)
VALUES ('${product.id}', '${product.sku}', '${product.name.replace(/'/g, "''")}',
'${product.description.replace(/'/g, "''")}', '${product.category}',
${product.price}, ${product.stock}, '${product.createdAt}')
ON CONFLICT (id) DO NOTHING;
`;
await $`docker exec -i postgres-dev psql -U developer -d myapp`.stdin(sql).nothrow();
}
console.log(`✅ Inserted ${products.length} products`);
// Verify
const count = await $`docker exec postgres-dev psql -U developer -d myapp -t -c "SELECT COUNT(*) FROM users"`;
console.log(`\nDatabase seeded: ${count.stdout.trim()} users in database`);
}
seedPostgres().catch(console.error);