1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
| import numpy as np import pandas as pd import matplotlib.pyplot as plt
np.random.seed(42) n = 1000
data = pd.DataFrame({ 'date': pd.date_range('2024-01-01', periods=n, freq='D'), 'product': np.random.choice(['手机', '电脑', '平板', '耳机'], n), 'region': np.random.choice(['华东', '华南', '华北', '西部'], n), 'sales': np.random.randint(1000, 10000, n), 'quantity': np.random.randint(1, 50, n), 'customer_age': np.random.randint(18, 65, n) })
print(data.isnull().sum())
data['unit_price'] = data['sales'] / data['quantity']
product_stats = data.groupby('product').agg({ 'sales': ['sum', 'mean', 'count'], 'quantity': 'sum' }).round(2) print(product_stats)
region_stats = data.groupby('region')['sales'].sum().sort_values(ascending=False) print(region_stats)
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
product_sales = data.groupby('product')['sales'].sum() axes[0, 0].bar(product_sales.index, product_sales.values, color='skyblue') axes[0, 0].set_title('各产品销售额') axes[0, 0].set_ylabel('销售额')
axes[0, 1].pie(region_stats.values, labels=region_stats.index, autopct='%1.1f%%') axes[0, 1].set_title('各地区销售占比')
data['month'] = data['date'].dt.to_period('M') monthly_sales = data.groupby('month')['sales'].sum() axes[1, 0].plot(range(len(monthly_sales)), monthly_sales.values, marker='o') axes[1, 0].set_title('月度销售趋势') axes[1, 0].set_ylabel('销售额')
axes[1, 1].hist(data['customer_age'], bins=20, edgecolor='black', alpha=0.7) axes[1, 1].set_title('客户年龄分布') axes[1, 1].set_xlabel('年龄') axes[1, 1].set_ylabel('频数')
plt.tight_layout() plt.savefig('sales_analysis.png', dpi=300) plt.show()
|