9/10/2021 Wholesale_Customer_Data.pdf

In [3]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
ECO ECONOMETRI sns.set(color_codes=True)
%matplotlib inline
import scipy.stats as stats
from scipy.stats import ttest_1samp, ttest_ind, mannwhitneyu, levene, shapiro, wilcoxon
from statsmodels.stats.power import ttest_power
mydata = pd.read_csv('Wholesale_customers_data.csv')

Buyer/Spender Channel Region Fresh Milk Grocery Frozen Detergents_Paper Delicatessen
#To Check Data Consistency and gather Intial Basic Information. No Null Vaue observed.

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 440 entries, 0 to 439
y_SMDM_Project_13 Buyer/Spender 440 non-null int64
Channel 440 non-null object
Fresh 440 non-null int64
IT 12020 Milk 440 non-null int64
Grocery 440 non-null int64
Frozen 440 non-null int64
Detergents_Paper 440 non-null int64
Delicatessen 440 non-null int64
Buyer/Spender Channel Region Fresh Milk Grocery Frozen Detergents_Paper Delica
count 440.000000 440 440 440.000000 440.000000 440.000000 440.000000 440.000000 440.

unique NaN 2 3 NaN NaN NaN NaN NaN 2/12
mean 220.500000 NaN NaN 12000.297727 5796.265909 7951.277273 3071.931818 2881.493182 1524
std 127.161315 NaN NaN 12647.328865 7380.377175 9503.162829 4854.673333 4767.854448 2820
min 1.000000 NaN NaN 3.000000 55.000000 3.000000 25.000000 3.000000 3

#Sum of all spend by creating new column and adding all 6 spends
mydata['Sum of All Spends']=mydata['Fresh']+ mydata['Milk']+ mydata['Grocery']+ mydata['Frozen
ydata['Detergents_Paper']+ mydata['Delicatessen']
1.1. Use methods of descriptive statistics to summarize data.

Which Region and which Channel seems to spend more?

