Data SummaryΒΆ
[ ]:
import pandas as pd
import datetime
from data_describe import data_summary
from datetime import datetime
[2]:
from sklearn.datasets import load_boston
data = load_boston()
df = pd.DataFrame(data.data, columns=list(data.feature_names))
df['target'] = data.target
df.head(1)
[2]:
CRIM | ZN | INDUS | CHAS | NOX | RM | AGE | DIS | RAD | TAX | PTRATIO | B | LSTAT | target | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.00632 | 18.0 | 2.31 | 0.0 | 0.538 | 6.575 | 65.2 | 4.09 | 1.0 | 296.0 | 15.3 | 396.9 | 4.98 | 24.0 |
[3]:
# Change data types to demonstrate data summary
df['AGE'] = df['AGE'].map(lambda x: "young" if x < 29 else "old")
df["AgeFlag"] = df['AGE'].astype(bool)
df['ZN'] = df['ZN'].astype(int)
df['Date'] = datetime.strptime('1/1/2008 1:30 PM', '%m/%d/%Y %I:%M %p')
[4]:
data_summary(df)
[4]:
CRIM | ZN | INDUS | CHAS | NOX | RM | AGE | DIS | RAD | TAX | PTRATIO | B | LSTAT | target | AgeFlag | Date | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Data Type | float64 | int32 | float64 | float64 | float64 | float64 | object | float64 | float64 | float64 | float64 | float64 | float64 | float64 | bool | datetime64[ns] |
Mean | 3.61352 | 11.3478 | 11.1368 | 0.06917 | 0.554695 | 6.28463 | 3.79504 | 9.54941 | 408.237 | 18.4555 | 356.674 | 12.6531 | 22.5328 | 1 | 2008-01-01 13:30:00 | |
Standard Deviation | 8.60155 | 23.3106 | 6.86035 | 0.253994 | 0.115878 | 0.702617 | 2.10571 | 8.70726 | 168.537 | 2.16495 | 91.2949 | 7.14106 | 9.1971 | 0 | ||
Median | 0.25651 | 0 | 9.69 | 0 | 0.538 | 6.2085 | 3.20745 | 5 | 330 | 19.05 | 391.44 | 11.36 | 21.2 | 1 | ||
Min | 0.00632 | 0 | 0.46 | 0 | 0.385 | 3.561 | 1.1296 | 1 | 187 | 12.6 | 0.32 | 1.73 | 5 | |||
Max | 88.9762 | 100 | 27.74 | 1 | 0.871 | 8.78 | 12.1265 | 24 | 711 | 22 | 396.9 | 37.97 | 50 | |||
# Zeros | 0 | 372 | 0 | 471 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |||
# Nulls | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
% Most Frequent Value | 0.4 | 73.52 | 26.09 | 93.08 | 4.55 | 0.59 | 88.14 | 0.99 | 26.09 | 26.09 | 27.67 | 23.91 | 0.59 | 3.16 | 100 | 100 |