{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Distributions" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import data_describe as dd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from sklearn.datasets import load_boston\n", "data = load_boston()\n", "df = pd.DataFrame(data.data, columns=list(data.feature_names))\n", "df['target'] = data.target\n", "\n", "# Create categorical (bin) features to demonstrate count plots\n", "df['AGE'] = df['AGE'].map(lambda x: \"young\" if x < 29 else \"old\")\n", "df['CRIM'] = df['CRIM'].map(lambda x: \"low\" if x < df.CRIM.median() else \"high\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | CRIM | \n", "ZN | \n", "INDUS | \n", "CHAS | \n", "NOX | \n", "RM | \n", "AGE | \n", "DIS | \n", "RAD | \n", "TAX | \n", "PTRATIO | \n", "B | \n", "LSTAT | \n", "target | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "low | \n", "18.0 | \n", "2.31 | \n", "0.0 | \n", "0.538 | \n", "6.575 | \n", "old | \n", "4.0900 | \n", "1.0 | \n", "296.0 | \n", "15.3 | \n", "396.9 | \n", "4.98 | \n", "24.0 | \n", "
1 | \n", "low | \n", "0.0 | \n", "7.07 | \n", "0.0 | \n", "0.469 | \n", "6.421 | \n", "old | \n", "4.9671 | \n", "2.0 | \n", "242.0 | \n", "17.8 | \n", "396.9 | \n", "9.14 | \n", "21.6 | \n", "