{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "5c805c2f-73c8-4308-916c-e7c56b3b5c1f", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import missingno as msno\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "from scipy.stats import mannwhitneyu\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.ensemble import GradientBoostingClassifier" ] }, { "cell_type": "code", "execution_count": 2, "id": "912d67c8-0d78-476d-99ce-13964af3e8ef", "metadata": {}, "outputs": [], "source": [ "def check(df):\n", " # Проверка данных на валидность\n", " for column in df.columns:\n", " print(f\"{column}: {X[column].sum()}; \")" ] }, { "cell_type": "code", "execution_count": 3, "id": "069dbbdb-42fd-4011-8cc2-cdd7053d1a57", "metadata": {}, "outputs": [], "source": [ "data_dir = \"/home/user/dev/dataScience/titanic/data/\"\n", "df = pd.read_csv (data_dir + 'gender_submission.csv')\n", "test = pd.read_csv (data_dir + 'test.csv')\n", "train = pd.read_csv (data_dir + 'train.csv')" ] }, { "cell_type": "code", "execution_count": 4, "id": "fa4461d6-b183-4245-8d77-3b4fca232d84", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 891 entries, 0 to 890\n", "Data columns (total 12 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 PassengerId 891 non-null int64 \n", " 1 Survived 891 non-null int64 \n", " 2 Pclass 891 non-null int64 \n", " 3 Name 891 non-null object \n", " 4 Sex 891 non-null object \n", " 5 Age 714 non-null float64\n", " 6 SibSp 891 non-null int64 \n", " 7 Parch 891 non-null int64 \n", " 8 Ticket 891 non-null object \n", " 9 Fare 891 non-null float64\n", " 10 Cabin 204 non-null object \n", " 11 Embarked 889 non-null object \n", "dtypes: float64(2), int64(5), object(5)\n", "memory usage: 83.7+ KB\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassAgeSibSpParchFare
count891.000000891.000000891.000000714.000000891.000000891.000000891.000000
mean446.0000000.3838382.30864229.6991180.5230080.38159432.204208
std257.3538420.4865920.83607114.5264971.1027430.80605749.693429
min1.0000000.0000001.0000000.4200000.0000000.0000000.000000
25%223.5000000.0000002.00000020.1250000.0000000.0000007.910400
50%446.0000000.0000003.00000028.0000000.0000000.00000014.454200
75%668.5000001.0000003.00000038.0000001.0000000.00000031.000000
max891.0000001.0000003.00000080.0000008.0000006.000000512.329200
\n", "
" ], "text/plain": [ " PassengerId Survived Pclass Age SibSp \\\n", "count 891.000000 891.000000 891.000000 714.000000 891.000000 \n", "mean 446.000000 0.383838 2.308642 29.699118 0.523008 \n", "std 257.353842 0.486592 0.836071 14.526497 1.102743 \n", "min 1.000000 0.000000 1.000000 0.420000 0.000000 \n", "25% 223.500000 0.000000 2.000000 20.125000 0.000000 \n", "50% 446.000000 0.000000 3.000000 28.000000 0.000000 \n", "75% 668.500000 1.000000 3.000000 38.000000 1.000000 \n", "max 891.000000 1.000000 3.000000 80.000000 8.000000 \n", "\n", " Parch Fare \n", "count 891.000000 891.000000 \n", "mean 0.381594 32.204208 \n", "std 0.806057 49.693429 \n", "min 0.000000 0.000000 \n", "25% 0.000000 7.910400 \n", "50% 0.000000 14.454200 \n", "75% 0.000000 31.000000 \n", "max 6.000000 512.329200 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Анализ датасета\n", "\n", "#Первые несколько строк\n", "train.head()\n", "# Информация о датасете\n", "train.info()\n", "# Описательная статистика\n", "train.describe()" ] }, { "cell_type": "code", "execution_count": 5, "id": "e0c46663-f8e8-4cc9-965b-b6f25e8f9c53", "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Построение гистограммы выживших и погибших\n", "sns.countplot(x='Survived', data=train)\n", "plt.title('Выживаемость на Титанике')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 6, "id": "16e5e321-e738-4606-b0dd-1bf619fab970", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAHFCAYAAAAOmtghAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA3k0lEQVR4nO3de1wWZf7/8fctRxO5TQTEDRE7GEZmghmeTWM1dVNrPeUpsSK1NNZM176m2HexdE1bFdM8fKk8ZKZtrifWPFB2UBKr1exkQoaKVKCmNwLz+8Pl/nl3gwKSg+Pr+XjM4+Fc93XNfObmbu/3XjNzj80wDEMAAAAWUcPsAgAAAKoS4QYAAFgK4QYAAFgK4QYAAFgK4QYAAFgK4QYAAFgK4QYAAFgK4QYAAFgK4QYAAFgK4QbXvGXLlslms7ksgYGB6tixo9avX292eQCACiLcAP+1dOlSffjhh9q1a5cWLlwoDw8P9ezZU++++67ZpQEAKsDT7AKA6iIyMlLR0dHO9a5du+r666/XihUr1LNnTxMrAwBUBDM3QBl8fX3l7e0tLy8vZ9v3338vm82mmTNnljluypQpstlszvXMzEyFhYXpvvvu07lz55ztHTt2VMeOHZ3r586d03333aewsDBlZWU52202m6ZMmeKyj2nTpslms7mM3759u8upNR8fH914442aPHmyioqKnP1ycnI0cuRINW3aVH5+fgoKCtI999yjtLQ0t2MpKCjQ888/r1tvvVU+Pj4KDAzUww8/rJycHJd+jRo1ks1m06hRo9y20alTJ9lsNvXo0cOlPTMzU4MGDVJQUJB8fHwUERGhv//97youLnbp53A4lJiYqIiICPn6+iogIECdOnXSrl27nO/PxZaS96jk/XnrrbfcaryUkr/7pfZR0WMrTaNGjTRs2DDnelFRkQYNGqTatWvr/fffL3ddF5o6dapatWqlunXryt/fXy1atNDixYtV2nOTly9frpiYGPn5+cnPz0/NmzfX4sWLXfps2rRJnTt3lt1u13XXXaeIiAglJSU5X9+zZ4/69++vRo0aqWbNmmrUqJEGDBigw4cPu2yn5JSwl5eXfvzxR5fXduzY4TyWPXv2XPJ9Ay7EzA3wX0VFRSosLJRhGDp27JhmzJih06dPa+DAgZe13YYNG+rf//632rVrp0GDBmnFihWqUcP1/1cUFxfroYce0qeffqq0tDSFhoaWub3Dhw8rKSlJHh4epb4+b948tWjRQmfOnNHq1as1bdo0+fn5afz48ZKkn376SZL03HPPqX79+jp16pTWrl2rjh07auvWrc4v6uLiYt1///1KS0vT+PHj1bp1ax0+fFjPPfecOnbsqD179qhmzZrO/datW1cpKSlKSkqSv7+/JOk///mPPvjgA+d6iZycHLVu3VoFBQWaNm2aGjVqpPXr12vcuHH69ttvNX/+fElSYWGhunXrprS0NI0dO1b33HOPCgsL9dFHHykzM1OtW7fWhx9+6Nzuv/71Lz3//PN6++23FRISIklu+74cTzzxhNvn4aGHHqrUsZVHcXGxhg4dqnfeeUcbN25U27ZtS+337LPPqnv37pKkV1991S2MfP/993rsscfUsGFDSdJHH32kJ554QkeOHNHkyZOd/SZPnqxp06apT58++stf/iK73a4vvvjCJZQsXrxYjzzyiDp06KAFCxYoKChIX331lb744guX/TVp0kT9+/dX3bp1lZ2dreTkZLVs2VL79+9XvXr1XOrz9/fXggULlJiY6GybO3euAgIClJubW+73C3AygGvc0qVLDUlui4+PjzF//nyXvocOHTIkGTNmzChze88995xR2n9a+/btM66//nojLi7OKC4uNjp06GB06NDBKC4uNoYPH25cf/31xr59+9zGSTKee+4553qvXr2MO++802jXrp3RoUMHZ/u2bdsMSca2bdtcxtepU8fo27dvmfUWFhYa586dMzp37mz07t3b2b5ixQpDkrFmzRqX/rt37zYkubw3YWFhRvfu3Y2mTZsac+bMcbbHx8cbffv2db5eYsKECYYk4+OPP3bZ9uOPP27YbDbj4MGDhmEYRkpKiiHJWLRoUZn1X6jkb3no0CG310ren9WrV5drWxe62N/9tttuc/k7lPfYyhIWFmYMHTrUKCoqMgYNGmT4+fkZaWlppfY9ePCgIcl47bXXnG1lff5KFBUVGefOnTMSExONgIAAo7i42DAMw/juu+8MDw8P46GHHipz7MmTJw1/f3+jbdu2znHlUVhYaJw6dcqoVauWy+ej5O/19NNPG8HBwYbD4TAMwzB++OEHw8vLy3j66acNScbu3bvLvS/AMAyD01LAf6WkpGj37t3avXu3Nm7cqKFDh2rUqFGaO3euW9/i4mIVFhaW6zRDiWbNmmn69OlavHix/vKXvzjbExIStGTJEk2fPl3NmjW76DY2bdqkd955R/PmzXOb/SlRMgN18uRJLV68WL/88os6d+7s0mfBggVq0aKFfH195enpKS8vL23dulUHDhxw9lm/fr3q1Kmjnj17qrCw0Lk0b95c9evX1/bt2932PXr0aM2bN0+GYSgvL0+vvfZaqaeq3nvvPTVt2lR33XWXS/uwYcNkGIbee+89SdLGjRvl6+ur4cOHX/R9qYjK/O0qorzHdqkahw0bptdff10vvPBCmTM2Z86ckXT+FOqlaurSpYvsdrs8PDzk5eWlyZMnKzc3V8ePH5ckpaamqqioqNS/V4ldu3YpPz9fI0eOdDv1daFTp07pmWee0U033SRPT095enrKz89Pp0+fdvmMlejTp488PT21evVqSVJycrLatm2rpk2bXvS4gLIQboD/ioiIUHR0tKKjo9W1a1e98sorio2N1fjx4/XLL7+49H3mmWfk5eUlDw8P1atXTz169NC+ffsuuv1ff/1VM2bMUMuWLZWcnKwPP/xQH374oRYsWKCWLVtqxowZ+vXXX8sc73A49OSTT2rYsGGKiYkps1+XLl3k5eUlf39/jRgxQnFxcYqLi3O+PmvWLD3++ONq1aqV1qxZo48++ki7d+9W165dnV+WknTs2DH98ssvzuuOLlyOHj2qEydOuO17yJAhOnbsmLZs2aKlS5fqxhtvVPv27d365ebmOk8bXahBgwbO16Xzp3gaNGhQZpCrjH79+snLy0uenp4KDg5W//799f3331fZ9st7bBezatUqrV27VtHR0Zo5c6by8/NL7VfyN/jtaZ4LffLJJ4qNjZUkLVq0SB988IF2796tSZMmSfr/AankOqobbrihzG2Vp48kDRw4UHPnztWIESO0efNmffLJJ9q9e7cCAwNdPmMlPD09FR8fr7lz56qgoECLFi3S6NGjL7oP4GK45ga4iGbNmmnz5s366quvXP6f+JgxYzRo0CAVFxfr0KFDevbZZ9WlSxe3iyIv9Mwzz+inn37Szp07tWfPHvXp00eStGbNGkVFRSkyMlITJkzQyy+/XOr4mTNnKicnRy+88MJFa16wYIGioqJUWFioL7/8Us8884zy8/P15ptvSpJef/11dezYUcnJyS7jTp486bJer149BQQEaNOmTaXup3bt2m5ttWrV0rBhw/Tyyy/r66+/1rhx40odGxAQoOzsbLf2kvev5Ms6MDBQ77//voqLi6ss4Lzwwgu65557VFRUpAMHDmj8+PHq1auXMjIyqmT75T22i/H29tbGjRt144036vbbb9eoUaP02muvufX7+uuvJUk33XRTmdtauXKlvLy8tH79epcZnnXr1rn0CwwMlCT98MMPZV7zdWGfsuTl5Wn9+vV67rnnNGHCBGe7w+FwXu9VmkcffVTTpk3T+PHj5ePjo/vvv7/UYwbKg5kb4CJKvvBK/ke9xA033KDo6Gjddddd6tevn0aPHq0TJ07o0KFDpW5n27ZtmjdvnubNm6eQkBD17NlTbdq0UZs2bdSjRw+FhIRo7ty5mjt3rrZt2+Y2PjMzU3/729/0/PPPu9XyW02aNFF0dLTuvvtuDRs2TAMGDNBbb70lh8MhSc47qS702WefuVyYK0k9evRQbm6uioqKnDNaFy5NmjQpdf+jRo3Sxo0blZOTo0GDBpXap3Pnztq/f78+/fRTl/aUlBTZbDZ16tRJktStWzedPXtWy5Ytu+gxV0Tjxo0VHR2tVq1aadiwYXrooYe0b98+5/tzucp7bBfzwAMPqG3btgoJCdGiRYv0+uuva/ny5W793nnnHYWHh190JsVms8nT09PlAvQzZ864BYfY2Fh5eHi4hd4LtW7dWna7XQsWLCj1TquS/RmG4fYZe/XVV13u2vutoKAg9e3bV3PmzFF8fHyZF8wD5cHMDfBfX3zxhQoLCyWdP3Xw9ttvKzU1Vb1791Z4eLhL35ycHH355ZcqLi7W4cOH9eqrryowMNCtn3R+RmT48OF64IEH1L9//zL3P2DAAK1Zs0bDhw/X559/Lj8/P+drKSkpatasmeLj4y95HPv375evr68KCwt18OBBLV++XBEREc4vmx49emjatGl67rnn1KFDBx08eFCJiYkKDw93Hr8k9e/fX2+88Ybuu+8+jRkzRnfddZe8vLz0ww8/aNu2bbr//vvVu3dvt/3ffPPNSktLU61atXTdddeVWuNTTz2llJQUde/eXYmJiQoLC9O//vUvzZ8/X48//rhuueUW53uydOlSxcfH6+DBg+rUqZOKi4v18ccfKyIi4qLvZ1l+/PFHffnllyoqKtJXX32l1atXq3nz5m5fxpVV3mMrr969eysuLk6PP/64WrdurUaNGunTTz/Viy++qE2bNmnhwoUXHd+9e3fNmjVLAwcO1KOPPqrc3FzNnDnT7XgbNWqkv/71r5o2bZrOnDmjAQMGyG63a//+/Tpx4oSmTp0qPz8//f3vf9eIESPUpUsXPfLIIwoODtY333yjffv2ae7cufL391f79u01Y8YM1atXT40aNdKOHTu0ePFi1alT56K1vvjiixo6dKjb9UpAhZl6OTNQDZR2t5TdbjeaN29uzJo1yzh79qyzb8ldMyVLjRo1jKCgIKNnz55GRkaGYRjud6s88sgjRlBQkHH8+HGX/ZbcLXWh48ePG0FBQcajjz7qbJNk2Gw2Y9euXRcdX3I3UMni4eFhhISEGAMGDDC+++47Zz+Hw2GMGzfO+MMf/mD4+voaLVq0MNatW2cMHTrUCAsLc9nHuXPnjJkzZxp33HGH4evra/j5+Rm33nqr8dhjjxlff/21s99v74b6rdJeP3z4sDFw4EAjICDA8PLyMpo0aWLMmDHDKCoqcul35swZY/LkycbNN99seHt7GwEBAcY999zj9n4YRvnulirt/Smt/4UqcrdURY6tNCV3S13o1KlTxk033WS0adPGKCwsNEaPHm3cfffdxsqVK93Gl3a31JIlS4wmTZoYPj4+RuPGjY2kpCRj8eLFpb5XKSkpRsuWLZ1/7zvvvNNYunSpS58NGzYYHTp0MGrVqmVcd911RtOmTY0XXnjB+foPP/xgPPDAA8b1119v1K5d2+jatavxxRdfuB1byd+rrLuhLvU6UBabYZQxtwgAAHAV4pobAABgKYQbAABgKYQbAABgKYQbAABgKYQbAABgKYQbAABgKdfcj/gVFxfrxx9/VO3atS/64DcAAFB9GIahkydPlut5c9dcuPnxxx/LfG4KAACo3rKysi758NZrLtyUPOwvKytL/v7+JlcDAADKIz8/X6GhoaU+tPe3rrlwU3Iqyt/fn3ADAMBVpjyXlHBBMQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTTw838+fMVHh4uX19fRUVFKS0trcy+w4YNk81mc1tuu+22K1gxAACozkwNN6tWrdLYsWM1adIk7d27V+3atVO3bt2UmZlZav85c+YoOzvbuWRlZalu3br685//fIUrBwAA1ZXNMAzDrJ23atVKLVq0UHJysrMtIiJCvXr1UlJS0iXHr1u3Tn369NGhQ4cUFhZWrn3m5+fLbrcrLy+P37kBAOAqUZHvb9NmbgoKCpSenq7Y2FiX9tjYWO3atatc21i8eLG6dOlS7mADAACsz7RfKD5x4oSKiooUHBzs0h4cHKyjR49ecnx2drY2btyo5cuXX7Sfw+GQw+Fwrufn51euYAAAcFUw/YLi3/6MsmEY5fpp5WXLlqlOnTrq1avXRfslJSXJbrc7Fx6aCQCAtZkWburVqycPDw+3WZrjx4+7zeb8lmEYWrJkiQYPHixvb++L9p04caLy8vKcS1ZW1mXXDgAAqi/Two23t7eioqKUmprq0p6amqrWrVtfdOyOHTv0zTffKC4u7pL78fHxcT4kk4dlAgBgfaY+FTwhIUGDBw9WdHS0YmJitHDhQmVmZio+Pl7S+VmXI0eOKCUlxWXc4sWL1apVK0VGRppRNiSNGTNGOTk5kqTAwEDNmTPH5IoAADjP1HDTr18/5ebmKjExUdnZ2YqMjNSGDRucdz9lZ2e7/eZNXl6e1qxZw5epyXJycnTs2DGzywAAwI2p4UaSRo4cqZEjR5b62rJly9za7Ha7fv3119+5KgAAcLUy/W4pAACAqkS4AQAAlkK4AQAAlkK4AQAAlkK4AQAAlkK4AQAAlkK4AQAAlkK4AQAAlkK4AQAAlkK4AQAAlkK4AQAAlkK4AQAAlkK4AQAAlkK4AQAAlkK4AQAAlkK4AQAAlkK4AQAAlkK4AQAAlkK4AQAAlkK4AQAAlkK4AQAAlkK4AQAAlkK4AQAAlkK4AQAAlkK4AQAAlkK4AQAAlkK4AQAAlkK4AQAAlkK4AQAAluJpdgFXq6inU8wuwVT+P59yJuPsn09d8+9H+owhZpcAAPgvZm4AAIClEG4AAIClEG4AAIClEG4AAIClEG4AAIClEG4AAIClEG4AAIClEG4AAIClEG4AAIClEG4AAIClmB5u5s+fr/DwcPn6+ioqKkppaWkX7e9wODRp0iSFhYXJx8dHN954o5YsWXKFqgUAANWdqc+WWrVqlcaOHav58+erTZs2euWVV9StWzft379fDRs2LHVM3759dezYMS1evFg33XSTjh8/rsLCwitcOQAAqK5MDTezZs1SXFycRowYIUmaPXu2Nm/erOTkZCUlJbn137Rpk3bs2KHvvvtOdevWlSQ1atToSpYMAACqOdNOSxUUFCg9PV2xsbEu7bGxsdq1a1epY/75z38qOjpaL774ov7whz/olltu0bhx43TmzJkrUTIAALgKmDZzc+LECRUVFSk4ONilPTg4WEePHi11zHfffaf3339fvr6+Wrt2rU6cOKGRI0fqp59+KvO6G4fDIYfD4VzPz8+vuoMAAADVjukXFNtsNpd1wzDc2koUFxfLZrPpjTfe0F133aX77rtPs2bN0rJly8qcvUlKSpLdbncuoaGhVX4MAACg+jAt3NSrV08eHh5uszTHjx93m80pERISoj/84Q+y2+3OtoiICBmGoR9++KHUMRMnTlReXp5zycrKqrqDAAAA1Y5p4cbb21tRUVFKTU11aU9NTVXr1q1LHdOmTRv9+OOPOnXqlLPtq6++Uo0aNXTDDTeUOsbHx0f+/v4uCwAAsC5TT0slJCTo1Vdf1ZIlS3TgwAE99dRTyszMVHx8vKTzsy5Dhgxx9h84cKACAgL08MMPa//+/dq5c6eefvppDR8+XDVr1jTrMAAAQDVi6q3g/fr1U25urhITE5Wdna3IyEht2LBBYWFhkqTs7GxlZmY6+/v5+Sk1NVVPPPGEoqOjFRAQoL59++r555836xAAAEA1YzMMwzC7iCspPz9fdrtdeXl5l3WKKurplCqs6urj/8VbqlFwWpJU7F1L+ZEPmlyRudJnDLl0JwBApVXk+9v0u6UAAACqEuEGAABYCuEGAABYCuEGAABYCuEGAABYCuEGAABYCuEGAABYCuEGAABYCuEGAABYCuEGAABYiqnPlsLVq9irVqn/BgDAbIQbVMqpJt3MLgEAgFJxWgoAAFgK4QYAAFgK4QYAAFgK4QYAAFgK4QYAAFgK4QYAAFgK4QYAAFgK4QYAAFgKP+IHwBLGjBmjnJwcSVJgYKDmzJljckUAzEK4AWAJOTk5OnbsmNllAKgGOC0FAAAshXADAAAshXADAAAshXADAAAshXADAAAshXADAAAshXADAAAshXADAAAshXADAAAshXADAAAshXADAAAshXADAAAshXADAAAshXADAAAshXADAAAshXADAAAshXADAAAshXADAAAsxfRwM3/+fIWHh8vX11dRUVFKS0srs+/27dtls9ncli+//PIKVgwAAKozU8PNqlWrNHbsWE2aNEl79+5Vu3bt1K1bN2VmZl503MGDB5Wdne1cbr755itUMQAAqO5MDTezZs1SXFycRowYoYiICM2ePVuhoaFKTk6+6LigoCDVr1/fuXh4eFyhigEAQHVnWrgpKChQenq6YmNjXdpjY2O1a9eui4698847FRISos6dO2vbtm2/Z5kAAOAq42nWjk+cOKGioiIFBwe7tAcHB+vo0aOljgkJCdHChQsVFRUlh8Oh1157TZ07d9b27dvVvn37Usc4HA45HA7nen5+ftUdBAAAqHZMCzclbDaby7phGG5tJZo0aaImTZo412NiYpSVlaWZM2eWGW6SkpI0derUqisYAABUa6adlqpXr548PDzcZmmOHz/uNptzMXfffbe+/vrrMl+fOHGi8vLynEtWVlalawYAANWfaeHG29tbUVFRSk1NdWlPTU1V69aty72dvXv3KiQkpMzXfXx85O/v77IAAADrMvW0VEJCggYPHqzo6GjFxMRo4cKFyszMVHx8vKTzsy5HjhxRSkqKJGn27Nlq1KiRbrvtNhUUFOj111/XmjVrtGbNGjMPAwAAVCOmhpt+/fopNzdXiYmJys7OVmRkpDZs2KCwsDBJUnZ2tstv3hQUFGjcuHE6cuSIatasqdtuu03/+te/dN9995l1CAAAoJqxGYZhmF3ElZSfny+73a68vLzLOkUV9XRKFVaFq136jCFml3DNGzhwoI4dOybp/F2Xy5cvN7kiAFWpIt/fpj9+AQAAoCoRbgAAgKUQbgAAgKUQbgAAgKUQbgAAgKUQbgAAgKUQbgAAgKWY/uBMAFUjM/F2s0swVeEvAZI8/vvvH6/590OSGk7+3OwSAFMwcwMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACzFs7wd+/TpU+6Nvv3225UqBgAA4HKVe+bGbrc7F39/f23dulV79uxxvp6enq6tW7fKbrf/LoUCAACUR7lnbpYuXer89zPPPKO+fftqwYIF8vDwkCQVFRVp5MiR8vf3r/oqAQAAyqlS19wsWbJE48aNcwYbSfLw8FBCQoKWLFlSoW3Nnz9f4eHh8vX1VVRUlNLS0so17oMPPpCnp6eaN29eof0BAABrq1S4KSws1IEDB9zaDxw4oOLi4nJvZ9WqVRo7dqwmTZqkvXv3ql27durWrZsyMzMvOi4vL09DhgxR586dK1w7AACwtnKflrrQww8/rOHDh+ubb77R3XffLUn66KOPNH36dD388MPl3s6sWbMUFxenESNGSJJmz56tzZs3Kzk5WUlJSWWOe+yxxzRw4EB5eHho3bp1lTkEAABgUZUKNzNnzlT9+vX10ksvKTs7W5IUEhKi8ePH6y9/+Uu5tlFQUKD09HRNmDDBpT02Nla7du0qc9zSpUv17bff6vXXX9fzzz9fmfIBAICFVSrc1KhRQ+PHj9f48eOVn58vSRW+kPjEiRMqKipScHCwS3twcLCOHj1a6pivv/5aEyZMUFpamjw9y1e6w+GQw+FwrpfUCwAArKnSP+JXWFiof//731qxYoVsNpsk6ccff9SpU6cqtJ2SsSUMw3Brk87fjTVw4EBNnTpVt9xyS7m3n5SU5HIbe2hoaIXqA3B1qOtTpID/LnV9iswuB4CJKjVzc/jwYXXt2lWZmZlyOBy69957Vbt2bb344os6e/asFixYcMlt1KtXTx4eHm6zNMePH3ebzZGkkydPas+ePdq7d69Gjx4tSSouLpZhGPL09NSWLVt0zz33uI2bOHGiEhISnOv5+fkEHMCC/nrnL2aXAKCaqNTMzZgxYxQdHa2ff/5ZNWvWdLb37t1bW7duLdc2vL29FRUVpdTUVJf21NRUtW7d2q2/v7+/Pv/8c2VkZDiX+Ph4NWnSRBkZGWrVqlWp+/Hx8ZG/v7/LAgAArKtSMzfvv/++PvjgA3l7e7u0h4WF6ciRI+XeTkJCggYPHqzo6GjFxMRo4cKFyszMVHx8vKTzsy5HjhxRSkqKatSoocjISJfxQUFB8vX1dWsHAADXrkqFm+LiYhUVuZ/T/uGHH1S7du1yb6dfv37Kzc1VYmKisrOzFRkZqQ0bNigsLEySlJ2dfcnfvAEAALiQzTAMo6KD+vXrJ7vdroULF6p27dr67LPPFBgYqPvvv18NGzZ0eVRDdZOfny+73a68vLzLOkUV9XRKFVaFq136jCFml6DMxNvNLgHVTMPJn5tdAlBlKvL9XamZm5deekmdOnVS06ZNdfbsWQ0cOFBff/216tWrpxUrVlSqaAAAgKpQqXDToEEDZWRkaMWKFfr0009VXFysuLg4PfTQQy4XGAMAAFxplQo3v/76q6677joNHz5cw4cPr+qaAAAAKq1St4IHBQVp0KBB2rx5c4UelAkAAPB7q1S4SUlJkcPhUO/evdWgQQONGTNGu3fvruraAAAAKqxS4aZPnz5avXq1jh07pqSkJB04cECtW7fWLbfcosTExKquEQAAoNwq/WwpSapdu7YefvhhbdmyRfv27VOtWrU0derUqqoNAACgwi4r3Jw9e1ZvvvmmevXqpRYtWig3N1fjxo2rqtoAAAAqrFJ3S23ZskVvvPGG1q1bJw8PDz344IPavHmzOnToUNX1AQAAVEilwk2vXr3UvXt3/d///Z+6d+8uLy+vqq4LAACgUioVbo4ePcrTtQEAQLVU7nCTn5/vEmjy8/PL7EvwAQAAZil3uLn++uuVnZ2toKAg1alTRzabza2PYRiy2WylPjEcAADgSih3uHnvvfdUt25d579LCzcAAABmK3e4ufBOqI4dO/4etQAAAFy2Sv3OTePGjfU///M/OnjwYFXXAwAAcFkqFW5Gjx6tTZs2KSIiQlFRUZo9e7ays7OrujYAAIAKq1S4SUhI0O7du/Xll1+qR48eSk5OVsOGDRUbG6uUlJSqrhEAAKDcLuvxC7fccoumTp2qgwcPKi0tTTk5OXr44YerqjYAAIAKq9SP+F3ok08+0fLly7Vq1Srl5eXpwQcfrIq6AAAAKqVS4earr77SG2+8oeXLl+v7779Xp06dNH36dPXp00e1a9eu6hoBAADKrVLh5tZbb1V0dLRGjRql/v37q379+lVdFwAAQKVUONwUFRVpwYIFevDBB50/6gcAAFBdVPiCYg8PDz355JPKy8v7PeoBAAC4LJW6W+r222/Xd999V9W1AAAAXLZKhZv//d//1bhx47R+/XplZ2crPz/fZQEAADBLpS4o7tq1qyTpT3/6k8sDNHkqOAAAMFulws22bduqug4AAIAqUalwc+ETwgEAAKqTSoWbnTt3XvT19u3bV6oYAACAy1WpcNOxY0e3tguvveGaGwAAYJZK3S31888/uyzHjx/Xpk2b1LJlS23ZsqWqawQAACi3Ss3c2O12t7Z7771XPj4+euqpp5Senn7ZhQEAAFRGpWZuyhIYGKiDBw9W5SYBAAAqpFIzN5999pnLumEYys7O1vTp03XHHXdUSWEAAACVUalw07x5c9lsNhmG4dJ+9913a8mSJVVSGAAAQGVUKtwcOnTIZb1GjRoKDAyUr69vlRQFAABQWRW65ubjjz/Wxo0bFRYW5lx27Nih9u3bq2HDhnr00UflcDh+r1oBAAAuqULhZsqUKS7X23z++eeKi4tTly5dNGHCBL377rtKSkqq8iIBAADKq0LhJiMjQ507d3aur1y5Uq1atdKiRYuUkJCgl19+WW+++WaVFwkAAFBeFQo3P//8s4KDg53rO3bscD4hXJJatmyprKysChUwf/58hYeHy9fXV1FRUUpLSyuz7/vvv682bdooICBANWvW1K233qqXXnqpQvsDAADWVqFwExwc7LyYuKCgQJ9++qliYmKcr588eVJeXl7l3t6qVas0duxYTZo0SXv37lW7du3UrVs3ZWZmltq/Vq1aGj16tHbu3KkDBw7o2Wef1bPPPquFCxdW5DAAAICFVSjcdO3aVRMmTFBaWpomTpyo6667Tu3atXO+/tlnn+nGG28s9/ZmzZqluLg4jRgxQhEREZo9e7ZCQ0OVnJxcav8777xTAwYM0G233aZGjRpp0KBB+uMf/3jR2R4AAHBtqVC4ef755+Xh4aEOHTpo0aJFWrRokby9vZ2vL1myRLGxseXaVkFBgdLT0936x8bGateuXeXaxt69e7Vr1y516NCh/AcBAAAsrUK/cxMYGKi0tDTl5eXJz89PHh4eLq+vXr1afn5+5drWiRMnVFRU5HINj3T+1NfRo0cvOvaGG25QTk6OCgsLNWXKFI0YMaLMvg6Hw+X29Pz8/HLVBwAArk6VeraU3W53CzaSVLduXZeZnPKw2Wwu64ZhuLX9Vlpamvbs2aMFCxZo9uzZWrFiRZl9k5KSZLfbnUtoaGiF6gMAAFeXSv1CcVWoV6+ePDw83GZpjh8/7jab81vh4eGSpNtvv13Hjh3TlClTNGDAgFL7Tpw4UQkJCc71/Px8Ag4AABZWpU8Frwhvb29FRUUpNTXVpT01NVWtW7cu93YMw7joryL7+PjI39/fZQEAANZl2syNJCUkJGjw4MGKjo5WTEyMFi5cqMzMTMXHx0s6P+ty5MgRpaSkSJLmzZunhg0b6tZbb5V0/ndvZs6cqSeeeMK0YwAAANWLqeGmX79+ys3NVWJiorKzsxUZGakNGzYoLCxMkpSdne3ymzfFxcWaOHGiDh06JE9PT914442aPn26HnvsMbMOAQAAVDM2wzAMs4u4kvLz82W325WXl3dZp6iink6pwqpwtUufMcTsEpSZeLvZJaCaaTj5c7NLAKpMRb6/TbvmBgAA4PdAuAEAAJZi6jU3AABY2ZgxY5STkyPp/A/hzpkzx+SKrg2EGwAAfic5OTk6duyY2WVcczgtBQAALIVwAwAALIVwAwAALIVwAwAALIVwAwAALIVwAwAALIVwAwAALIVwAwAALIVwAwAALIVwAwAALIVwAwAALIVwAwAALIVwAwAALIVwAwAALIVwAwAALIVwAwAALIVwAwAALIVwAwAALIVwAwAALMXT7AIAANbV5h9tzC7BVD75PrLJJkk6mn/0mn8/PnjigyuyH2ZuAACApRBuAACApRBuAACApRBuAACApRBuAACApRBuAACApRBuAACApRBuAACApRBuAACApRBuAACApRBuAACApRBuAACApRBuAACApRBuAACApRBuAACApRBuAACApZgebubPn6/w8HD5+voqKipKaWlpZfZ9++23de+99yowMFD+/v6KiYnR5s2br2C1AACgujM13KxatUpjx47VpEmTtHfvXrVr107dunVTZmZmqf137type++9Vxs2bFB6ero6deqknj17au/evVe4cgAAUF2ZGm5mzZqluLg4jRgxQhEREZo9e7ZCQ0OVnJxcav/Zs2dr/PjxatmypW6++Wb97W9/080336x33333ClcOAACqK9PCTUFBgdLT0xUbG+vSHhsbq127dpVrG8XFxTp58qTq1q37e5QIAACuQp5m7fjEiRMqKipScHCwS3twcLCOHj1arm38/e9/1+nTp9W3b98y+zgcDjkcDud6fn5+5QoGAKCCjJpGqf/G78u0cFPCZrO5rBuG4dZWmhUrVmjKlCl65513FBQUVGa/pKQkTZ069bLrBACgograF5hdwjXJtNNS9erVk4eHh9sszfHjx91mc35r1apViouL05tvvqkuXbpctO/EiROVl5fnXLKysi67dgAAUH2ZFm68vb0VFRWl1NRUl/bU1FS1bt26zHErVqzQsGHDtHz5cnXv3v2S+/Hx8ZG/v7/LAgAArMvU01IJCQkaPHiwoqOjFRMTo4ULFyozM1Px8fGSzs+6HDlyRCkpKZLOB5shQ4Zozpw5uvvuu52zPjVr1pTdbjftOAAAQPVharjp16+fcnNzlZiYqOzsbEVGRmrDhg0KCwuTJGVnZ7v85s0rr7yiwsJCjRo1SqNGjXK2Dx06VMuWLbvS5QMAgGrI9AuKR44cqZEjR5b62m8Dy/bt23//ggAAwFXN9McvAAAAVCXCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTTw838+fMVHh4uX19fRUVFKS0trcy+2dnZGjhwoJo0aaIaNWpo7NixV65QAABwVTA13KxatUpjx47VpEmTtHfvXrVr107dunVTZmZmqf0dDocCAwM1adIk3XHHHVe4WgAAcDUwNdzMmjVLcXFxGjFihCIiIjR79myFhoYqOTm51P6NGjXSnDlzNGTIENnt9itcLQAAuBqYFm4KCgqUnp6u2NhYl/bY2Fjt2rXLpKoAAMDVztOsHZ84cUJFRUUKDg52aQ8ODtbRo0erbD8Oh0MOh8O5np+fX2XbBgAA1Y/pFxTbbDaXdcMw3NouR1JSkux2u3MJDQ2tsm0DAIDqx7RwU69ePXl4eLjN0hw/ftxtNudyTJw4UXl5ec4lKyuryrYNAACqH9PCjbe3t6KiopSamurSnpqaqtatW1fZfnx8fOTv7++yAAAA6zLtmhtJSkhI0ODBgxUdHa2YmBgtXLhQmZmZio+Pl3R+1uXIkSNKSUlxjsnIyJAknTp1Sjk5OcrIyJC3t7eaNm1qxiEAAIBqxtRw069fP+Xm5ioxMVHZ2dmKjIzUhg0bFBYWJun8j/b99jdv7rzzTue/09PTtXz5coWFhen777+/kqUDAIBqytRwI0kjR47UyJEjS31t2bJlbm2GYfzOFQEAgKuZ6XdLAQAAVCXCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTCDQAAsBTTw838+fMVHh4uX19fRUVFKS0t7aL9d+zYoaioKPn6+qpx48ZasGDBFaoUAABcDUwNN6tWrdLYsWM1adIk7d27V+3atVO3bt2UmZlZav9Dhw7pvvvuU7t27bR371799a9/1ZNPPqk1a9Zc4coBAEB1ZWq4mTVrluLi4jRixAhFRERo9uzZCg0NVXJycqn9FyxYoIYNG2r27NmKiIjQiBEjNHz4cM2cOfMKVw4AAKor08JNQUGB0tPTFRsb69IeGxurXbt2lTrmww8/dOv/xz/+UXv27NG5c+d+t1oBAMDVw9OsHZ84cUJFRUUKDg52aQ8ODtbRo0dLHXP06NFS+xcWFurEiRMKCQlxG+NwOORwOJzreXl5kqT8/PzLqr/IceayxsNaLvfzVBVOni0yuwRUM9Xhc1l4ptDsElCNXM5nsmSsYRiX7GtauClhs9lc1g3DcGu7VP/S2kskJSVp6tSpbu2hoaEVLRUok/0f8WaXALhLsptdAeDC/szlfyZPnjwpu/3i2zEt3NSrV08eHh5uszTHjx93m50pUb9+/VL7e3p6KiAgoNQxEydOVEJCgnO9uLhYP/30kwICAi4aonBp+fn5Cg0NVVZWlvz9/c0uB+AziWqJz2XVMAxDJ0+eVIMGDS7Z17Rw4+3traioKKWmpqp3797O9tTUVN1///2ljomJidG7777r0rZlyxZFR0fLy8ur1DE+Pj7y8fFxaatTp87lFQ8X/v7+/AeLaoXPJKojPpeX71IzNiVMvVsqISFBr776qpYsWaIDBw7oqaeeUmZmpuLjz0/xT5w4UUOGDHH2j4+P1+HDh5WQkKADBw5oyZIlWrx4scaNG2fWIQAAgGrG1Gtu+vXrp9zcXCUmJio7O1uRkZHasGGDwsLCJEnZ2dkuv3kTHh6uDRs26KmnntK8efPUoEEDvfzyy3rggQfMOgQAAFDN2IzyXHYMlMLhcCgpKUkTJ050O/UHmIHPJKojPpdXHuEGAABYiunPlgIAAKhKhBsAAGAphBsAAGAphBsAAGAphBtU2M6dO9WzZ081aNBANptN69atM7skXOOSkpLUsmVL1a5dW0FBQerVq5cOHjxodlm4hiUnJ6tZs2bOH+6LiYnRxo0bzS7rmkG4QYWdPn1ad9xxh+bOnWt2KYAkaceOHRo1apQ++ugjpaamqrCwULGxsTp9+rTZpeEadcMNN2j69Onas2eP9uzZo3vuuUf333+//vOf/5hd2jWBW8FxWWw2m9auXatevXqZXQrglJOTo6CgIO3YsUPt27c3uxxAklS3bl3NmDFDcXFxZpdieaY/FRwAqlpeXp6k818mgNmKioq0evVqnT59WjExMWaXc00g3ACwFMMwlJCQoLZt2yoyMtLscnAN+/zzzxUTE6OzZ8/Kz89Pa9euVdOmTc0u65pAuAFgKaNHj9Znn32m999/3+xScI1r0qSJMjIy9Msvv2jNmjUaOnSoduzYQcC5Agg3ACzjiSee0D//+U/t3LlTN9xwg9nl4Brn7e2tm266SZIUHR2t3bt3a86cOXrllVdMrsz6CDcArnqGYeiJJ57Q2rVrtX37doWHh5tdEuDGMAw5HA6zy7gmEG5QYadOndI333zjXD906JAyMjJUt25dNWzY0MTKcK0aNWqUli9frnfeeUe1a9fW0aNHJUl2u101a9Y0uTpci/7617+qW7duCg0N1cmTJ7Vy5Upt375dmzZtMru0awK3gqPCtm/frk6dOrm1Dx06VMuWLbvyBeGaZ7PZSm1funSphg0bdmWLASTFxcVp69atys7Olt1uV7NmzfTMM8/o3nvvNbu0awLhBgAAWAq/UAwAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAMAACyFcAPgqjVs2DD16tXL7DIAVDOEGwCmGjZsmGw2m2w2m7y8vNS4cWONGzdOp0+fNrs0AFcpni0FwHRdu3bV0qVLde7cOaWlpWnEiBE6ffq0kpOTzS4NwFWImRsApvPx8VH9+vUVGhqqgQMH6qGHHtK6deskSf/5z3/UvXt3+fv7q3bt2mrXrp2+/fbbUrezadMmtW3bVnXq1FFAQIB69Ojh0regoECjR49WSEiIfH191ahRIyUlJTlfnzJliho2bCgfHx81aNBATz755O963AB+H8zcAKh2atasqXPnzunIkSNq3769OnbsqPfee0/+/v764IMPVFhYWOq406dPKyEhQbfffrtOnz6tyZMnq3fv3srIyFCNGjX08ssv65///KfefPNNNWzYUFlZWcrKypIkvfXWW3rppZe0cuVK3XbbbTp69Kj27dt3JQ8bQBUh3ACoVj755BMtX75cnTt31rx582S327Vy5Up5eXlJkm655ZYyxz7wwAMu64sXL1ZQUJD279+vyMhIZWZm6uabb1bbtm1ls9kUFhbm7JuZman69eurS5cu8vLyUsOGDXXXXXf9PgcJ4HfFaSkAplu/fr38/Pzk6+urmJgYtW/fXv/4xz+UkZGhdu3aOYPNpXz77bcaOHCgGjduLH9/f4WHh0s6H1yk8xcvZ2RkqEmTJnryySe1ZcsW59g///nPOnPmjBo3bqxHHnlEa9euLXOGCED1RrgBYLpOnTopIyNDBw8e1NmzZ/X2228rKChINWvWrNB2evbsqdzcXC1atEgff/yxPv74Y0nnr7WRpBYtWujQoUOaNm2azpw5o759++rBBx+UJIWGhurgwYOaN2+eatasqZEjR6p9+/Y6d+5c1R4sgN8d4QaA6WrVqqWbbrpJYWFhLrM0zZo1U1paWrkCRm5urg4cOKBnn31WnTt3VkREhH7++We3fv7+/urXr58WLVqkVatWac2aNfrpp58knb/W509/+pNefvllbd++XR9++KE+//zzqjtQAFcE19wAqLZGjx6tf/zjH+rfv78mTpwou92ujz76SHfddZeaNGni0vf6669XQECAFi5cqJCQEGVmZmrChAkufV566SWFhISoefPmqlGjhlavXq369eurTp06WrZsmYqKitSqVStdd911eu2111SzZk2X63IAXB2YuQFQbQUEBOi9997TqVOn1KFDB0VFRWnRokWlXoNTo0YNrVy5Uunp6YqMjNRTTz2lGTNmuPTx8/PTCy+8oOjoaLVs2VLff/+9NmzYoBo1aqhOnTpatGiR2rRpo2bNmmnr1q169913FRAQcKUOF0AVsRmGYZhdBAAAQFVh5gYAAFgK4QYAAFgK4QYAAFgK4QYAAFgK4QYAAFgK4QYAAFgK4QYAAFgK4QYAAFgK4QYAAFgK4QYAAFgK4QYAAFgK4QYAAFjK/wOjciIexYlFKgAAAABJRU5ErkJggg==", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Построение гистограммы по классам\n", "sns.barplot(x='Pclass', y='Survived', data=train)\n", "plt.title('Выживаемость по классам')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 21, "id": "1bdde2bc-d842-4eab-a217-f5a2ed5537e9", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_7201/3630563121.py:2: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.\n", " corr_matrix = train.corr()\n" ] }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Построение матрицы корреляции\n", "corr_matrix = train.corr()\n", "sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')\n", "plt.title('Матрица корреляции')\n", "plt.show()\n", "\n", "# Матрица показывает, что явной корреляции между элементами не имеется." ] }, { "cell_type": "code", "execution_count": 26, "id": "595a8bdd-9699-496a-8810-e94f66d406e0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "T-статистика: 3.178614052332485, p-значение: 0.0015954656668372637\n" ] } ], "source": [ "# t-test\n", "\n", "from scipy.stats import ttest_ind\n", "\n", "group1 = train[train['Pclass'] == 1]['Survived']\n", "group2 = train[train['Pclass'] == 2]['Survived']\n", "\n", "t_stat, p_value = ttest_ind(group1, group2)\n", "print(f'T-статистика: {t_stat}, p-значение: {p_value}')" ] }, { "cell_type": "code", "execution_count": 28, "id": "a8ae9118-a267-414b-8bed-8ea74fc8ea8a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Хи-квадрат статистика: 102.88898875696056, p-значение: 4.549251711298793e-23\n" ] } ], "source": [ "# Хи-квадрат\n", "\n", "from scipy.stats import chi2_contingency\n", "\n", "contingency_table = pd.crosstab(train['Pclass'], train['Survived'])\n", "\n", "chi2_stat, p_value, _, _ = chi2_contingency(contingency_table)\n", "print(f'Хи-квадрат статистика: {chi2_stat}, p-значение: {p_value}')" ] }, { "cell_type": "code", "execution_count": 31, "id": "007f2b1c-c93b-4b8b-b550-d0df90897a0e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "F-статистика: 57.964817590910116, p-значение: 2.1832474151179777e-24\n" ] } ], "source": [ "# ANOVA\n", "\n", "from scipy.stats import f_oneway\n", "\n", "class1 = train[train['Pclass'] == 1]['Survived']\n", "class2 = train[train['Pclass'] == 2]['Survived']\n", "class3 = train[train['Pclass'] == 3]['Survived']\n", "\n", "f_stat, p_value = f_oneway(class1, class2, class3)\n", "print(f'F-статистика: {f_stat}, p-значение: {p_value}')" ] }, { "cell_type": "code", "execution_count": 24, "id": "9ec2c264-344a-4f47-88e6-926855779c23", "metadata": {}, "outputs": [], "source": [ "# Проверка пропущенных значений\n", "train.isnull().sum()\n", "\n", "# Обработка пропущенных значений (например, заполнение средними значениями)\n", "train['Age'].fillna(train['Age'].mean(), inplace=True)" ] }, { "cell_type": "code", "execution_count": 7, "id": "b913858e-4503-419e-abc7-4614129616a7", "metadata": {}, "outputs": [], "source": [ "# Выделение целевой колонки и колонки параметров\n", "columns_target = ['Survived']\n", "columns_train = ['Pclass', 'Sex', 'Age', 'Fare']" ] }, { "cell_type": "code", "execution_count": 8, "id": "a1e6b5a7-002b-4903-8c9a-47fd4d94179c", "metadata": {}, "outputs": [], "source": [ "# Получается красиво, прям как на той теории\n", "X = train[columns_train]\n", "Y = train[columns_target]" ] }, { "cell_type": "code", "execution_count": 9, "id": "c2dd3353-a663-47e9-895a-91cd0b9a802c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Pclass: 0; \n", "Sex: 0; \n", "Age: 177; \n", "Fare: 0; \n" ] } ], "source": [ "# Проверка данных на валидность\n", "for column in columns_train:\n", " print(f\"{column}: {X[column].isnull().sum()}; \")" ] }, { "cell_type": "code", "execution_count": 10, "id": "181eab63-2b36-4c07-84ca-695862395333", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_7201/4142239557.py:6: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " X['Age'] = X['Age'].fillna(X['Age'].median())\n" ] } ], "source": [ "# Корректировка данных\n", "\n", "# Заметили, что у Age много пустых значений, но удалить эти записи может быть не совсем правильным\n", "# ибо это может повлиять на выборку, поэтому мы просто заполним их медианными значениями по возрасту\n", "\n", "X['Age'] = X['Age'].fillna(X['Age'].median())" ] }, { "cell_type": "code", "execution_count": 11, "id": "851a570f-7e58-427b-86df-683ad463e308", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Pclass: 0; \n", "Sex: 0; \n", "Age: 0; \n", "Fare: 0; \n" ] } ], "source": [ "# Проверка данных на валидность\n", "for column in columns_train:\n", " print(f\"{column}: {X[column].isnull().sum()}; \")" ] }, { "cell_type": "code", "execution_count": 12, "id": "244eadb7-633a-4d36-b72d-430cc25c0cf4", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_7201/4159695936.py:3: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " X['Sex'] = X['Sex'].apply(lambda x:d[x])\n" ] } ], "source": [ "# Корректировка данных (замена значений категориальных значений числовыми вместо строк)\n", "d={'male':1, 'female':0}\n", "X['Sex'] = X['Sex'].apply(lambda x:d[x])" ] }, { "cell_type": "code", "execution_count": 13, "id": "ecd37e6d-2646-40be-8490-ad5220b63ef3", "metadata": {}, "outputs": [], "source": [ "# Разделение выборки на обучающую и тестовую\n", "from sklearn.model_selection import train_test_split\n", "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=42, shuffle=False)" ] }, { "cell_type": "code", "execution_count": 14, "id": "42154582-dfb4-4518-86cc-395d7fa378f5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Pclass: 2057; \n", "Sex: 577; \n", "Age: 26161.17; \n", "Fare: 28693.9493; \n", "Pclass: 2057; \n", "Sex: 577; \n", "Age: 26161.17; \n", "Fare: 28693.9493; \n" ] } ], "source": [ "check(X_train)\n", "check(X_test)" ] }, { "cell_type": "code", "execution_count": 15, "id": "e73c6614-d395-4ee2-9a53-dcd85575d69a", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/lib/python3.11/site-packages/sklearn/utils/validation.py:1183: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", " y = column_or_1d(y, warn=True)\n", "/usr/lib/python3.11/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning.\n", " warnings.warn(\n", "/usr/lib/python3.11/site-packages/sklearn/svm/_base.py:1250: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n", " warnings.warn(\n" ] }, { "data": { "text/html": [ "
LinearSVC()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "LinearSVC()" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn import svm\n", "predmodel = svm.LinearSVC()\n", "predmodel.fit(X_train, Y_train)" ] }, { "cell_type": "code", "execution_count": 16, "id": "21651a18-0919-4486-83e0-41b26c52428f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.6779661016949152" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Проверяем точность предсказаний\n", "predmodel.score(X_test, Y_test)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 5 }