2024-09-23 23:22:33 +00:00
|
|
|
|
{
|
|
|
|
|
"cells": [
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"# Рабочая тетрадь No 3"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 3,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"import numpy as np\n",
|
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
"import seaborn as sns\n",
|
|
|
|
|
"import matplotlib.pyplot as plt\n",
|
|
|
|
|
"from sklearn.metrics import accuracy_score\n",
|
|
|
|
|
"from sklearn.neighbors import KNeighborsClassifier\n",
|
|
|
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
|
|
|
"from sklearn.feature_extraction import DictVectorizer"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 1.3.1 Задание\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"Задайте 4 точки в трехмерном пространстве, рассчитайте между ними \n",
|
|
|
|
|
"расстояния по описанным в примере выше метрикам. Отобразите точки \n",
|
|
|
|
|
"в трехмерном пространстве."
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 4,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"first_dot = np.array([np.random.randint(10) for _ in range(3)])\n",
|
|
|
|
|
"second_dot = np.array([np.random.randint(10) for _ in range(3)])\n",
|
|
|
|
|
"third_dot = np.array([np.random.randint(10) for _ in range(3)])\n",
|
|
|
|
|
"fourth_dot = np.array([np.random.randint(10) for _ in range(3)])\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"dots = [first_dot, second_dot, third_dot, fourth_dot]"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 5,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"[2 4 8] [8 6 2]\n",
|
|
|
|
|
"Расстояние Евклида 8.717797887081348\n",
|
|
|
|
|
"Квадрат Евклидова расстояния 76.00000000000001\n",
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"Расстояние Чебышева 6.0\n",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"Расстояние Хемминга 14.0\n",
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"\n",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"[2 4 8] [6 6 1]\n",
|
|
|
|
|
"Расстояние Евклида 8.306623862918075\n",
|
|
|
|
|
"Квадрат Евклидова расстояния 69.0\n",
|
|
|
|
|
"Расстояние Чебышева 7.0\n",
|
|
|
|
|
"Расстояние Хемминга 13.0\n",
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"\n",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"[2 4 8] [7 6 1]\n",
|
|
|
|
|
"Расстояние Евклида 8.831760866327848\n",
|
|
|
|
|
"Квадрат Евклидова расстояния 78.00000000000001\n",
|
|
|
|
|
"Расстояние Чебышева 7.0\n",
|
|
|
|
|
"Расстояние Хемминга 14.0\n",
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"\n",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"[8 6 2] [2 4 8]\n",
|
|
|
|
|
"Расстояние Евклида 8.717797887081348\n",
|
|
|
|
|
"Квадрат Евклидова расстояния 76.00000000000001\n",
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"Расстояние Чебышева 6.0\n",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"Расстояние Хемминга 14.0\n",
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"\n",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"[8 6 2] [6 6 1]\n",
|
|
|
|
|
"Расстояние Евклида 2.23606797749979\n",
|
|
|
|
|
"Квадрат Евклидова расстояния 5.000000000000001\n",
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"Расстояние Чебышева 2.0\n",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"Расстояние Хемминга 3.0\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"[8 6 2] [7 6 1]\n",
|
|
|
|
|
"Расстояние Евклида 1.4142135623730951\n",
|
|
|
|
|
"Квадрат Евклидова расстояния 2.0000000000000004\n",
|
|
|
|
|
"Расстояние Чебышева 1.0\n",
|
|
|
|
|
"Расстояние Хемминга 2.0\n",
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"\n",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"[6 6 1] [2 4 8]\n",
|
|
|
|
|
"Расстояние Евклида 8.306623862918075\n",
|
|
|
|
|
"Квадрат Евклидова расстояния 69.0\n",
|
|
|
|
|
"Расстояние Чебышева 7.0\n",
|
|
|
|
|
"Расстояние Хемминга 13.0\n",
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"\n",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"[6 6 1] [8 6 2]\n",
|
|
|
|
|
"Расстояние Евклида 2.23606797749979\n",
|
|
|
|
|
"Квадрат Евклидова расстояния 5.000000000000001\n",
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"Расстояние Чебышева 2.0\n",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"Расстояние Хемминга 3.0\n",
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"\n",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"[6 6 1] [7 6 1]\n",
|
|
|
|
|
"Расстояние Евклида 1.0\n",
|
|
|
|
|
"Квадрат Евклидова расстояния 1.0\n",
|
|
|
|
|
"Расстояние Чебышева 1.0\n",
|
|
|
|
|
"Расстояние Хемминга 1.0\n",
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"\n",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"[7 6 1] [2 4 8]\n",
|
|
|
|
|
"Расстояние Евклида 8.831760866327848\n",
|
|
|
|
|
"Квадрат Евклидова расстояния 78.00000000000001\n",
|
|
|
|
|
"Расстояние Чебышева 7.0\n",
|
|
|
|
|
"Расстояние Хемминга 14.0\n",
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"\n",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"[7 6 1] [8 6 2]\n",
|
|
|
|
|
"Расстояние Евклида 1.4142135623730951\n",
|
|
|
|
|
"Квадрат Евклидова расстояния 2.0000000000000004\n",
|
|
|
|
|
"Расстояние Чебышева 1.0\n",
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"Расстояние Хемминга 2.0\n",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"\n",
|
|
|
|
|
"[7 6 1] [6 6 1]\n",
|
|
|
|
|
"Расстояние Евклида 1.0\n",
|
|
|
|
|
"Квадрат Евклидова расстояния 1.0\n",
|
|
|
|
|
"Расстояние Чебышева 1.0\n",
|
|
|
|
|
"Расстояние Хемминга 1.0\n",
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"for first in dots:\n",
|
|
|
|
|
" for second in dots:\n",
|
|
|
|
|
" if (first == second).all():\n",
|
|
|
|
|
" continue\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" print(first, second)\n",
|
|
|
|
|
" print('Расстояние Евклида', np.linalg.norm(first-second))\n",
|
|
|
|
|
" print('Квадрат Евклидова расстояния', np.linalg.norm(first-second) ** 2)\n",
|
|
|
|
|
" print('Расстояние Чебышева', np.linalg.norm(first-second, ord=np.inf))\n",
|
|
|
|
|
" print('Расстояние Хемминга', np.linalg.norm(first-second, ord=1))\n",
|
|
|
|
|
" print()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 6,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZMAAAGOCAYAAABSVO4kAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAC6XElEQVR4nOy9d3xcd5k1fmY0GvXebFnFlmTLtmxZxbItOw1Ic5oNP0qyWRJC2M0uEEoW3hA2IQkLhMC+wLILoSwk7AuhJHYSEkhPnEKabWnUu6wuTddI08u9vz+83+s7V1Num5kr557Ph89uHGfu1Wjme+7zPOc5R0PTNA0VKlSoUKFCArSpvgEVKlSoULH+oZKJChUqVKiQDJVMVKhQoUKFZKhkokKFChUqJEMlExUqVKhQIRkqmahQoUKFCslQyUSFChUqVEiGSiYqVKhQoUIyVDJRoUKFChWSoZKJChUqVKiQDJVMVKhQoUKFZKhkokKFChUqJEMlExUqVKhQIRkqmahQoUKFCslQyUSFChUqVEiGSiYqVKhQoUIyVDJRoUKFChWSoZKJChUqVKiQDJVMVKhQoUKFZKhkokKFChUqJEMlExUqVKhQIRkqmahQoUKFCslQyUSFChUqVEiGSiYqVKhQoUIyVDJRoUKFChWSoZKJChUqVKiQDJVMVKhQoUKFZKhkokKFChUqJEMlExUqVKhQIRkqmahQoUKFCslQyUSFChUqVEiGSiYqVKhQoUIyVDJRoUKFChWSoZKJChUqVKiQDJVMVKhQoUKFZKhkokKFChUqJEOX6htQ8f4CTdMIhULw+XxIS0tj/qfVqs81KlSsZ6hkoiJpoGkagUAAwWAQPp+P+XOtVgudTgedTqeSiwoV6xQamqbpVN+EivMfoVAIgUAAFEVBo9HA7/dDq9WCpmnQNA2KokDTNDQaDTQajUouKlSsM6hkoiKhoGkawWAQwWAQAKDRaJgKRaPRRPz7hFgIyN/T6/VIT0+HTqeL+N+qUKEidVDbXCoSBoqimGoEAFN1EKIglQgbGo0GaWlpzD8TcnnnnXfQ2NiIwsJCaLVapKWlhVUvKrmoUJFaqGSiQnYQAggEAmGtK+7f4UMAhFzI/01LS2Ne2+/3Q6PRMOSSnp7O/B2VXFSoSC5UMlEhK0gLq7+/H+Xl5SgtLZXlYCevEa1y4ZILd+aikosKFYmFSiYqZAM51EOhEFZXV1FUVCTbIc5uj3H/nJAL+fcURcHv98Pn86nkokJFkqCSiQrJILsjwWAQFEVBq9VGPfwTCUIQKrmoUJF8qGSiQhJIWysUCgEAQyRE9isXxJBTJHIh//P5fPD7/cw9q+SiQoU0qGSiQjTIEz+7GmEj0uFP0zQmJiYwPT2NvLw8FBUVoaioCHl5eQnfJWELAdLS0taQC7tyIRJksuOikosKFbGhkokKwSBtLaLWinTYRqokvF4venp64Pf7sXPnTni9XtjtdszOzoKiKBQWFqKoqAiFhYXIy8sLe81EtM1ikYvVasXk5CSam5tVclGhggdUMlEhCBRFIRgMrmlrccE9/M1mM3p7e1FWVoa2tjZm96Sqqgo0TcPlcsFut8Nut2NqagoAGHIpKipKyvyFTS5arRZut5tp13m9XubvcHdcVHJRoUIlExU8wWd3hA1CJhRFYXR0FLOzs9i5cyc2bdoEmqaZeQX5u7m5ucjNzUV1dTVomsbq6irsdjtTIVAUhenpafh8PhQWFiI7OzspBzhpvbErF4qiGHLRarVrZi4quah4P0IlExVxEckSJd5hqdFo4PP58O6774KiKBw8eBA5OTm8rqfRaJCfn4/8/HzU1taCoii89957yMzMhMlkwtjYGHQ6HVO1FBUVISsrS/LPyee+IrXFKIqCz+eD1+tVyUXF+xYqmaiICfbuCBlO84HX64XZbEZVVRUaGxvDFg2FghzOZWVlKC8vRygUwsrKCux2OxYXFzEyMoKMjIwwcsnIyBB9Pb7gkiohl1AoxNjsswf6pD3Gh4xVqFhvUMlERURE2x2Jh1AohKGhIaysrGDDhg3YuXOnrPcEnK0KCGkAQDAYhMPhYIb5g4ODyM7ODpu56PV62e4jGghJEMJlk0swGGT+fSRfMZVcVKx3qGSiYg2i7Y7Ew+rqKnp6epgqgm9biw9iXV+n06GkpAQlJSUAzpLL8vIy7HY7pqenMTAwgJycHIZYCgsLkZ6eLtu9xbrnSOQSDAYZ12RCLmxfMdVuX8V6hEomKsIQb3ckEmiaxtzcHIaHh1FbW4uGhgYMDAzIrsDi+3o6nQ6lpaUoLS0FAPj9foZcJiYm4Ha7mR2XwsJCFBYWQqdL/FchHrmEQiGYTCbU1NSo5KJi3UElExUA+O2OREIwGER/fz9sNhtaW1uZA1zuvRApbSC9Xo/y8nKUl5cDAHw+H+x2O5aXlzE2Ngav1xu2QFlQUCDXbccEl1wCgQDGx8dRWVmJQCAAQE2hVLF+oJKJCtFtLYfDAYPBgOzsbBw6dChs6J2IJUO5Xi8jIwMbNmzAhg0bAIBZnrTb7RgaGoLf70dOTg5CoRDsdjsKCgqScoCzUyaBc5UL2xFZTaFUoVSoZPI+BztOV0hba3p6GmNjY6ivr8eWLVt4bcBLQSIH1JmZmdi4cSM2btwImqbh8XiwuLiI2dlZDAwMIBgMoqCgIOHWL9z3K1JbjKjrSOXCJRc1hVJFqqCSyfsU3N0RvkTi9/vR19eH1dVV7N27l1FUcRGPTMSQTbK24LOzs1FWVobFxUUcOnQIbrebqVxmZmZA03SYUiw3N1eWAzxeYBifLBc2uagplCqSCZVM3ofgxunyfcq22Wzo6elBYWEhDh06FFMRpdFomNeXA6k6EDUaDXJycpCTk8NYvzidToZczpw5A41GE0YuOTk5ou9XyH/HNyhMTaFUkQyoZPI+AvuwEdrWmpiYwJkzZ9DY2Ijq6mpeG/ByIhX5KNHuIy8vD3l5eaipqQFFUQy5WK1WTExMhO3BkO18vu+z1HtTUyhVpAoqmbxPQMwKh4eHsW3bNt6HiNfrRW9vL7xeL/bv34/8/Hxe15O7MlEqtFrtGusXsp1vNBoxNjaG9PT0sMollvWLnAe7mkKpIplQyeR9APbQdmZmBlu3buV1YHCdfoXsYpyvlUk8aLVaZndly5YtCIVCcDgcWF5ejmv9ksifL1YK5eDgIPR6PWpqalRyUSEaKpmcx+BaonAPkmiI5PQrFO+XyiQe0tLSUFxcjOLiYgDRrV+KioqQmZmZtPtikwtFUUwVQ0wr1RRKFUKhksl5iki7IwSxDnm3242enh5QFIXOzk7k5uaKun68A0dMBO96qEzigWv9EggEmO38hYUFBINBvPvuu0m1fmFHChDCiJdCSWTIqiOyCgKVTM5DxNsdiXYoLy4uYmBgAJWVlZKdfmNVJouLixgcHERGRgaKi4uZQzMZliZKQ3p6OsrKylBWVoaVlRX09PRgy5YtEa1fyHa+3O8TcTxgI1YKJTsoTE2hVEHw/vv2nsdg747EitPlHvLE6XdpaQm7d+9GRUWF5HuJdKCwr9PY2AgAsNvtYZYmhFy4W+fnS2USC6RCiGT9YrfbMTIyAp/Pt4ZcpJA++7qxwJdc1BTK9y9UMjlPEGl3JNKXmMTQErCdfg8dOiRbyBT38He5XDAYDNBqtTh48CB0Oh0oimKIy+PxMIcmafeQrfPi4mLm4Drfwf2dca1fPB4P0xYj1i/s7fz8/HzB2/l8yCTSfUYLClPJ5f0JlUzWOYTG6Wq1WlAUFdHpV06LEDaZLCwsYGBgANXV1di2bRs0Gg1jB0KQlZWFrKwsVFZWgqZpZuvcZrNhZmYGoVAILpcLWq0WxcXFSYvtTSb4kCV5n9jWL4SE5+bmEAqFwkg4Nzc37u9VDJlwoaZQqlDJZB2DO2TnE7JEDvKenp41Tr9ygrTTBgYGsLS0hD179jCtm3iHZqSt856eHgCAxWLBxMRESmJ7kwGhG/DZ2dnIzs7Gpk2bQNM0XC4XU7nwtX4hai65fw72a0ZKoVxZWYFer0dhYaGaQnkeQCWTdQp2nK7Qp7ve3l7k5ua
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"text/plain": [
|
|
|
|
|
"<Figure size 640x480 with 1 Axes>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"fig = plt.figure()\n",
|
|
|
|
|
"ax = fig.add_subplot(111, projection='3d')\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"for dot in dots:\n",
|
|
|
|
|
" ax.scatter(*dot)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"plt.show()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 1.3.2 Задание\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"Создать 5x5 матрицу со значениями в строках от 0 до 4. Для создания \n",
|
|
|
|
|
"необходимо использовать функцию arange."
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 7,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"array([[0, 1, 2, 3, 4],\n",
|
|
|
|
|
" [0, 1, 2, 3, 4],\n",
|
|
|
|
|
" [0, 1, 2, 3, 4],\n",
|
|
|
|
|
" [0, 1, 2, 3, 4],\n",
|
|
|
|
|
" [0, 1, 2, 3, 4]])"
|
|
|
|
|
]
|
|
|
|
|
},
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 7,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"np.array([np.arange(0, 5) for _ in range(5)])"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 2.3.1 Задание\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"Для предыдущего примера поэкспериментируйте с параметрами классификатора: \n",
|
|
|
|
|
"1. Установите другое количество ближайших соседей (k = 1, 5, 10). \n",
|
|
|
|
|
"2. Установите размер тестовой выборки 15% от всего датасета. \n",
|
|
|
|
|
"3. Постройте графики и оцените качество моделей, проанализируйте результаты. "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"#### 2.2.1\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"В примере показано создание 2d-массива со значениями x и y. Список \n",
|
|
|
|
|
"target содержит возможные выходные классы (часто называемые \n",
|
|
|
|
|
"метками). Далее происходит обучене классификатора k-ближайших \n",
|
|
|
|
|
"соседей по исходным данным. Далее производится прогноз \n",
|
|
|
|
|
"принадлежности к классам для двух точек данных."
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 25,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"KNeighborsClassifier(n_neighbors=3)\n",
|
|
|
|
|
"(-2,-2) is class [0]\n",
|
|
|
|
|
"(1,3) is class [1]\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"# данные\n",
|
|
|
|
|
"X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n",
|
|
|
|
|
"target = [0, 0, 0, 1, 1, 1]\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# обучаем модель k-ближайших соседей к данным\n",
|
|
|
|
|
"K = 3\n",
|
|
|
|
|
"model = KNeighborsClassifier(n_neighbors=K)\n",
|
|
|
|
|
"model.fit(X, target)\n",
|
|
|
|
|
"print(model)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# делаем прогноз\n",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"print('(-2,-2) is class', model.predict([[-9, 9]]))\n",
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"print('(1,3) is class', model.predict([[1, 3]]))"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"#### 2.2.2\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"Далее приведем более наглядный пример. Будет построена граница \n",
|
|
|
|
|
"решения для каждого класса. В качестве данных будем использовать уже \n",
|
|
|
|
|
"знакомый нам и встроенный в библиотеку sklearn набор данных ирисов \n",
|
|
|
|
|
"Фишера. Этот набор данных стал уже классическим, и часто используется \n",
|
|
|
|
|
"в литературе для иллюстрации работы различных статистических \n",
|
|
|
|
|
"алгоритмов. Датасет содержит наблюдения за 150 разными цветками \n",
|
|
|
|
|
"ирисов, данные по каждому цветку расположены в строках. В стобцах \n",
|
|
|
|
|
"записаны длина и ширина чашелистика, длина и ширина лепестка, вид \n",
|
|
|
|
|
"ириса. "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 9,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>sepal_length_cm</th>\n",
|
|
|
|
|
" <th>sepal_width_cm</th>\n",
|
|
|
|
|
" <th>petal_length_cm</th>\n",
|
|
|
|
|
" <th>petal_width_cm</th>\n",
|
|
|
|
|
" <th>species</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" <td>5.1</td>\n",
|
|
|
|
|
" <td>3.5</td>\n",
|
|
|
|
|
" <td>1.4</td>\n",
|
|
|
|
|
" <td>0.2</td>\n",
|
|
|
|
|
" <td>setosa</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
|
|
|
|
" <td>4.9</td>\n",
|
|
|
|
|
" <td>3.0</td>\n",
|
|
|
|
|
" <td>1.4</td>\n",
|
|
|
|
|
" <td>0.2</td>\n",
|
|
|
|
|
" <td>setosa</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
|
|
|
|
" <td>4.7</td>\n",
|
|
|
|
|
" <td>3.2</td>\n",
|
|
|
|
|
" <td>1.3</td>\n",
|
|
|
|
|
" <td>0.2</td>\n",
|
|
|
|
|
" <td>setosa</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
|
|
|
|
" <td>4.6</td>\n",
|
|
|
|
|
" <td>3.1</td>\n",
|
|
|
|
|
" <td>1.5</td>\n",
|
|
|
|
|
" <td>0.2</td>\n",
|
|
|
|
|
" <td>setosa</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>4</th>\n",
|
|
|
|
|
" <td>5.0</td>\n",
|
|
|
|
|
" <td>3.6</td>\n",
|
|
|
|
|
" <td>1.4</td>\n",
|
|
|
|
|
" <td>0.2</td>\n",
|
|
|
|
|
" <td>setosa</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>...</th>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>145</th>\n",
|
|
|
|
|
" <td>6.7</td>\n",
|
|
|
|
|
" <td>3.0</td>\n",
|
|
|
|
|
" <td>5.2</td>\n",
|
|
|
|
|
" <td>2.3</td>\n",
|
|
|
|
|
" <td>virginica</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>146</th>\n",
|
|
|
|
|
" <td>6.3</td>\n",
|
|
|
|
|
" <td>2.5</td>\n",
|
|
|
|
|
" <td>5.0</td>\n",
|
|
|
|
|
" <td>1.9</td>\n",
|
|
|
|
|
" <td>virginica</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>147</th>\n",
|
|
|
|
|
" <td>6.5</td>\n",
|
|
|
|
|
" <td>3.0</td>\n",
|
|
|
|
|
" <td>5.2</td>\n",
|
|
|
|
|
" <td>2.0</td>\n",
|
|
|
|
|
" <td>virginica</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>148</th>\n",
|
|
|
|
|
" <td>6.2</td>\n",
|
|
|
|
|
" <td>3.4</td>\n",
|
|
|
|
|
" <td>5.4</td>\n",
|
|
|
|
|
" <td>2.3</td>\n",
|
|
|
|
|
" <td>virginica</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>149</th>\n",
|
|
|
|
|
" <td>5.9</td>\n",
|
|
|
|
|
" <td>3.0</td>\n",
|
|
|
|
|
" <td>5.1</td>\n",
|
|
|
|
|
" <td>1.8</td>\n",
|
|
|
|
|
" <td>virginica</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"<p>150 rows × 5 columns</p>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
2024-09-27 05:31:03 +00:00
|
|
|
|
" sepal_length_cm sepal_width_cm petal_length_cm petal_width_cm \\\n",
|
|
|
|
|
"0 5.1 3.5 1.4 0.2 \n",
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"1 4.9 3.0 1.4 0.2 \n",
|
|
|
|
|
"2 4.7 3.2 1.3 0.2 \n",
|
|
|
|
|
"3 4.6 3.1 1.5 0.2 \n",
|
|
|
|
|
"4 5.0 3.6 1.4 0.2 \n",
|
|
|
|
|
".. ... ... ... ... \n",
|
|
|
|
|
"145 6.7 3.0 5.2 2.3 \n",
|
|
|
|
|
"146 6.3 2.5 5.0 1.9 \n",
|
|
|
|
|
"147 6.5 3.0 5.2 2.0 \n",
|
|
|
|
|
"148 6.2 3.4 5.4 2.3 \n",
|
|
|
|
|
"149 5.9 3.0 5.1 1.8 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" species \n",
|
|
|
|
|
"0 setosa \n",
|
|
|
|
|
"1 setosa \n",
|
|
|
|
|
"2 setosa \n",
|
|
|
|
|
"3 setosa \n",
|
|
|
|
|
"4 setosa \n",
|
|
|
|
|
".. ... \n",
|
|
|
|
|
"145 virginica \n",
|
|
|
|
|
"146 virginica \n",
|
|
|
|
|
"147 virginica \n",
|
|
|
|
|
"148 virginica \n",
|
|
|
|
|
"149 virginica \n",
|
|
|
|
|
"\n",
|
|
|
|
|
"[150 rows x 5 columns]"
|
|
|
|
|
]
|
|
|
|
|
},
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 9,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"url = 'https://raw.githubusercontent.com/akmand/datasets/master/iris.csv'\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"iris = pd.read_csv(url)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"iris"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 10,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABRgAAAJaCAYAAABELyv0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hU1dbH8e/0mUwaPfQOAoKAgAIKKFIFURRFUSmKvWLlXkVBfbl6QUDFAgoCghUFlSIgigqCIEWK0iH0DumZ+v6RSySmTUKSmQm/z314rjln5cxKdiaZWWfvtQ1+v9+PiIiIiIiIiIiISCEYg52AiIiIiIiIiIiIhC8VGEVERERERERERKTQVGAUERERERERERGRQlOBUURERERERERERApNBUYREREREREREREpNBUYRUREREREREREpNBUYBQREREREREREZFCU4FRRERERERERERECs0c7ATOh8/n4+DBg0RFRWEwGIKdjoiIiEiB+f1+EhMTqVKlCkaj7v2GI70mFRERkXBWFK9Hw7rAePDgQapXrx7sNERERETO2759+6hWrVqw05BC0GtSERERKQ3O5/VoWBcYo6KigIxvQHR0NG63m0WLFtG1a1csFkuQs5O8aKzCh8YqfGiswoPGKXyU1FglJCRQvXr1zNc1En7++Zq0OOh3R/jQWIUPjVV40DiFD41V+PjnWBXF69GwLjCeXYISHR2dWWCMiIggOjpaP8whTmMVPjRW4UNjFR40TuGjpMdKS2vD1z9fkxYH/e4IHxqr8KGxCg8ap/ChsQofuY3V+bweVaMfERERERERERERKTQVGEVERERERERERKTQVGAUERERERERERGRQgvrHoyB8Pv9eDwevF5vsFORc7jdbsxmM2lpaQGPjclkwmw2q0eViIiI5Mjr9fLiiy/y0UcfcfjwYapUqcKgQYN47rnn8nz98OOPPzJs2DA2b95M9erVee655xg0aFDJJS4iIhKCClpPKcz7fCkZJVFPKdUFRpfLxaFDh0hJSQl2KvIPfr+fuLg49u3bV6Af8IiICCpXrozVai3G7ERERCQcvfrqq7zzzjtMmzaNJk2asGbNGgYPHkxMTAyPPPJIjp+ze/durr32Wu677z5mzpzJ999/z913303lypXp1q1bCX8FIiIioaEw9ZTCvs+XklHc9ZRSW2D0+Xzs3r0bk8lElSpVsFqt+gEPIT6fj6SkJCIjIzEa81+p7/f7cblcHDt2jN27d1O/fv2APk9EREQuHCtWrKBPnz5ce+21ANSqVYuPP/6Y3377LdfPeffdd6lduzZjx44FoFGjRvzyyy+MGzdOBUYREbkgFbaeUtD3+VIycqqnFIdSW2B0uVz4fD6qV69OREREsNORf/D5fLhcLux2e8C/eBwOBxaLhb1792Z+roiIiMhZ7dq1Y9KkSWzbto0GDRqwYcMGfvnlF15//fVcP+fXX3/lmmuuyXKsW7duPPbYY8WcrYiISGgqbD2lMO/zpWT8s55iMpmK/DFKbYHxLP1Qly4aTxEREcnNs88+S0JCAhdddBEmkwmv18srr7zCgAEDcv2cw4cPU6lSpSzHKlWqREJCAqmpqTgcjmyfk56eTnp6eubHCQkJQEbvKbfbXURfTVZnr1tc15eio7EKHxqr8KBxKnlutxu/3w9kFA0DdfZz/H5/gT5PSo7f78ftdmeOT1E+v0p9gVFERERELgyfffYZM2fOZNasWTRp0oT169fz2GOPUaVKFQYOHFhkjzN69GhGjhyZ7fiiRYuKfeXM4sWLi/X6UnQ0VuFDYxUeNE4lx2w2ExcXR1JSEi6Xq8Cfn5iYWAxZyflyuVykpqby008/4fF4gL+fV0Wxd0lQC4y1atVi79692Y4/8MADTJw4MQgZ5czn85OQ9nc1N9puwWhUP0cRERGRUPLUU0/x7LPP0r9/fwCaNm3K3r17GT16dK4Fxri4OI4cOZLl2JEjR4iOjs5x9iLA8OHDGTZsWObHCQkJVK9ena5duxIdHV1EX01WbrebxYsX06VLFywWS7E8hhQNjVX40FiFB41TyUtLS2Pfvn1ERkYWqDWZ3+8nMTGRqKiozJ6Nfj8kpnvwAwYgymZG22MER1paGg6Hgw4dOmAymbI8r86uxjgfQS0wrl69OsvW5Zs2baJLly7069cviFn9zevzk+b28uO2Y8xcuZfjSemUj7Qx4PKadGpQAYfFpEKjiIiISIhISUnJ1k7FZDLluUyrbdu2zJ8/P8uxxYsX07Zt21w/x2azYbPZsh23WCzF/ua3JB5DiobGKnxorMKDxqnkeL1eDAYDRqOxQG3Kzv69NRgM+DGonhJijEYjBoMBi8WS2YPx7POqKJ5bQS0wVqhQIcvH//nPf6hbty4dO3YMUkZ/8/r8HE9K56Z3V7DvZGrm8W1Hklix8wTVyzr44r52VIi0hcyTYs+ePdSuXZt169bRvHnzYKcjIiIiUqJ69+7NK6+8Qo0aNWjSpAnr1q3j9ddfZ8iQIZkxw4cP58CBA0yfPh2A++67j7feeounn36aIUOGsHTpUj777DPmzZsXrC9DREQkrPn8fk4ku8KqngKqqZyvkOnB6HK5+Oijjxg2bFiu25/n11D73OaUXq83s7FoYZqLprl92Z4M59p3MpWb3l3Bwkc74LCExsYjZ7/Own7NJamwzV99Pl9mU9Li2PVIslNT5fChsQoPGqfwUVJjpZ+FovPmm2/y/PPP88ADD3D06FGqVKnCvffey4gRIzJjDh06RHx8fObHtWvXZt68eTz++ONMmDCBatWq8f7779OtW7dgfAkiIiJhL93jD7ie4rSFTFlKzlPIjOScOXM4ffo0gwYNyjUm0IbaixcvPq+mpGaLhWU7z+T6ZDhr38lUlm09Soe6MXiK8M3B3LlzefXVV9m9ezcOh4NmzZoxc+ZMnE4n06dPZ+LEiezdu5caNWpwzz33cPfddwNQt25dAC699FIA2rdvz7fffovP52PMmDFMmzaN48eP06BBA1544QWuueYaIKO4++9//5tvvvmG06dPU6FCBQYPHpzZW2jixInMnDmTvXv3EhsbS/fu3Rk5ciSRkZHn/bUWtPlrTk1JpWSoqXL40FiFB41T+CjusSqKptqSISoqivHjxzN+/PhcYz788MNsxzp16sS6deuKLzEREZELhNli4cdtxwKqp/y07RjdmsQV+SzGL774gpEjR7Jjxw4iIiJo0aIFc+fOxel08v777zN27Fh2795NrVq1eOSRR3jggQeAjJuOAC1atACgY8eO/Pjjj/h8Pl5++WUmTZrEsWPHaNSoEf/5z3/o3r07kFGnGDZsGLNnz+bUqVNUqlSJ++67j+HDhwPw+uuvM3XqVHbt2kXZsmXp3bs3r732WpHUVEJJyBQYP/jgA3r06EGVKlVyjcmvofa5zV+9Xm+hmpICJKR5mLkqPv9A4KNV8bSr15Lo6JybgBfUoUOHuPvuu3n11Ve5/vrrSUxM5JdffiEqKoq5c+fyn//8hzfeeIMWLVqwbt067r33XsqVK8fAgQNZuXIll19+OYsWLaJJkyZYrVaio6MZP348EydO5J133qFFixZMnTqV2267jY0bN1K/fn3Gjh3Ld999x6effkqNGjXYt28f+/bty2xSHhERwZtvvknt2rXZtWsXDz30EK+88sp5bcSTU/PXQJzblLSg4yqFo6bK4UNjFR40TuGjpMaqKJpqi4iIiISCdJ+RmSuzb+abkxkr99KuXjliHNYie/xDhw5x66238tprr3HDDTeQmJjIzz//jN/vZ+bMmYwYMYK33nors6YydOhQnE4nAwcO5LfffqNNmzYsWbIks6YCMGHCBMaOHct7771HixYtmDJlCtdddx2bN2+mfv36vPHGG3z99dd89tlnWWoqZxmNRt54443MmsoDDzzA008/zdtvv11kX3coCIkC4969e1myZAlffvllnnGBNtS2WCyZzSsL2pT0rONJ6fkHASeSMmZHFuYxcnLkyBE8Hg833ngjNWvWBOCSSy4BYOTIkYwdO5abbroJyJix+NdffzF58mQGDx5MpUqVgIz
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"text/plain": [
|
|
|
|
|
"<Figure size 1600x700 with 2 Axes>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"# Объявляем фигуру из двух графиков и ее размер \n",
|
|
|
|
|
"plt.figure(figsize=(16, 7))\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Левый график\n",
|
|
|
|
|
"plt.subplot(121)\n",
|
|
|
|
|
"sns.scatterplot(\n",
|
|
|
|
|
" data=iris, # из этой таблицы нарисовать точки\n",
|
|
|
|
|
" x='petal_width_cm', y='petal_length_cm', # с этими координатами,\n",
|
|
|
|
|
" hue='species', # для которых цвет определить согласно этому столбцу \n",
|
|
|
|
|
" s=70 # размер точек\n",
|
|
|
|
|
")\n",
|
|
|
|
|
"plt.xlabel('Длина лепестка, см') \n",
|
|
|
|
|
"plt.ylabel('Ширина лепестка, см') \n",
|
|
|
|
|
"plt.legend() # добавить легенду \n",
|
|
|
|
|
"plt.grid() # добавить сетку\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Правый график аналогично \n",
|
|
|
|
|
"plt.subplot(122)\n",
|
|
|
|
|
"sns.scatterplot(data=iris,\n",
|
|
|
|
|
"x='sepal_width_cm', y='sepal_length_cm', hue='species', s=70)\n",
|
|
|
|
|
"plt.xlabel('Длина чашелистика, см') \n",
|
|
|
|
|
"plt.ylabel('Ширина чашелистика, см') \n",
|
|
|
|
|
"plt.legend()\n",
|
|
|
|
|
"plt.grid();"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 11,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"((127, 4), (23, 4), (127,), (23,))"
|
|
|
|
|
]
|
|
|
|
|
},
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 11,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"X_train, X_test, y_train, y_test = train_test_split(\n",
|
|
|
|
|
" # поскольку iris это pandas-таблица, для нее нужно указывать iloc\n",
|
|
|
|
|
" iris.iloc[:, :-1], # берем все колонки кроме последней в признаки\n",
|
|
|
|
|
" iris.iloc[:, -1], # последнюю в целевую переменную (класс)\n",
|
|
|
|
|
" test_size=0.15 # размер тестовой выборки 15%\n",
|
|
|
|
|
")\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"X_train.shape, X_test.shape, y_train.shape, y_test.shape\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 12,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>sepal_length_cm</th>\n",
|
|
|
|
|
" <th>sepal_width_cm</th>\n",
|
|
|
|
|
" <th>petal_length_cm</th>\n",
|
|
|
|
|
" <th>petal_width_cm</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
" <th>52</th>\n",
|
|
|
|
|
" <td>6.9</td>\n",
|
|
|
|
|
" <td>3.1</td>\n",
|
|
|
|
|
" <td>4.9</td>\n",
|
|
|
|
|
" <td>1.5</td>\n",
|
2024-09-23 23:22:33 +00:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
" <th>42</th>\n",
|
|
|
|
|
" <td>4.4</td>\n",
|
|
|
|
|
" <td>3.2</td>\n",
|
2024-09-23 23:22:33 +00:00
|
|
|
|
" <td>1.3</td>\n",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
" <td>0.2</td>\n",
|
2024-09-23 23:22:33 +00:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
" <th>75</th>\n",
|
|
|
|
|
" <td>6.6</td>\n",
|
|
|
|
|
" <td>3.0</td>\n",
|
|
|
|
|
" <td>4.4</td>\n",
|
|
|
|
|
" <td>1.4</td>\n",
|
2024-09-23 23:22:33 +00:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
" <th>135</th>\n",
|
2024-09-23 23:22:33 +00:00
|
|
|
|
" <td>7.7</td>\n",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
" <td>3.0</td>\n",
|
|
|
|
|
" <td>6.1</td>\n",
|
|
|
|
|
" <td>2.3</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>10</th>\n",
|
|
|
|
|
" <td>5.4</td>\n",
|
|
|
|
|
" <td>3.7</td>\n",
|
|
|
|
|
" <td>1.5</td>\n",
|
|
|
|
|
" <td>0.2</td>\n",
|
2024-09-23 23:22:33 +00:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
" sepal_length_cm sepal_width_cm petal_length_cm petal_width_cm\n",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"52 6.9 3.1 4.9 1.5\n",
|
|
|
|
|
"42 4.4 3.2 1.3 0.2\n",
|
|
|
|
|
"75 6.6 3.0 4.4 1.4\n",
|
|
|
|
|
"135 7.7 3.0 6.1 2.3\n",
|
|
|
|
|
"10 5.4 3.7 1.5 0.2"
|
2024-09-23 23:22:33 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 12,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"X_train.head()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 13,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"52 versicolor\n",
|
|
|
|
|
"42 setosa\n",
|
|
|
|
|
"75 versicolor\n",
|
|
|
|
|
"135 virginica\n",
|
|
|
|
|
"10 setosa\n",
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"Name: species, dtype: object"
|
|
|
|
|
]
|
|
|
|
|
},
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 13,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"y_train.head()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"Обучим метод 1 ближайшем соседе"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 14,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"array(['versicolor', 'setosa', 'virginica', 'setosa', 'versicolor',\n",
|
2024-09-23 23:22:33 +00:00
|
|
|
|
" 'versicolor', 'setosa', 'versicolor', 'versicolor', 'virginica',\n",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
" 'versicolor', 'setosa', 'virginica', 'setosa', 'virginica',\n",
|
|
|
|
|
" 'setosa', 'versicolor', 'versicolor', 'virginica', 'setosa',\n",
|
|
|
|
|
" 'versicolor', 'setosa', 'setosa'], dtype=object)"
|
2024-09-23 23:22:33 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 14,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"model = KNeighborsClassifier(n_neighbors=1)\n",
|
|
|
|
|
"model.fit(X_train, y_train)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Получим предсказания модели\n",
|
|
|
|
|
"y_pred = model.predict(X_test)\n",
|
|
|
|
|
"y_pred"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 15,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0EAAAJaCAYAAADpm0w1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACyiElEQVR4nOzdd3hU1dbH8e/09ITepYP0qoIoKEgRERFFFFQExC4qYntVFNTrtYBYsICIDUQFRRQQ6VJEQaUX6b2X9Ex//5hLIKZN4Ezq7/M8eUjOWcxemRzCrNlnr23y+/1+RERERERESghzQScgIiIiIiKSn1QEiYiIiIhIiaIiSEREREREShQVQSIiIiIiUqKoCBIRERERkRJFRZCIiIiIiJQoKoJERERERKREUREkIiIiIiIlirWgE7gQPp+PgwcPEh0djclkKuh0RERERESkgPj9fhITE6lcuTJmc85zPUW6CDp48CDVqlUr6DRERERERKSQ2LdvH1WrVs0xpkgXQdHR0UDgG42JiUk/7na7+eWXX+jSpQs2m62g0pNiQNeSGEXXkhhB15EYRdeSGKUwXUsJCQlUq1YtvUbISZEugs7cAhcTE5OpCIqIiCAmJqbAfxhStOlaEqPoWhIj6DoSo+haEqMUxmspmGUyaowgIiIiIiIlioogEREREREpUVQEiYiIiIhIiVKk1wQFw+/34/F48Hq9BZ2KXCCLxYLValU7dBERERG5IMW6CHK73Rw8eJCUlJSCTkUMEhERQaVKlbDb7QWdioiIiIgUUcW6CNq7dy9Wq5XKlStjt9s1g1CE+f1+XC4Xx44dY9euXdStWzfXTbBERERERLJSbIsgq9WKz+ejcuXKREREFHQ6YoDw8HBsNht79uzB5XIRFhZW0CmJiIiISBFU7N9K12xB8aKfp4iIiIhcKL2iFBERERGREqVAi6AaNWpgMpkyfTz44IMFmVYmPp+f0ymu9A+fz1/QKYmIiIiIyHkq0DVBq1atytC6esOGDXTu3Jk+ffoUYFZneX1+0txeFv9zjMkr93A8yUnZKAf921TnqnrlCLdZMJvVbEFEREREpCgp0CKoXLlyGb7+73//S+3atenQoUMBZXSW1+fneJKTmz9cwb6TqenH/zmSxIodJ6hWOpxp911OuShHoSmEdu/eTc2aNfn7779p3rx5QacjIiIiIlIoFZrucC6Xiy+//JJhw4Zl28ra6XTidDrTv05ISAAC+wG53e7042c+9/v9+Hw+fD5fnvNJc/syFUDn2ncylZs/XMHPj7Qn3FY4llad+T7P93suCnw+H36/H7fbjcViCfl4Z66lc68vkfOha0mMoOtIjKJrSYxSmK6lvORg8vv9hWKByzfffEO/fv3Yu3cvlStXzjLmxRdfZOTIkZmOT5kyJVMbbKvVSsWKFalWrVqeN9a02mws2RHPg1P+zjX2/X4taF87Fo+BP/gffviB1157jV27dhEeHk7Tpk2ZPHkykZGRfP7554wbN449e/Zw0UUXcc8993D33XcDUKpUqQyP065dO3766Sd8Ph9vvvkmn332GcePH6devXq88MILXHPNNUCgAH322Wf58ccfOX36NOXKlWPgwIEMGzYMgHHjxjF58mT27NlDXFwc3bp1Y+TIkURFRRn2PQfL5XKxb98+Dh8+jMfjyffxRURERKRwSklJoV+/fsTHxxMTE5NjbKGZCZo4cSLXXntttgUQwDPPPJP+whwCM0HVqlWjS5cuGb5Rt9vNokWLCAsLIyoqKs/7ySSkeZj8+96gYr/8fS+X12lJTEx4nsbIzqFDh7j77rt57bXX6NWrF4mJiSxbtozo6Gh++OEH/vvf//LOO+/QokUL/v77b+69917KlCnDgAEDWLlyJW3atOGXX36hUaNG2O12YmJiGDt2LOPGjeODDz6gRYsWTJo0iX79+rF+/Xrq1q3L6NGjmTt3Ll9//TUXXXQR+/btY9++fenPaUREBO+++y41a9Zk586dPPTQQ7zyyiuMGzfOkO85L9LS0ggPD6d9+/b5sk+Q2+1m3rx5dO7cGZvNFvLxpPjStSRG0HUkRtG1JEYpTNfSmbvEglEoiqA9e/Ywf/58vvvuuxzjHA4HDocj03GbzZblk24ymTCbzee1t8zxJGfuQcCJJBdg3P41R44cwePxcNNNN1G9enUAmjVrBsDIkSMZPXo0N998MwC1a9dmy5YtTJgwgYEDB1KhQgUgsNbq3GJy9OjRPPXUU/Tr1w+A119/ncWLF/POO+8wbtw49u3bR926dWnfvj0mk4maNWtmyOmxxx5L/7xWrVq8/PLL3HfffXzwwQeGfM95YTabMZlM2f7MQyW/x5PiS9eSGEHXkRhF15Kx/H4/Se4krGYrXp8Xi9mCCRNh1uK/wXthuJbyMn6hKIImTZpE+fLlue666wo6lXRloxz8cyQp17gyUXayWcJ0Xpo1a0anTp1o0qQJXbt2pUuXLtx8883Y7XZ27NjB4MGDGTJkSHq8x+MhNjY228dLSEjg4MGDtGvXLsPxdu3asXbtWgDuuusuOnfuTP369enWrRs9evSgS5cu6bHz58/n1VdfZcuWLSQkJODxeEhLSyMlJSXTbYgiIiIiJVGyO5nVh1czYf0E1h4LvMYq5ShFn3p9GNBoAJG2SCzm0K9nluAU+Ip+n8/HpEmTGDBgAFZroajJiAmz0b9N9aBi72hTnWiHcVWvxWJh3rx5zJkzh4YNG/Luu+9Sv359NmzYAMCECRNYs2ZN+seGDRtYuXLlBY3ZsmVLdu3axUsvvURqaiq33HJL+mzT7t276dGjB02bNmX69On8+eef6bfBuVyuC/tmRURERIqBZHcyb//1Ng8tfCi9AAI45TzF+PXjuenHmzjlPEUhWYovFIIiaP78+ezdu5dBgwYVdCrpzGYTV9UrR7XSOa/zqVY6nPb1yhneIttkMtGuXTtGjhzJ33//jd1uZ/ny5VSuXJmdO3dSp06dDB9nbl870wDi3L2XYmJiqFy5MsuXL88wxvLly2nYsGGGuL59+zJhwgS+/vprpk+fzsmTJ/nzzz/x+XyMHj2aNm3aUK9ePQ4ePGjo9ysiIiJSVPn9fv449Adfbfkq25jDyYd5dNGjJLlzv8tI8keBT7106dKlUFbFYTYL0+67PNs22Wf2CQq3GTut+fvvv7NgwQK6dOlC+fLl+f333zl27BgNGjRg5MiRDB06lNjYWLp164bT6WT16tWcOnWKYcOGUb58ecLDw/n555+pWrUqYWFhxMbG8sQTT/DCCy9Qu3ZtmjdvzqRJk1izZg2TJ08GYMyYMVSqVIkWLVpgNpv59ttvqVixInFxcdSpUwe32827777L9ddfz/Lly/nwww8N/Z5FREREiqpEdyIfr/8417i1x9ZyMu0k0fbofMhKclPgRVBhZTGbKBfl4OdH2rPkn2N8uXIPJ5JclImyc0eb6rSvV45wm8XwWaCYmBh+/fVXxo4dS0JCAtWrV2f06NFce+21QKBT2xtvvMETTzxBZGQkTZo04dFHHwUCbcHfeecdRo0axYgRI7jyyitZvHgxQ4cOJT4+nscff5yjR4/SsGFDZs6cSd26dQGIjo7m9ddfZ9u2bVgsFi655BJmz56N2WymWbNmjBkzhtdee41nnnmG9u3b8+qrr3LnnXca+n2LiIiIFEVWk5V1x9cFFfvzrp+5t9m9Ic5IgqEiKAdms4lIh5VujSrSrk4Z/H4wmSDaYTO8+DmjQYMG/Pzzz9me79evX3qXt6zcfffd6fsGnWE2m3nhhRd44YUXsvw7Q4YMydBs4d8ee+yxDB3iAO64445s40VERERKCo8/+H0LUzwp+P1+TEZ21ZLzoiIoCGazidjwvG24KiIiIiLFn81kI9YRS7wzPtfYxmUaqwAqJAq8MYKIiIiISFFlMpm4qc5NucbF2GNoV6VdrnGSP1QEiYiIiIicpzBrGIOaDKJCRIUc44a3Hq5ZoEJERZCIiIiIyAWItEUy5bopNC7bONO5GHsMoy4fRZcaXQi35rz9iuQfrQkSEREREbkAVrOVcuHlGN95PMdTjzNn5xxSvak
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"text/plain": [
|
|
|
|
|
"<Figure size 1000x700 with 1 Axes>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"# Покажем на графике, что отражаем полученное число.\n",
|
|
|
|
|
"# Красным цветом обозначим точки, для которых классификация сработала неправильно.\n",
|
|
|
|
|
"plt.figure(figsize=(10, 7))\n",
|
|
|
|
|
"sns.scatterplot(x='petal_width_cm', y='petal_length_cm', data=iris, hue='species', s=70)\n",
|
|
|
|
|
"plt.xlabel('Длина лепестка, см')\n",
|
|
|
|
|
"plt.ylabel('Ширина лепестка, см')\n",
|
|
|
|
|
"plt.legend(loc=2)\n",
|
|
|
|
|
"plt.grid()\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Перебираем все объекты из теста\n",
|
|
|
|
|
"for i in range(len(y_test)):\n",
|
|
|
|
|
" # Если предсказание неправильное\n",
|
|
|
|
|
" if np.array(y_test)[i] != y_pred[i]:\n",
|
|
|
|
|
" # то подсвечиваем точку красным\n",
|
|
|
|
|
" plt.scatter(X_test.iloc[i, 3], X_test.iloc[i, 2], color='red', s=150)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 16,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"accuracy: 1.000\n"
|
2024-09-23 23:22:33 +00:00
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"# качество модели (доля правильно классифицированных точек)\n",
|
|
|
|
|
"print(f'accuracy: {accuracy_score(y_test, y_pred):.3f}')"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"Обучим метод 5 ближайших соседях"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 17,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"array(['versicolor', 'setosa', 'virginica', 'setosa', 'versicolor',\n",
|
|
|
|
|
" 'versicolor', 'setosa', 'versicolor', 'versicolor', 'virginica',\n",
|
|
|
|
|
" 'versicolor', 'setosa', 'virginica', 'setosa', 'virginica',\n",
|
|
|
|
|
" 'setosa', 'versicolor', 'versicolor', 'virginica', 'setosa',\n",
|
|
|
|
|
" 'versicolor', 'setosa', 'setosa'], dtype=object)"
|
2024-09-23 23:22:33 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 17,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"model = KNeighborsClassifier(n_neighbors=5)\n",
|
|
|
|
|
"model.fit(X_train, y_train)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Получим предсказания модели\n",
|
|
|
|
|
"y_pred = model.predict(X_test)\n",
|
|
|
|
|
"y_pred"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 18,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0EAAAJaCAYAAADpm0w1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACyiElEQVR4nOzdd3hU1dbH8e/09ITepYP0qoIoKEgRERFFFFQExC4qYntVFNTrtYBYsICIDUQFRRQQ6VJEQaUX6b2X9Ex//5hLIKZN4Ezq7/M8eUjOWcxemRzCrNlnr23y+/1+RERERERESghzQScgIiIiIiKSn1QEiYiIiIhIiaIiSEREREREShQVQSIiIiIiUqKoCBIRERERkRJFRZCIiIiIiJQoKoJERERERKREUREkIiIiIiIlirWgE7gQPp+PgwcPEh0djclkKuh0RERERESkgPj9fhITE6lcuTJmc85zPUW6CDp48CDVqlUr6DRERERERKSQ2LdvH1WrVs0xpkgXQdHR0UDgG42JiUk/7na7+eWXX+jSpQs2m62g0pNiQNeSGEXXkhhB15EYRdeSGKUwXUsJCQlUq1YtvUbISZEugs7cAhcTE5OpCIqIiCAmJqbAfxhStOlaEqPoWhIj6DoSo+haEqMUxmspmGUyaowgIiIiIiIlioogEREREREpUVQEiYiIiIhIiVKk1wQFw+/34/F48Hq9BZ2KXCCLxYLValU7dBERERG5IMW6CHK73Rw8eJCUlJSCTkUMEhERQaVKlbDb7QWdioiIiIgUUcW6CNq7dy9Wq5XKlStjt9s1g1CE+f1+XC4Xx44dY9euXdStWzfXTbBERERERLJSbIsgq9WKz+ejcuXKREREFHQ6YoDw8HBsNht79uzB5XIRFhZW0CmJiIiISBFU7N9K12xB8aKfp4iIiIhcKL2iFBERERGREqVAi6AaNWpgMpkyfTz44IMFmVYmPp+f0ymu9A+fz1/QKYmIiIiIyHkq0DVBq1atytC6esOGDXTu3Jk+ffoUYFZneX1+0txeFv9zjMkr93A8yUnZKAf921TnqnrlCLdZMJvVbEFEREREpCgp0CKoXLlyGb7+73//S+3atenQoUMBZXSW1+fneJKTmz9cwb6TqenH/zmSxIodJ6hWOpxp911OuShHoSmEdu/eTc2aNfn7779p3rx5QacjIiIiIlIoFZrucC6Xiy+//JJhw4Zl28ra6XTidDrTv05ISAAC+wG53e7042c+9/v9+Hw+fD5fnvNJc/syFUDn2ncylZs/XMHPj7Qn3FY4llad+T7P93suCnw+H36/H7fbjcViCfl4Z66lc68vkfOha0mMoOtIjKJrSYxSmK6lvORg8vv9hWKByzfffEO/fv3Yu3cvlStXzjLmxRdfZOTIkZmOT5kyJVMbbKvVSsWKFalWrVqeN9a02mws2RHPg1P+zjX2/X4taF87Fo+BP/gffviB1157jV27dhEeHk7Tpk2ZPHkykZGRfP7554wbN449e/Zw0UUXcc8993D33XcDUKpUqQyP065dO3766Sd8Ph9vvvkmn332GcePH6devXq88MILXHPNNUCgAH322Wf58ccfOX36NOXKlWPgwIEMGzYMgHHjxjF58mT27NlDXFwc3bp1Y+TIkURFRRn2PQfL5XKxb98+Dh8+jMfjyffxRURERKRwSklJoV+/fsTHxxMTE5NjbKGZCZo4cSLXXntttgUQwDPPPJP+whwCM0HVqlWjS5cuGb5Rt9vNokWLCAsLIyoqKs/7ySSkeZj8+96gYr/8fS+X12lJTEx4nsbIzqFDh7j77rt57bXX6NWrF4mJiSxbtozo6Gh++OEH/vvf//LOO+/QokUL/v77b+69917KlCnDgAEDWLlyJW3atOGXX36hUaNG2O12YmJiGDt2LOPGjeODDz6gRYsWTJo0iX79+rF+/Xrq1q3L6NGjmTt3Ll9//TUXXXQR+/btY9++fenPaUREBO+++y41a9Zk586dPPTQQ7zyyiuMGzfOkO85L9LS0ggPD6d9+/b5sk+Q2+1m3rx5dO7cGZvNFvLxpPjStSRG0HUkRtG1JEYpTNfSmbvEglEoiqA9e/Ywf/58vvvuuxzjHA4HDocj03GbzZblk24ymTCbzee1t8zxJGfuQcCJJBdg3P41R44cwePxcNNNN1G9enUAmjVrBsDIkSMZPXo0N998MwC1a9dmy5YtTJgwgYEDB1KhQgUgsNbq3GJy9OjRPPXUU/Tr1w+A119/ncWLF/POO+8wbtw49u3bR926dWnfvj0mk4maNWtmyOmxxx5L/7xWrVq8/PLL3HfffXzwwQeGfM95YTabMZlM2f7MQyW/x5PiS9eSGEHXkRhF15Kx/H4/Se4krGYrXp8Xi9mCCRNh1uK/wXthuJbyMn6hKIImTZpE+fLlue666wo6lXRloxz8cyQp17gyUXayWcJ0Xpo1a0anTp1o0qQJXbt2pUuXLtx8883Y7XZ27NjB4MGDGTJkSHq8x+MhNjY228dLSEjg4MGDtGvXLsPxdu3asXbtWgDuuusuOnfuTP369enWrRs9evSgS5cu6bHz58/n1VdfZcuWLSQkJODxeEhLSyMlJSXTbYgiIiIiJVGyO5nVh1czYf0E1h4LvMYq5ShFn3p9GNBoAJG2SCzm0K9nluAU+Ip+n8/HpEmTGDBgAFZroajJiAmz0b9N9aBi72hTnWiHcVWvxWJh3rx5zJkzh4YNG/Luu+9Sv359NmzYAMCECRNYs2ZN+seGDRtYuXLlBY3ZsmVLdu3axUsvvURqaiq33HJL+mzT7t276dGjB02bNmX69On8+eef6bfBuVyuC/tmRURERIqBZHcyb//1Ng8tfCi9AAI45TzF+PXjuenHmzjlPEUhWYovFIIiaP78+ezdu5dBgwYVdCrpzGYTV9UrR7XSOa/zqVY6nPb1yhneIttkMtGuXTtGjhzJ33//jd1uZ/ny5VSuXJmdO3dSp06dDB9nbl870wDi3L2XYmJiqFy5MsuXL88wxvLly2nYsGGGuL59+zJhwgS+/vprpk+fzsmTJ/nzzz/x+XyMHj2aNm3aUK9ePQ4ePGjo9ysiIiJSVPn9fv449Adfbfkq25jDyYd5dNGjJLlzv8tI8keBT7106dKlUFbFYTYL0+67PNs22Wf2CQq3GTut+fvvv7NgwQK6dOlC+fLl+f333zl27BgNGjRg5MiRDB06lNjYWLp164bT6WT16tWcOnWKYcOGUb58ecLDw/n555+pWrUqYWFhxMbG8sQTT/DCCy9Qu3ZtmjdvzqRJk1izZg2TJ08GYMyYMVSqVIkWLVpgNpv59ttvqVixInFxcdSpUwe32827777L9ddfz/Lly/nwww8N/Z5FREREiqpEdyIfr/8417i1x9ZyMu0k0fbofMhKclPgRVBhZTGbKBfl4OdH2rPkn2N8uXIPJ5JclImyc0eb6rSvV45wm8XwWaCYmBh+/fVXxo4dS0JCAtWrV2f06NFce+21QKBT2xtvvMETTzxBZGQkTZo04dFHHwUCbcHfeecdRo0axYgRI7jyyitZvHgxQ4cOJT4+nscff5yjR4/SsGFDZs6cSd26dQGIjo7m9ddfZ9u2bVgsFi655BJmz56N2WymWbNmjBkzhtdee41nnnmG9u3b8+qrr3LnnXca+n2LiIiIFEVWk5V1x9cFFfvzrp+5t9m9Ic5IgqEiKAdms4lIh5VujSrSrk4Z/H4wmSDaYTO8+DmjQYMG/Pzzz9me79evX3qXt6zcfffd6fsGnWE2m3nhhRd44YUXsvw7Q4YMydBs4d8ee+yxDB3iAO64445s40VERERKCo8/+H0LUzwp+P1+TEZ21ZLzoiIoCGazidjwvG24KiIiIiLFn81kI9YRS7wzPtfYxmUaqwAqJAq8MYKIiIiISFFlMpm4qc5NucbF2GNoV6VdrnGSP1QEiYiIiIicpzBrGIOaDKJCRIUc44a3Hq5ZoEJERZCIiIiIyAWItEUy5bopNC7bONO5GHsMoy4fRZcaXQi35rz9iuQfrQkSEREREbkAVrOVcuHlGN95PMdTjzNn5xxSvak
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"text/plain": [
|
|
|
|
|
"<Figure size 1000x700 with 1 Axes>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"# Покажем на графике, что отражаем полученное число.\n",
|
|
|
|
|
"# Красным цветом обозначим точки, для которых классификация сработала неправильно.\n",
|
|
|
|
|
"plt.figure(figsize=(10, 7))\n",
|
|
|
|
|
"sns.scatterplot(x='petal_width_cm', y='petal_length_cm', data=iris, hue='species', s=70)\n",
|
|
|
|
|
"plt.xlabel('Длина лепестка, см')\n",
|
|
|
|
|
"plt.ylabel('Ширина лепестка, см')\n",
|
|
|
|
|
"plt.legend(loc=2)\n",
|
|
|
|
|
"plt.grid()\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Перебираем все объекты из теста\n",
|
|
|
|
|
"for i in range(len(y_test)):\n",
|
|
|
|
|
" # Если предсказание неправильное\n",
|
|
|
|
|
" if np.array(y_test)[i] != y_pred[i]:\n",
|
|
|
|
|
" # то подсвечиваем точку красным\n",
|
|
|
|
|
" plt.scatter(X_test.iloc[i, 3], X_test.iloc[i, 2], color='red', s=150)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 19,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"accuracy: 1.000\n"
|
2024-09-23 23:22:33 +00:00
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"# качество модели (доля правильно классифицированных точек)\n",
|
|
|
|
|
"print(f'accuracy: {accuracy_score(y_test, y_pred):.3f}')"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"Обучим метод 10 ближайших соседях"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 20,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"array(['versicolor', 'setosa', 'virginica', 'setosa', 'versicolor',\n",
|
2024-09-23 23:22:33 +00:00
|
|
|
|
" 'versicolor', 'setosa', 'versicolor', 'versicolor', 'virginica',\n",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
" 'versicolor', 'setosa', 'virginica', 'setosa', 'virginica',\n",
|
|
|
|
|
" 'setosa', 'versicolor', 'versicolor', 'virginica', 'setosa',\n",
|
|
|
|
|
" 'versicolor', 'setosa', 'setosa'], dtype=object)"
|
2024-09-23 23:22:33 +00:00
|
|
|
|
]
|
|
|
|
|
},
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 20,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"model = KNeighborsClassifier(n_neighbors=10)\n",
|
|
|
|
|
"model.fit(X_train, y_train)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Получим предсказания модели\n",
|
|
|
|
|
"y_pred = model.predict(X_test)\n",
|
|
|
|
|
"y_pred"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 21,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0EAAAJaCAYAAADpm0w1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACyiElEQVR4nOzdd3hU1dbH8e/09ITepYP0qoIoKEgRERFFFFQExC4qYntVFNTrtYBYsICIDUQFRRQQ6VJEQaUX6b2X9Ex//5hLIKZN4Ezq7/M8eUjOWcxemRzCrNlnr23y+/1+RERERERESghzQScgIiIiIiKSn1QEiYiIiIhIiaIiSEREREREShQVQSIiIiIiUqKoCBIRERERkRJFRZCIiIiIiJQoKoJERERERKREUREkIiIiIiIlirWgE7gQPp+PgwcPEh0djclkKuh0RERERESkgPj9fhITE6lcuTJmc85zPUW6CDp48CDVqlUr6DRERERERKSQ2LdvH1WrVs0xpkgXQdHR0UDgG42JiUk/7na7+eWXX+jSpQs2m62g0pNiQNeSGEXXkhhB15EYRdeSGKUwXUsJCQlUq1YtvUbISZEugs7cAhcTE5OpCIqIiCAmJqbAfxhStOlaEqPoWhIj6DoSo+haEqMUxmspmGUyaowgIiIiIiIlioogEREREREpUVQEiYiIiIhIiVKk1wQFw+/34/F48Hq9BZ2KXCCLxYLValU7dBERERG5IMW6CHK73Rw8eJCUlJSCTkUMEhERQaVKlbDb7QWdioiIiIgUUcW6CNq7dy9Wq5XKlStjt9s1g1CE+f1+XC4Xx44dY9euXdStWzfXTbBERERERLJSbIsgq9WKz+ejcuXKREREFHQ6YoDw8HBsNht79uzB5XIRFhZW0CmJiIiISBFU7N9K12xB8aKfp4iIiIhcKL2iFBERERGREqVAi6AaNWpgMpkyfTz44IMFmVYmPp+f0ymu9A+fz1/QKYmIiIiIyHkq0DVBq1atytC6esOGDXTu3Jk+ffoUYFZneX1+0txeFv9zjMkr93A8yUnZKAf921TnqnrlCLdZMJvVbEFEREREpCgp0CKoXLlyGb7+73//S+3atenQoUMBZXSW1+fneJKTmz9cwb6TqenH/zmSxIodJ6hWOpxp911OuShHoSmEdu/eTc2aNfn7779p3rx5QacjIiIiIlIoFZrucC6Xiy+//JJhw4Zl28ra6XTidDrTv05ISAAC+wG53e7042c+9/v9+Hw+fD5fnvNJc/syFUDn2ncylZs/XMHPj7Qn3FY4llad+T7P93suCnw+H36/H7fbjcViCfl4Z66lc68vkfOha0mMoOtIjKJrSYxSmK6lvORg8vv9hWKByzfffEO/fv3Yu3cvlStXzjLmxRdfZOTIkZmOT5kyJVMbbKvVSsWKFalWrVqeN9a02mws2RHPg1P+zjX2/X4taF87Fo+BP/gffviB1157jV27dhEeHk7Tpk2ZPHkykZGRfP7554wbN449e/Zw0UUXcc8993D33XcDUKpUqQyP065dO3766Sd8Ph9vvvkmn332GcePH6devXq88MILXHPNNUCgAH322Wf58ccfOX36NOXKlWPgwIEMGzYMgHHjxjF58mT27NlDXFwc3bp1Y+TIkURFRRn2PQfL5XKxb98+Dh8+jMfjyffxRURERKRwSklJoV+/fsTHxxMTE5NjbKGZCZo4cSLXXntttgUQwDPPPJP+whwCM0HVqlWjS5cuGb5Rt9vNokWLCAsLIyoqKs/7ySSkeZj8+96gYr/8fS+X12lJTEx4nsbIzqFDh7j77rt57bXX6NWrF4mJiSxbtozo6Gh++OEH/vvf//LOO+/QokUL/v77b+69917KlCnDgAEDWLlyJW3atOGXX36hUaNG2O12YmJiGDt2LOPGjeODDz6gRYsWTJo0iX79+rF+/Xrq1q3L6NGjmTt3Ll9//TUXXXQR+/btY9++fenPaUREBO+++y41a9Zk586dPPTQQ7zyyiuMGzfOkO85L9LS0ggPD6d9+/b5sk+Q2+1m3rx5dO7cGZvNFvLxpPjStSRG0HUkRtG1JEYpTNfSmbvEglEoiqA9e/Ywf/58vvvuuxzjHA4HDocj03GbzZblk24ymTCbzee1t8zxJGfuQcCJJBdg3P41R44cwePxcNNNN1G9enUAmjVrBsDIkSMZPXo0N998MwC1a9dmy5YtTJgwgYEDB1KhQgUgsNbq3GJy9OjRPPXUU/Tr1w+A119/ncWLF/POO+8wbtw49u3bR926dWnfvj0mk4maNWtmyOmxxx5L/7xWrVq8/PLL3HfffXzwwQeGfM95YTabMZlM2f7MQyW/x5PiS9eSGEHXkRhF15Kx/H4/Se4krGYrXp8Xi9mCCRNh1uK/wXthuJbyMn6hKIImTZpE+fLlue666wo6lXRloxz8cyQp17gyUXayWcJ0Xpo1a0anTp1o0qQJXbt2pUuXLtx8883Y7XZ27NjB4MGDGTJkSHq8x+MhNjY228dLSEjg4MGDtGvXLsPxdu3asXbtWgDuuusuOnfuTP369enWrRs9evSgS5cu6bHz58/n1VdfZcuWLSQkJODxeEhLSyMlJSXTbYgiIiIiJVGyO5nVh1czYf0E1h4LvMYq5ShFn3p9GNBoAJG2SCzm0K9nluAU+Ip+n8/HpEmTGDBgAFZroajJiAmz0b9N9aBi72hTnWiHcVWvxWJh3rx5zJkzh4YNG/Luu+9Sv359NmzYAMCECRNYs2ZN+seGDRtYuXLlBY3ZsmVLdu3axUsvvURqaiq33HJL+mzT7t276dGjB02bNmX69On8+eef6bfBuVyuC/tmRURERIqBZHcyb//1Ng8tfCi9AAI45TzF+PXjuenHmzjlPEUhWYovFIIiaP78+ezdu5dBgwYVdCrpzGYTV9UrR7XSOa/zqVY6nPb1yhneIttkMtGuXTtGjhzJ33//jd1uZ/ny5VSuXJmdO3dSp06dDB9nbl870wDi3L2XYmJiqFy5MsuXL88wxvLly2nYsGGGuL59+zJhwgS+/vprpk+fzsmTJ/nzzz/x+XyMHj2aNm3aUK9ePQ4ePGjo9ysiIiJSVPn9fv449Adfbfkq25jDyYd5dNGjJLlzv8tI8keBT7106dKlUFbFYTYL0+67PNs22Wf2CQq3GTut+fvvv7NgwQK6dOlC+fLl+f333zl27BgNGjRg5MiRDB06lNjYWLp164bT6WT16tWcOnWKYcOGUb58ecLDw/n555+pWrUqYWFhxMbG8sQTT/DCCy9Qu3ZtmjdvzqRJk1izZg2TJ08GYMyYMVSqVIkWLVpgNpv59ttvqVixInFxcdSpUwe32827777L9ddfz/Lly/nwww8N/Z5FREREiqpEdyIfr/8417i1x9ZyMu0k0fbofMhKclPgRVBhZTGbKBfl4OdH2rPkn2N8uXIPJ5JclImyc0eb6rSvV45wm8XwWaCYmBh+/fVXxo4dS0JCAtWrV2f06NFce+21QKBT2xtvvMETTzxBZGQkTZo04dFHHwUCbcHfeecdRo0axYgRI7jyyitZvHgxQ4cOJT4+nscff5yjR4/SsGFDZs6cSd26dQGIjo7m9ddfZ9u2bVgsFi655BJmz56N2WymWbNmjBkzhtdee41nnnmG9u3b8+qrr3LnnXca+n2LiIiIFEVWk5V1x9cFFfvzrp+5t9m9Ic5IgqEiKAdms4lIh5VujSrSrk4Z/H4wmSDaYTO8+DmjQYMG/Pzzz9me79evX3qXt6zcfffd6fsGnWE2m3nhhRd44YUXsvw7Q4YMydBs4d8ee+yxDB3iAO64445s40VERERKCo8/+H0LUzwp+P1+TEZ21ZLzoiIoCGazidjwvG24KiIiIiLFn81kI9YRS7wzPtfYxmUaqwAqJAq8MYKIiIiISFFlMpm4qc5NucbF2GNoV6VdrnGSP1QEiYiIiIicpzBrGIOaDKJCRIUc44a3Hq5ZoEJERZCIiIiIyAWItEUy5bopNC7bONO5GHsMoy4fRZcaXQi35rz9iuQfrQkSEREREbkAVrOVcuHlGN95PMdTjzNn5xxSvak
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"text/plain": [
|
|
|
|
|
"<Figure size 1000x700 with 1 Axes>"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "display_data"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"# Покажем на графике, что отражаем полученное число.\n",
|
|
|
|
|
"# Красным цветом обозначим точки, для которых классификация сработала неправильно.\n",
|
|
|
|
|
"plt.figure(figsize=(10, 7))\n",
|
|
|
|
|
"sns.scatterplot(x='petal_width_cm', y='petal_length_cm', data=iris, hue='species', s=70)\n",
|
|
|
|
|
"plt.xlabel('Длина лепестка, см')\n",
|
|
|
|
|
"plt.ylabel('Ширина лепестка, см')\n",
|
|
|
|
|
"plt.legend(loc=2)\n",
|
|
|
|
|
"plt.grid()\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Перебираем все объекты из теста\n",
|
|
|
|
|
"for i in range(len(y_test)):\n",
|
|
|
|
|
" # Если предсказание неправильное\n",
|
|
|
|
|
" if np.array(y_test)[i] != y_pred[i]:\n",
|
|
|
|
|
" # то подсвечиваем точку красным\n",
|
|
|
|
|
" plt.scatter(X_test.iloc[i, 3], X_test.iloc[i, 2], color='red', s=150)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 22,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"accuracy: 1.000\n"
|
2024-09-23 23:22:33 +00:00
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"# качество модели (доля правильно классифицированных точек)\n",
|
|
|
|
|
"print(f'accuracy: {accuracy_score(y_test, y_pred):.3f}')"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### 3.3.2 Задание\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"Определите набор признаков человека, по аналогии из РТ 1, – например, \n",
|
|
|
|
|
"цвет глаз и конвертируйте его в матрицу признаков. "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 23,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"array([[2., 0., 3., 1.],\n",
|
|
|
|
|
" [0., 1., 1., 1.],\n",
|
|
|
|
|
" [5., 3., 0., 5.],\n",
|
|
|
|
|
" [3., 4., 2., 0.]])"
|
|
|
|
|
]
|
|
|
|
|
},
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"execution_count": 23,
|
2024-09-23 23:22:33 +00:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"data_dict = [{\"карий\": 3, \"голубой\":2, \"серый\":1}, \n",
|
|
|
|
|
" {\"карий\": 1, \"зеленый\":1, \"серый\":1},\n",
|
|
|
|
|
" {\"зеленый\": 3, \"голубой\":5, \"серый\":5},\n",
|
|
|
|
|
" {\"карий\": 2, \"голубой\":3, \"зеленый\":4}]\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"dictvectoriser = DictVectorizer(sparse=False)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"features = dictvectoriser.fit_transform(data_dict)\n",
|
|
|
|
|
"features"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
"display_name": "Python 3",
|
|
|
|
|
"language": "python",
|
|
|
|
|
"name": "python3"
|
|
|
|
|
},
|
|
|
|
|
"language_info": {
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
"version": 3
|
|
|
|
|
},
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
"name": "python",
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
"pygments_lexer": "ipython3",
|
2024-09-27 05:31:03 +00:00
|
|
|
|
"version": "3.12.5"
|
2024-09-23 23:22:33 +00:00
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
"nbformat_minor": 2
|
|
|
|
|
}
|