mirea-projects/Third term/Artificial intelligence systems and big data/3.ipynb

965 lines
394 KiB
Plaintext
Raw Permalink Normal View History

2024-09-23 23:22:33 +00:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Рабочая тетрадь No 3"
]
},
{
"cell_type": "code",
2024-09-27 05:31:03 +00:00
"execution_count": 3,
2024-09-23 23:22:33 +00:00
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.metrics import accuracy_score\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.feature_extraction import DictVectorizer"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1.3.1 Задание\n",
"\n",
"Задайте 4 точки в трехмерном пространстве, рассчитайте между ними \n",
"расстояния по описанным в примере выше метрикам. Отобразите точки \n",
"в трехмерном пространстве."
]
},
{
"cell_type": "code",
2024-09-27 05:31:03 +00:00
"execution_count": 4,
2024-09-23 23:22:33 +00:00
"metadata": {},
"outputs": [],
"source": [
"first_dot = np.array([np.random.randint(10) for _ in range(3)])\n",
"second_dot = np.array([np.random.randint(10) for _ in range(3)])\n",
"third_dot = np.array([np.random.randint(10) for _ in range(3)])\n",
"fourth_dot = np.array([np.random.randint(10) for _ in range(3)])\n",
"\n",
"dots = [first_dot, second_dot, third_dot, fourth_dot]"
]
},
{
"cell_type": "code",
2024-09-27 05:31:03 +00:00
"execution_count": 5,
2024-09-23 23:22:33 +00:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-09-27 05:31:03 +00:00
"[2 4 8] [8 6 2]\n",
"Расстояние Евклида 8.717797887081348\n",
"Квадрат Евклидова расстояния 76.00000000000001\n",
2024-09-23 23:22:33 +00:00
"Расстояние Чебышева 6.0\n",
2024-09-27 05:31:03 +00:00
"Расстояние Хемминга 14.0\n",
2024-09-23 23:22:33 +00:00
"\n",
2024-09-27 05:31:03 +00:00
"[2 4 8] [6 6 1]\n",
"Расстояние Евклида 8.306623862918075\n",
"Квадрат Евклидова расстояния 69.0\n",
"Расстояние Чебышева 7.0\n",
"Расстояние Хемминга 13.0\n",
2024-09-23 23:22:33 +00:00
"\n",
2024-09-27 05:31:03 +00:00
"[2 4 8] [7 6 1]\n",
"Расстояние Евклида 8.831760866327848\n",
"Квадрат Евклидова расстояния 78.00000000000001\n",
"Расстояние Чебышева 7.0\n",
"Расстояние Хемминга 14.0\n",
2024-09-23 23:22:33 +00:00
"\n",
2024-09-27 05:31:03 +00:00
"[8 6 2] [2 4 8]\n",
"Расстояние Евклида 8.717797887081348\n",
"Квадрат Евклидова расстояния 76.00000000000001\n",
2024-09-23 23:22:33 +00:00
"Расстояние Чебышева 6.0\n",
2024-09-27 05:31:03 +00:00
"Расстояние Хемминга 14.0\n",
2024-09-23 23:22:33 +00:00
"\n",
2024-09-27 05:31:03 +00:00
"[8 6 2] [6 6 1]\n",
"Расстояние Евклида 2.23606797749979\n",
"Квадрат Евклидова расстояния 5.000000000000001\n",
2024-09-23 23:22:33 +00:00
"Расстояние Чебышева 2.0\n",
2024-09-27 05:31:03 +00:00
"Расстояние Хемминга 3.0\n",
"\n",
"[8 6 2] [7 6 1]\n",
"Расстояние Евклида 1.4142135623730951\n",
"Квадрат Евклидова расстояния 2.0000000000000004\n",
"Расстояние Чебышева 1.0\n",
"Расстояние Хемминга 2.0\n",
2024-09-23 23:22:33 +00:00
"\n",
2024-09-27 05:31:03 +00:00
"[6 6 1] [2 4 8]\n",
"Расстояние Евклида 8.306623862918075\n",
"Квадрат Евклидова расстояния 69.0\n",
"Расстояние Чебышева 7.0\n",
"Расстояние Хемминга 13.0\n",
2024-09-23 23:22:33 +00:00
"\n",
2024-09-27 05:31:03 +00:00
"[6 6 1] [8 6 2]\n",
"Расстояние Евклида 2.23606797749979\n",
"Квадрат Евклидова расстояния 5.000000000000001\n",
2024-09-23 23:22:33 +00:00
"Расстояние Чебышева 2.0\n",
2024-09-27 05:31:03 +00:00
"Расстояние Хемминга 3.0\n",
2024-09-23 23:22:33 +00:00
"\n",
2024-09-27 05:31:03 +00:00
"[6 6 1] [7 6 1]\n",
"Расстояние Евклида 1.0\n",
"Квадрат Евклидова расстояния 1.0\n",
"Расстояние Чебышева 1.0\n",
"Расстояние Хемминга 1.0\n",
2024-09-23 23:22:33 +00:00
"\n",
2024-09-27 05:31:03 +00:00
"[7 6 1] [2 4 8]\n",
"Расстояние Евклида 8.831760866327848\n",
"Квадрат Евклидова расстояния 78.00000000000001\n",
"Расстояние Чебышева 7.0\n",
"Расстояние Хемминга 14.0\n",
2024-09-23 23:22:33 +00:00
"\n",
2024-09-27 05:31:03 +00:00
"[7 6 1] [8 6 2]\n",
"Расстояние Евклида 1.4142135623730951\n",
"Квадрат Евклидова расстояния 2.0000000000000004\n",
"Расстояние Чебышева 1.0\n",
2024-09-23 23:22:33 +00:00
"Расстояние Хемминга 2.0\n",
2024-09-27 05:31:03 +00:00
"\n",
"[7 6 1] [6 6 1]\n",
"Расстояние Евклида 1.0\n",
"Квадрат Евклидова расстояния 1.0\n",
"Расстояние Чебышева 1.0\n",
"Расстояние Хемминга 1.0\n",
2024-09-23 23:22:33 +00:00
"\n"
]
}
],
"source": [
"for first in dots:\n",
" for second in dots:\n",
" if (first == second).all():\n",
" continue\n",
"\n",
" print(first, second)\n",
" print('Расстояние Евклида', np.linalg.norm(first-second))\n",
" print('Квадрат Евклидова расстояния', np.linalg.norm(first-second) ** 2)\n",
" print('Расстояние Чебышева', np.linalg.norm(first-second, ord=np.inf))\n",
" print('Расстояние Хемминга', np.linalg.norm(first-second, ord=1))\n",
" print()"
]
},
{
"cell_type": "code",
2024-09-27 05:31:03 +00:00
"execution_count": 6,
2024-09-23 23:22:33 +00:00
"metadata": {},
"outputs": [
{
"data": {
2024-09-27 05:31:03 +00:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZMAAAGOCAYAAABSVO4kAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAC6XElEQVR4nOy9d3xcd5k1fmY0GvXebFnFlmTLtmxZxbItOw1Ic5oNP0qyWRJC2M0uEEoW3hA2IQkLhMC+wLILoSwk7AuhJHYSEkhPnEKabWnUu6wuTddI08u9vz+83+s7V1Num5kr557Ph89uHGfu1Wjme+7zPOc5R0PTNA0VKlSoUKFCArSpvgEVKlSoULH+oZKJChUqVKiQDJVMVKhQoUKFZKhkokKFChUqJEMlExUqVKhQIRkqmahQoUKFCslQyUSFChUqVEiGSiYqVKhQoUIyVDJRoUKFChWSoZKJChUqVKiQDJVMVKhQoUKFZKhkokKFChUqJEMlExUqVKhQIRkqmahQoUKFCslQyUSFChUqVEiGSiYqVKhQoUIyVDJRoUKFChWSoZKJChUqVKiQDJVMVKhQoUKFZKhkokKFChUqJEMlExUqVKhQIRkqmahQoUKFCslQyUSFChUqVEiGSiYqVKhQoUIyVDJRoUKFChWSoZKJChUqVKiQDJVMVKhQoUKFZKhkokKFChUqJEMlExUqVKhQIRkqmahQoUKFCslQyUSFChUqVEiGSiYqVKhQoUIyVDJRoUKFChWSoZKJChUqVKiQDJVMVKhQoUKFZKhkokKFChUqJEOX6htQ8f4CTdMIhULw+XxIS0tj/qfVqs81KlSsZ6hkoiJpoGkagUAAwWAQPp+P+XOtVgudTgedTqeSiwoV6xQamqbpVN+EivMfoVAIgUAAFEVBo9HA7/dDq9WCpmnQNA2KokDTNDQaDTQajUouKlSsM6hkoiKhoGkawWAQwWAQAKDRaJgKRaPRRPz7hFgIyN/T6/VIT0+HTqeL+N+qUKEidVDbXCoSBoqimGoEAFN1EKIglQgbGo0GaWlpzD8TcnnnnXfQ2NiIwsJCaLVapKWlhVUvKrmoUJFaqGSiQnYQAggEAmGtK+7f4UMAhFzI/01LS2Ne2+/3Q6PRMOSSnp7O/B2VXFSoSC5UMlEhK0gLq7+/H+Xl5SgtLZXlYCevEa1y4ZILd+aikosKFYmFSiYqZAM51EOhEFZXV1FUVCTbIc5uj3H/nJAL+fcURcHv98Pn86nkokJFkqCSiQrJILsjwWAQFEVBq9VGPfwTCUIQKrmoUJF8qGSiQhJIWysUCgEAQyRE9isXxJBTJHIh//P5fPD7/cw9q+SiQoU0qGSiQjTIEz+7GmEj0uFP0zQmJiYwPT2NvLw8FBUVoaioCHl5eQnfJWELAdLS0taQC7tyIRJksuOikosKFbGhkokKwSBtLaLWinTYRqokvF4venp64Pf7sXPnTni9XtjtdszOzoKiKBQWFqKoqAiFhYXIy8sLe81EtM1ikYvVasXk5CSam5tVclGhggdUMlEhCBRFIRgMrmlrccE9/M1mM3p7e1FWVoa2tjZm96Sqqgo0TcPlcsFut8Nut2NqagoAGHIpKipKyvyFTS5arRZut5tp13m9XubvcHdcVHJRoUIlExU8wWd3hA1CJhRFYXR0FLOzs9i5cyc2bdoEmqaZeQX5u7m5ucjNzUV1dTVomsbq6irsdjtTIVAUhenpafh8PhQWFiI7OzspBzhpvbErF4qiGHLRarVrZi4quah4P0IlExVxEckSJd5hqdFo4PP58O6774KiKBw8eBA5OTm8rqfRaJCfn4/8/HzU1taCoii89957yMzMhMlkwtjYGHQ6HVO1FBUVISsrS/LPyee+IrXFKIqCz+eD1+tVyUXF+xYqmaiICfbuCBlO84HX64XZbEZVVRUaGxvDFg2FghzOZWVlKC8vRygUwsrKCux2OxYXFzEyMoKMjIwwcsnIyBB9Pb7gkiohl1AoxNjsswf6pD3Gh4xVqFhvUMlERURE2x2Jh1AohKGhIaysrGDDhg3YuXOnrPcEnK0KCGkAQDAYhMPhYIb5g4ODyM7ODpu56PV62e4jGghJEMJlk0swGGT+fSRfMZVcVKx3qGSiYg2i7Y7Ew+rqKnp6epgqgm9biw9iXV+n06GkpAQlJSUAzpLL8vIy7HY7pqenMTAwgJycHIZYCgsLkZ6eLtu9xbrnSOQSDAYZ12RCLmxfMdVuX8V6hEomKsIQb3ckEmiaxtzcHIaHh1FbW4uGhgYMDAzIrsDi+3o6nQ6lpaUoLS0FAPj9foZcJiYm4Ha7mR2XwsJCFBYWQqdL/FchHrmEQiGYTCbU1NSo5KJi3UElExUA+O2OREIwGER/fz9sNhtaW1uZA1zuvRApbSC9Xo/y8nKUl5cDAHw+H+x2O5aXlzE2Ngav1xu2QFlQUCDXbccEl1wCgQDGx8dRWVmJQCAAQE2hVLF+oJKJCtFtLYfDAYPBgOzsbBw6dChs6J2IJUO5Xi8jIwMbNmzAhg0bAIBZnrTb7RgaGoLf70dOTg5CoRDsdjsKCgqScoCzUyaBc5UL2xFZTaFUoVSoZPI+BztOV0hba3p6GmNjY6ivr8eWLVt4bcBLQSIH1JmZmdi4cSM2btwImqbh8XiwuLiI2dlZDAwMIBgMoqCgIOHWL9z3K1JbjKjrSOXCJRc1hVJFqqCSyfsU3N0RvkTi9/vR19eH1dVV7N27l1FUcRGPTMSQTbK24LOzs1FWVobFxUUcOnQIbrebqVxmZmZA03SYUiw3N1eWAzxeYBifLBc2uagplCqSCZVM3ofgxunyfcq22Wzo6elBYWEhDh06FFMRpdFomNeXA6k6EDUaDXJycpCTk8NYvzidToZczpw5A41GE0YuOTk5ou9XyH/HNyhMTaFUkQyoZPI+AvuwEdrWmpiYwJkzZ9DY2Ijq6mpeG/ByIhX5KNHuIy8vD3l5eaipqQFFUQy5WK1WTExMhO3BkO18vu+z1HtTUyhVpAoqmbxPQMwKh4eHsW3bNt6HiNfrRW9vL7xeL/bv34/8/Hxe15O7MlEqtFrtGusXsp1vNBoxNjaG9PT0sMollvWLnAe7mkKpIplQyeR9APbQdmZmBlu3buV1YHCdfoXsYpyvlUk8aLVaZndly5YtCIVCcDgcWF5ejmv9ksifL1YK5eDgIPR6PWpqalRyUSEaKpmcx+BaonAPkmiI5PQrFO+XyiQe0tLSUFxcjOLiYgDRrV+KioqQmZmZtPtikwtFUUwVQ0wr1RRKFUKhksl5iki7IwSxDnm3242enh5QFIXOzk7k5uaKun68A0dMBO96qEzigWv9EggEmO38hYUFBINBvPvuu0m1fmFHChDCiJdCSWTIqiOyCgKVTM5DxNsdiXYoLy4uYmBgAJWVlZKdfmNVJouLixgcHERGRgaKi4uZQzMZliZKQ3p6OsrKylBWVoaVlRX09PRgy5YtEa1fyHa+3O8TcTxgI1YKJTsoTE2hVEHw/vv2nsdg747EitPlHvLE6XdpaQm7d+9GRUWF5HuJdKCwr9PY2AgAsNvtYZYmhFy4W+fnS2USC6RCiGT9YrfbMTIyAp/Pt4ZcpJA++7qxwJdc1BTK9y9UMjlPEGl3JNKXmMTQErCdfg8dOiRbyBT38He5XDAYDNBqtTh48CB0Oh0oimKIy+PxMIcmafeQrfPi4mLm4Drfwf2dca1fPB4P0xYj1i/s7fz8/HzB2/l8yCTSfUYLClPJ5f0JlUzWOYTG6Wq1WlAUFdHpV06LEDaZLCwsYGBgANXV1di2bRs0Gg1jB0KQlZWFrKwsVFZWgqZpZuvcZrNhZmYGoVAILpcLWq0WxcXFSYvtTSb4kCV5n9jWL4SE5+bmEAqFwkg4Nzc37u9VDJlwoaZQqlDJZB2DO2TnE7JEDvKenp41Tr9ygrTTBgYGsLS0hD179jCtm3iHZqSt856eHgCAxWLBxMRESmJ7kwGhG/DZ2dnIzs7Gpk2bQNM0XC4XU7nwtX4hai65fw72a0ZKoVxZWYFer0dhYaGaQnkeQCWTdQp2nK7Qp7ve3l7k5ua
2024-09-23 23:22:33 +00:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig = plt.figure()\n",
"ax = fig.add_subplot(111, projection='3d')\n",
"\n",
"for dot in dots:\n",
" ax.scatter(*dot)\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1.3.2 Задание\n",
"\n",
"Создать 5x5 матрицу со значениями в строках от 0 до 4. Для создания \n",
"необходимо использовать функцию arange."
]
},
{
"cell_type": "code",
2024-09-27 05:31:03 +00:00
"execution_count": 7,
2024-09-23 23:22:33 +00:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0, 1, 2, 3, 4],\n",
" [0, 1, 2, 3, 4],\n",
" [0, 1, 2, 3, 4],\n",
" [0, 1, 2, 3, 4],\n",
" [0, 1, 2, 3, 4]])"
]
},
2024-09-27 05:31:03 +00:00
"execution_count": 7,
2024-09-23 23:22:33 +00:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.array([np.arange(0, 5) for _ in range(5)])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2.3.1 Задание\n",
"\n",
"Для предыдущего примера поэкспериментируйте с параметрами классификатора: \n",
"1. Установите другое количество ближайших соседей (k = 1, 5, 10). \n",
"2. Установите размер тестовой выборки 15% от всего датасета. \n",
"3. Постройте графики и оцените качество моделей, проанализируйте результаты. "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 2.2.1\n",
"\n",
"В примере показано создание 2d-массива со значениями x и y. Список \n",
"target содержит возможные выходные классы (часто называемые \n",
"метками). Далее происходит обучене классификатора k-ближайших \n",
"соседей по исходным данным. Далее производится прогноз \n",
"принадлежности к классам для двух точек данных."
]
},
{
"cell_type": "code",
2024-09-27 05:31:03 +00:00
"execution_count": 25,
2024-09-23 23:22:33 +00:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"KNeighborsClassifier(n_neighbors=3)\n",
"(-2,-2) is class [0]\n",
"(1,3) is class [1]\n"
]
}
],
"source": [
"# данные\n",
"X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n",
"target = [0, 0, 0, 1, 1, 1]\n",
"\n",
"# обучаем модель k-ближайших соседей к данным\n",
"K = 3\n",
"model = KNeighborsClassifier(n_neighbors=K)\n",
"model.fit(X, target)\n",
"print(model)\n",
"\n",
"# делаем прогноз\n",
2024-09-27 05:31:03 +00:00
"print('(-2,-2) is class', model.predict([[-9, 9]]))\n",
2024-09-23 23:22:33 +00:00
"print('(1,3) is class', model.predict([[1, 3]]))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 2.2.2\n",
"\n",
"Далее приведем более наглядный пример. Будет построена граница \n",
"решения для каждого класса. В качестве данных будем использовать уже \n",
"знакомый нам и встроенный в библиотеку sklearn набор данных ирисов \n",
"Фишера. Этот набор данных стал уже классическим, и часто используется \n",
"в литературе для иллюстрации работы различных статистических \n",
"алгоритмов. Датасет содержит наблюдения за 150 разными цветками \n",
"ирисов, данные по каждому цветку расположены в строках. В стобцах \n",
"записаны длина и ширина чашелистика, длина и ширина лепестка, вид \n",
"ириса. "
]
},
{
"cell_type": "code",
2024-09-27 05:31:03 +00:00
"execution_count": 9,
2024-09-23 23:22:33 +00:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sepal_length_cm</th>\n",
" <th>sepal_width_cm</th>\n",
" <th>petal_length_cm</th>\n",
" <th>petal_width_cm</th>\n",
" <th>species</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>5.1</td>\n",
" <td>3.5</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" <td>setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>4.9</td>\n",
" <td>3.0</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" <td>setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>4.7</td>\n",
" <td>3.2</td>\n",
" <td>1.3</td>\n",
" <td>0.2</td>\n",
" <td>setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4.6</td>\n",
" <td>3.1</td>\n",
" <td>1.5</td>\n",
" <td>0.2</td>\n",
" <td>setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5.0</td>\n",
" <td>3.6</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" <td>setosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>145</th>\n",
" <td>6.7</td>\n",
" <td>3.0</td>\n",
" <td>5.2</td>\n",
" <td>2.3</td>\n",
" <td>virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>146</th>\n",
" <td>6.3</td>\n",
" <td>2.5</td>\n",
" <td>5.0</td>\n",
" <td>1.9</td>\n",
" <td>virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>147</th>\n",
" <td>6.5</td>\n",
" <td>3.0</td>\n",
" <td>5.2</td>\n",
" <td>2.0</td>\n",
" <td>virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>148</th>\n",
" <td>6.2</td>\n",
" <td>3.4</td>\n",
" <td>5.4</td>\n",
" <td>2.3</td>\n",
" <td>virginica</td>\n",
" </tr>\n",
" <tr>\n",
" <th>149</th>\n",
" <td>5.9</td>\n",
" <td>3.0</td>\n",
" <td>5.1</td>\n",
" <td>1.8</td>\n",
" <td>virginica</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>150 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
2024-09-27 05:31:03 +00:00
" sepal_length_cm sepal_width_cm petal_length_cm petal_width_cm \\\n",
"0 5.1 3.5 1.4 0.2 \n",
2024-09-23 23:22:33 +00:00
"1 4.9 3.0 1.4 0.2 \n",
"2 4.7 3.2 1.3 0.2 \n",
"3 4.6 3.1 1.5 0.2 \n",
"4 5.0 3.6 1.4 0.2 \n",
".. ... ... ... ... \n",
"145 6.7 3.0 5.2 2.3 \n",
"146 6.3 2.5 5.0 1.9 \n",
"147 6.5 3.0 5.2 2.0 \n",
"148 6.2 3.4 5.4 2.3 \n",
"149 5.9 3.0 5.1 1.8 \n",
"\n",
" species \n",
"0 setosa \n",
"1 setosa \n",
"2 setosa \n",
"3 setosa \n",
"4 setosa \n",
".. ... \n",
"145 virginica \n",
"146 virginica \n",
"147 virginica \n",
"148 virginica \n",
"149 virginica \n",
"\n",
"[150 rows x 5 columns]"
]
},
2024-09-27 05:31:03 +00:00
"execution_count": 9,
2024-09-23 23:22:33 +00:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"url = 'https://raw.githubusercontent.com/akmand/datasets/master/iris.csv'\n",
"\n",
"iris = pd.read_csv(url)\n",
"\n",
"iris"
]
},
{
"cell_type": "code",
2024-09-27 05:31:03 +00:00
"execution_count": 10,
2024-09-23 23:22:33 +00:00
"metadata": {},
"outputs": [
{
"data": {
2024-09-27 05:31:03 +00:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAABRgAAAJaCAYAAABELyv0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hU1dbH8e/0mUwaPfQOAoKAgAIKKFIFURRFUSmKvWLlXkVBfbl6QUDFAgoCghUFlSIgigqCIEWK0iH0DumZ+v6RSySmTUKSmQm/z314rjln5cxKdiaZWWfvtQ1+v9+PiIiIiIiIiIiISCEYg52AiIiIiIiIiIiIhC8VGEVERERERERERKTQVGAUERERERERERGRQlOBUURERERERERERApNBUYREREREREREREpNBUYRUREREREREREpNBUYBQREREREREREZFCU4FRRERERERERERECs0c7ATOh8/n4+DBg0RFRWEwGIKdjoiIiEiB+f1+EhMTqVKlCkaj7v2GI70mFRERkXBWFK9Hw7rAePDgQapXrx7sNERERETO2759+6hWrVqw05BC0GtSERERKQ3O5/VoWBcYo6KigIxvQHR0NG63m0WLFtG1a1csFkuQs5O8aKzCh8YqfGiswoPGKXyU1FglJCRQvXr1zNc1En7++Zq0OOh3R/jQWIUPjVV40DiFD41V+PjnWBXF69GwLjCeXYISHR2dWWCMiIggOjpaP8whTmMVPjRW4UNjFR40TuGjpMdKS2vD1z9fkxYH/e4IHxqr8KGxCg8ap/ChsQofuY3V+bweVaMfERERERERERERKTQVGEVERERERERERKTQVGAUERERERERERGRQgvrHoyB8Pv9eDwevF5vsFORc7jdbsxmM2lpaQGPjclkwmw2q0eViIiI5Mjr9fLiiy/y0UcfcfjwYapUqcKgQYN47rnn8nz98OOPPzJs2DA2b95M9erVee655xg0aFDJJS4iIhKCClpPKcz7fCkZJVFPKdUFRpfLxaFDh0hJSQl2KvIPfr+fuLg49u3bV6Af8IiICCpXrozVai3G7ERERCQcvfrqq7zzzjtMmzaNJk2asGbNGgYPHkxMTAyPPPJIjp+ze/durr32Wu677z5mzpzJ999/z913303lypXp1q1bCX8FIiIioaEw9ZTCvs+XklHc9ZRSW2D0+Xzs3r0bk8lElSpVsFqt+gEPIT6fj6SkJCIjIzEa81+p7/f7cblcHDt2jN27d1O/fv2APk9EREQuHCtWrKBPnz5ce+21ANSqVYuPP/6Y3377LdfPeffdd6lduzZjx44FoFGjRvzyyy+MGzdOBUYREbkgFbaeUtD3+VIycqqnFIdSW2B0uVz4fD6qV69OREREsNORf/D5fLhcLux2e8C/eBwOBxaLhb1792Z+roiIiMhZ7dq1Y9KkSWzbto0GDRqwYcMGfvnlF15//fVcP+fXX3/lmmuuyXKsW7duPPbYY8WcrYiISGgqbD2lMO/zpWT8s55iMpmK/DFKbYHxLP1Qly4aTxEREcnNs88+S0JCAhdddBEmkwmv18srr7zCgAEDcv2cw4cPU6lSpSzHKlWqREJCAqmpqTgcjmyfk56eTnp6eubHCQkJQEbvKbfbXURfTVZnr1tc15eio7EKHxqr8KBxKnlutxu/3w9kFA0DdfZz/H5/gT5PSo7f78ftdmeOT1E+v0p9gVFERERELgyfffYZM2fOZNasWTRp0oT169fz2GOPUaVKFQYOHFhkjzN69GhGjhyZ7fiiRYuKfeXM4sWLi/X6UnQ0VuFDYxUeNE4lx2w2ExcXR1JSEi6Xq8Cfn5iYWAxZyflyuVykpqby008/4fF4gL+fV0Wxd0lQC4y1atVi79692Y4/8MADTJw4MQgZ5czn85OQ9nc1N9puwWhUP0cRERGRUPLUU0/x7LPP0r9/fwCaNm3K3r17GT16dK4Fxri4OI4cOZLl2JEjR4iOjs5x9iLA8OHDGTZsWObHCQkJVK9ena5duxIdHV1EX01WbrebxYsX06VLFywWS7E8hhQNjVX40FiFB41TyUtLS2Pfvn1ERkYWqDWZ3+8nMTGRqKiozJ6Nfj8kpnvwAwYgymZG22MER1paGg6Hgw4dOmAymbI8r86uxjgfQS0wrl69OsvW5Zs2baJLly7069cviFn9zevzk+b28uO2Y8xcuZfjSemUj7Qx4PKadGpQAYfFpEKjiIiISIhISUnJ1k7FZDLluUyrbdu2zJ8/P8uxxYsX07Zt21w/x2azYbPZsh23WCzF/ua3JB5DiobGKnxorMKDxqnkeL1eDAYDRqOxQG3Kzv69NRgM+DGonhJijEYjBoMBi8WS2YPx7POqKJ5bQS0wVqhQIcvH//nPf6hbty4dO3YMUkZ/8/r8HE9K56Z3V7DvZGrm8W1Hklix8wTVyzr44r52VIi0hcyTYs+ePdSuXZt169bRvHnzYKcjIiIiUqJ69+7NK6+8Qo0aNWjSpAnr1q3j9ddfZ8iQIZkxw4cP58CBA0yfPh2A++67j7feeounn36aIUOGsHTpUj777DPmzZsXrC9DREQkrPn8fk4ku8KqngKqqZyvkOnB6HK5+Oijjxg2bFiu25/n11D73OaUXq83s7FoYZqLprl92Z4M59p3MpWb3l3Bwkc74LCExsYjZ7/Own7NJamwzV99Pl9mU9Li2PVIslNT5fChsQoPGqfwUVJjpZ+FovPmm2/y/PPP88ADD3D06FGqVKnCvffey4gRIzJjDh06RHx8fObHtWvXZt68eTz++ONMmDCBatWq8f7779OtW7dgfAkiIiJhL93jD7ie4rSFTFlKzlPIjOScOXM4ffo0gwYNyjUm0IbaixcvPq+mpGaLhWU7z+T6ZDhr38lUlm09Soe6MXiK8M3B3LlzefXVV9m9ezcOh4NmzZoxc+ZMnE4n06dPZ+LEiezdu5caNWpwzz33cPfddwNQt25dAC699FIA2rdvz7fffovP52PMmDFMmzaN48eP06BBA1544QWuueYaIKO4++9//5tvvvmG06dPU6FCBQYPHpzZW2jixInMnDmTvXv3EhsbS/fu3Rk5ciSRkZHn/bUWtPlrTk1JpWSoqXL40FiFB41T+CjusSqKptqSISoqivHjxzN+/PhcYz788MNsxzp16sS6deuKLzEREZELhNli4cdtxwKqp/y07RjdmsQV+SzGL774gpEjR7Jjxw4iIiJo0aIFc+fOxel08v777zN27Fh2795NrVq1eOSRR3jggQeAjJuOAC1atACgY8eO/Pjjj/h8Pl5++WUmTZrEsWPHaNSoEf/5z3/o3r07kFGnGDZsGLNnz+bUqVNUqlSJ++67j+HDhwPw+uuvM3XqVHbt2kXZsmXp3bs3r732WpHUVEJJyBQYP/jgA3r06EGVKlVyjcmvofa5zV+9Xm+hmpICJKR5mLkqPv9A4KNV8bSr15Lo6JybgBfUoUOHuPvuu3n11Ve5/vrrSUxM5JdffiEqKoq5c+fyn//8hzfeeIMWLVqwbt067r33XsqVK8fAgQNZuXIll19+OYsWLaJJkyZYrVaio6MZP348EydO5J133qFFixZMnTqV2267jY0bN1K/fn3Gjh3Ld999x6effkqNGjXYt28f+/bty2xSHhERwZtvvknt2rXZtWsXDz30EK+88sp5bcSTU/PXQJzblLSg4yqFo6bK4UNjFR40TuGjpMaqKJpqi4iIiISCdJ+RmSuzb+abkxkr99KuXjliHNYie/xDhw5x66238tprr3HDDTeQmJjIzz//jN/vZ+bMmYwYMYK33nors6YydOhQnE4nAwcO5LfffqNNmzYsWbIks6YCMGHCBMaOHct7771HixYtmDJlCtdddx2bN2+mfv36vPHGG3z99dd89tlnWWoqZxmNRt54443MmsoDDzzA008/zdtvv11kX3coCIkC4969e1myZAlffvllnnGBNtS2WCyZzSsL2pT0rONJ6fkHASeSMmZHFuYxcnLkyBE8Hg833ngjNWvWBOCSSy4BYOTIkYwdO5abbroJyJix+NdffzF58mQGDx5MpUqVgIz
2024-09-23 23:22:33 +00:00
"text/plain": [
"<Figure size 1600x700 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Объявляем фигуру из двух графиков и ее размер \n",
"plt.figure(figsize=(16, 7))\n",
"\n",
"# Левый график\n",
"plt.subplot(121)\n",
"sns.scatterplot(\n",
" data=iris, # из этой таблицы нарисовать точки\n",
" x='petal_width_cm', y='petal_length_cm', # с этими координатами,\n",
" hue='species', # для которых цвет определить согласно этому столбцу \n",
" s=70 # размер точек\n",
")\n",
"plt.xlabel('Длина лепестка, см') \n",
"plt.ylabel('Ширина лепестка, см') \n",
"plt.legend() # добавить легенду \n",
"plt.grid() # добавить сетку\n",
"\n",
"# Правый график аналогично \n",
"plt.subplot(122)\n",
"sns.scatterplot(data=iris,\n",
"x='sepal_width_cm', y='sepal_length_cm', hue='species', s=70)\n",
"plt.xlabel('Длина чашелистика, см') \n",
"plt.ylabel('Ширина чашелистика, см') \n",
"plt.legend()\n",
"plt.grid();"
]
},
{
"cell_type": "code",
2024-09-27 05:31:03 +00:00
"execution_count": 11,
2024-09-23 23:22:33 +00:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((127, 4), (23, 4), (127,), (23,))"
]
},
2024-09-27 05:31:03 +00:00
"execution_count": 11,
2024-09-23 23:22:33 +00:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(\n",
" # поскольку iris это pandas-таблица, для нее нужно указывать iloc\n",
" iris.iloc[:, :-1], # берем все колонки кроме последней в признаки\n",
" iris.iloc[:, -1], # последнюю в целевую переменную (класс)\n",
" test_size=0.15 # размер тестовой выборки 15%\n",
")\n",
"\n",
"X_train.shape, X_test.shape, y_train.shape, y_test.shape\n"
]
},
{
"cell_type": "code",
2024-09-27 05:31:03 +00:00
"execution_count": 12,
2024-09-23 23:22:33 +00:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sepal_length_cm</th>\n",
" <th>sepal_width_cm</th>\n",
" <th>petal_length_cm</th>\n",
" <th>petal_width_cm</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2024-09-27 05:31:03 +00:00
" <th>52</th>\n",
" <td>6.9</td>\n",
" <td>3.1</td>\n",
" <td>4.9</td>\n",
" <td>1.5</td>\n",
2024-09-23 23:22:33 +00:00
" </tr>\n",
" <tr>\n",
2024-09-27 05:31:03 +00:00
" <th>42</th>\n",
" <td>4.4</td>\n",
" <td>3.2</td>\n",
2024-09-23 23:22:33 +00:00
" <td>1.3</td>\n",
2024-09-27 05:31:03 +00:00
" <td>0.2</td>\n",
2024-09-23 23:22:33 +00:00
" </tr>\n",
" <tr>\n",
2024-09-27 05:31:03 +00:00
" <th>75</th>\n",
" <td>6.6</td>\n",
" <td>3.0</td>\n",
" <td>4.4</td>\n",
" <td>1.4</td>\n",
2024-09-23 23:22:33 +00:00
" </tr>\n",
" <tr>\n",
2024-09-27 05:31:03 +00:00
" <th>135</th>\n",
2024-09-23 23:22:33 +00:00
" <td>7.7</td>\n",
2024-09-27 05:31:03 +00:00
" <td>3.0</td>\n",
" <td>6.1</td>\n",
" <td>2.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>5.4</td>\n",
" <td>3.7</td>\n",
" <td>1.5</td>\n",
" <td>0.2</td>\n",
2024-09-23 23:22:33 +00:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" sepal_length_cm sepal_width_cm petal_length_cm petal_width_cm\n",
2024-09-27 05:31:03 +00:00
"52 6.9 3.1 4.9 1.5\n",
"42 4.4 3.2 1.3 0.2\n",
"75 6.6 3.0 4.4 1.4\n",
"135 7.7 3.0 6.1 2.3\n",
"10 5.4 3.7 1.5 0.2"
2024-09-23 23:22:33 +00:00
]
},
2024-09-27 05:31:03 +00:00
"execution_count": 12,
2024-09-23 23:22:33 +00:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train.head()"
]
},
{
"cell_type": "code",
2024-09-27 05:31:03 +00:00
"execution_count": 13,
2024-09-23 23:22:33 +00:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2024-09-27 05:31:03 +00:00
"52 versicolor\n",
"42 setosa\n",
"75 versicolor\n",
"135 virginica\n",
"10 setosa\n",
2024-09-23 23:22:33 +00:00
"Name: species, dtype: object"
]
},
2024-09-27 05:31:03 +00:00
"execution_count": 13,
2024-09-23 23:22:33 +00:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_train.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Обучим метод 1 ближайшем соседе"
]
},
{
"cell_type": "code",
2024-09-27 05:31:03 +00:00
"execution_count": 14,
2024-09-23 23:22:33 +00:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2024-09-27 05:31:03 +00:00
"array(['versicolor', 'setosa', 'virginica', 'setosa', 'versicolor',\n",
2024-09-23 23:22:33 +00:00
" 'versicolor', 'setosa', 'versicolor', 'versicolor', 'virginica',\n",
2024-09-27 05:31:03 +00:00
" 'versicolor', 'setosa', 'virginica', 'setosa', 'virginica',\n",
" 'setosa', 'versicolor', 'versicolor', 'virginica', 'setosa',\n",
" 'versicolor', 'setosa', 'setosa'], dtype=object)"
2024-09-23 23:22:33 +00:00
]
},
2024-09-27 05:31:03 +00:00
"execution_count": 14,
2024-09-23 23:22:33 +00:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = KNeighborsClassifier(n_neighbors=1)\n",
"model.fit(X_train, y_train)\n",
"\n",
"# Получим предсказания модели\n",
"y_pred = model.predict(X_test)\n",
"y_pred"
]
},
{
"cell_type": "code",
2024-09-27 05:31:03 +00:00
"execution_count": 15,
2024-09-23 23:22:33 +00:00
"metadata": {},
"outputs": [
{
"data": {
2024-09-27 05:31:03 +00:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0EAAAJaCAYAAADpm0w1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACyiElEQVR4nOzdd3hU1dbH8e/09ITepYP0qoIoKEgRERFFFFQExC4qYntVFNTrtYBYsICIDUQFRRQQ6VJEQaUX6b2X9Ex//5hLIKZN4Ezq7/M8eUjOWcxemRzCrNlnr23y+/1+RERERERESghzQScgIiIiIiKSn1QEiYiIiIhIiaIiSEREREREShQVQSIiIiIiUqKoCBIRERERkRJFRZCIiIiIiJQoKoJERERERKREUREkIiIiIiIlirWgE7gQPp+PgwcPEh0djclkKuh0RERERESkgPj9fhITE6lcuTJmc85zPUW6CDp48CDVqlUr6DRERERERKSQ2LdvH1WrVs0xpkgXQdHR0UDgG42JiUk/7na7+eWXX+jSpQs2m62g0pNiQNeSGEXXkhhB15EYRdeSGKUwXUsJCQlUq1YtvUbISZEugs7cAhcTE5OpCIqIiCAmJqbAfxhStOlaEqPoWhIj6DoSo+haEqMUxmspmGUyaowgIiIiIiIlioogEREREREpUVQEiYiIiIhIiVKk1wQFw+/34/F48Hq9BZ2KXCCLxYLValU7dBERERG5IMW6CHK73Rw8eJCUlJSCTkUMEhERQaVKlbDb7QWdioiIiIgUUcW6CNq7dy9Wq5XKlStjt9s1g1CE+f1+XC4Xx44dY9euXdStWzfXTbBERERERLJSbIsgq9WKz+ejcuXKREREFHQ6YoDw8HBsNht79uzB5XIRFhZW0CmJiIiISBFU7N9K12xB8aKfp4iIiIhcKL2iFBERERGREqVAi6AaNWpgMpkyfTz44IMFmVYmPp+f0ymu9A+fz1/QKYmIiIiIyHkq0DVBq1atytC6esOGDXTu3Jk+ffoUYFZneX1+0txeFv9zjMkr93A8yUnZKAf921TnqnrlCLdZMJvVbEFEREREpCgp0CKoXLlyGb7+73//S+3atenQoUMBZXSW1+fneJKTmz9cwb6TqenH/zmSxIodJ6hWOpxp911OuShHoSmEdu/eTc2aNfn7779p3rx5QacjIiIiIlIoFZrucC6Xiy+//JJhw4Zl28ra6XTidDrTv05ISAAC+wG53e7042c+9/v9+Hw+fD5fnvNJc/syFUDn2ncylZs/XMHPj7Qn3FY4llad+T7P93suCnw+H36/H7fbjcViCfl4Z66lc68vkfOha0mMoOtIjKJrSYxSmK6lvORg8vv9hWKByzfffEO/fv3Yu3cvlStXzjLmxRdfZOTIkZmOT5kyJVMbbKvVSsWKFalWrVqeN9a02mws2RHPg1P+zjX2/X4taF87Fo+BP/gffviB1157jV27dhEeHk7Tpk2ZPHkykZGRfP7554wbN449e/Zw0UUXcc8993D33XcDUKpUqQyP065dO3766Sd8Ph9vvvkmn332GcePH6devXq88MILXHPNNUCgAH322Wf58ccfOX36NOXKlWPgwIEMGzYMgHHjxjF58mT27NlDXFwc3bp1Y+TIkURFRRn2PQfL5XKxb98+Dh8+jMfjyffxRURERKRwSklJoV+/fsTHxxMTE5NjbKGZCZo4cSLXXntttgUQwDPPPJP+whwCM0HVqlWjS5cuGb5Rt9vNokWLCAsLIyoqKs/7ySSkeZj8+96gYr/8fS+X12lJTEx4nsbIzqFDh7j77rt57bXX6NWrF4mJiSxbtozo6Gh++OEH/vvf//LOO+/QokUL/v77b+69917KlCnDgAEDWLlyJW3atOGXX36hUaNG2O12YmJiGDt2LOPGjeODDz6gRYsWTJo0iX79+rF+/Xrq1q3L6NGjmTt3Ll9//TUXXXQR+/btY9++fenPaUREBO+++y41a9Zk586dPPTQQ7zyyiuMGzfOkO85L9LS0ggPD6d9+/b5sk+Q2+1m3rx5dO7cGZvNFvLxpPjStSRG0HUkRtG1JEYpTNfSmbvEglEoiqA9e/Ywf/58vvvuuxzjHA4HDocj03GbzZblk24ymTCbzee1t8zxJGfuQcCJJBdg3P41R44cwePxcNNNN1G9enUAmjVrBsDIkSMZPXo0N998MwC1a9dmy5YtTJgwgYEDB1KhQgUgsNbq3GJy9OjRPPXUU/Tr1w+A119/ncWLF/POO+8wbtw49u3bR926dWnfvj0mk4maNWtmyOmxxx5L/7xWrVq8/PLL3HfffXzwwQeGfM95YTabMZlM2f7MQyW/x5PiS9eSGEHXkRhF15Kx/H4/Se4krGYrXp8Xi9mCCRNh1uK/wXthuJbyMn6hKIImTZpE+fLlue666wo6lXRloxz8cyQp17gyUXayWcJ0Xpo1a0anTp1o0qQJXbt2pUuXLtx8883Y7XZ27NjB4MGDGTJkSHq8x+MhNjY228dLSEjg4MGDtGvXLsPxdu3asXbtWgDuuusuOnfuTP369enWrRs9evSgS5cu6bHz58/n1VdfZcuWLSQkJODxeEhLSyMlJSXTbYgiIiIiJVGyO5nVh1czYf0E1h4LvMYq5ShFn3p9GNBoAJG2SCzm0K9nluAU+Ip+n8/HpEmTGDBgAFZroajJiAmz0b9N9aBi72hTnWiHcVWvxWJh3rx5zJkzh4YNG/Luu+9Sv359NmzYAMCECRNYs2ZN+seGDRtYuXLlBY3ZsmVLdu3axUsvvURqaiq33HJL+mzT7t276dGjB02bNmX69On8+eef6bfBuVyuC/tmRURERIqBZHcyb//1Ng8tfCi9AAI45TzF+PXjuenHmzjlPEUhWYovFIIiaP78+ezdu5dBgwYVdCrpzGYTV9UrR7XSOa/zqVY6nPb1yhneIttkMtGuXTtGjhzJ33//jd1uZ/ny5VSuXJmdO3dSp06dDB9nbl870wDi3L2XYmJiqFy5MsuXL88wxvLly2nYsGGGuL59+zJhwgS+/vprpk+fzsmTJ/nzzz/x+XyMHj2aNm3aUK9ePQ4ePGjo9ysiIiJSVPn9fv449Adfbfkq25jDyYd5dNGjJLlzv8tI8keBT7106dKlUFbFYTYL0+67PNs22Wf2CQq3GTut+fvvv7NgwQK6dOlC+fLl+f333zl27BgNGjRg5MiRDB06lNjYWLp164bT6WT16tWcOnWKYcOGUb58ecLDw/n555+pWrUqYWFhxMbG8sQTT/DCCy9Qu3ZtmjdvzqRJk1izZg2TJ08GYMyYMVSqVIkWLVpgNpv59ttvqVixInFxcdSpUwe32827777L9ddfz/Lly/nwww8N/Z5FREREiqpEdyIfr/8417i1x9ZyMu0k0fbofMhKclPgRVBhZTGbKBfl4OdH2rPkn2N8uXIPJ5JclImyc0eb6rSvV45wm8XwWaCYmBh+/fVXxo4dS0JCAtWrV2f06NFce+21QKBT2xtvvMETTzxBZGQkTZo04dFHHwUCbcHfeecdRo0axYgRI7jyyitZvHgxQ4cOJT4+nscff5yjR4/SsGFDZs6cSd26dQGIjo7m9ddfZ9u2bVgsFi655BJmz56N2WymWbNmjBkzhtdee41nnnmG9u3b8+qrr3LnnXca+n2LiIiIFEVWk5V1x9cFFfvzrp+5t9m9Ic5IgqEiKAdms4lIh5VujSrSrk4Z/H4wmSDaYTO8+DmjQYMG/Pzzz9me79evX3qXt6zcfffd6fsGnWE2m3nhhRd44YUXsvw7Q4YMydBs4d8ee+yxDB3iAO64445s40VERERKCo8/+H0LUzwp+P1+TEZ21ZLzoiIoCGazidjwvG24KiIiIiLFn81kI9YRS7wzPtfYxmUaqwAqJAq8MYKIiIiISFFlMpm4qc5NucbF2GNoV6VdrnGSP1QEiYiIiIicpzBrGIOaDKJCRIUc44a3Hq5ZoEJERZCIiIiIyAWItEUy5bopNC7bONO5GHsMoy4fRZcaXQi35rz9iuQfrQkSEREREbkAVrOVcuHlGN95PMdTjzNn5xxSvak
2024-09-23 23:22:33 +00:00
"text/plain": [
"<Figure size 1000x700 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Покажем на графике, что отражаем полученное число.\n",
"# Красным цветом обозначим точки, для которых классификация сработала неправильно.\n",
"plt.figure(figsize=(10, 7))\n",
"sns.scatterplot(x='petal_width_cm', y='petal_length_cm', data=iris, hue='species', s=70)\n",
"plt.xlabel('Длина лепестка, см')\n",
"plt.ylabel('Ширина лепестка, см')\n",
"plt.legend(loc=2)\n",
"plt.grid()\n",
"\n",
"# Перебираем все объекты из теста\n",
"for i in range(len(y_test)):\n",
" # Если предсказание неправильное\n",
" if np.array(y_test)[i] != y_pred[i]:\n",
" # то подсвечиваем точку красным\n",
" plt.scatter(X_test.iloc[i, 3], X_test.iloc[i, 2], color='red', s=150)"
]
},
{
"cell_type": "code",
2024-09-27 05:31:03 +00:00
"execution_count": 16,
2024-09-23 23:22:33 +00:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-09-27 05:31:03 +00:00
"accuracy: 1.000\n"
2024-09-23 23:22:33 +00:00
]
}
],
"source": [
"# качество модели (доля правильно классифицированных точек)\n",
"print(f'accuracy: {accuracy_score(y_test, y_pred):.3f}')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Обучим метод 5 ближайших соседях"
]
},
{
"cell_type": "code",
2024-09-27 05:31:03 +00:00
"execution_count": 17,
2024-09-23 23:22:33 +00:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2024-09-27 05:31:03 +00:00
"array(['versicolor', 'setosa', 'virginica', 'setosa', 'versicolor',\n",
" 'versicolor', 'setosa', 'versicolor', 'versicolor', 'virginica',\n",
" 'versicolor', 'setosa', 'virginica', 'setosa', 'virginica',\n",
" 'setosa', 'versicolor', 'versicolor', 'virginica', 'setosa',\n",
" 'versicolor', 'setosa', 'setosa'], dtype=object)"
2024-09-23 23:22:33 +00:00
]
},
2024-09-27 05:31:03 +00:00
"execution_count": 17,
2024-09-23 23:22:33 +00:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = KNeighborsClassifier(n_neighbors=5)\n",
"model.fit(X_train, y_train)\n",
"\n",
"# Получим предсказания модели\n",
"y_pred = model.predict(X_test)\n",
"y_pred"
]
},
{
"cell_type": "code",
2024-09-27 05:31:03 +00:00
"execution_count": 18,
2024-09-23 23:22:33 +00:00
"metadata": {},
"outputs": [
{
"data": {
2024-09-27 05:31:03 +00:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0EAAAJaCAYAAADpm0w1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACyiElEQVR4nOzdd3hU1dbH8e/09ITepYP0qoIoKEgRERFFFFQExC4qYntVFNTrtYBYsICIDUQFRRQQ6VJEQaUX6b2X9Ex//5hLIKZN4Ezq7/M8eUjOWcxemRzCrNlnr23y+/1+RERERERESghzQScgIiIiIiKSn1QEiYiIiIhIiaIiSEREREREShQVQSIiIiIiUqKoCBIRERERkRJFRZCIiIiIiJQoKoJERERERKREUREkIiIiIiIlirWgE7gQPp+PgwcPEh0djclkKuh0RERERESkgPj9fhITE6lcuTJmc85zPUW6CDp48CDVqlUr6DRERERERKSQ2LdvH1WrVs0xpkgXQdHR0UDgG42JiUk/7na7+eWXX+jSpQs2m62g0pNiQNeSGEXXkhhB15EYRdeSGKUwXUsJCQlUq1YtvUbISZEugs7cAhcTE5OpCIqIiCAmJqbAfxhStOlaEqPoWhIj6DoSo+haEqMUxmspmGUyaowgIiIiIiIlioogEREREREpUVQEiYiIiIhIiVKk1wQFw+/34/F48Hq9BZ2KXCCLxYLValU7dBERERG5IMW6CHK73Rw8eJCUlJSCTkUMEhERQaVKlbDb7QWdioiIiIgUUcW6CNq7dy9Wq5XKlStjt9s1g1CE+f1+XC4Xx44dY9euXdStWzfXTbBERERERLJSbIsgq9WKz+ejcuXKREREFHQ6YoDw8HBsNht79uzB5XIRFhZW0CmJiIiISBFU7N9K12xB8aKfp4iIiIhcKL2iFBERERGREqVAi6AaNWpgMpkyfTz44IMFmVYmPp+f0ymu9A+fz1/QKYmIiIiIyHkq0DVBq1atytC6esOGDXTu3Jk+ffoUYFZneX1+0txeFv9zjMkr93A8yUnZKAf921TnqnrlCLdZMJvVbEFEREREpCgp0CKoXLlyGb7+73//S+3atenQoUMBZXSW1+fneJKTmz9cwb6TqenH/zmSxIodJ6hWOpxp911OuShHoSmEdu/eTc2aNfn7779p3rx5QacjIiIiIlIoFZrucC6Xiy+//JJhw4Zl28ra6XTidDrTv05ISAAC+wG53e7042c+9/v9+Hw+fD5fnvNJc/syFUDn2ncylZs/XMHPj7Qn3FY4llad+T7P93suCnw+H36/H7fbjcViCfl4Z66lc68vkfOha0mMoOtIjKJrSYxSmK6lvORg8vv9hWKByzfffEO/fv3Yu3cvlStXzjLmxRdfZOTIkZmOT5kyJVMbbKvVSsWKFalWrVqeN9a02mws2RHPg1P+zjX2/X4taF87Fo+BP/gffviB1157jV27dhEeHk7Tpk2ZPHkykZGRfP7554wbN449e/Zw0UUXcc8993D33XcDUKpUqQyP065dO3766Sd8Ph9vvvkmn332GcePH6devXq88MILXHPNNUCgAH322Wf58ccfOX36NOXKlWPgwIEMGzYMgHHjxjF58mT27NlDXFwc3bp1Y+TIkURFRRn2PQfL5XKxb98+Dh8+jMfjyffxRURERKRwSklJoV+/fsTHxxMTE5NjbKGZCZo4cSLXXntttgUQwDPPPJP+whwCM0HVqlWjS5cuGb5Rt9vNokWLCAsLIyoqKs/7ySSkeZj8+96gYr/8fS+X12lJTEx4nsbIzqFDh7j77rt57bXX6NWrF4mJiSxbtozo6Gh++OEH/vvf//LOO+/QokUL/v77b+69917KlCnDgAEDWLlyJW3atOGXX36hUaNG2O12YmJiGDt2LOPGjeODDz6gRYsWTJo0iX79+rF+/Xrq1q3L6NGjmTt3Ll9//TUXXXQR+/btY9++fenPaUREBO+++y41a9Zk586dPPTQQ7zyyiuMGzfOkO85L9LS0ggPD6d9+/b5sk+Q2+1m3rx5dO7cGZvNFvLxpPjStSRG0HUkRtG1JEYpTNfSmbvEglEoiqA9e/Ywf/58vvvuuxzjHA4HDocj03GbzZblk24ymTCbzee1t8zxJGfuQcCJJBdg3P41R44cwePxcNNNN1G9enUAmjVrBsDIkSMZPXo0N998MwC1a9dmy5YtTJgwgYEDB1KhQgUgsNbq3GJy9OjRPPXUU/Tr1w+A119/ncWLF/POO+8wbtw49u3bR926dWnfvj0mk4maNWtmyOmxxx5L/7xWrVq8/PLL3HfffXzwwQeGfM95YTabMZlM2f7MQyW/x5PiS9eSGEHXkRhF15Kx/H4/Se4krGYrXp8Xi9mCCRNh1uK/wXthuJbyMn6hKIImTZpE+fLlue666wo6lXRloxz8cyQp17gyUXayWcJ0Xpo1a0anTp1o0qQJXbt2pUuXLtx8883Y7XZ27NjB4MGDGTJkSHq8x+MhNjY228dLSEjg4MGDtGvXLsPxdu3asXbtWgDuuusuOnfuTP369enWrRs9evSgS5cu6bHz58/n1VdfZcuWLSQkJODxeEhLSyMlJSXTbYgiIiIiJVGyO5nVh1czYf0E1h4LvMYq5ShFn3p9GNBoAJG2SCzm0K9nluAU+Ip+n8/HpEmTGDBgAFZroajJiAmz0b9N9aBi72hTnWiHcVWvxWJh3rx5zJkzh4YNG/Luu+9Sv359NmzYAMCECRNYs2ZN+seGDRtYuXLlBY3ZsmVLdu3axUsvvURqaiq33HJL+mzT7t276dGjB02bNmX69On8+eef6bfBuVyuC/tmRURERIqBZHcyb//1Ng8tfCi9AAI45TzF+PXjuenHmzjlPEUhWYovFIIiaP78+ezdu5dBgwYVdCrpzGYTV9UrR7XSOa/zqVY6nPb1yhneIttkMtGuXTtGjhzJ33//jd1uZ/ny5VSuXJmdO3dSp06dDB9nbl870wDi3L2XYmJiqFy5MsuXL88wxvLly2nYsGGGuL59+zJhwgS+/vprpk+fzsmTJ/nzzz/x+XyMHj2aNm3aUK9ePQ4ePGjo9ysiIiJSVPn9fv449Adfbfkq25jDyYd5dNGjJLlzv8tI8keBT7106dKlUFbFYTYL0+67PNs22Wf2CQq3GTut+fvvv7NgwQK6dOlC+fLl+f333zl27BgNGjRg5MiRDB06lNjYWLp164bT6WT16tWcOnWKYcOGUb58ecLDw/n555+pWrUqYWFhxMbG8sQTT/DCCy9Qu3ZtmjdvzqRJk1izZg2TJ08GYMyYMVSqVIkWLVpgNpv59ttvqVixInFxcdSpUwe32827777L9ddfz/Lly/nwww8N/Z5FREREiqpEdyIfr/8417i1x9ZyMu0k0fbofMhKclPgRVBhZTGbKBfl4OdH2rPkn2N8uXIPJ5JclImyc0eb6rSvV45wm8XwWaCYmBh+/fVXxo4dS0JCAtWrV2f06NFce+21QKBT2xtvvMETTzxBZGQkTZo04dFHHwUCbcHfeecdRo0axYgRI7jyyitZvHgxQ4cOJT4+nscff5yjR4/SsGFDZs6cSd26dQGIjo7m9ddfZ9u2bVgsFi655BJmz56N2WymWbNmjBkzhtdee41nnnmG9u3b8+qrr3LnnXca+n2LiIiIFEVWk5V1x9cFFfvzrp+5t9m9Ic5IgqEiKAdms4lIh5VujSrSrk4Z/H4wmSDaYTO8+DmjQYMG/Pzzz9me79evX3qXt6zcfffd6fsGnWE2m3nhhRd44YUXsvw7Q4YMydBs4d8ee+yxDB3iAO64445s40VERERKCo8/+H0LUzwp+P1+TEZ21ZLzoiIoCGazidjwvG24KiIiIiLFn81kI9YRS7wzPtfYxmUaqwAqJAq8MYKIiIiISFFlMpm4qc5NucbF2GNoV6VdrnGSP1QEiYiIiIicpzBrGIOaDKJCRIUc44a3Hq5ZoEJERZCIiIiIyAWItEUy5bopNC7bONO5GHsMoy4fRZcaXQi35rz9iuQfrQkSEREREbkAVrOVcuHlGN95PMdTjzNn5xxSvak
2024-09-23 23:22:33 +00:00
"text/plain": [
"<Figure size 1000x700 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Покажем на графике, что отражаем полученное число.\n",
"# Красным цветом обозначим точки, для которых классификация сработала неправильно.\n",
"plt.figure(figsize=(10, 7))\n",
"sns.scatterplot(x='petal_width_cm', y='petal_length_cm', data=iris, hue='species', s=70)\n",
"plt.xlabel('Длина лепестка, см')\n",
"plt.ylabel('Ширина лепестка, см')\n",
"plt.legend(loc=2)\n",
"plt.grid()\n",
"\n",
"# Перебираем все объекты из теста\n",
"for i in range(len(y_test)):\n",
" # Если предсказание неправильное\n",
" if np.array(y_test)[i] != y_pred[i]:\n",
" # то подсвечиваем точку красным\n",
" plt.scatter(X_test.iloc[i, 3], X_test.iloc[i, 2], color='red', s=150)"
]
},
{
"cell_type": "code",
2024-09-27 05:31:03 +00:00
"execution_count": 19,
2024-09-23 23:22:33 +00:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-09-27 05:31:03 +00:00
"accuracy: 1.000\n"
2024-09-23 23:22:33 +00:00
]
}
],
"source": [
"# качество модели (доля правильно классифицированных точек)\n",
"print(f'accuracy: {accuracy_score(y_test, y_pred):.3f}')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Обучим метод 10 ближайших соседях"
]
},
{
"cell_type": "code",
2024-09-27 05:31:03 +00:00
"execution_count": 20,
2024-09-23 23:22:33 +00:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2024-09-27 05:31:03 +00:00
"array(['versicolor', 'setosa', 'virginica', 'setosa', 'versicolor',\n",
2024-09-23 23:22:33 +00:00
" 'versicolor', 'setosa', 'versicolor', 'versicolor', 'virginica',\n",
2024-09-27 05:31:03 +00:00
" 'versicolor', 'setosa', 'virginica', 'setosa', 'virginica',\n",
" 'setosa', 'versicolor', 'versicolor', 'virginica', 'setosa',\n",
" 'versicolor', 'setosa', 'setosa'], dtype=object)"
2024-09-23 23:22:33 +00:00
]
},
2024-09-27 05:31:03 +00:00
"execution_count": 20,
2024-09-23 23:22:33 +00:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = KNeighborsClassifier(n_neighbors=10)\n",
"model.fit(X_train, y_train)\n",
"\n",
"# Получим предсказания модели\n",
"y_pred = model.predict(X_test)\n",
"y_pred"
]
},
{
"cell_type": "code",
2024-09-27 05:31:03 +00:00
"execution_count": 21,
2024-09-23 23:22:33 +00:00
"metadata": {},
"outputs": [
{
"data": {
2024-09-27 05:31:03 +00:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0EAAAJaCAYAAADpm0w1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACyiElEQVR4nOzdd3hU1dbH8e/09ITepYP0qoIoKEgRERFFFFQExC4qYntVFNTrtYBYsICIDUQFRRQQ6VJEQaUX6b2X9Ex//5hLIKZN4Ezq7/M8eUjOWcxemRzCrNlnr23y+/1+RERERERESghzQScgIiIiIiKSn1QEiYiIiIhIiaIiSEREREREShQVQSIiIiIiUqKoCBIRERERkRJFRZCIiIiIiJQoKoJERERERKREUREkIiIiIiIlirWgE7gQPp+PgwcPEh0djclkKuh0RERERESkgPj9fhITE6lcuTJmc85zPUW6CDp48CDVqlUr6DRERERERKSQ2LdvH1WrVs0xpkgXQdHR0UDgG42JiUk/7na7+eWXX+jSpQs2m62g0pNiQNeSGEXXkhhB15EYRdeSGKUwXUsJCQlUq1YtvUbISZEugs7cAhcTE5OpCIqIiCAmJqbAfxhStOlaEqPoWhIj6DoSo+haEqMUxmspmGUyaowgIiIiIiIlioogEREREREpUVQEiYiIiIhIiVKk1wQFw+/34/F48Hq9BZ2KXCCLxYLValU7dBERERG5IMW6CHK73Rw8eJCUlJSCTkUMEhERQaVKlbDb7QWdioiIiIgUUcW6CNq7dy9Wq5XKlStjt9s1g1CE+f1+XC4Xx44dY9euXdStWzfXTbBERERERLJSbIsgq9WKz+ejcuXKREREFHQ6YoDw8HBsNht79uzB5XIRFhZW0CmJiIiISBFU7N9K12xB8aKfp4iIiIhcKL2iFBERERGREqVAi6AaNWpgMpkyfTz44IMFmVYmPp+f0ymu9A+fz1/QKYmIiIiIyHkq0DVBq1atytC6esOGDXTu3Jk+ffoUYFZneX1+0txeFv9zjMkr93A8yUnZKAf921TnqnrlCLdZMJvVbEFEREREpCgp0CKoXLlyGb7+73//S+3atenQoUMBZXSW1+fneJKTmz9cwb6TqenH/zmSxIodJ6hWOpxp911OuShHoSmEdu/eTc2aNfn7779p3rx5QacjIiIiIlIoFZrucC6Xiy+//JJhw4Zl28ra6XTidDrTv05ISAAC+wG53e7042c+9/v9+Hw+fD5fnvNJc/syFUDn2ncylZs/XMHPj7Qn3FY4llad+T7P93suCnw+H36/H7fbjcViCfl4Z66lc68vkfOha0mMoOtIjKJrSYxSmK6lvORg8vv9hWKByzfffEO/fv3Yu3cvlStXzjLmxRdfZOTIkZmOT5kyJVMbbKvVSsWKFalWrVqeN9a02mws2RHPg1P+zjX2/X4taF87Fo+BP/gffviB1157jV27dhEeHk7Tpk2ZPHkykZGRfP7554wbN449e/Zw0UUXcc8993D33XcDUKpUqQyP065dO3766Sd8Ph9vvvkmn332GcePH6devXq88MILXHPNNUCgAH322Wf58ccfOX36NOXKlWPgwIEMGzYMgHHjxjF58mT27NlDXFwc3bp1Y+TIkURFRRn2PQfL5XKxb98+Dh8+jMfjyffxRURERKRwSklJoV+/fsTHxxMTE5NjbKGZCZo4cSLXXntttgUQwDPPPJP+whwCM0HVqlWjS5cuGb5Rt9vNokWLCAsLIyoqKs/7ySSkeZj8+96gYr/8fS+X12lJTEx4nsbIzqFDh7j77rt57bXX6NWrF4mJiSxbtozo6Gh++OEH/vvf//LOO+/QokUL/v77b+69917KlCnDgAEDWLlyJW3atOGXX36hUaNG2O12YmJiGDt2LOPGjeODDz6gRYsWTJo0iX79+rF+/Xrq1q3L6NGjmTt3Ll9//TUXXXQR+/btY9++fenPaUREBO+++y41a9Zk586dPPTQQ7zyyiuMGzfOkO85L9LS0ggPD6d9+/b5sk+Q2+1m3rx5dO7cGZvNFvLxpPjStSRG0HUkRtG1JEYpTNfSmbvEglEoiqA9e/Ywf/58vvvuuxzjHA4HDocj03GbzZblk24ymTCbzee1t8zxJGfuQcCJJBdg3P41R44cwePxcNNNN1G9enUAmjVrBsDIkSMZPXo0N998MwC1a9dmy5YtTJgwgYEDB1KhQgUgsNbq3GJy9OjRPPXUU/Tr1w+A119/ncWLF/POO+8wbtw49u3bR926dWnfvj0mk4maNWtmyOmxxx5L/7xWrVq8/PLL3HfffXzwwQeGfM95YTabMZlM2f7MQyW/x5PiS9eSGEHXkRhF15Kx/H4/Se4krGYrXp8Xi9mCCRNh1uK/wXthuJbyMn6hKIImTZpE+fLlue666wo6lXRloxz8cyQp17gyUXayWcJ0Xpo1a0anTp1o0qQJXbt2pUuXLtx8883Y7XZ27NjB4MGDGTJkSHq8x+MhNjY228dLSEjg4MGDtGvXLsPxdu3asXbtWgDuuusuOnfuTP369enWrRs9evSgS5cu6bHz58/n1VdfZcuWLSQkJODxeEhLSyMlJSXTbYgiIiIiJVGyO5nVh1czYf0E1h4LvMYq5ShFn3p9GNBoAJG2SCzm0K9nluAU+Ip+n8/HpEmTGDBgAFZroajJiAmz0b9N9aBi72hTnWiHcVWvxWJh3rx5zJkzh4YNG/Luu+9Sv359NmzYAMCECRNYs2ZN+seGDRtYuXLlBY3ZsmVLdu3axUsvvURqaiq33HJL+mzT7t276dGjB02bNmX69On8+eef6bfBuVyuC/tmRURERIqBZHcyb//1Ng8tfCi9AAI45TzF+PXjuenHmzjlPEUhWYovFIIiaP78+ezdu5dBgwYVdCrpzGYTV9UrR7XSOa/zqVY6nPb1yhneIttkMtGuXTtGjhzJ33//jd1uZ/ny5VSuXJmdO3dSp06dDB9nbl870wDi3L2XYmJiqFy5MsuXL88wxvLly2nYsGGGuL59+zJhwgS+/vprpk+fzsmTJ/nzzz/x+XyMHj2aNm3aUK9ePQ4ePGjo9ysiIiJSVPn9fv449Adfbfkq25jDyYd5dNGjJLlzv8tI8keBT7106dKlUFbFYTYL0+67PNs22Wf2CQq3GTut+fvvv7NgwQK6dOlC+fLl+f333zl27BgNGjRg5MiRDB06lNjYWLp164bT6WT16tWcOnWKYcOGUb58ecLDw/n555+pWrUqYWFhxMbG8sQTT/DCCy9Qu3ZtmjdvzqRJk1izZg2TJ08GYMyYMVSqVIkWLVpgNpv59ttvqVixInFxcdSpUwe32827777L9ddfz/Lly/nwww8N/Z5FREREiqpEdyIfr/8417i1x9ZyMu0k0fbofMhKclPgRVBhZTGbKBfl4OdH2rPkn2N8uXIPJ5JclImyc0eb6rSvV45wm8XwWaCYmBh+/fVXxo4dS0JCAtWrV2f06NFce+21QKBT2xtvvMETTzxBZGQkTZo04dFHHwUCbcHfeecdRo0axYgRI7jyyitZvHgxQ4cOJT4+nscff5yjR4/SsGFDZs6cSd26dQGIjo7m9ddfZ9u2bVgsFi655BJmz56N2WymWbNmjBkzhtdee41nnnmG9u3b8+qrr3LnnXca+n2LiIiIFEVWk5V1x9cFFfvzrp+5t9m9Ic5IgqEiKAdms4lIh5VujSrSrk4Z/H4wmSDaYTO8+DmjQYMG/Pzzz9me79evX3qXt6zcfffd6fsGnWE2m3nhhRd44YUXsvw7Q4YMydBs4d8ee+yxDB3iAO64445s40VERERKCo8/+H0LUzwp+P1+TEZ21ZLzoiIoCGazidjwvG24KiIiIiLFn81kI9YRS7wzPtfYxmUaqwAqJAq8MYKIiIiISFFlMpm4qc5NucbF2GNoV6VdrnGSP1QEiYiIiIicpzBrGIOaDKJCRIUc44a3Hq5ZoEJERZCIiIiIyAWItEUy5bopNC7bONO5GHsMoy4fRZcaXQi35rz9iuQfrQkSEREREbkAVrOVcuHlGN95PMdTjzNn5xxSvak
2024-09-23 23:22:33 +00:00
"text/plain": [
"<Figure size 1000x700 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Покажем на графике, что отражаем полученное число.\n",
"# Красным цветом обозначим точки, для которых классификация сработала неправильно.\n",
"plt.figure(figsize=(10, 7))\n",
"sns.scatterplot(x='petal_width_cm', y='petal_length_cm', data=iris, hue='species', s=70)\n",
"plt.xlabel('Длина лепестка, см')\n",
"plt.ylabel('Ширина лепестка, см')\n",
"plt.legend(loc=2)\n",
"plt.grid()\n",
"\n",
"# Перебираем все объекты из теста\n",
"for i in range(len(y_test)):\n",
" # Если предсказание неправильное\n",
" if np.array(y_test)[i] != y_pred[i]:\n",
" # то подсвечиваем точку красным\n",
" plt.scatter(X_test.iloc[i, 3], X_test.iloc[i, 2], color='red', s=150)"
]
},
{
"cell_type": "code",
2024-09-27 05:31:03 +00:00
"execution_count": 22,
2024-09-23 23:22:33 +00:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-09-27 05:31:03 +00:00
"accuracy: 1.000\n"
2024-09-23 23:22:33 +00:00
]
}
],
"source": [
"# качество модели (доля правильно классифицированных точек)\n",
"print(f'accuracy: {accuracy_score(y_test, y_pred):.3f}')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3.3.2 Задание\n",
"\n",
"Определите набор признаков человека, по аналогии из РТ 1, например, \n",
"цвет глаз и конвертируйте его в матрицу признаков. "
]
},
{
"cell_type": "code",
2024-09-27 05:31:03 +00:00
"execution_count": 23,
2024-09-23 23:22:33 +00:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[2., 0., 3., 1.],\n",
" [0., 1., 1., 1.],\n",
" [5., 3., 0., 5.],\n",
" [3., 4., 2., 0.]])"
]
},
2024-09-27 05:31:03 +00:00
"execution_count": 23,
2024-09-23 23:22:33 +00:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_dict = [{\"карий\": 3, \"голубой\":2, \"серый\":1}, \n",
" {\"карий\": 1, \"зеленый\":1, \"серый\":1},\n",
" {\"зеленый\": 3, \"голубой\":5, \"серый\":5},\n",
" {\"карий\": 2, \"голубой\":3, \"зеленый\":4}]\n",
"\n",
"dictvectoriser = DictVectorizer(sparse=False)\n",
"\n",
"features = dictvectoriser.fit_transform(data_dict)\n",
"features"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
2024-09-27 05:31:03 +00:00
"version": "3.12.5"
2024-09-23 23:22:33 +00:00
}
},
"nbformat": 4,
"nbformat_minor": 2
}