965 lines
388 KiB
Plaintext
965 lines
388 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# Рабочая тетрадь No 3"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 100,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"import numpy as np\n",
|
|||
|
"import pandas as pd\n",
|
|||
|
"import seaborn as sns\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"from sklearn.metrics import accuracy_score\n",
|
|||
|
"from sklearn.neighbors import KNeighborsClassifier\n",
|
|||
|
"from sklearn.model_selection import train_test_split\n",
|
|||
|
"from sklearn.feature_extraction import DictVectorizer"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### 1.3.1 Задание\n",
|
|||
|
"\n",
|
|||
|
"Задайте 4 точки в трехмерном пространстве, рассчитайте между ними \n",
|
|||
|
"расстояния по описанным в примере выше метрикам. Отобразите точки \n",
|
|||
|
"в трехмерном пространстве."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 81,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"first_dot = np.array([np.random.randint(10) for _ in range(3)])\n",
|
|||
|
"second_dot = np.array([np.random.randint(10) for _ in range(3)])\n",
|
|||
|
"third_dot = np.array([np.random.randint(10) for _ in range(3)])\n",
|
|||
|
"fourth_dot = np.array([np.random.randint(10) for _ in range(3)])\n",
|
|||
|
"\n",
|
|||
|
"dots = [first_dot, second_dot, third_dot, fourth_dot]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 82,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"[6 9 3] [2 3 5]\n",
|
|||
|
"Расстояние Евклида 7.483314773547883\n",
|
|||
|
"Квадрат Евклидова расстояния 56.0\n",
|
|||
|
"Расстояние Чебышева 6.0\n",
|
|||
|
"Расстояние Хемминга 12.0\n",
|
|||
|
"\n",
|
|||
|
"[6 9 3] [5 7 5]\n",
|
|||
|
"Расстояние Евклида 3.0\n",
|
|||
|
"Квадрат Евклидова расстояния 9.0\n",
|
|||
|
"Расстояние Чебышева 2.0\n",
|
|||
|
"Расстояние Хемминга 5.0\n",
|
|||
|
"\n",
|
|||
|
"[6 9 3] [3 7 5]\n",
|
|||
|
"Расстояние Евклида 4.123105625617661\n",
|
|||
|
"Квадрат Евклидова расстояния 17.0\n",
|
|||
|
"Расстояние Чебышева 3.0\n",
|
|||
|
"Расстояние Хемминга 7.0\n",
|
|||
|
"\n",
|
|||
|
"[2 3 5] [6 9 3]\n",
|
|||
|
"Расстояние Евклида 7.483314773547883\n",
|
|||
|
"Квадрат Евклидова расстояния 56.0\n",
|
|||
|
"Расстояние Чебышева 6.0\n",
|
|||
|
"Расстояние Хемминга 12.0\n",
|
|||
|
"\n",
|
|||
|
"[2 3 5] [5 7 5]\n",
|
|||
|
"Расстояние Евклида 5.0\n",
|
|||
|
"Квадрат Евклидова расстояния 25.0\n",
|
|||
|
"Расстояние Чебышева 4.0\n",
|
|||
|
"Расстояние Хемминга 7.0\n",
|
|||
|
"\n",
|
|||
|
"[2 3 5] [3 7 5]\n",
|
|||
|
"Расстояние Евклида 4.123105625617661\n",
|
|||
|
"Квадрат Евклидова расстояния 17.0\n",
|
|||
|
"Расстояние Чебышева 4.0\n",
|
|||
|
"Расстояние Хемминга 5.0\n",
|
|||
|
"\n",
|
|||
|
"[5 7 5] [6 9 3]\n",
|
|||
|
"Расстояние Евклида 3.0\n",
|
|||
|
"Квадрат Евклидова расстояния 9.0\n",
|
|||
|
"Расстояние Чебышева 2.0\n",
|
|||
|
"Расстояние Хемминга 5.0\n",
|
|||
|
"\n",
|
|||
|
"[5 7 5] [2 3 5]\n",
|
|||
|
"Расстояние Евклида 5.0\n",
|
|||
|
"Квадрат Евклидова расстояния 25.0\n",
|
|||
|
"Расстояние Чебышева 4.0\n",
|
|||
|
"Расстояние Хемминга 7.0\n",
|
|||
|
"\n",
|
|||
|
"[5 7 5] [3 7 5]\n",
|
|||
|
"Расстояние Евклида 2.0\n",
|
|||
|
"Квадрат Евклидова расстояния 4.0\n",
|
|||
|
"Расстояние Чебышева 2.0\n",
|
|||
|
"Расстояние Хемминга 2.0\n",
|
|||
|
"\n",
|
|||
|
"[3 7 5] [6 9 3]\n",
|
|||
|
"Расстояние Евклида 4.123105625617661\n",
|
|||
|
"Квадрат Евклидова расстояния 17.0\n",
|
|||
|
"Расстояние Чебышева 3.0\n",
|
|||
|
"Расстояние Хемминга 7.0\n",
|
|||
|
"\n",
|
|||
|
"[3 7 5] [2 3 5]\n",
|
|||
|
"Расстояние Евклида 4.123105625617661\n",
|
|||
|
"Квадрат Евклидова расстояния 17.0\n",
|
|||
|
"Расстояние Чебышева 4.0\n",
|
|||
|
"Расстояние Хемминга 5.0\n",
|
|||
|
"\n",
|
|||
|
"[3 7 5] [5 7 5]\n",
|
|||
|
"Расстояние Евклида 2.0\n",
|
|||
|
"Квадрат Евклидова расстояния 4.0\n",
|
|||
|
"Расстояние Чебышева 2.0\n",
|
|||
|
"Расстояние Хемминга 2.0\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"for first in dots:\n",
|
|||
|
" for second in dots:\n",
|
|||
|
" if (first == second).all():\n",
|
|||
|
" continue\n",
|
|||
|
"\n",
|
|||
|
" print(first, second)\n",
|
|||
|
" print('Расстояние Евклида', np.linalg.norm(first-second))\n",
|
|||
|
" print('Квадрат Евклидова расстояния', np.linalg.norm(first-second) ** 2)\n",
|
|||
|
" print('Расстояние Чебышева', np.linalg.norm(first-second, ord=np.inf))\n",
|
|||
|
" print('Расстояние Хемминга', np.linalg.norm(first-second, ord=1))\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 83,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZkAAAGOCAYAAABFdn7tAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAACrUUlEQVR4nOy9d3hc5Z32f09Rb6NuS7K6LFlyUXGTgRACwdjG2JCQhGUDYUknCWw2vATeQCBATEkgybIvgWw24f0RLwlgDIElBowNBDDY1oyq1XsbTZM0vZ3z+8Pvc3JmNPXMmZkz8vlcF1dilTPPjGae+zzfcn8lNE3TEBERERERiQLSeC9ARERERGT1IoqMiIiIiEjUEEVGRERERCRqiCIjIiIiIhI1RJEREREREYkaosiIiIiIiEQNUWRERERERKKGKDIiIiIiIlFDFBkRERERkaghioyIiIiISNQQRUZEREREJGqIIiMiIiIiEjVEkRERERERiRqiyIiIiIiIRA1RZEREREREooYoMiIiIiIiUUMUGRERERGRqCGKjIiIiIhI1BBFRkREREQkaogiIyIiIiISNUSRERERERGJGqLIiIiIiIhEDVFkRERERESihigyIiIiIiJRQxQZEREREZGoIYqMiIiIiEjUEEVGRERERCRqiCIjIiIiIhI1RJEREREREYkaosiIiIiIiEQNUWRERERERKKGKDIiIiIiIlFDFBkRERERkaghioyIiIiISNQQRUZEREREJGqIIiMiIiIiEjXk8V6AyIUFTdNwu92w2+2QyWTMf1KpeL8jIrIaEUVGJGbQNA2n0wmXywW73c58XSqVQi6XQy6Xi6IjIrLKkNA0Tcd7ESKrH7fbDafTCYqiIJFI4HA4IJVKQdM0aJoGRVGgaRoSiQQSiUQUHRGRVYIoMiJRhaZpuFwuuFwuAIBEImFONBKJxOfPE8EhkJ9LTk5GUlIS5HK5z98VERERHmK4TCRqUBTFnF4AMKcUIiDk5MJGIpFAJpMx/yaic+rUKdTX10OhUEAqlUImk3mcdkTRERERJqLIiPAOEQan0+kRAvP+mVCEgYgO+V+ZTMZc2+FwQCKRMKKTlJTE/IwoOiIiwkAUGRFeIaGwnp4eFBUVoaCggJcNn1zD30nHW3S8czqi6IiIxAdRZER4g2z2brcbRqMRubm5vG3u7DCb99eJ6JDvUxQFh8MBu90uio6ISJwRRUYkYkjvi8vlAkVRkEqlfkUhmhDhEEVHREQ4iCIjEhEkPOZ2uwGAERhSnswXXETLl+iQ/+x2OxwOB7NmUXRERKKDKDIinCEnBPbphY0vUaBpGiMjI5iYmEBWVhZyc3ORm5uLrKysqPfCsAsQZDLZCtFhn3RIqTTp0RFFR0SEG6LIiIQNCY+R6jFfm7Cvk4fNZkNnZyccDgcaGxths9lgMBgwNTUFiqKgUCiQm5sLhUKBrKwsj2tGI/wWSHRsNhvzM6LoiIhwRxQZkbCgKAoul2tFeMwbb1HQaDTo6upCYWEhWltbmd6ZsrIy0DQNs9kMg8EAg8GA8fFxAGBEJzc3Nyb5nVBFx7tHRxQdERH/iCIjEhKh9L6wISJDURQGBwcxNTWFxsZGlJaWgqZpJh9CfjYzMxOZmZlYt24daJqG0WiEwWCATqfD6OgoKIrCxMQE7HY7FAoF0tPTo76x+xMdiqIY0ZFKpStyOqLoiIj8A9FWRiQovqxhgm2iSqUS6enp0Ov1oCgKzc3NyMjIYK5HRCaUzZiiKHz66afIysqCw+HA0tIS5HI5c8rJzc1FWlpahM8yfNgnHfKfVCqFy+VCcnIyUlNTRdERueARTzIiAWH3vpD8RCjYbDZoNBqUlZWhvr7eo4EyXMhJobCwEEVFRXC73VheXobBYMDc3BwGBgaQkpLiITopKSmcHy9UvMWWCE1fXx8KCgqwdu1aj5wOCbOFItIiIqsFUWREfOKv9yUYbrcb586dw/LyMtasWYPGxkZe1wScD10RMQEAl8uFpaUlpoigr68P6enpHjmd5ORk3tbhDyIebAsc9uvI/p6375ooOiKrFVFkRFbgr/clGEajEZ2dncypg4TH+CDQ48vlcuTn5yM/Px/AedFZXFyEwWDAxMQEent7kZGRwQiOQqFAUlISb2vzhoghEQ9y+iMnHZfLxbhQE9Fh+66JYw1EVhOiyIh44Ha7YbPZmM0xFHGhaRrT09Po7+9HRUUFamtr0dvby3tFWKjXk8vlKCgoQEFBAQDA4XAwojMyMgKLxcL06CgUCigUCsjl0f8oiKIjciEiiowIgH+Ex3Q6Hc6ePYvPfe5zIQmMy+VCT08P9Ho9WlpamI2d776WSMJJycnJKCoqQlFREQDAbrfDYDBgcXERQ0NDsNlsHo2hOTk5EeWQQiWY6ADi1FCRxEcUGRGP8BgRh1A29aWlJahUKqSnp+Oiiy7ySLZHo3mSr+ulpKRgzZo1WLNmDQAwTaEGgwHnzp2Dw+FATk4Oc9LJyckJe2PnIor+RIftMC1ODRVJNESRucBhj0Umc1lIo6Q/aJrGxMQEhoaGUFNTg6qqqpA6/iMhmonx1NRUrF27FmvXrgVN07BarcxJZ2ZmBi6XixGdUCxw+HrevkSHVPuRk4636IhTQ0WEhigyFyjevS+hGls6HA50d3fDaDRi69atTIWXN8FEhosIxarrPz09Henp6UzjqMViYU46k5OToGnao3ItMzMzJht7KLN02KIjTg0VEQKiyFyAeI9FZt+Vk83fV8hMr9ejs7MTCoUCF110UcAKLYlEEvREFA7x2iglEgkyMjKQkZHBWOCYTCZGdMbGxiCRSGJugUPWFsoAN3FqqEg8EUXmAoK9CfnrfSH/ZosMcU4eGxtDfX091q1bF3Sj4nsji8d8Gn/ryMrKQlZWFsrLy0FRFCM6Op0OIyMjHvYzxI1AKCcdcayBSKwRReYCIdTeF3b8HzifFO/q6oLNZsOOHTuQnZ0d0uPxfZIRKlKpFNnZ2cjOzkZFRQUoisLp06eRkpICtVqNoaEhJCUleZx0YmWBI04NFRECoshcALCtYYL1vrBPL97OyeH0kqzWk0ww2BY4a9asgdvtxtLSEhYXF+NugQP4nhra19eH5ORklJeXi6IjwjuiyKxiuFjDkO8PDAxgdnaWcU4OlwvlJBMMmUyGvLw85OXlAfBvgcN2I4iVBQ5ZH0VRzKmHoihxaqgIr4gis0rhag1DLOwNBgPa29uRmZnJ6fEv1JMMwd/z97bAcTqdjBvB+Pg4TCZTTC1wgH/k37w91wJNDSXl0qLDtEgwRJFZhXj3voS6CczNzaG3txcA0NLSEpH32IV8kglHDJOSklBYWIjCwkIAgS1wiBsB3xY4ZEQBG3FqqAhfiCKzimD3vvgbi+wL4pw8Pz+PTZs2QalURrxRXOgnGa74s8AxGAwYGBiA3W7n3QInFIcHcWqoCFdEkVklkKohEl/n4px80UUXMeW2kW7ooVwjVPsa9s8nCnxtrN4WOFarlTnpeFvg5ObmIjs7O2ybmXD/DkBoU0NF0REBRJFJeMgH22q14vjx4/jc5z4XUuLYl3My2ZyCdf2HQiLZyiQSaWlpSEtLW2GBYzAYMD09DbfbzYhOXl4eMjMzg4oOF5HxJpDo2O122Gw2cVT1BYooMgmMd3I/VPw5JxP4yKcI2SAz2sSy49/bAsdsNjMnnVAtcMjpl++1sa9JRMftdsPtdvstJBAHuK0+RJFJULx7X0gyOJg4LC0tobOzE2lpaSuckwniSSYxkUgkyMzMRGZmZkALHHaPTnp6Oi8nmVDW5sthWpwauvoRRSbBCNT7EugEEopzMiFWOZlwSZSTDCAMUfRlgWM0GmEwGKDRaDA8PAy5XA6aprG4uIicnJyYWuD4Eh1fA9y8w2siiYUoMglEsN4XqVTqU2RCdU4mCDFcJoRNO9GRSqXIyclBTk4OKisrQVEUlpeX0dvbi6WlJXzyySdITk726NGJpQVOqKIjTg1NLESRSRDY1WP+kqW+NvZwnJMJQgyXAYlzkkmUdUqlUsZhoKKiAvn5+YwbwczMDPr7++NigQMEFx2DwQC
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"fig = plt.figure()\n",
|
|||
|
"ax = fig.add_subplot(111, projection='3d')\n",
|
|||
|
"\n",
|
|||
|
"for dot in dots:\n",
|
|||
|
" ax.scatter(*dot)\n",
|
|||
|
"\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### 1.3.2 Задание\n",
|
|||
|
"\n",
|
|||
|
"Создать 5x5 матрицу со значениями в строках от 0 до 4. Для создания \n",
|
|||
|
"необходимо использовать функцию arange."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 84,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"array([[0, 1, 2, 3, 4],\n",
|
|||
|
" [0, 1, 2, 3, 4],\n",
|
|||
|
" [0, 1, 2, 3, 4],\n",
|
|||
|
" [0, 1, 2, 3, 4],\n",
|
|||
|
" [0, 1, 2, 3, 4]])"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 84,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"np.array([np.arange(0, 5) for _ in range(5)])"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### 2.3.1 Задание\n",
|
|||
|
"\n",
|
|||
|
"Для предыдущего примера поэкспериментируйте с параметрами классификатора: \n",
|
|||
|
"1. Установите другое количество ближайших соседей (k = 1, 5, 10). \n",
|
|||
|
"2. Установите размер тестовой выборки 15% от всего датасета. \n",
|
|||
|
"3. Постройте графики и оцените качество моделей, проанализируйте результаты. "
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"#### 2.2.1\n",
|
|||
|
"\n",
|
|||
|
"В примере показано создание 2d-массива со значениями x и y. Список \n",
|
|||
|
"target содержит возможные выходные классы (часто называемые \n",
|
|||
|
"метками). Далее происходит обучене классификатора k-ближайших \n",
|
|||
|
"соседей по исходным данным. Далее производится прогноз \n",
|
|||
|
"принадлежности к классам для двух точек данных."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 85,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"KNeighborsClassifier(n_neighbors=3)\n",
|
|||
|
"(-2,-2) is class [0]\n",
|
|||
|
"(1,3) is class [1]\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# данные\n",
|
|||
|
"X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n",
|
|||
|
"target = [0, 0, 0, 1, 1, 1]\n",
|
|||
|
"\n",
|
|||
|
"# обучаем модель k-ближайших соседей к данным\n",
|
|||
|
"K = 3\n",
|
|||
|
"model = KNeighborsClassifier(n_neighbors=K)\n",
|
|||
|
"model.fit(X, target)\n",
|
|||
|
"print(model)\n",
|
|||
|
"\n",
|
|||
|
"# делаем прогноз\n",
|
|||
|
"print('(-2,-2) is class', model.predict([[-2, -2]]))\n",
|
|||
|
"print('(1,3) is class', model.predict([[1, 3]]))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"#### 2.2.2\n",
|
|||
|
"\n",
|
|||
|
"Далее приведем более наглядный пример. Будет построена граница \n",
|
|||
|
"решения для каждого класса. В качестве данных будем использовать уже \n",
|
|||
|
"знакомый нам и встроенный в библиотеку sklearn набор данных ирисов \n",
|
|||
|
"Фишера. Этот набор данных стал уже классическим, и часто используется \n",
|
|||
|
"в литературе для иллюстрации работы различных статистических \n",
|
|||
|
"алгоритмов. Датасет содержит наблюдения за 150 разными цветками \n",
|
|||
|
"ирисов, данные по каждому цветку расположены в строках. В стобцах \n",
|
|||
|
"записаны длина и ширина чашелистика, длина и ширина лепестка, вид \n",
|
|||
|
"ириса. "
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 86,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>sepal_length_cm</th>\n",
|
|||
|
" <th>sepal_width_cm</th>\n",
|
|||
|
" <th>petal_length_cm</th>\n",
|
|||
|
" <th>petal_width_cm</th>\n",
|
|||
|
" <th>species</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>5.1</td>\n",
|
|||
|
" <td>3.5</td>\n",
|
|||
|
" <td>1.4</td>\n",
|
|||
|
" <td>0.2</td>\n",
|
|||
|
" <td>setosa</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>4.9</td>\n",
|
|||
|
" <td>3.0</td>\n",
|
|||
|
" <td>1.4</td>\n",
|
|||
|
" <td>0.2</td>\n",
|
|||
|
" <td>setosa</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>4.7</td>\n",
|
|||
|
" <td>3.2</td>\n",
|
|||
|
" <td>1.3</td>\n",
|
|||
|
" <td>0.2</td>\n",
|
|||
|
" <td>setosa</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>4.6</td>\n",
|
|||
|
" <td>3.1</td>\n",
|
|||
|
" <td>1.5</td>\n",
|
|||
|
" <td>0.2</td>\n",
|
|||
|
" <td>setosa</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>5.0</td>\n",
|
|||
|
" <td>3.6</td>\n",
|
|||
|
" <td>1.4</td>\n",
|
|||
|
" <td>0.2</td>\n",
|
|||
|
" <td>setosa</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>145</th>\n",
|
|||
|
" <td>6.7</td>\n",
|
|||
|
" <td>3.0</td>\n",
|
|||
|
" <td>5.2</td>\n",
|
|||
|
" <td>2.3</td>\n",
|
|||
|
" <td>virginica</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>146</th>\n",
|
|||
|
" <td>6.3</td>\n",
|
|||
|
" <td>2.5</td>\n",
|
|||
|
" <td>5.0</td>\n",
|
|||
|
" <td>1.9</td>\n",
|
|||
|
" <td>virginica</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>147</th>\n",
|
|||
|
" <td>6.5</td>\n",
|
|||
|
" <td>3.0</td>\n",
|
|||
|
" <td>5.2</td>\n",
|
|||
|
" <td>2.0</td>\n",
|
|||
|
" <td>virginica</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>148</th>\n",
|
|||
|
" <td>6.2</td>\n",
|
|||
|
" <td>3.4</td>\n",
|
|||
|
" <td>5.4</td>\n",
|
|||
|
" <td>2.3</td>\n",
|
|||
|
" <td>virginica</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>149</th>\n",
|
|||
|
" <td>5.9</td>\n",
|
|||
|
" <td>3.0</td>\n",
|
|||
|
" <td>5.1</td>\n",
|
|||
|
" <td>1.8</td>\n",
|
|||
|
" <td>virginica</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>150 rows × 5 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" sepal_length_cm sepal_width_cm petal_length_cm petal_width_cm \n",
|
|||
|
"0 5.1 3.5 1.4 0.2 \\\n",
|
|||
|
"1 4.9 3.0 1.4 0.2 \n",
|
|||
|
"2 4.7 3.2 1.3 0.2 \n",
|
|||
|
"3 4.6 3.1 1.5 0.2 \n",
|
|||
|
"4 5.0 3.6 1.4 0.2 \n",
|
|||
|
".. ... ... ... ... \n",
|
|||
|
"145 6.7 3.0 5.2 2.3 \n",
|
|||
|
"146 6.3 2.5 5.0 1.9 \n",
|
|||
|
"147 6.5 3.0 5.2 2.0 \n",
|
|||
|
"148 6.2 3.4 5.4 2.3 \n",
|
|||
|
"149 5.9 3.0 5.1 1.8 \n",
|
|||
|
"\n",
|
|||
|
" species \n",
|
|||
|
"0 setosa \n",
|
|||
|
"1 setosa \n",
|
|||
|
"2 setosa \n",
|
|||
|
"3 setosa \n",
|
|||
|
"4 setosa \n",
|
|||
|
".. ... \n",
|
|||
|
"145 virginica \n",
|
|||
|
"146 virginica \n",
|
|||
|
"147 virginica \n",
|
|||
|
"148 virginica \n",
|
|||
|
"149 virginica \n",
|
|||
|
"\n",
|
|||
|
"[150 rows x 5 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 86,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"url = 'https://raw.githubusercontent.com/akmand/datasets/master/iris.csv'\n",
|
|||
|
"\n",
|
|||
|
"iris = pd.read_csv(url)\n",
|
|||
|
"\n",
|
|||
|
"iris"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 87,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABRgAAAJaCAYAAABELyv0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hU1dbH8e/0mUwaPfQOAoKAgAIKKFIFURRFUSmKvWLlXkVBfbl6QUDFAgoCghUFlSIgigqCIEWK0iH0DumZ+v6RSySmTUKSmQm/z314rjln5cxKdiaZWWfvtQ1+v9+PiIiIiIiIiIiISCEYg52AiIiIiIiIiIiIhC8VGEVERERERERERKTQVGAUERERERERERGRQlOBUURERERERERERApNBUYREREREREREREpNBUYRUREREREREREpNBUYBQREREREREREZFCU4FRRERERERERERECs0c7ATOh8/n4+DBg0RFRWEwGIKdjoiIiEiB+f1+EhMTqVKlCkaj7v2GI70mFRERkXBWFK9Hw7rAePDgQapXrx7sNERERETO2759+6hWrVqw05BC0GtSERERKQ3O5/VoWBcYo6KigIxvQHR0NG63m0WLFtG1a1csFkuQs5O8aKzCh8YqfGiswoPGKXyU1FglJCRQvXr1zNc1En7++Zq0OOh3R/jQWIUPjVV40DiFD41V+PjnWBXF69GwLjCeXYISHR2dWWCMiIggOjpaP8whTmMVPjRW4UNjFR40TuGjpMdKS2vD1z9fkxYH/e4IHxqr8KGxCg8ap/ChsQofuY3V+bweVaMfERERERERERERKTQVGEVERERERERERKTQVGAUERERERERERGRQgvrHoyB8Pv9eDwevF5vsFORc7jdbsxmM2lpaQGPjclkwmw2q0eViIiI5Mjr9fLiiy/y0UcfcfjwYapUqcKgQYN47rnn8nz98OOPPzJs2DA2b95M9erVee655xg0aFDJJS4iIhKCClpPKcz7fCkZJVFPKdUFRpfLxaFDh0hJSQl2KvIPfr+fuLg49u3bV6Af8IiICCpXrozVai3G7ERERCQcvfrqq7zzzjtMmzaNJk2asGbNGgYPHkxMTAyPPPJIjp+ze/durr32Wu677z5mzpzJ999/z913303lypXp1q1bCX8FIiIioaEw9ZTCvs+XklHc9ZRSW2D0+Xzs3r0bk8lElSpVsFqt+gEPIT6fj6SkJCIjIzEa81+p7/f7cblcHDt2jN27d1O/fv2APk9EREQuHCtWrKBPnz5ce+21ANSqVYuPP/6Y3377LdfPeffdd6lduzZjx44FoFGjRvzyyy+MGzdOBUYREbkgFbaeUtD3+VIycqqnFIdSW2B0uVz4fD6qV69OREREsNORf/D5fLhcLux2e8C/eBwOBxaLhb1792Z+roiIiMhZ7dq1Y9KkSWzbto0GDRqwYcMGfvnlF15//fVcP+fXX3/lmmuuyXKsW7duPPbYY8WcrYiISGgqbD2lMO/zpWT8s55iMpmK/DFKbYHxLP1Qly4aTxEREcnNs88+S0JCAhdddBEmkwmv18srr7zCgAEDcv2cw4cPU6lSpSzHKlWqREJCAqmpqTgcjmyfk56eTnp6eubHCQkJQEbvKbfbXURfTVZnr1tc15eio7EKHxqr8KBxKnlutxu/3w9kFA0DdfZz/H5/gT5PSo7f78ftdmeOT1E+v0p9gVFERERELgyfffYZM2fOZNasWTRp0oT169fz2GOPUaVKFQYOHFhkjzN69GhGjhyZ7fiiRYuKfeXM4sWLi/X6UnQ0VuFDYxUeNE4lx2w2ExcXR1JSEi6Xq8Cfn5iYWAxZyflyuVykpqby008/4fF4gL+fV0Wxd0lQC4y1atVi79692Y4/8MADTJw4MQgZ5czn85OQ9nc1N9puwWhUP0cRERGRUPLUU0/x7LPP0r9/fwCaNm3K3r17GT16dK4Fxri4OI4cOZLl2JEjR4iOjs5x9iLA8OHDGTZsWObHCQkJVK9ena5duxIdHV1EX01WbrebxYsX06VLFywWS7E8hhQNjVX40FiFB41TyUtLS2Pfvn1ERkYWqDWZ3+8nMTGRqKiozJ6Nfj8kpnvwAwYgymZG22MER1paGg6Hgw4dOmAymbI8r86uxjgfQS0wrl69OsvW5Zs2baJLly7069cviFn9zevzk+b28uO2Y8xcuZfjSemUj7Qx4PKadGpQAYfFpEKjiIiISIhISUnJ1k7FZDLluUyrbdu2zJ8/P8uxxYsX07Zt21w/x2azYbPZsh23WCzF/ua3JB5DiobGKnxorMKDxqnkeL1eDAYDRqOxQG3Kzv69NRgM+DGonhJijEYjBoMBi8WS2YPx7POqKJ5bQS0wVqhQIcvH//nPf6hbty4dO3YMUkZ/8/r8HE9K56Z3V7DvZGrm8W1Hklix8wTVyzr44r52VIi0hcyTYs+ePdSuXZt169bRvHnzYKcjIiIiUqJ69+7NK6+8Qo0aNWjSpAnr1q3j9ddfZ8iQIZkxw4cP58CBA0yfPh2A++67j7feeounn36aIUOGsHTpUj777DPmzZsXrC9DREQkrPn8fk4ku8KqngKqqZyvkOnB6HK5+Oijjxg2bFiu25/n11D73OaUXq83s7FoYZqLprl92Z4M59p3MpWb3l3Bwkc74LCExsYjZ7/Own7NJamwzV99Pl9mU9Li2PVIslNT5fChsQoPGqfwUVJjpZ+FovPmm2/y/PPP88ADD3D06FGqVKnCvffey4gRIzJjDh06RHx8fObHtWvXZt68eTz++ONMmDCBatWq8f7779OtW7dgfAkiIiJhL93jD7ie4rSFTFlKzlPIjOScOXM4ffo0gwYNyjUm0IbaixcvPq+mpGaLhWU7z+T6ZDhr38lUlm09Soe6MXiK8M3B3LlzefXVV9m9ezcOh4NmzZoxc+ZMnE4n06dPZ+LEiezdu5caNWpwzz33cPfddwNQt25dAC699FIA2rdvz7fffovP52PMmDFMmzaN48eP06BBA1544QWuueYaIKO4++9//5tvvvmG06dPU6FCBQYPHpzZW2jixInMnDmTvXv3EhsbS/fu3Rk5ciSRkZHn/bUWtPlrTk1JpWSoqXL40FiFB41T+CjusSqKptqSISoqivHjxzN+/PhcYz788MNsxzp16sS6deuKLzEREZELhNli4cdtxwKqp/y07RjdmsQV+SzGL774gpEjR7Jjxw4iIiJo0aIFc+fOxel08v777zN27Fh2795NrVq1eOSRR3jggQeAjJuOAC1atACgY8eO/Pjjj/h8Pl5++WUmTZrEsWPHaNSoEf/5z3/o3r07kFGnGDZsGLNnz+bUqVNUqlSJ++67j+HDhwPw+uuvM3XqVHbt2kXZsmXp3bs3r732WpHUVEJJyBQYP/jgA3r06EGVKlVyjcmvofa5zV+9Xm+hmpICJKR5mLkqPv9A4KNV8bSr15Lo6JybgBfUoUOHuPvuu3n11Ve5/vrrSUxM5JdffiEqKoq5c+fyn//8hzfeeIMWLVqwbt067r33XsqVK8fAgQNZuXIll19+OYsWLaJJkyZYrVaio6MZP348EydO5J133qFFixZMnTqV2267jY0bN1K/fn3Gjh3Ld999x6effkqNGjXYt28f+/bty2xSHhERwZtvvknt2rXZtWsXDz30EK+88sp5bcSTU/PXQJzblLSg4yqFo6bK4UNjFR40TuGjpMaqKJpqi4iIiISCdJ+RmSuzb+abkxkr99KuXjliHNYie/xDhw5x66238tprr3HDDTeQmJjIzz//jN/vZ+bMmYwYMYK33nors6YydOhQnE4nAwcO5LfffqNNmzYsWbIks6YCMGHCBMaOHct7771HixYtmDJlCtdddx2bN2+mfv36vPHGG3z99dd89tlnWWoqZxmNRt54443MmsoDDzzA008/zdtvv11kX3coCIkC4969e1myZAlffvllnnGBNtS2WCyZzSsL2pT0rONJ6fkHASeSMmZHFuYxcnLkyBE8Hg833ngjNWvWBOCSSy4BYOTIkYwdO5abbroJyJix+NdffzF58mQGDx5MpUqVgIz
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1600x700 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Объявляем фигуру из двух графиков и ее размер \n",
|
|||
|
"plt.figure(figsize=(16, 7))\n",
|
|||
|
"\n",
|
|||
|
"# Левый график\n",
|
|||
|
"plt.subplot(121)\n",
|
|||
|
"sns.scatterplot(\n",
|
|||
|
" data=iris, # из этой таблицы нарисовать точки\n",
|
|||
|
" x='petal_width_cm', y='petal_length_cm', # с этими координатами,\n",
|
|||
|
" hue='species', # для которых цвет определить согласно этому столбцу \n",
|
|||
|
" s=70 # размер точек\n",
|
|||
|
")\n",
|
|||
|
"plt.xlabel('Длина лепестка, см') \n",
|
|||
|
"plt.ylabel('Ширина лепестка, см') \n",
|
|||
|
"plt.legend() # добавить легенду \n",
|
|||
|
"plt.grid() # добавить сетку\n",
|
|||
|
"\n",
|
|||
|
"# Правый график аналогично \n",
|
|||
|
"plt.subplot(122)\n",
|
|||
|
"sns.scatterplot(data=iris,\n",
|
|||
|
"x='sepal_width_cm', y='sepal_length_cm', hue='species', s=70)\n",
|
|||
|
"plt.xlabel('Длина чашелистика, см') \n",
|
|||
|
"plt.ylabel('Ширина чашелистика, см') \n",
|
|||
|
"plt.legend()\n",
|
|||
|
"plt.grid();"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 88,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"((127, 4), (23, 4), (127,), (23,))"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 88,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"X_train, X_test, y_train, y_test = train_test_split(\n",
|
|||
|
" # поскольку iris это pandas-таблица, для нее нужно указывать iloc\n",
|
|||
|
" iris.iloc[:, :-1], # берем все колонки кроме последней в признаки\n",
|
|||
|
" iris.iloc[:, -1], # последнюю в целевую переменную (класс)\n",
|
|||
|
" test_size=0.15 # размер тестовой выборки 15%\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"X_train.shape, X_test.shape, y_train.shape, y_test.shape\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 89,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>sepal_length_cm</th>\n",
|
|||
|
" <th>sepal_width_cm</th>\n",
|
|||
|
" <th>petal_length_cm</th>\n",
|
|||
|
" <th>petal_width_cm</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>14</th>\n",
|
|||
|
" <td>5.8</td>\n",
|
|||
|
" <td>4.0</td>\n",
|
|||
|
" <td>1.2</td>\n",
|
|||
|
" <td>0.2</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>5</th>\n",
|
|||
|
" <td>5.4</td>\n",
|
|||
|
" <td>3.9</td>\n",
|
|||
|
" <td>1.7</td>\n",
|
|||
|
" <td>0.4</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>16</th>\n",
|
|||
|
" <td>5.4</td>\n",
|
|||
|
" <td>3.9</td>\n",
|
|||
|
" <td>1.3</td>\n",
|
|||
|
" <td>0.4</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>94</th>\n",
|
|||
|
" <td>5.6</td>\n",
|
|||
|
" <td>2.7</td>\n",
|
|||
|
" <td>4.2</td>\n",
|
|||
|
" <td>1.3</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>122</th>\n",
|
|||
|
" <td>7.7</td>\n",
|
|||
|
" <td>2.8</td>\n",
|
|||
|
" <td>6.7</td>\n",
|
|||
|
" <td>2.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" sepal_length_cm sepal_width_cm petal_length_cm petal_width_cm\n",
|
|||
|
"14 5.8 4.0 1.2 0.2\n",
|
|||
|
"5 5.4 3.9 1.7 0.4\n",
|
|||
|
"16 5.4 3.9 1.3 0.4\n",
|
|||
|
"94 5.6 2.7 4.2 1.3\n",
|
|||
|
"122 7.7 2.8 6.7 2.0"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 89,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"X_train.head()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 90,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"14 setosa\n",
|
|||
|
"5 setosa\n",
|
|||
|
"16 setosa\n",
|
|||
|
"94 versicolor\n",
|
|||
|
"122 virginica\n",
|
|||
|
"Name: species, dtype: object"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 90,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"y_train.head()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Обучим метод 1 ближайшем соседе"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 91,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"array(['versicolor', 'versicolor', 'virginica', 'versicolor',\n",
|
|||
|
" 'versicolor', 'versicolor', 'setosa', 'versicolor', 'versicolor',\n",
|
|||
|
" 'virginica', 'virginica', 'virginica', 'setosa', 'setosa',\n",
|
|||
|
" 'versicolor', 'setosa', 'versicolor', 'versicolor', 'virginica',\n",
|
|||
|
" 'virginica', 'setosa', 'virginica', 'setosa'], dtype=object)"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 91,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"model = KNeighborsClassifier(n_neighbors=1)\n",
|
|||
|
"model.fit(X_train, y_train)\n",
|
|||
|
"\n",
|
|||
|
"# Получим предсказания модели\n",
|
|||
|
"y_pred = model.predict(X_test)\n",
|
|||
|
"y_pred"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 92,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0EAAAJaCAYAAADpm0w1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAACyW0lEQVR4nOzdd3RU1dfG8e/09ITeOwSp0lQQBQUpIiqiiIKKiNhFRWyvioL6s4IVC6hgARs2FBEBAQVERaUj0nsv6ZmZzMz7x5VATJskd1Kfz1pZJPfuzN2ZXGD2nHP2sQQCgQAiIiIiIiIVhLWkExARERERESlOKoJERERERKRCUREkIiIiIiIVioogERERERGpUFQEiYiIiIhIhaIiSEREREREKhQVQSIiIiIiUqGoCBIRERERkQrFXtIJFIXf72fv3r1ER0djsVhKOh0RERERESkhgUCApKQkateujdWa91hPmS6C9u7dS7169Uo6DRERERERKSV27dpF3bp184wp00VQdHQ0YPygMTExmce9Xi8//PADvXv3xuFwlFR6Ug7oXhKz6F4SM+g+ErPoXhKzlKZ7KTExkXr16mXWCHkp00XQiSlwMTEx2YqgiIgIYmJiSvyXIWWb7iUxi+4lMYPuIzGL7iUxS2m8l4JZJqPGCCIiIiIiUqGoCBIRERERkQpFRZCIiIiIiFQoZXpNUDACgQAZGRn4fL6STkWKyGazYbfb1Q5dRERERIqkXBdBXq+XvXv3kpqaWtKpiEkiIiKoVasWTqezpFMRERERkTKqXBdBO3fuxG63U7t2bZxOp0YQyrBAIIDH4+HQoUNs27aNZs2a5bsJloiIiIhITsptEWS32/H7/dSuXZuIiIiSTkdMEB4ejsPhYMeOHXg8HsLCwko6JREREREpg8r9W+kaLShf9PsUERERkaLSK0oREREREalQSrQIatiwIRaLJdvH7bffXpJpZeP3Bzie6sn88PsDJZ2SiIiIiIgUUomuCfr999+ztK5eu3YtvXr1YtCgQSWY1Uk+f4B0r49F/xxi+vIdHE52UzXKxdDODTgvvhrhDhtWq5otiIiIiIiUJSVaBFWrVi3L18888wxNmjShe/fuJZTRST5/gMPJbq54cxm7jqZlHv/nQDLLthyhXuVwZt5yNtWiXKWmENq+fTuNGjXir7/+ol27diWdjoiIiIhIqVRqusN5PB4+/PBDRo8enWsra7fbjdvtzvw6MTERMPYD8nq9mcdPfB4IBPD7/fj9/gLnk+71ZyuATrXraBpXvLmM7+/qRrijdCytOvFzFvZnLgv8fj+BQACv14vNZgv59U7cS6feXyKFoXtJzKD7SMyie0nMUprupYLkYAkEAqVigcunn37KkCFD2LlzJ7Vr184x5vHHH2fcuHHZjs+YMSNbG2y73U7NmjWpV69egTfWtDscLN6SwO0z/so39vUh7enWJJYME3/xX3/9Nc8++yzbtm0jPDyctm3bMn36dCIjI3n//feZNGkSO3bsoH79+tx0003ceOONAFSqVCnL43Tt2pVvv/0Wv9/PCy+8wHvvvcfhw4eJj4/nscce44ILLgCMAvThhx/mm2++4fjx41SrVo3hw4czevRoACZNmsT06dPZsWMHcXFx9O3bl3HjxhEVFWXazxwsj8fDrl272L9/PxkZGcV+fREREREpnVJTUxkyZAgJCQnExMTkGVtqRoLeeecdLrzwwlwLIICHHnoo84U5GCNB9erVo3fv3ll+UK/Xy8KFCwkLCyMqKqrA+8kkpmcw/dedQcV++OtOzm7agZiY8AJdIzf79u3jxhtv5Nlnn2XAgAEkJSWxZMkSoqOj+frrr3nmmWd45ZVXaN++PX/99Rc333wzVapUYdiwYSxfvpzOnTvzww8/0KpVK5xOJzExMbz00ktMmjSJN954g/bt2zN16lSGDBnCmjVraNasGRMmTGDu3Ll88skn1K9fn127drFr167M5zQiIoJXX32VRo0asXXrVu644w6eeuopJk2aZMrPXBDp6emEh4fTrVu3YtknyOv1Mm/ePHr16oXD4Qj59aT80r0kZtB9JGbRvSRmKU330olZYsEoFUXQjh07mD9/Pl988UWecS6XC5fLle24w+HI8Um3WCxYrdZC7S1zONmdfxBwJNkDmLd/zYEDB8jIyODyyy+nQYMGAJx++ukAjBs3jgkTJnDFFVcA0KRJE/7++2+mTJnC8OHDqVGjBmCstTq1mJwwYQIPPPAAQ4YMAeC5555j0aJFvPLKK0yaNIldu3bRrFkzunXrhsVioVGjRllyuueeezI/b9y4MU8++SS33HILb7zxhik/c0FYrVYsFkuuv/NQKe7rSfmle0nMoPtIzKJ7yVyBQIBkbzJ2qx2f34fNasOChTB7+d/gvTTcSwW5fqkogqZOnUr16tW56KKLSjqVTFWjXPxzIDnfuCpRTnJZwlQop59+Oj179qRNmzb06dOH3r17c8UVV+B0OtmyZQsjRoxg5MiRmfEZGRnExsbm+niJiYns3buXrl27ZjnetWtXVq1aBcD1119Pr169aN68OX379qV///707t07M3b+/Pk8/fTT/P333yQmJpKRkUF6ejqpqanZpiGKiIiIVEQp3hRW7F/BlDVTWHXIeI1VyVWJQfGDGNZqGJGOSGzW0K9nluCU+Ip+v9/P1KlTGTZsGHZ7qajJiAlzMLRzg6Bir+3cgGiXeVWvzWZj3rx5zJkzh5YtW/Lqq6/SvHlz1q5dC8CUKVNYuXJl5sfatWtZvnx5ka7ZoUMHtm3bxhNPPEFaWhpXXnll5mjT9u3b6d+/P23btuXzzz/njz/+yJwG5/F4ivbDioiIiJQDKd4UXv7zZe748Y7MAgjgmPsYk9dM5vJvLueY+xilZCm+UAqKoPnz57Nz505uuOGGkk4lk9Vq4bz4atSrnPc6n3qVw+kWX830FtkWi4WuXbsybtw4/vrrL5xOJ0uXLqV27dps3bqVpk2bZvk4MX3tRAOIU/deiomJoXbt2ixdujTLNZYuXUrLli2zxA0ePJgpU6bwySef8Pnnn3P06FH++OMP/H4/EyZMoHPnzsTHx7N3715Tf14RERGRsioQCPDbvt/46O+Pco3Zn7KfuxfeTbI3/1lGUjxKfOild+/epbIqDnPYmHnL2bm2yT6xT1C4w9xhzV9//ZUFCxbQu3dvqlevzq+//sqhQ4do0aIF48aNY9SoUcTGxtK3b1/cbjcrVqzg2LFjjB49murVqxMeHs73339P3bp1CQsLIzY2lvvuu4/HHnuMJk2a0K5dO6ZOncrKlSuZPn06ABMnTqRWrVq0b98eq9XKZ599Rs2aNYmLi6Np06Z4vV5effVVLr74YpYuXcqbb75p6s8sIiIiUlYleZN4e83b+catOrSKo+lHiXZGF0NWkp8SL4JKK5vVQrUoF9/f1Y3F/xziw+U7OJLsoUqUk2s7N6BbfDXCHTbTR4FiYmL46aefeOmll0hMTKRBgwZMmDCBCy+8EDA6tT3//PPcd999REZG0qZNG+6++27AaAv+yiuvMH78eMaOHcu5557LokWLGDVqFAkJCdx7770cPHiQli1bMmvWLJo1awZAdHQ0zz33HJs2bcJms3HGGWfw3XffYbVaOf3005k4cSLPPvssDz30EN26dePpp5/muuuuM/XnFhERESmL7BY7qw+vDir2+23fc/PpN4c4IwmGiqA8WK0WIl12+raqSdemVQgEwGKBaJfD9OLnhBYtWvD999/nen7IkCGZXd5ycuONN2buG3SC1Wrlscce47HHHsvxe0aOHJml2cJ/3XPPPVk6xAFce+21ucaLiIiIVBQZgeD3LUzNSCUQCGAxs6uWFIqKoCBYrRZiwwu24aqIiIiIlH8Oi4NYVywJ7oR8Y1tXaa0CqJQo8cYIIiIiIiJllcVi4fKml+cbF+OMoWudrvnGSfFQESQiIiIiUkhh9jBuaHMDNSJq5Bk3ptMYjQKVIiqCRERERESKINIRyYyLZtC6auts52KcMYw/ezy9G/Ym3J739itSfLQmSERERESkCOxWO9XCqzG512QOpx1mztY
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x700 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Покажем на графике, что отражаем полученное число.\n",
|
|||
|
"# Красным цветом обозначим точки, для которых классификация сработала неправильно.\n",
|
|||
|
"plt.figure(figsize=(10, 7))\n",
|
|||
|
"sns.scatterplot(x='petal_width_cm', y='petal_length_cm', data=iris, hue='species', s=70)\n",
|
|||
|
"plt.xlabel('Длина лепестка, см')\n",
|
|||
|
"plt.ylabel('Ширина лепестка, см')\n",
|
|||
|
"plt.legend(loc=2)\n",
|
|||
|
"plt.grid()\n",
|
|||
|
"\n",
|
|||
|
"# Перебираем все объекты из теста\n",
|
|||
|
"for i in range(len(y_test)):\n",
|
|||
|
" # Если предсказание неправильное\n",
|
|||
|
" if np.array(y_test)[i] != y_pred[i]:\n",
|
|||
|
" # то подсвечиваем точку красным\n",
|
|||
|
" plt.scatter(X_test.iloc[i, 3], X_test.iloc[i, 2], color='red', s=150)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 93,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"accuracy: 0.913\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# качество модели (доля правильно классифицированных точек)\n",
|
|||
|
"print(f'accuracy: {accuracy_score(y_test, y_pred):.3f}')"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Обучим метод 5 ближайших соседях"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 94,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"array(['versicolor', 'versicolor', 'virginica', 'versicolor',\n",
|
|||
|
" 'versicolor', 'versicolor', 'setosa', 'versicolor', 'versicolor',\n",
|
|||
|
" 'virginica', 'virginica', 'virginica', 'setosa', 'setosa',\n",
|
|||
|
" 'versicolor', 'setosa', 'versicolor', 'virginica', 'virginica',\n",
|
|||
|
" 'virginica', 'setosa', 'virginica', 'setosa'], dtype=object)"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 94,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"model = KNeighborsClassifier(n_neighbors=5)\n",
|
|||
|
"model.fit(X_train, y_train)\n",
|
|||
|
"\n",
|
|||
|
"# Получим предсказания модели\n",
|
|||
|
"y_pred = model.predict(X_test)\n",
|
|||
|
"y_pred"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 95,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0EAAAJaCAYAAADpm0w1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAACyhklEQVR4nOzdd3RU1dfG8e/09ITepQep0lQQBQUpIiqiiIKKgNhFRWyvioL6s4IVC6jYwIYKKCLSFRAVlY5I772kZ2YyM+8fVwIxbULupD6ftbJI7t3M3Zlcwuw55+xjCQQCAURERERERMoJa3EnICIiIiIiUpRUBImIiIiISLmiIkhERERERMoVFUEiIiIiIlKuqAgSEREREZFyRUWQiIiIiIiUKyqCRERERESkXFERJCIiIiIi5Yq9uBMoDL/fz969e4mOjsZisRR3OiIiIiIiUkwCgQBJSUnUrFkTqzXvsZ5SXQTt3buXOnXqFHcaIiIiIiJSQuzatYvatWvnGVOqi6Do6GjA+EZjYmIyj3u9Xn788Ud69OiBw+EorvSkDNC9JGbRvSRm0H0kZtG9JGYpSfdSYmIiderUyawR8lKqi6ATU+BiYmKyFUERERHExMQU+w9DSjfdS2IW3UtiBt1HYhbdS2KWkngvBbNMRo0RRERERESkXFERJCIiIiIi5YqKIBERERERKVdK9ZqgYAQCATIyMvD5fMWdihSSzWbDbrerHbqIiIiIFEqZLoK8Xi979+4lNTW1uFMRk0RERFCjRg2cTmdxpyIiIiIipVSZLoJ27tyJ3W6nZs2aOJ1OjSCUYoFAAI/Hw6FDh9i2bRuNGzfOdxMsEREREZGclNkiyG634/f7qVmzJhEREcWdjpggPDwch8PBjh078Hg8hIWFFXdKIiIiIlIKlfm30jVaULbo5ykiIiIihaVXlCIiIiIiUq4UaxFUr149LBZLto8777yzONPKxu8PcDzVk/nh9weKOyURERERETlNxbom6Pfff8/Sunrt2rV0796d/v37F2NWJ/n8AdK9Phb9c4gpy3dwONlN5SgXgzrU5cL4KoQ7bFitarYgIiIiIlKaFGsRVKVKlSxfP/fcczRs2JAuXboUU0Yn+fwBDie7ufrtZew6mpZ5/J8DySzbcoQ6FcOZdtt5VIlylZhCaPv27dSvX5+//vqL1q1bF3c6IiIiIiIlUonpDufxePjkk08YOXJkrq2s3W43brc78+vExETA2A/I6/VmHj/xeSAQwO/34/f7C5xPutefrQA61a6jaVz99jJ+uKcz4Y6SsbTqxPd5ut9zaeD3+wkEAni9Xmw2W8ivd+JeOvX+EjkdupfEDLqPxCy6l8QsJeleKkgOlkAgUCIWuHzxxRcMHDiQnTt3UrNmzRxjnnzyScaMGZPt+NSpU7O1wbbb7VSvXp06deoUeGNNu8PB4i0J3Dn1r3xj3xzYhs4NY8kw8Qc/Y8YMnn/+ebZt20Z4eDitWrViypQpREZG8tFHHzFhwgR27NjBGWecwS233MLNN98MQIUKFbI8TqdOnfjuu+/w+/289NJLfPjhhxw+fJj4+HieeOIJLr74YsAoQB999FG+/fZbjh8/TpUqVRgyZAgjR44EYMKECUyZMoUdO3YQFxdHr169GDNmDFFRUaZ9z8HyeDzs2rWL/fv3k5GRUeTXFxEREZGSKTU1lYEDB5KQkEBMTEyesSVmJOi9997jkksuybUAAnjkkUcyX5iDMRJUp04devTokeUb9Xq9LFy4kLCwMKKiogq8n0xiegZTft0ZVOwnv+7kvEZtiYkJL9A1crNv3z5uvvlmnn/+efr27UtSUhJLliwhOjqaGTNm8Nxzz/Haa6/Rpk0b/vrrL2699VYqVarE4MGDWb58OR06dODHH3+kefPmOJ1OYmJieOWVV5gwYQJvvfUWbdq0YfLkyQwcOJA1a9bQuHFjxo0bx5w5c/j8888544wz2LVrF7t27cp8TiMiInj99depX78+W7du5a677uKZZ55hwoQJpnzPBZGenk54eDidO3cukn2CvF4vc+fOpXv37jgcjpBfT8ou3UtiBt1HYhbdS2KWknQvnZglFowSUQTt2LGDefPm8fXXX+cZ53K5cLlc2Y47HI4cn3SLxYLVaj2tvWUOJ7vzDwKOJHsA8/avOXDgABkZGVx11VXUrVsXgLPOOguAMWPGMG7cOK6++moAGjZsyN9//82kSZMYMmQI1apVA4y1VqcWk+PGjeOhhx5i4MCBALzwwgssWrSI1157jQkTJrBr1y4aN25M586dsVgs1K9fP0tO9913X+bnDRo04Omnn+a2227jrbfeMuV7Lgir1YrFYsn1Zx4qRX09Kbt0L4kZdB+JWXQvmSsQCJDsTcZutePz+7BZbViwEGYv+xu8l4R7qSDXLxFF0OTJk6latSqXXnppcaeSqXKUi38OJOcbVynKSS5LmE7LWWedRbdu3WjZsiU9e/akR48eXH311TidTrZs2cKwYcMYPnx4ZnxGRgaxsbG5Pl5iYiJ79+6lU6dOWY536tSJVatWAXDTTTfRvXt3mjRpQq9evejTpw89evTIjJ03bx7PPvssf//9N4mJiWRkZJCenk5qamq2aYgiIiIi5VGKN4UV+1cwac0kVh0yXmNVcFWgf3x/BjcfTKQjEps19OuZJTjFvqLf7/czefJkBg8ejN1eImoyYsIcDOpQN6jYGzrUJdplXtVrs9mYO3cus2fPplmzZrz++us0adKEtWvXAjBp0iRWrlyZ+bF27VqWL19eqGu2bduWbdu28dRTT5GWlsY111yTOdq0fft2+vTpQ6tWrfjqq6/4448/MqfBeTyewn2zIiIiImVAijeFV/98lbsW3JVZAAEccx9j4pqJXPXtVRxzH6OELMUXSkARNG/ePHbu3MnQoUOLO5VMVquFC+OrUKdi3ut86lQMp3N8FdNbZFssFjp16sSYMWP466+/cDqdLF26lJo1a7J161YaNWqU5ePE9LUTDSBO3XspJiaGmjVrsnTp0izXWLp0Kc2aNcsSN2DAACZNmsTnn3/OV199xdGjR/njjz/w+/2MGzeODh06EB8fz969e039fkVERERKq0AgwG/7fuPTvz/NNWZ/yn7uXXgvyd78ZxlJ0Sj2oZcePXqUyKo4zGFj2m3n5dom+8Q+QeEOc4c1f/31V+bPn0+PHj2oWrUqv/76K4cOHaJp06aMGTOGESNGEBsbS69evXC73axYsYJjx44xcuRIqlatSnh4OD/88AO1a9cmLCyM2NhYHnjgAZ544gkaNmxI69atmTx5MitXrmTKlCkAjB8/nho1atCmTRusVitffvkl1atXJy4ujkaNGuH1enn99de57LLLWLp0KW+//bap37OIiIhIaZXkTeLdNe/mG7fq0CqOph8l2hldBFlJfoq9CCqpbFYLVaJc/HBPZxb/c4hPlu/gSLKHSlFObuhQl87xVQh32EwfBYqJieGnn37ilVdeITExkbp16zJu3DguueQSwOjU9uKLL/LAAw8QGRlJy5YtuffeewGjLfhrr73G2LFjGT16NBdccAGLFi1ixIgRJCQkcP/993Pw4EGaNWvGzJkzady4MQDR0dG88MILbNq0CZvNxtlnn83333+P1WrlrLPOYvz48Tz//PM88sgjdO7cmWeffZYbb7zR1O9bREREpDSyW+ysPrw6qNgftv3ArWfdGuKMJBgqgvJgtVqIdNnp1bw6nRpVIhAAiwWiXQ7Ti58TmjZtyg8//JDr+YEDB2Z2ecvJzTffnLlv0AlWq5UnnniCJ554Ise/M3z48CzNFv7rvvvuy9IhDuCGG27INV5ERESkvMgIBL9vYWpGKoFAAIuZXbXktKgICoLVaiE2vGAbroqIiIhI2eewOIh1xZLgTsg3tkWlFiqASohib4wgIiIiIlJaWSwWrmp0Vb5xMc4YOtXqlG+cFA0VQSIiIiIipynMHsbQlkOpFlEtz7hR7UdpFKgEUREkIiIiIlIIkY5Ipl46lRaVW2Q7F+OMYex5Y+lRrwfh9ry3X5GiozVBIiIiIiKFYLfaqRJehYndJ3I47TCzt84
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x700 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Покажем на графике, что отражаем полученное число.\n",
|
|||
|
"# Красным цветом обозначим точки, для которых классификация сработала неправильно.\n",
|
|||
|
"plt.figure(figsize=(10, 7))\n",
|
|||
|
"sns.scatterplot(x='petal_width_cm', y='petal_length_cm', data=iris, hue='species', s=70)\n",
|
|||
|
"plt.xlabel('Длина лепестка, см')\n",
|
|||
|
"plt.ylabel('Ширина лепестка, см')\n",
|
|||
|
"plt.legend(loc=2)\n",
|
|||
|
"plt.grid()\n",
|
|||
|
"\n",
|
|||
|
"# Перебираем все объекты из теста\n",
|
|||
|
"for i in range(len(y_test)):\n",
|
|||
|
" # Если предсказание неправильное\n",
|
|||
|
" if np.array(y_test)[i] != y_pred[i]:\n",
|
|||
|
" # то подсвечиваем точку красным\n",
|
|||
|
" plt.scatter(X_test.iloc[i, 3], X_test.iloc[i, 2], color='red', s=150)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 96,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"accuracy: 0.957\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# качество модели (доля правильно классифицированных точек)\n",
|
|||
|
"print(f'accuracy: {accuracy_score(y_test, y_pred):.3f}')"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Обучим метод 10 ближайших соседях"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 97,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"array(['versicolor', 'versicolor', 'virginica', 'versicolor',\n",
|
|||
|
" 'versicolor', 'versicolor', 'setosa', 'versicolor', 'versicolor',\n",
|
|||
|
" 'virginica', 'virginica', 'virginica', 'setosa', 'setosa',\n",
|
|||
|
" 'versicolor', 'setosa', 'versicolor', 'versicolor', 'virginica',\n",
|
|||
|
" 'virginica', 'setosa', 'virginica', 'setosa'], dtype=object)"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 97,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"model = KNeighborsClassifier(n_neighbors=10)\n",
|
|||
|
"model.fit(X_train, y_train)\n",
|
|||
|
"\n",
|
|||
|
"# Получим предсказания модели\n",
|
|||
|
"y_pred = model.predict(X_test)\n",
|
|||
|
"y_pred"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 98,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0EAAAJaCAYAAADpm0w1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAACyW0lEQVR4nOzdd3RU1dfG8e/09ITeOwSp0lQQBQUpIiqiiIKKiNhFRWyvioL6s4IVC6hgARs2FBEBAQVERaUj0nsv6ZmZzMz7x5VATJskd1Kfz1pZJPfuzN2ZXGD2nHP2sQQCgQAiIiIiIiIVhLWkExARERERESlOKoJERERERKRCUREkIiIiIiIVioogERERERGpUFQEiYiIiIhIhaIiSEREREREKhQVQSIiIiIiUqGoCBIRERERkQrFXtIJFIXf72fv3r1ER0djsVhKOh0RERERESkhgUCApKQkateujdWa91hPmS6C9u7dS7169Uo6DRERERERKSV27dpF3bp184wp00VQdHQ0YPygMTExmce9Xi8//PADvXv3xuFwlFR6Ug7oXhKz6F4SM+g+ErPoXhKzlKZ7KTExkXr16mXWCHkp00XQiSlwMTEx2YqgiIgIYmJiSvyXIWWb7iUxi+4lMYPuIzGL7iUxS2m8l4JZJqPGCCIiIiIiUqGoCBIRERERkQpFRZCIiIiIiFQoZXpNUDACgQAZGRn4fL6STkWKyGazYbfb1Q5dRERERIqkXBdBXq+XvXv3kpqaWtKpiEkiIiKoVasWTqezpFMRERERkTKqXBdBO3fuxG63U7t2bZxOp0YQyrBAIIDH4+HQoUNs27aNZs2a5bsJloiIiIhITsptEWS32/H7/dSuXZuIiIiSTkdMEB4ejsPhYMeOHXg8HsLCwko6JREREREpg8r9W+kaLShf9PsUERERkaLSK0oREREREalQSrQIatiwIRaLJdvH7bffXpJpZeP3Bzie6sn88PsDJZ2SiIiIiIgUUomuCfr999+ztK5eu3YtvXr1YtCgQSWY1Uk+f4B0r49F/xxi+vIdHE52UzXKxdDODTgvvhrhDhtWq5otiIiIiIiUJSVaBFWrVi3L18888wxNmjShe/fuJZTRST5/gMPJbq54cxm7jqZlHv/nQDLLthyhXuVwZt5yNtWiXKWmENq+fTuNGjXir7/+ol27diWdjoiIiIhIqVRqusN5PB4+/PBDRo8enWsra7fbjdvtzvw6MTERMPYD8nq9mcdPfB4IBPD7/fj9/gLnk+71ZyuATrXraBpXvLmM7+/qRrijdCytOvFzFvZnLgv8fj+BQACv14vNZgv59U7cS6feXyKFoXtJzKD7SMyie0nMUprupYLkYAkEAqVigcunn37KkCFD2LlzJ7Vr184x5vHHH2fcuHHZjs+YMSNbG2y73U7NmjWpV69egTfWtDscLN6SwO0z/so39vUh7enWJJYME3/xX3/9Nc8++yzbtm0jPDyctm3bMn36dCIjI3n//feZNGkSO3bsoH79+tx0003ceOONAFSqVCnL43Tt2pVvv/0Wv9/PCy+8wHvvvcfhw4eJj4/nscce44ILLgCMAvThhx/mm2++4fjx41SrVo3hw4czevRoACZNmsT06dPZsWMHcXFx9O3bl3HjxhEVFWXazxwsj8fDrl272L9/PxkZGcV+fREREREpnVJTUxkyZAgJCQnExMTkGVtqRoLeeecdLrzwwlwLIICHHnoo84U5GCNB9erVo3fv3ll+UK/Xy8KFCwkLCyMqKqrA+8kkpmcw/dedQcV++OtOzm7agZiY8AJdIzf79u3jxhtv5Nlnn2XAgAEkJSWxZMkSoqOj+frrr3nmmWd45ZVXaN++PX/99Rc333wzVapUYdiwYSxfvpzOnTvzww8/0KpVK5xOJzExMbz00ktMmjSJN954g/bt2zN16lSGDBnCmjVraNasGRMmTGDu3Ll88skn1K9fn127drFr167M5zQiIoJXX32VRo0asXXrVu644w6eeuopJk2aZMrPXBDp6emEh4fTrVu3YtknyOv1Mm/ePHr16oXD4Qj59aT80r0kZtB9JGbRvSRmKU330olZYsEoFUXQjh07mD9/Pl988UWecS6XC5fLle24w+HI8Um3WCxYrdZC7S1zONmdfxBwJNkDmLd/zYEDB8jIyODyyy+nQYMGAJx++ukAjBs3jgkTJnDFFVcA0KRJE/7++2+mTJnC8OHDqVGjBmCstTq1mJwwYQIPPPAAQ4YMAeC5555j0aJFvPLKK0yaNIldu3bRrFkzunXrhsVioVGjRllyuueeezI/b9y4MU8++SS33HILb7zxhik/c0FYrVYsFkuuv/NQKe7rSfmle0nMoPtIzKJ7yVyBQIBkbzJ2qx2f34fNasOChTB7+d/gvTTcSwW5fqkogqZOnUr16tW56KKLSjqVTFWjXPxzIDnfuCpRTnJZwlQop59+Oj179qRNmzb06dOH3r17c8UVV+B0OtmyZQsjRoxg5MiRmfEZGRnExsbm+niJiYns3buXrl27ZjnetWtXVq1aBcD1119Pr169aN68OX379qV///707t07M3b+/Pk8/fTT/P333yQmJpKRkUF6ejqpqanZpiGKiIiIVEQp3hRW7F/BlDVTWHXIeI1VyVWJQfGDGNZqGJGOSGzW0K9nluCU+Ip+v9/P1KlTGTZsGHZ7qajJiAlzMLRzg6Bir+3cgGiXeVWvzWZj3rx5zJkzh5YtW/Lqq6/SvHlz1q5dC8CUKVNYuXJl5sfatWtZvnx5ka7ZoUMHtm3bxhNPPEFaWhpXXnll5mjT9u3b6d+/P23btuXzzz/njz/+yJwG5/F4ivbDioiIiJQDKd4UXv7zZe748Y7MAgjgmPsYk9dM5vJvLueY+xilZCm+UAqKoPnz57Nz505uuOGGkk4lk9Vq4bz4atSrnPc6n3qVw+kWX830FtkWi4WuXbsybtw4/vrrL5xOJ0uXLqV27dps3bqVpk2bZvk4MX3tRAOIU/deiomJoXbt2ixdujTLNZYuXUrLli2zxA0ePJgpU6bwySef8Pnnn3P06FH++OMP/H4/EyZMoHPnzsTHx7N3715Tf14RERGRsioQCPDbvt/46O+Pco3Zn7KfuxfeTbI3/1lGUjxKfOild+/epbIqDnPYmHnL2bm2yT6xT1C4w9xhzV9//ZUFCxbQu3dvqlevzq+//sqhQ4do0aIF48aNY9SoUcTGxtK3b1/cbjcrVqzg2LFjjB49murVqxMeHs73339P3bp1CQsLIzY2lvvuu4/HHnuMJk2a0K5dO6ZOncrKlSuZPn06ABMnTqRWrVq0b98eq9XKZ599Rs2aNYmLi6Np06Z4vV5effVVLr74YpYuXcqbb75p6s8sIiIiUlYleZN4e83b+catOrSKo+lHiXZGF0NWkp8SL4JKK5vVQrUoF9/f1Y3F/xziw+U7OJLsoUqUk2s7N6BbfDXCHTbTR4FiYmL46aefeOmll0hMTKRBgwZMmDCBCy+8EDA6tT3//PPcd999REZG0qZNG+6++27AaAv+yiuvMH78eMaOHcu5557LokWLGDVqFAkJCdx7770cPHiQli1bMmvWLJo1awZAdHQ0zz33HJs2bcJms3HGGWfw3XffYbVaOf3005k4cSLPPvssDz30EN26dePpp5/muuuuM/XnFhERESmL7BY7qw+vDir2+23fc/PpN4c4IwmGiqA8WK0WIl12+raqSdemVQgEwGKBaJfD9OLnhBYtWvD999/nen7IkCGZXd5ycuONN2buG3SC1Wrlscce47HHHsvxe0aOHJml2cJ/3XPPPVk6xAFce+21ucaLiIiIVBQZgeD3LUzNSCUQCGAxs6uWFIqKoCBYrRZiwwu24aqIiIiIlH8Oi4NYVywJ7oR8Y1tXaa0CqJQo8cYIIiIiIiJllcVi4fKml+cbF+OMoWudrvnGSfFQESQiIiIiUkhh9jBuaHMDNSJq5Bk3ptMYjQKVIiqCRERERESKINIRyYyLZtC6auts52KcMYw/ezy9G/Ym3J739itSfLQmSERERESkCOxWO9XCqzG512QOpx1mztY
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x700 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Покажем на графике, что отражаем полученное число.\n",
|
|||
|
"# Красным цветом обозначим точки, для которых классификация сработала неправильно.\n",
|
|||
|
"plt.figure(figsize=(10, 7))\n",
|
|||
|
"sns.scatterplot(x='petal_width_cm', y='petal_length_cm', data=iris, hue='species', s=70)\n",
|
|||
|
"plt.xlabel('Длина лепестка, см')\n",
|
|||
|
"plt.ylabel('Ширина лепестка, см')\n",
|
|||
|
"plt.legend(loc=2)\n",
|
|||
|
"plt.grid()\n",
|
|||
|
"\n",
|
|||
|
"# Перебираем все объекты из теста\n",
|
|||
|
"for i in range(len(y_test)):\n",
|
|||
|
" # Если предсказание неправильное\n",
|
|||
|
" if np.array(y_test)[i] != y_pred[i]:\n",
|
|||
|
" # то подсвечиваем точку красным\n",
|
|||
|
" plt.scatter(X_test.iloc[i, 3], X_test.iloc[i, 2], color='red', s=150)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 99,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"accuracy: 0.913\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# качество модели (доля правильно классифицированных точек)\n",
|
|||
|
"print(f'accuracy: {accuracy_score(y_test, y_pred):.3f}')"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### 3.3.2 Задание\n",
|
|||
|
"\n",
|
|||
|
"Определите набор признаков человека, по аналогии из РТ 1, – например, \n",
|
|||
|
"цвет глаз и конвертируйте его в матрицу признаков. "
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 103,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"array([[2., 0., 3., 1.],\n",
|
|||
|
" [0., 1., 1., 1.],\n",
|
|||
|
" [5., 3., 0., 5.],\n",
|
|||
|
" [3., 4., 2., 0.]])"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 103,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"data_dict = [{\"карий\": 3, \"голубой\":2, \"серый\":1}, \n",
|
|||
|
" {\"карий\": 1, \"зеленый\":1, \"серый\":1},\n",
|
|||
|
" {\"зеленый\": 3, \"голубой\":5, \"серый\":5},\n",
|
|||
|
" {\"карий\": 2, \"голубой\":3, \"зеленый\":4}]\n",
|
|||
|
"\n",
|
|||
|
"dictvectoriser = DictVectorizer(sparse=False)\n",
|
|||
|
"\n",
|
|||
|
"features = dictvectoriser.fit_transform(data_dict)\n",
|
|||
|
"features"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "Python 3",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.11.0"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|