{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Рабочая тетрадь № 2" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd \n", "from sklearn.preprocessing import MinMaxScaler, StandardScaler" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1.3.1 Задание \n", "\n", "Создать 8x8 матрицу и заполнить её в шахматном порядке нулями и \n", "единицами." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0, 1, 0, 1, 0, 1, 0, 1],\n", " [1, 0, 1, 0, 1, 0, 1, 0],\n", " [0, 1, 0, 1, 0, 1, 0, 1],\n", " [1, 0, 1, 0, 1, 0, 1, 0],\n", " [0, 1, 0, 1, 0, 1, 0, 1],\n", " [1, 0, 1, 0, 1, 0, 1, 0],\n", " [0, 1, 0, 1, 0, 1, 0, 1],\n", " [1, 0, 1, 0, 1, 0, 1, 0]])" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.array([[(x + y) % 2 for x in range(8)] for y in range(8)])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1.3.2 Задание\n", "\n", "Создать 5x5 матрицу со значениями в строках от 0 до 4. Для создания \n", "необходимо использовать функцию arrange. " ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0, 1, 2, 3, 4],\n", " [0, 1, 2, 3, 4],\n", " [0, 1, 2, 3, 4],\n", " [0, 1, 2, 3, 4],\n", " [0, 1, 2, 3, 4]])" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.array([np.arange(0, 5) for _ in range(5)])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1.3.3 Задание\n", "\n", "Создать массив 3x3x3 со случайными значениями. " ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[[0.34392799, 0.718808 , 0.49594499],\n", " [0.12167775, 0.56056024, 0.59003049],\n", " [0.12481231, 0.79707319, 0.66605017]],\n", "\n", " [[0.11550937, 0.29438156, 0.69728858],\n", " [0.3432886 , 0.35701781, 0.72659151],\n", " [0.73779222, 0.09585279, 0.40705831]],\n", "\n", " [[0.23874481, 0.80360945, 0.53127737],\n", " [0.85959837, 0.16119215, 0.78824553],\n", " [0.53977056, 0.71800074, 0.93729907]]])" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.random.random((3, 3, 3))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1.3.4 Задание\n", "\n", "Создать матрицу с 0 внутри, и 1 на границах." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1, 1, 1, 1, 1, 1, 1, 1],\n", " [1, 0, 0, 0, 0, 0, 0, 1],\n", " [1, 0, 0, 0, 0, 0, 0, 1],\n", " [1, 0, 0, 0, 0, 0, 0, 1],\n", " [1, 0, 0, 0, 0, 0, 0, 1],\n", " [1, 0, 0, 0, 0, 0, 0, 1],\n", " [1, 0, 0, 0, 0, 0, 0, 1],\n", " [1, 1, 1, 1, 1, 1, 1, 1]])" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.array([[int((x in [0, 7]) or (y in [0, 7])) for x in range(8)] for y in range(8)])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1.3.5 Задание\n", "\n", "Создайте массив и отсортируйте его по убыванию. " ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "arr = np.arange(0, 10)\n", "np.sort(arr)[::-1]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1.3.6 Задание\n", "\n", "Создайте матрицу, выведите ее форму, размер и размерность." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(8, 10) 2 80\n" ] } ], "source": [ "arr = np.array([np.arange(0, 10) for _ in range(8)])\n", "\n", "print(arr.shape, arr.ndim, arr.size)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2.3.1 Задание\n", "\n", "Найди евклидово расстояние между двумя Series (точками) a и b, не \n", "используя встроенную формулу." ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3.1622776601683795\n" ] } ], "source": [ "from math import sqrt\n", "\n", "first_dot = pd.Series([1, 3])\n", "second_dot = pd.Series([4, 2])\n", "\n", "s = 0\n", "for dim in range(first_dot.size):\n", " s += (first_dot.array[dim] - second_dot.array[dim]) ** 2\n", "\n", "print(sqrt(s))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2.3.2 Задание \n", "\n", "Найдите в Интернете ссылку на любой csv файл и сформируйте из него \n", "фрейм данных (например, коллекцию фреймов данных можно найти \n", "здесь: https://github.com/akmand/datasets)." ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AirlineFlightAirportFromAirportToDayOfWeekTimeLengthDelay
0CO269SFOIAH3152051
1US1558PHXCLT3152221
2AA2400LAXDFW3201651
3AA2466SFODFW3201951
4AS108ANCSEA3302020
...........................
539378CO178OGGSNA514393260
539379FL398SEAATL514393050
539380FL609SFOMKE514392550
539381UA78HNLSFO514393131
539382US1442LAXPHL514393011
\n", "

539383 rows × 8 columns

\n", "
" ], "text/plain": [ " Airline Flight AirportFrom AirportTo DayOfWeek Time Length Delay\n", "0 CO 269 SFO IAH 3 15 205 1\n", "1 US 1558 PHX CLT 3 15 222 1\n", "2 AA 2400 LAX DFW 3 20 165 1\n", "3 AA 2466 SFO DFW 3 20 195 1\n", "4 AS 108 ANC SEA 3 30 202 0\n", "... ... ... ... ... ... ... ... ...\n", "539378 CO 178 OGG SNA 5 1439 326 0\n", "539379 FL 398 SEA ATL 5 1439 305 0\n", "539380 FL 609 SFO MKE 5 1439 255 0\n", "539381 UA 78 HNL SFO 5 1439 313 1\n", "539382 US 1442 LAX PHL 5 1439 301 1\n", "\n", "[539383 rows x 8 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "url = 'https://raw.githubusercontent.com/akmand/datasets/refs/heads/main/airlines.csv'\n", "\n", "df = pd.read_csv(url)\n", "\n", "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2.3.3 Задание\n", "\n", "Проделайте с получившемся из предыдущего задания фреймом данных \n", "те же действия, что и в примерах 2.2.5-2.2.7. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 2.2.5\n", "\n", "Пронализировать характеристики фрейма данных." ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AirlineFlightAirportFromAirportToDayOfWeekTimeLengthDelay
0CO269SFOIAH3152051
1US1558PHXCLT3152221
\n", "
" ], "text/plain": [ " Airline Flight AirportFrom AirportTo DayOfWeek Time Length Delay\n", "0 CO 269 SFO IAH 3 15 205 1\n", "1 US 1558 PHX CLT 3 15 222 1" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(2)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AirlineFlightAirportFromAirportToDayOfWeekTimeLengthDelay
539380FL609SFOMKE514392550
539381UA78HNLSFO514393131
539382US1442LAXPHL514393011
\n", "
" ], "text/plain": [ " Airline Flight AirportFrom AirportTo DayOfWeek Time Length Delay\n", "539380 FL 609 SFO MKE 5 1439 255 0\n", "539381 UA 78 HNL SFO 5 1439 313 1\n", "539382 US 1442 LAX PHL 5 1439 301 1" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.tail(3)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(539383, 8)" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FlightDayOfWeekTimeLengthDelay
count539383.000000539383.000000539383.000000539383.000000539383.000000
mean2427.9286303.929668802.728963132.2020070.445442
std2067.4298371.914664278.04591170.1170160.497015
min1.0000001.00000010.0000000.0000000.000000
25%712.0000002.000000565.00000081.0000000.000000
50%1809.0000004.000000795.000000115.0000000.000000
75%3745.0000005.0000001035.000000162.0000001.000000
max7814.0000007.0000001439.000000655.0000001.000000
\n", "
" ], "text/plain": [ " Flight DayOfWeek Time Length \n", "count 539383.000000 539383.000000 539383.000000 539383.000000 \\\n", "mean 2427.928630 3.929668 802.728963 132.202007 \n", "std 2067.429837 1.914664 278.045911 70.117016 \n", "min 1.000000 1.000000 10.000000 0.000000 \n", "25% 712.000000 2.000000 565.000000 81.000000 \n", "50% 1809.000000 4.000000 795.000000 115.000000 \n", "75% 3745.000000 5.000000 1035.000000 162.000000 \n", "max 7814.000000 7.000000 1439.000000 655.000000 \n", "\n", " Delay \n", "count 539383.000000 \n", "mean 0.445442 \n", "std 0.497015 \n", "min 0.000000 \n", "25% 0.000000 \n", "50% 0.000000 \n", "75% 1.000000 \n", "max 1.000000 " ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 2.2.6\n", "\n", "Выберите индивидуальные данные или срезы фрейма данных." ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AirlineFlightAirportFromAirportToDayOfWeekTimeLengthDelay
1US1558PHXCLT3152221
2AA2400LAXDFW3201651
3AA2466SFODFW3201951
\n", "
" ], "text/plain": [ " Airline Flight AirportFrom AirportTo DayOfWeek Time Length Delay\n", "1 US 1558 PHX CLT 3 15 222 1\n", "2 AA 2400 LAX DFW 3 20 165 1\n", "3 AA 2466 SFO DFW 3 20 195 1" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.iloc[1:4]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 2.2.7\n", "\n", "Требуется отобрать строки фрейма данных на основе некоторого \n", "условия." ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AirlineFlightAirportFromAirportToDayOfWeekTimeLengthDelay
1US1558PHXCLT3152221
15US498DENCLT3551790
\n", "
" ], "text/plain": [ " Airline Flight AirportFrom AirportTo DayOfWeek Time Length Delay\n", "1 US 1558 PHX CLT 3 15 222 1\n", "15 US 498 DEN CLT 3 55 179 0" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['Airline'] == 'US'].head(2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3.3.2 Задание\n", "\n", "Загрузить фрейм данных по ссылке: \n", "https://raw.githubusercontent.com/akmand/datasets/master/iris.csv. \n", "Необходимо выполнить нормализацию первого числового признака \n", "(sepal_length_cm) с использованием минимаксного преобразования, а \n", "второго (sepal_width_cm) с задействованием z-масштабирования. " ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sepal_length_cmsepal_width_cmpetal_length_cmpetal_width_cmspecies
05.13.51.40.2setosa
14.93.01.40.2setosa
24.73.21.30.2setosa
34.63.11.50.2setosa
45.03.61.40.2setosa
..................
1456.73.05.22.3virginica
1466.32.55.01.9virginica
1476.53.05.22.0virginica
1486.23.45.42.3virginica
1495.93.05.11.8virginica
\n", "

150 rows × 5 columns

\n", "
" ], "text/plain": [ " sepal_length_cm sepal_width_cm petal_length_cm petal_width_cm \n", "0 5.1 3.5 1.4 0.2 \\\n", "1 4.9 3.0 1.4 0.2 \n", "2 4.7 3.2 1.3 0.2 \n", "3 4.6 3.1 1.5 0.2 \n", "4 5.0 3.6 1.4 0.2 \n", ".. ... ... ... ... \n", "145 6.7 3.0 5.2 2.3 \n", "146 6.3 2.5 5.0 1.9 \n", "147 6.5 3.0 5.2 2.0 \n", "148 6.2 3.4 5.4 2.3 \n", "149 5.9 3.0 5.1 1.8 \n", "\n", " species \n", "0 setosa \n", "1 setosa \n", "2 setosa \n", "3 setosa \n", "4 setosa \n", ".. ... \n", "145 virginica \n", "146 virginica \n", "147 virginica \n", "148 virginica \n", "149 virginica \n", "\n", "[150 rows x 5 columns]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "url = 'https://raw.githubusercontent.com/akmand/datasets/master/iris.csv'\n", "\n", "iris_df = pd.read_csv(url)\n", "\n", "iris_df" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "length_feature = np.array(iris_df['sepal_length_cm']).reshape(-1,1)\n", "\n", "minmax_scale = MinMaxScaler(feature_range = (0, 1))\n", "scaled_sepal_length = minmax_scale.fit_transform(length_feature)\n", "iris_df['sepal_length_cm'] = scaled_sepal_length;" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "width_feature = np.array(iris_df['sepal_width_cm']).reshape(-1,1)\n", "\n", "z_scale = StandardScaler()\n", "scaled_sepal_width = z_scale.fit_transform(width_feature)\n", "iris_df['sepal_width_cm'] = scaled_sepal_width" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sepal_length_cmsepal_width_cmpetal_length_cmpetal_width_cmspecies
00.2222221.0320571.40.2setosa
10.166667-0.1249581.40.2setosa
20.1111110.3378481.30.2setosa
30.0833330.1064451.50.2setosa
40.1944441.2634601.40.2setosa
..................
1450.666667-0.1249585.22.3virginica
1460.555556-1.2819725.01.9virginica
1470.611111-0.1249585.22.0virginica
1480.5277780.8006545.42.3virginica
1490.444444-0.1249585.11.8virginica
\n", "

150 rows × 5 columns

\n", "
" ], "text/plain": [ " sepal_length_cm sepal_width_cm petal_length_cm petal_width_cm \n", "0 0.222222 1.032057 1.4 0.2 \\\n", "1 0.166667 -0.124958 1.4 0.2 \n", "2 0.111111 0.337848 1.3 0.2 \n", "3 0.083333 0.106445 1.5 0.2 \n", "4 0.194444 1.263460 1.4 0.2 \n", ".. ... ... ... ... \n", "145 0.666667 -0.124958 5.2 2.3 \n", "146 0.555556 -1.281972 5.0 1.9 \n", "147 0.611111 -0.124958 5.2 2.0 \n", "148 0.527778 0.800654 5.4 2.3 \n", "149 0.444444 -0.124958 5.1 1.8 \n", "\n", " species \n", "0 setosa \n", "1 setosa \n", "2 setosa \n", "3 setosa \n", "4 setosa \n", ".. ... \n", "145 virginica \n", "146 virginica \n", "147 virginica \n", "148 virginica \n", "149 virginica \n", "\n", "[150 rows x 5 columns]" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iris_df" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 2 }