Return Home


Airline Ticket Price¶

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
import sys
In [3]:
bom_blr_data = pd.read_csv(r"C:\Users\jki\Downloads\flight_data_BOM_BLR.csv")
bom_blr_data
Out[3]:
FlightName FlightCode DepartingCity DepartingTime ArrivingCity ArrivingTime Duration Price
0 Air India AI 621 Mumbai 03:55 Bengaluru 05:50 01 h 55 m 2,307
1 AirAsia I5 670 Mumbai 19:55 Bengaluru 21:45 01 h 50 m 2,773
2 AirAsia I5 2992 Mumbai 23:55 Bengaluru 01:45\r\n+ 1 DAY 01 h 50 m 2,773
3 IndiGo 6E 5388 Mumbai 21:30 Bengaluru 23:15 01 h 45 m 2,839
4 Akasa Air QP 1103 Mumbai 00:45 Bengaluru 02:20 01 h 35 m 3,005
... ... ... ... ... ... ... ... ...
85 Vistara UK 840 Bengaluru 21:35 Mumbai 23:20 01 h 45 m 11,038
86 Vistara UK 850 Bengaluru 19:55 Mumbai 21:35 01 h 40 m 11,934
87 Vistara UK 864 Bengaluru 19:00 Mumbai 20:35 01 h 35 m 14,596
88 Vistara UK 864 Bengaluru 19:00 Mumbai 20:35 01 h 35 m 14,596
89 Air India AI 642 Bengaluru 21:25 Mumbai 23:20 01 h 55 m 20,581

90 rows × 8 columns

In [4]:
bom_blr_data.shape
Out[4]:
(90, 8)
In [5]:
bom_blr_data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90 entries, 0 to 89
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   FlightName     90 non-null     object
 1   FlightCode     90 non-null     object
 2   DepartingCity  90 non-null     object
 3   DepartingTime  90 non-null     object
 4   ArrivingCity   90 non-null     object
 5   ArrivingTime   90 non-null     object
 6   Duration       90 non-null     object
 7   Price          90 non-null     object
dtypes: object(8)
memory usage: 5.8+ KB
In [6]:
bom_blr_data.describe()
Out[6]:
FlightName FlightCode DepartingCity DepartingTime ArrivingCity ArrivingTime Duration Price
count 90 90 90 90 90 90 90 90
unique 5 70 2 58 2 61 9 53
top IndiGo 6E 5124 Bengaluru 09:20 Mumbai 11:35 01 h 50 m 3,005
freq 39 3 46 3 46 3 21 5
In [7]:
del_blr_data = pd.read_csv(r"C:\Users\jki\Downloads\flight_data_DEL_BLR.csv")
del_blr_data
Out[7]:
FlightName FlightCode DepartingCity DepartingTime ArrivingCity ArrivingTime Duration Price
0 Akasa Air QP 1409 New Delhi 23:00 Bengaluru 01:45\n+ 1 DAY 02 h 45 m 5,230
1 AirAsia I5 740 New Delhi 07:50 Bengaluru 10:40 02 h 50 m 5,235
2 AirAsia I5 1534 New Delhi 22:55 Bengaluru 01:35\n+ 1 DAY 02 h 40 m 5,235
3 IndiGo 6E 2067 New Delhi 03:55 Bengaluru 06:45 02 h 50 m 5,236
4 IndiGo 6E 6612 New Delhi 05:50 Bengaluru 08:35 02 h 45 m 5,236
... ... ... ... ... ... ... ... ...
90 Air India AI 808 Bengaluru 21:00 New Delhi 23:55 02 h 55 m 10,711
91 Vistara UK 816 Bengaluru 11:30 New Delhi 14:10 02 h 40 m 10,774
92 Vistara UK 816 Bengaluru 11:30 New Delhi 14:10 02 h 40 m 10,774
93 IndiGo 6E 2339 Bengaluru 00:30 New Delhi 03:20 02 h 50 m 12,445
94 Vistara UK 808 Bengaluru 08:00 New Delhi 10:35 02 h 35 m 13,399

95 rows × 8 columns

In [8]:
del_blr_data.shape
Out[8]:
(95, 8)
In [9]:
del_blr_data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 95 entries, 0 to 94
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   FlightName     95 non-null     object
 1   FlightCode     95 non-null     object
 2   DepartingCity  95 non-null     object
 3   DepartingTime  95 non-null     object
 4   ArrivingCity   95 non-null     object
 5   ArrivingTime   95 non-null     object
 6   Duration       95 non-null     object
 7   Price          95 non-null     object
dtypes: object(8)
memory usage: 6.1+ KB
In [10]:
del_blr_data.describe()
Out[10]:
FlightName FlightCode DepartingCity DepartingTime ArrivingCity ArrivingTime Duration Price
count 95 95 95 95 95 95 95 95
unique 6 75 2 63 2 69 10 40
top IndiGo 6E 6612 New Delhi 08:00 Bengaluru 20:45 02 h 45 m 5,354
freq 36 3 48 6 48 3 26 11
In [11]:
del_bom_data = pd.read_csv(r"C:\Users\jki\Downloads\flight_data_DEL_BOM.csv")
del_bom_data
Out[11]:
FlightName FlightCode DepartingCity DepartingTime ArrivingCity ArrivingTime Duration Price
0 Akasa Air QP 1719 New Delhi 09:00 Mumbai 10:55 01 h 55 m 4,758
1 Akasa Air QP 1411 New Delhi 10:55 Mumbai 13:05 02 h 10 m 4,758
2 Akasa Air QP 1128 New Delhi 16:00 Mumbai 18:10 02 h 10 m 4,758
3 IndiGo 6E 2009 New Delhi 01:00 Mumbai 03:00 02 h 4,780
4 IndiGo 6E 2112 New Delhi 05:30 Mumbai 07:35 02 h 05 m 4,780
... ... ... ... ... ... ... ... ...
95 Vistara UK 902 Mumbai 15:45 New Delhi 18:10 02 h 25 m 10,539
96 Vistara UK 910 Mumbai 17:25 New Delhi 19:35 02 h 10 m 10,539
97 Vistara UK 996 Mumbai 18:30 New Delhi 20:40 02 h 10 m 10,539
98 Vistara UK 950 Mumbai 21:55 New Delhi 00:10\n+ 1 DAY 02 h 15 m 10,539
99 Vistara UK 986 Mumbai 22:50 New Delhi 01:00\n+ 1 DAY 02 h 10 m 10,539

100 rows × 8 columns

In [12]:
del_bom_data.shape
Out[12]:
(100, 8)
In [14]:
del_bom_data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   FlightName     100 non-null    object
 1   FlightCode     100 non-null    object
 2   DepartingCity  100 non-null    object
 3   DepartingTime  100 non-null    object
 4   ArrivingCity   100 non-null    object
 5   ArrivingTime   100 non-null    object
 6   Duration       100 non-null    object
 7   Price          100 non-null    object
dtypes: object(8)
memory usage: 6.4+ KB
In [15]:
del_bom_data.describe()
Out[15]:
FlightName FlightCode DepartingCity DepartingTime ArrivingCity ArrivingTime Duration Price
count 100 100 100 100 100 100 100 100
unique 5 80 2 61 2 72 9 15
top IndiGo AI 864 New Delhi 09:00 Mumbai 01:00\n+ 1 DAY 02 h 10 m 4,780
freq 52 3 50 4 50 3 30 37
In [16]:
del_ccu_data = pd.read_csv(r"C:\Users\jki\Downloads\flight_data_DEL_CCU.csv")
del_ccu_data
Out[16]:
FlightName FlightCode DepartingCity DepartingTime ArrivingCity ArrivingTime Duration Price
0 IndiGo 6E 2284 New Delhi 05:25 Kolkata 07:55 02 h 30 m 5,408
1 Vistara UK 747 New Delhi 06:15 Kolkata 08:25 02 h 10 m 5,408
2 IndiGo 6E 5219 New Delhi 06:25 Kolkata 08:35 02 h 10 m 5,408
3 IndiGo 6E 6182 New Delhi 15:55 Kolkata 18:10 02 h 15 m 5,408
4 IndiGo 6E 2415 New Delhi 17:45 Kolkata 20:00 02 h 15 m 5,408
5 IndiGo 6E 2415 New Delhi 17:45 Kolkata 20:00 02 h 15 m 5,408
6 IndiGo 6E 2415 New Delhi 17:45 Kolkata 20:00 02 h 15 m 5,408
7 IndiGo 6E 2057 New Delhi 19:00 Kolkata 21:05 02 h 05 m 5,408
8 IndiGo 6E 2057 New Delhi 19:00 Kolkata 21:05 02 h 05 m 5,408
9 SpiceJet SG 8251 New Delhi 19:30 Kolkata 21:40 02 h 10 m 5,408
10 IndiGo 6E 2517 New Delhi 20:30 Kolkata 22:40 02 h 10 m 5,408
11 IndiGo 6E 6005 New Delhi 22:50 Kolkata 01:00\n+ 1 DAY 02 h 10 m 5,408
12 IndiGo 6E 2603 New Delhi 23:50 Kolkata 02:05\n+ 1 DAY 02 h 15 m 5,408
13 Vistara UK 737 New Delhi 15:45 Kolkata 18:05 02 h 20 m 5,440
14 Vistara UK 737 New Delhi 15:45 Kolkata 18:05 02 h 20 m 5,440
15 Air India AI 767 New Delhi 12:20 Kolkata 14:30 02 h 10 m 5,492
16 Air India AI 764 New Delhi 17:05 Kolkata 19:15 02 h 10 m 5,492
17 Air India AI 764 New Delhi 17:05 Kolkata 19:15 02 h 10 m 5,492
18 Air India AI 762 New Delhi 21:00 Kolkata 23:30 02 h 30 m 5,492
19 IndiGo 6E 2788 New Delhi 02:55 Kolkata 05:10 02 h 15 m 5,849
20 IndiGo 6E 2788 New Delhi 02:55 Kolkata 05:10 02 h 15 m 5,849
21 IndiGo 6E 282 New Delhi 10:10 Kolkata 12:10 02 h 5,849
22 IndiGo 6E 6557 New Delhi 13:00 Kolkata 15:00 02 h 5,849
23 Vistara UK 707 New Delhi 17:35 Kolkata 19:35 02 h 5,881
24 SpiceJet SG 8263 New Delhi 07:10 Kolkata 09:30 02 h 20 m 5,986
25 Air India AI 401 New Delhi 06:55 Kolkata 09:00 02 h 05 m 6,374
26 Air India AI 401 New Delhi 06:55 Kolkata 09:00 02 h 05 m 6,374
27 Vistara UK 705 New Delhi 07:40 Kolkata 09:45 02 h 05 m 8,474
28 Vistara UK 727 New Delhi 20:30 Kolkata 22:20 01 h 50 m 10,469
29 IndiGo 6E 5213 New Delhi 09:00 Kolkata 11:05 02 h 05 m 15,298
30 IndiGo 6E 5214 Kolkata 05:25 New Delhi 07:50 02 h 25 m 6,020
31 IndiGo 6E 375 Kolkata 07:00 New Delhi 09:15 02 h 15 m 6,020
32 IndiGo 6E 6183 Kolkata 18:55 New Delhi 21:10 02 h 15 m 6,020
33 IndiGo 6E 898 Kolkata 19:35 New Delhi 21:55 02 h 20 m 6,020
34 IndiGo 6E 2224 Kolkata 20:55 New Delhi 23:10 02 h 15 m 6,020
35 IndiGo 6E 2224 Kolkata 20:55 New Delhi 23:10 02 h 15 m 6,020
36 IndiGo 6E 2224 Kolkata 20:55 New Delhi 23:10 02 h 15 m 6,020
37 IndiGo 6E 2716 Kolkata 21:45 New Delhi 00:05\n+ 1 DAY 02 h 20 m 6,020
38 IndiGo 6E 2716 Kolkata 21:45 New Delhi 00:05\n+ 1 DAY 02 h 20 m 6,020
39 SpiceJet SG 8265 Kolkata 22:20 New Delhi 00:30\n+ 1 DAY 02 h 10 m 6,020
40 IndiGo 6E 2746 Kolkata 23:45 New Delhi 02:00\n+ 1 DAY 02 h 15 m 6,020
41 Air India AI 763 Kolkata 06:55 New Delhi 09:15 02 h 20 m 6,063
42 Vistara UK 778 Kolkata 15:15 New Delhi 17:50 02 h 35 m 6,063
43 Air India AI 768 Kolkata 15:25 New Delhi 18:10 02 h 45 m 6,063
44 Air India AI 768 Kolkata 15:25 New Delhi 18:10 02 h 45 m 6,063
45 Air India AI 770 Kolkata 20:00 New Delhi 22:35 02 h 35 m 6,063
46 SpiceJet SG 8373 Kolkata 10:50 New Delhi 13:10 02 h 20 m 6,064
47 SpiceJet SG 8373 Kolkata 10:50 New Delhi 13:10 02 h 20 m 6,064
48 Vistara UK 720 Kolkata 07:10 New Delhi 09:35 02 h 25 m 6,095
49 Vistara UK 706 Kolkata 10:25 New Delhi 12:40 02 h 15 m 6,095
50 Vistara UK 706 Kolkata 10:25 New Delhi 12:40 02 h 15 m 6,095
51 Vistara UK 738 Kolkata 18:45 New Delhi 21:20 02 h 35 m 6,095
52 Vistara UK 708 Kolkata 20:30 New Delhi 23:00 02 h 30 m 6,095
53 IndiGo 6E 2513 Kolkata 08:30 New Delhi 10:55 02 h 25 m 6,212
54 IndiGo 6E 2759 Kolkata 15:40 New Delhi 18:00 02 h 20 m 6,212
55 IndiGo 6E 2345 Kolkata 10:10 New Delhi 12:30 02 h 20 m 6,420
56 IndiGo 6E 2345 Kolkata 10:10 New Delhi 12:30 02 h 20 m 6,420
57 IndiGo 6E 2358 Kolkata 11:40 New Delhi 14:00 02 h 20 m 6,420
58 Air India AI 769 Kolkata 10:00 New Delhi 12:20 02 h 20 m 8,100
59 IndiGo 6E 2516 Kolkata 16:45 New Delhi 19:05 02 h 20 m 11,820
In [17]:
del_ccu_data.shape
Out[17]:
(60, 8)
In [18]:
del_ccu_data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60 entries, 0 to 59
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   FlightName     60 non-null     object
 1   FlightCode     60 non-null     object
 2   DepartingCity  60 non-null     object
 3   DepartingTime  60 non-null     object
 4   ArrivingCity   60 non-null     object
 5   ArrivingTime   60 non-null     object
 6   Duration       60 non-null     object
 7   Price          60 non-null     object
dtypes: object(8)
memory usage: 3.9+ KB
In [19]:
del_ccu_data.describe()
Out[19]:
FlightName FlightCode DepartingCity DepartingTime ArrivingCity ArrivingTime Duration Price
count 60 60 60 60 60 60 60 60
unique 4 46 2 40 2 44 10 18
top IndiGo 6E 2224 New Delhi 06:55 Kolkata 23:10 02 h 15 m 5,408
freq 32 3 30 3 30 3 15 13
In [20]:
del_hyd_data = pd.read_csv(r"C:\Users\jki\Downloads\flight_data_DEL_HYD.csv")
del_hyd_data
Out[20]:
FlightName FlightCode DepartingCity DepartingTime ArrivingCity ArrivingTime Duration Price
0 Akasa Air QP 1406 New Delhi 12:45 Hyderabad 15:05 02 h 20 m 4,548
1 IndiGo 6E 2461 New Delhi 01:55 Hyderabad 04:10 02 h 15 m 4,750
2 IndiGo 6E 2337 New Delhi 05:40 Hyderabad 07:55 02 h 15 m 4,750
3 IndiGo 6E 6203 New Delhi 06:30 Hyderabad 08:40 02 h 10 m 4,750
4 Air India AI 560 New Delhi 07:10 Hyderabad 09:20 02 h 10 m 4,750
5 Air India AI 560 New Delhi 07:10 Hyderabad 09:20 02 h 10 m 4,750
6 Air India AI 560 New Delhi 07:10 Hyderabad 09:20 02 h 10 m 4,750
7 IndiGo 6E 774 New Delhi 08:10 Hyderabad 10:20 02 h 10 m 4,750
8 IndiGo 6E 774 New Delhi 08:10 Hyderabad 10:20 02 h 10 m 4,750
9 Air India AI 542 New Delhi 09:30 Hyderabad 12:00 02 h 30 m 4,750
10 IndiGo 6E 2005 New Delhi 10:10 Hyderabad 12:15 02 h 05 m 4,750
11 Air India AI 522 New Delhi 11:05 Hyderabad 13:25 02 h 20 m 4,750
12 IndiGo 6E 837 New Delhi 13:30 Hyderabad 15:45 02 h 15 m 4,750
13 IndiGo 6E 2187 New Delhi 15:05 Hyderabad 17:20 02 h 15 m 4,750
14 IndiGo 6E 2187 New Delhi 15:05 Hyderabad 17:20 02 h 15 m 4,750
15 Air India AI 544 New Delhi 17:00 Hyderabad 19:10 02 h 10 m 4,750
16 IndiGo 6E 5312 New Delhi 17:15 Hyderabad 19:25 02 h 10 m 4,750
17 IndiGo 6E 5312 New Delhi 17:15 Hyderabad 19:25 02 h 10 m 4,750
18 IndiGo 6E 2341 New Delhi 18:45 Hyderabad 21:00 02 h 15 m 4,750
19 IndiGo 6E 605 New Delhi 19:45 Hyderabad 22:00 02 h 15 m 4,750
20 IndiGo 6E 605 New Delhi 19:45 Hyderabad 22:00 02 h 15 m 4,750
21 IndiGo 6E 2371 New Delhi 21:25 Hyderabad 23:40 02 h 15 m 4,750
22 Air India AI 839 New Delhi 21:30 Hyderabad 23:45 02 h 15 m 4,750
23 Vistara UK 879 New Delhi 17:35 Hyderabad 19:50 02 h 15 m 4,873
24 Vistara UK 859 New Delhi 10:25 Hyderabad 12:35 02 h 10 m 4,904
25 Vistara UK 899 New Delhi 14:45 Hyderabad 17:00 02 h 15 m 4,925
26 Vistara UK 899 New Delhi 14:45 Hyderabad 17:00 02 h 15 m 4,925
27 SpiceJet SG 8164 New Delhi 23:00 Hyderabad 01:00\n+ 1 DAY 02 h 4,981
28 Vistara UK 829 New Delhi 07:20 Hyderabad 09:30 02 h 10 m 5,692
29 Vistara UK 871 New Delhi 20:35 Hyderabad 22:55 02 h 20 m 6,343
30 Akasa Air QP 1407 Hyderabad 19:25 New Delhi 21:45 02 h 20 m 4,979
31 IndiGo 6E 379 Hyderabad 05:05 New Delhi 07:25 02 h 20 m 4,986
32 IndiGo 6E 491 Hyderabad 09:30 New Delhi 11:50 02 h 20 m 4,986
33 IndiGo 6E 2171 Hyderabad 11:55 New Delhi 14:05 02 h 10 m 4,986
34 IndiGo 6E 2063 Hyderabad 14:25 New Delhi 16:45 02 h 20 m 4,986
35 IndiGo 6E 2063 Hyderabad 14:25 New Delhi 16:45 02 h 20 m 4,986
36 IndiGo 6E 2063 Hyderabad 14:25 New Delhi 16:45 02 h 20 m 4,986
37 IndiGo 6E 2003 Hyderabad 18:15 New Delhi 20:30 02 h 15 m 4,986
38 IndiGo 6E 2003 Hyderabad 18:15 New Delhi 20:30 02 h 15 m 4,986
39 IndiGo 6E 5605 Hyderabad 21:55 New Delhi 00:05\n+ 1 DAY 02 h 10 m 4,986
40 IndiGo 6E 6146 Hyderabad 22:45 New Delhi 01:00\n+ 1 DAY 02 h 15 m 4,986
41 Air India AI 543 Hyderabad 10:05 New Delhi 12:30 02 h 25 m 5,052
42 IndiGo 6E 2011 Hyderabad 06:45 New Delhi 09:00 02 h 15 m 5,210
43 IndiGo 6E 2342 Hyderabad 08:40 New Delhi 10:45 02 h 05 m 5,210
44 IndiGo 6E 2342 Hyderabad 08:40 New Delhi 10:45 02 h 05 m 5,210
45 IndiGo 6E 6606 Hyderabad 16:30 New Delhi 18:50 02 h 20 m 5,210
46 Vistara UK 880 Hyderabad 13:15 New Delhi 15:35 02 h 20 m 5,401
47 Vistara UK 880 Hyderabad 13:15 New Delhi 15:35 02 h 20 m 5,401
48 Vistara UK 890 Hyderabad 17:40 New Delhi 19:55 02 h 15 m 5,453
49 Air India AI 559 Hyderabad 06:15 New Delhi 08:45 02 h 30 m 5,472
50 Air India AI 559 Hyderabad 06:15 New Delhi 08:45 02 h 30 m 5,472
51 Air India AI 523 Hyderabad 14:10 New Delhi 16:35 02 h 25 m 5,472
52 Air India AI 541 Hyderabad 16:25 New Delhi 18:50 02 h 25 m 5,472
53 Air India AI 840 Hyderabad 20:50 New Delhi 23:20 02 h 30 m 5,472
54 SpiceJet SG 160 Hyderabad 06:10 New Delhi 08:25 02 h 15 m 5,629
55 Vistara UK 830 Hyderabad 10:10 New Delhi 12:30 02 h 20 m 5,905
56 Vistara UK 830 Hyderabad 10:10 New Delhi 12:30 02 h 20 m 5,905
57 Vistara UK 860 Hyderabad 07:00 New Delhi 08:55 01 h 55 m 7,585
58 Vistara UK 870 Hyderabad 20:35 New Delhi 22:45 02 h 10 m 7,606
In [21]:
del_hyd_data.shape
Out[21]:
(59, 8)
In [22]:
del_hyd_data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59 entries, 0 to 58
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   FlightName     59 non-null     object
 1   FlightCode     59 non-null     object
 2   DepartingCity  59 non-null     object
 3   DepartingTime  59 non-null     object
 4   ArrivingCity   59 non-null     object
 5   ArrivingTime   59 non-null     object
 6   Duration       59 non-null     object
 7   Price          59 non-null     object
dtypes: object(8)
memory usage: 3.8+ KB
In [23]:
del_hyd_data.describe()
Out[23]:
FlightName FlightCode DepartingCity DepartingTime ArrivingCity ArrivingTime Duration Price
count 59 59 59 59 59 59 59 59
unique 5 45 2 42 2 42 8 19
top IndiGo 6E 2063 New Delhi 07:10 Hyderabad 16:45 02 h 15 m 4,750
freq 29 3 30 3 30 3 19 22
In [24]:
bom_blr_data["Price"] = bom_blr_data["Price"].str.replace(",", "")
del_blr_data["Price"] = del_blr_data["Price"].str.replace(",", "")
del_bom_data["Price"] = del_bom_data["Price"].str.replace(",", "")
del_ccu_data["Price"] = del_ccu_data["Price"].str.replace(",", "")
del_hyd_data["Price"] = del_hyd_data["Price"].str.replace(",", "")
In [25]:
bom_blr_data = bom_blr_data.astype({"Price":int})
del_blr_data = del_blr_data.astype({"Price":int})
del_bom_data = del_bom_data.astype({"Price":int})
del_ccu_data = del_ccu_data.astype({"Price":int})
del_hyd_data = del_hyd_data.astype({"Price":int})
In [26]:
bom_blr_data["DepartingHour"] = bom_blr_data["DepartingTime"].str.split(":").str[0]
del_blr_data["DepartingHour"] = del_blr_data["DepartingTime"].str.split(":").str[0]
del_bom_data["DepartingHour"] = del_bom_data["DepartingTime"].str.split(":").str[0]
del_ccu_data["DepartingHour"] = del_ccu_data["DepartingTime"].str.split(":").str[0]
del_hyd_data["DepartingHour"] = del_hyd_data["DepartingTime"].str.split(":").str[0]
In [27]:
bom_blr_data = bom_blr_data.astype({"DepartingHour" : int})
del_blr_data = del_blr_data.astype({"DepartingHour" : int})
del_bom_data = del_bom_data.astype({"DepartingHour" : int})
del_ccu_data = del_ccu_data.astype({"DepartingHour" : int})
del_hyd_data = del_hyd_data.astype({"DepartingHour" : int})

Pairplot method¶

In [28]:
sns.pairplot(bom_blr_data, kind='scatter', height=3.5)
Out[28]:
<seaborn.axisgrid.PairGrid at 0x1c29bba1510>
In [29]:
sns.pairplot(del_blr_data, kind='scatter', height=3.5)
Out[29]:
<seaborn.axisgrid.PairGrid at 0x1c29d070a10>
In [30]:
sns.pairplot(del_bom_data, kind='scatter', height=3.5)
Out[30]:
<seaborn.axisgrid.PairGrid at 0x1c29bcb2790>
In [31]:
sns.pairplot(del_ccu_data, kind='scatter', height=3.5)
Out[31]:
<seaborn.axisgrid.PairGrid at 0x1c29d117250>
In [33]:
sns.pairplot(del_hyd_data, kind='scatter', height=3.5)
Out[33]:
<seaborn.axisgrid.PairGrid at 0x1c29d798dd0>

lmplot method between DepartingHour vs Price¶

In [34]:
sns.lmplot(bom_blr_data, x='DepartingHour', y='Price', line_kws = {'color':'black'}, scatter_kws = {'color':'darkgrey'})
plt.title('DepartingHour vs Price')
Out[34]:
Text(0.5, 1.0, 'DepartingHour vs Price')
In [35]:
sns.lmplot(del_blr_data, x='DepartingHour', y='Price', line_kws = {'color':'black'}, scatter_kws = {'color':'darkgrey'})
plt.title('DepartingHour vs Price')
Out[35]:
Text(0.5, 1.0, 'DepartingHour vs Price')
In [36]:
sns.lmplot(del_bom_data, x='DepartingHour', y='Price', line_kws = {'color':'black'}, scatter_kws = {'color':'darkgrey'})
plt.title('DepartingHour vs Price')
Out[36]:
Text(0.5, 1.0, 'DepartingHour vs Price')
In [37]:
sns.lmplot(del_ccu_data, x='DepartingHour', y='Price', line_kws = {'color':'black'}, scatter_kws = {'color':'darkgrey'})
plt.title('DepartingHour vs Price')
Out[37]:
Text(0.5, 1.0, 'DepartingHour vs Price')
In [38]:
sns.lmplot(del_hyd_data, x='DepartingHour', y='Price', line_kws = {'color':'black'}, scatter_kws = {'color':'darkgrey'})
plt.title('DepartingHour vs Price')
Out[38]:
Text(0.5, 1.0, 'DepartingHour vs Price')

Barplot method between FlightName vs Price¶

In [39]:
bom_blr_data_avg_price = bom_blr_data.groupby('FlightName')['Price'].mean().reset_index().sort_values(by='Price', ascending=False)
sns.barplot(x='FlightName', y='Price', data=bom_blr_data_avg_price)
plt.title('Barplot FlightName vs Avg. Price')
Out[39]:
Text(0.5, 1.0, 'Barplot FlightName vs Avg. Price')
In [40]:
del_blr_data_avg_price = del_blr_data.groupby('FlightName')['Price'].mean().reset_index().sort_values(by='Price', ascending=False)
sns.barplot(x='FlightName', y='Price', data=del_blr_data_avg_price)
plt.title('Barplot FlightName vs Avg. Price')
Out[40]:
Text(0.5, 1.0, 'Barplot FlightName vs Avg. Price')
In [41]:
del_bom_data_avg_price = del_bom_data.groupby('FlightName')['Price'].mean().reset_index().sort_values(by='Price', ascending=False)
sns.barplot(x='FlightName', y='Price', data=del_bom_data_avg_price)
plt.title('Barplot FlightName vs Avg. Price')
Out[41]:
Text(0.5, 1.0, 'Barplot FlightName vs Avg. Price')
In [42]:
del_ccu_data_avg_price = del_ccu_data.groupby('FlightName')['Price'].mean().reset_index().sort_values(by='Price', ascending=False)
sns.barplot(x='FlightName', y='Price', data=del_ccu_data_avg_price)
plt.title('Barplot FlightName vs Avg. Price')
Out[42]:
Text(0.5, 1.0, 'Barplot FlightName vs Avg. Price')
In [43]:
del_hyd_data_avg_price = del_hyd_data.groupby('FlightName')['Price'].mean().reset_index().sort_values(by='Price', ascending=False)
sns.barplot(x='FlightName', y='Price', data=del_hyd_data_avg_price)
plt.title('Barplot FlightName vs Avg. Price')
Out[43]:
Text(0.5, 1.0, 'Barplot FlightName vs Avg. Price')

Pie Chart method using for FlightName¶

In [44]:
bom_blr_data.groupby("FlightName").size().plot(kind='pie', autopct='%1.0f%%', subplots=True)
plt.title('Pie chart Bombay --> Bengaluru Flight')
plt.show()
In [45]:
del_blr_data.groupby("FlightName").size().plot(kind='pie', autopct='%1.0f%%', subplots=True)
plt.title('Pie chart Delhi --> Bengaluru Flight')
plt.show()
In [46]:
del_bom_data.groupby("FlightName").size().plot(kind='pie', autopct='%1.0f%%', subplots=True)
plt.title('Pie chart Delhi --> Bombay Flight')
plt.show()
In [47]:
del_ccu_data.groupby("FlightName").size().plot(kind='pie', autopct='%1.0f%%', subplots=True)
plt.title('Pie chart Delhi --> Kolkata Flight')
plt.show()
In [48]:
del_hyd_data.groupby("FlightName").size().plot(kind='pie', autopct='%1.0f%%', subplots=True)
plt.title('Pie chart Delhi --> Hydrabad Flight')
plt.show()

Lineplot method using DepartingHour, Price and ArrivingCity¶

In [49]:
sns.lineplot(bom_blr_data, x='DepartingHour', y='Price', hue='ArrivingCity', marker='o')
Out[49]:
<Axes: xlabel='DepartingHour', ylabel='Price'>
In [50]:
sns.lineplot(del_blr_data, x='DepartingHour', y='Price', hue='ArrivingCity', marker='o')
Out[50]:
<Axes: xlabel='DepartingHour', ylabel='Price'>
In [51]:
sns.lineplot(del_bom_data, x='DepartingHour', y='Price', hue='ArrivingCity', marker='o')
Out[51]:
<Axes: xlabel='DepartingHour', ylabel='Price'>
In [52]:
sns.lineplot(del_ccu_data, x='DepartingHour', y='Price', hue='ArrivingCity', marker='o')
Out[52]:
<Axes: xlabel='DepartingHour', ylabel='Price'>
In [53]:
sns.lineplot(del_hyd_data, x='DepartingHour', y='Price', hue='ArrivingCity', marker='o')
Out[53]:
<Axes: xlabel='DepartingHour', ylabel='Price'>

Polynomial Regression¶

In [54]:
bom_blr_x = bom_blr_data['DepartingHour']
bom_blr_y = bom_blr_data['Price']

model = np.poly1d(np.polyfit(bom_blr_x, bom_blr_y, 3))
line = np.linspace(1, 30, 90)
In [55]:
plt.scatter(bom_blr_x, bom_blr_y)
plt.plot(line, model(line))
plt.title('Polynomial Regression [Bombay --> Bengaluru]')
plt.show()
In [56]:
del_blr_x = del_blr_data['DepartingHour']
del_blr_y = del_blr_data['Price']

model = np.poly1d(np.polyfit(del_blr_x, del_blr_y, 3))
line = np.linspace(1, 30, 95)
In [57]:
plt.scatter(del_blr_x, del_blr_y)
plt.plot(line, model(line))
plt.title('Polynomial Regression [Delhi --> Bengaluru]')
plt.show()
In [58]:
del_bom_x = del_bom_data['DepartingHour']
del_bom_y = del_bom_data['Price']

model = np.poly1d(np.polyfit(del_bom_x, del_bom_y, 3))
line = np.linspace(1, 30, 100)
In [59]:
plt.scatter(del_bom_x, del_bom_y)
plt.plot(line, model(line))
plt.title('Polynomial Regression [Delhi --> Bombay]')
plt.show()
In [60]:
del_ccu_x = del_ccu_data['DepartingHour']
del_ccu_y = del_ccu_data['Price']

model = np.poly1d(np.polyfit(del_ccu_x, del_ccu_y, 3))
line = np.linspace(1, 30, 60)
In [61]:
plt.scatter(del_ccu_x, del_ccu_y)
plt.plot(line, model(line))
plt.title('Polynomial Regression [Delhi --> Kolkata]')
plt.show()
In [62]:
del_hyd_x = del_hyd_data['DepartingHour']
del_hyd_y = del_hyd_data['Price']

model = np.poly1d(np.polyfit(del_hyd_x, del_hyd_y, 3))
line = np.linspace(1, 30, 59)
In [63]:
plt.scatter(del_hyd_x, del_hyd_y)
plt.plot(line, model(line))
plt.title('Polynomial Regression [Delhi --> Hydrabad]')
plt.show()

Decision Tree method¶

In [64]:
bom_blr_d = {'Air India':0, 'AirAsia':1, 'IndiGo':2, 'Akasa Air':3, 'Vistara':4}
bom_blr_data['FlightName'] = bom_blr_data['FlightName'].map(bom_blr_d)
In [65]:
bom_blr_d = {'Bengaluru':0, 'Mumbai':1}
bom_blr_data['ArrivingCity'] = bom_blr_data['ArrivingCity'].map(bom_blr_d)
In [66]:
features = ['DepartingHour', 'Price', 'FlightName', 'ArrivingCity']
bom_blr_x = bom_blr_data[features]
bom_blr_y = bom_blr_data['ArrivingCity']
In [67]:
bom_blr_dtree = DecisionTreeClassifier()
bom_blr_dtree = bom_blr_dtree.fit(bom_blr_x, bom_blr_y)
In [68]:
tree.plot_tree(bom_blr_dtree, feature_names=features)
Out[68]:
[Text(0.5, 0.75, 'ArrivingCity <= 0.5\ngini = 0.5\nsamples = 90\nvalue = [44, 46]'),
 Text(0.25, 0.25, 'gini = 0.0\nsamples = 44\nvalue = [44, 0]'),
 Text(0.75, 0.25, 'gini = 0.0\nsamples = 46\nvalue = [0, 46]')]
In [69]:
del_blr_d = {'Akasa Air':0, 'AirAsia':1, 'IndiGo':2, 'Air India':3, 'SpiceJet':4, 'Vistara':5}
del_blr_data['FlightName'] = del_blr_data['FlightName'].map(del_blr_d)
In [70]:
del_blr_d = {'Bengaluru':0, 'New Delhi':1}
del_blr_data['ArrivingCity'] = del_blr_data['ArrivingCity'].map(del_blr_d)
In [71]:
features = ['DepartingHour', 'Price', 'FlightName', 'ArrivingCity']
del_blr_x = del_blr_data[features]
del_blr_y = del_blr_data['ArrivingCity']
In [72]:
del_blr_dtree = DecisionTreeClassifier()
del_blr_dtree = del_blr_dtree.fit(del_blr_x, del_blr_y)
In [73]:
tree.plot_tree(del_blr_dtree, feature_names=features)
Out[73]:
[Text(0.5, 0.75, 'ArrivingCity <= 0.5\ngini = 0.5\nsamples = 95\nvalue = [48, 47]'),
 Text(0.25, 0.25, 'gini = 0.0\nsamples = 48\nvalue = [48, 0]'),
 Text(0.75, 0.25, 'gini = 0.0\nsamples = 47\nvalue = [0, 47]')]
In [74]:
del_bom_d = {'Akasa Air':0, 'IndiGo':1, 'Air India':2, 'SpiceJet':3, 'Vistara':4}
del_bom_data['FlightName'] = del_bom_data['FlightName'].map(del_bom_d)
In [75]:
del_bom_d = {'Mumbai':0, 'New Delhi':1}
del_bom_data['ArrivingCity'] = del_bom_data['ArrivingCity'].map(del_bom_d)
In [76]:
features = ['DepartingHour', 'Price', 'FlightName', 'ArrivingCity']
del_bom_x = del_bom_data[features]
del_bom_y = del_bom_data['ArrivingCity']
In [77]:
del_bom_dtree = DecisionTreeClassifier()
del_bom_dtree = del_bom_dtree.fit(del_bom_x, del_bom_y)
In [78]:
tree.plot_tree(del_bom_dtree, feature_names=features)
Out[78]:
[Text(0.5, 0.75, 'ArrivingCity <= 0.5\ngini = 0.5\nsamples = 100\nvalue = [50, 50]'),
 Text(0.25, 0.25, 'gini = 0.0\nsamples = 50\nvalue = [50, 0]'),
 Text(0.75, 0.25, 'gini = 0.0\nsamples = 50\nvalue = [0, 50]')]
In [79]:
del_ccu_d = {'IndiGo':0, 'Vistara':1, 'SpiceJet':2, 'Air India':3}
del_ccu_data['FlightName'] = del_ccu_data['FlightName'].map(del_ccu_d)
In [80]:
del_ccu_d = {'Kolkata':0, 'New Delhi':1}
del_ccu_data['ArrivingCity'] = del_ccu_data['ArrivingCity'].map(del_ccu_d)
In [81]:
features = ['DepartingHour', 'Price', 'FlightName', 'ArrivingCity']
del_ccu_x = del_ccu_data[features]
del_ccu_y = del_ccu_data['ArrivingCity']
In [82]:
del_ccu_dtree = DecisionTreeClassifier()
del_ccu_dtree = del_ccu_dtree.fit(del_ccu_x, del_ccu_y)
In [83]:
tree.plot_tree(del_ccu_dtree, feature_names=features)
Out[83]:
[Text(0.5, 0.75, 'ArrivingCity <= 0.5\ngini = 0.5\nsamples = 60\nvalue = [30, 30]'),
 Text(0.25, 0.25, 'gini = 0.0\nsamples = 30\nvalue = [30, 0]'),
 Text(0.75, 0.25, 'gini = 0.0\nsamples = 30\nvalue = [0, 30]')]
In [84]:
del_hyd_d = {'Akasa Air':0, 'IndiGo':1, 'Air India':2, 'Vistara':3, 'SpiceJet':4}
del_hyd_data['FlightName'] = del_hyd_data['FlightName'].map(del_hyd_d)
In [85]:
del_hyd_d = {'Hyderabad':0, 'New Delhi':1}
del_hyd_data['ArrivingCity'] = del_hyd_data['ArrivingCity'].map(del_hyd_d)
In [86]:
features = ['DepartingHour', 'Price', 'FlightName', 'ArrivingCity']
del_hyd_x = del_hyd_data[features]
del_hyd_y = del_hyd_data['ArrivingCity']
In [87]:
del_hyd_dtree = DecisionTreeClassifier()
del_hyd_dtree = del_hyd_dtree.fit(del_hyd_x, del_hyd_y)
In [88]:
tree.plot_tree(del_hyd_dtree, feature_names=features)
Out[88]:
[Text(0.5, 0.75, 'ArrivingCity <= 0.5\ngini = 0.5\nsamples = 59\nvalue = [30, 29]'),
 Text(0.25, 0.25, 'gini = 0.0\nsamples = 30\nvalue = [30, 0]'),
 Text(0.75, 0.25, 'gini = 0.0\nsamples = 29\nvalue = [0, 29]')]

Catplot Method using DepartingHour, Price and DepartingCity¶¶

In [89]:
sns.catplot(bom_blr_data, x='DepartingHour', y='Price', hue='DepartingCity', height=5.5)
Out[89]:
<seaborn.axisgrid.FacetGrid at 0x1c2a137d590>
In [90]:
sns.catplot(del_blr_data, x='DepartingHour', y='Price', hue='DepartingCity', height=5.5)
Out[90]:
<seaborn.axisgrid.FacetGrid at 0x1c2a10aa2d0>
In [91]:
sns.catplot(del_bom_data, x='DepartingHour', y='Price', hue='DepartingCity', height=5.5)
Out[91]:
<seaborn.axisgrid.FacetGrid at 0x1c2a1363a50>
In [92]:
sns.catplot(del_ccu_data, x='DepartingHour', y='Price', hue='DepartingCity', height=5.5)
Out[92]:
<seaborn.axisgrid.FacetGrid at 0x1c2a2974c10>
In [93]:
sns.catplot(del_hyd_data, x='DepartingHour', y='Price', hue='DepartingCity', height=5.5)
Out[93]:
<seaborn.axisgrid.FacetGrid at 0x1c2a2ee8e90>

Violinplot method using ArrivingCity, Price and DepartingCity¶¶

In [94]:
sns.violinplot(bom_blr_data, x='ArrivingCity', y='Price', hue='DepartingCity', dodge=False)
Out[94]:
<Axes: xlabel='ArrivingCity', ylabel='Price'>
In [95]:
sns.violinplot(del_blr_data, x='ArrivingCity', y='Price', hue='DepartingCity', dodge=False)
Out[95]:
<Axes: xlabel='ArrivingCity', ylabel='Price'>
In [96]:
sns.violinplot(del_bom_data, x='ArrivingCity', y='Price', hue='DepartingCity', dodge=False)
Out[96]:
<Axes: xlabel='ArrivingCity', ylabel='Price'>
In [97]:
sns.violinplot(del_ccu_data, x='ArrivingCity', y='Price', hue='DepartingCity', dodge=False)
Out[97]:
<Axes: xlabel='ArrivingCity', ylabel='Price'>
In [98]:
sns.violinplot(del_hyd_data, x='ArrivingCity', y='Price', hue='DepartingCity', dodge=False)
Out[98]:
<Axes: xlabel='ArrivingCity', ylabel='Price'>
In [ ]: