# Load in some packages
import calendar
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

population_df = pd.read_csv(r"C:\Users\jki\Downloads\gapminder_full.csv")
population_df


population_df.describe()


# lets  check for missing 
missing_values =  population_df.isna().sum()
print(missing_values)

country       0
year          0
population    0
continent     0
life_exp      0
gdp_cap       0
dtype: int64


# lets check on data types
population_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1704 entries, 0 to 1703
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   country     1704 non-null   object 
 1   year        1704 non-null   int64  
 2   population  1704 non-null   int64  
 3   continent   1704 non-null   object 
 4   life_exp    1704 non-null   float64
 5   gdp_cap     1704 non-null   float64
dtypes: float64(2), int64(2), object(2)
memory usage: 80.0+ KB


population_df.head(3)


# Plot the Population size for each  country

population_size = population_df.groupby('country').sum()['population'] 

# Sort the values in descending order and select the top ten countries
top_ten_population_size= population_size.sort_values(ascending=False).head(10)

# Display the result
print(top_ten_population_size)

# Plot the top ten Countries
top_ten_population_size.plot(kind='bar', color='green', figsize=(10, 6))
plt.title('Top Ten Countries based on popluation ')
plt.xlabel('Country')
plt.ylabel('population')
plt.show()

country
China            11497920623
India             8413568878
United States     2738534790
Indonesia         1779874000
Brazil            1467745520
Japan             1341105696
Pakistan          1124200629
Bangladesh        1089064744
Germany            930564520
Nigeria            884496214
Name: population, dtype: int64


population_df['life_exp'] = population_df['life_exp'].astype(int)


# Plot the life expectancy  for each  country

life_expectancy = population_df.groupby('country').sum()['life_exp'] 

# Sort the values in descending order and select the top ten
top_ten_life_expectancy= life_expectancy.sort_values(ascending=False).head(10)

# Display the result
print(top_ten_life_expectancy)

# Plot the top ten Country
top_ten_life_expectancy.plot(kind='bar', color='blue', figsize=(10, 6))
plt.title('Top Ten Countries based on life expectancy ')
plt.xlabel('Country')
plt.ylabel('life expectancy ')
plt.show()

country
Iceland        911
Sweden         909
Norway         905
Netherlands    902
Switzerland    901
Japan          893
Canada         891
Australia      890
Denmark        886
France         885
Name: life_exp, dtype: int32


# Plot the GDP (income)  for each  country
income_amount = population_df.groupby('country').sum()['gdp_cap'] 

# Sort the values in descending order and select the top ten
top_ten_income_amount= income_amount.sort_values(ascending=False).head(10)

# Display the result
print(top_ten_income_amount)

# Pltop_ten_income_amount the top ten Country
top_ten_income_amount.plot(kind='bar', color='brown', figsize=(10, 6))
plt.title('Top Ten Countries based on Income ')
plt.xlabel('Country')
plt.ylabel('GDP Income')
plt.show()

country
Kuwait           783994.925660
Switzerland      324892.012860
Norway           320967.678650
United States    315133.816160
Canada           268928.956080
Netherlands      260986.226498
Denmark          260061.898655
Germany          246680.213193
Iceland          246377.067270
Austria          244942.995352
Name: gdp_cap, dtype: float64


population_df


# Selecting data for Kuwait
kuwait_data = population_df[population_df['country'] == "Kuwait"]

# Extracting specific columns
f1 = kuwait_data['life_exp']
f2 = kuwait_data['gdp_cap']

# Checking if both conditions hold true
kuwait_filtered_data = kuwait_data[(f1.notnull()) & (f2.notnull())]

kuwait_filtered_data.head(5)


kuwait_filtered_data.describe()


import plotly.express as px

# Plot histogram
fig = px.histogram(kuwait_filtered_data, x='life_exp', height=320, labels={'life_exp':'Life Expectancy'}, title='Life Expectancy Histogram - Kuwait')

# Show the plot
fig.show()


import plotly.express as px

# Plot relationship between life expectancy and years
fig = px.bar(kuwait_filtered_data, x='year', y='life_exp', height=320, labels={'life_exp':'Life Expectancy'}, title='Relationship between Life Expectancy and Years - Kuwait')
fig.update_layout(xaxis_tickangle=-45)  # Rotate x-axis labels for better readability

# Show the plot
fig.show()


import plotly.express as px

# Create a bar plot
fig = px.bar(kuwait_filtered_data, x='year', y='population', color='life_exp', height=320, labels={'population':'Population Kuwait'}, title='Life Expectancy with Respect to Population Growth per Year - Kuwait')

# Show the plot
fig.show()


population_df


# Selecting data for China
China_data = population_df[population_df['country'] == "China"]
# Extracting specific columns
Chinaf1 = China_data['life_exp']
Chinaf2 = China_data['gdp_cap']

# Checking if both conditions hold true
China_filtered_data = China_data[(Chinaf1.notnull()) & (Chinaf2.notnull())]

China_filtered_data.head(5)


China_filtered_data.describe()


import plotly.express as px

# Assuming population_df is defined elsewhere
# Selecting data for China
china_data = population_df[population_df['country'] == "China"]

# Extracting specific columns
f1 = china_data['life_exp']
f2 = china_data['gdp_cap']

# Checking if both conditions hold true
china_filtered_data = china_data[(f1.notnull()) & (f2.notnull())]

# Plot histogram
fig = px.histogram(china_filtered_data, x='life_exp', height=320, labels={'life_exp':'Life Expectancy'}, title='Life Expectancy Histogram - China')

# Show the plot
fig.show()


import plotly.express as px

# Plot relationship between life expectancy and years
fig = px.bar(china_filtered_data, x='year', y='life_exp', height=320, labels={'life_exp':'Life Expectancy'}, title='Relationship between Life Expectancy and Years - China')
fig.update_layout(xaxis_tickangle=-45)  # Rotate x-axis labels for better readability

# Show the plot
fig.show()


import plotly.express as px

# Create a bar plot
fig = px.bar(china_filtered_data, x='year', y='population', color='life_exp', height=320, labels={'population':'Population Kuwait'}, title='Life Expectancy with Respect to Population Growth per Year - China')

# Show the plot
fig.show()


population_df


# Selecting data for Iceland
Iceland_data = population_df[population_df['country'] == "Iceland"]
# Extracting specific columns
Icelandf1 = Iceland_data['life_exp']
Icelandf2 = Iceland_data['gdp_cap']

# Checking if both conditions hold true
Iceland_filtered_data = Iceland_data[(Icelandf1.notnull()) & (Icelandf2.notnull())]

Iceland_filtered_data.head(5)


Iceland_filtered_data.describe()


import plotly.express as px

# Plot histogram
fig = px.histogram(Iceland_filtered_data, x='life_exp', height=320, labels={'life_exp':'Life Expectancy'}, title='Life Expectancy Histogram - Iceland')

# Show the plot
fig.show()


import plotly.express as px

# Plot relationship between life expectancy and years
fig = px.bar(Iceland_filtered_data, x='year', y='life_exp', height=320, labels={'life_exp':'Life Expectancy'}, title='Relationship between Life Expectancy and Years - Iceland')
fig.update_layout(xaxis_tickangle=-45)  # Rotate x-axis labels for better readability

# Show the plot
fig.show()


import plotly.express as px

# Create a bar plot
fig = px.bar(Iceland_filtered_data, x='year', y='population', color='life_exp', height=320, labels={'pop':'Population Kuwait'}, title='Life Expectancy with Respect to population Growth per Year - Iceland')

# Show the plot
fig.show()

	country	year	population	continent	life_exp	gdp_cap
0	Afghanistan	1952	8425333	Asia	28.801	779.445314
1	Afghanistan	1957	9240934	Asia	30.332	820.853030
2	Afghanistan	1962	10267083	Asia	31.997	853.100710
3	Afghanistan	1967	11537966	Asia	34.020	836.197138
4	Afghanistan	1972	13079460	Asia	36.088	739.981106
...	...	...	...	...	...	...
1699	Zimbabwe	1987	9216418	Africa	62.351	706.157306
1700	Zimbabwe	1992	10704340	Africa	60.377	693.420786
1701	Zimbabwe	1997	11404948	Africa	46.809	792.449960
1702	Zimbabwe	2002	11926563	Africa	39.989	672.038623
1703	Zimbabwe	2007	12311143	Africa	43.487	469.709298

	year	population	life_exp	gdp_cap
count	1704.00000	1.704000e+03	1704.000000	1704.000000
mean	1979.50000	2.960121e+07	59.474439	7215.327081
std	17.26533	1.061579e+08	12.917107	9857.454543
min	1952.00000	6.001100e+04	23.599000	241.165876
25%	1965.75000	2.793664e+06	48.198000	1202.060309
50%	1979.50000	7.023596e+06	60.712500	3531.846988
75%	1993.25000	1.958522e+07	70.845500	9325.462346
max	2007.00000	1.318683e+09	82.603000	113523.132900

	country	year	population	continent	life_exp	gdp_cap
0	Afghanistan	1952	8425333	Asia	28.801	779.445314
1	Afghanistan	1957	9240934	Asia	30.332	820.853030
2	Afghanistan	1962	10267083	Asia	31.997	853.100710

	country	year	population	continent	life_exp	gdp_cap
0	Afghanistan	1952	8425333	Asia	28	779.445314
1	Afghanistan	1957	9240934	Asia	30	820.853030
2	Afghanistan	1962	10267083	Asia	31	853.100710
3	Afghanistan	1967	11537966	Asia	34	836.197138
4	Afghanistan	1972	13079460	Asia	36	739.981106
...	...	...	...	...	...	...
1699	Zimbabwe	1987	9216418	Africa	62	706.157306
1700	Zimbabwe	1992	10704340	Africa	60	693.420786
1701	Zimbabwe	1997	11404948	Africa	46	792.449960
1702	Zimbabwe	2002	11926563	Africa	39	672.038623
1703	Zimbabwe	2007	12311143	Africa	43	469.709298

	country	year	population	continent	life_exp	gdp_cap
852	Kuwait	1952	160000	Asia	55	108382.35290
853	Kuwait	1957	212846	Asia	58	113523.13290
854	Kuwait	1962	358266	Asia	60	95458.11176
855	Kuwait	1967	575003	Asia	64	80894.88326
856	Kuwait	1972	841934	Asia	67	109347.86700

Return Home

Return to Python Projects Page

Gapminder World Population Exporatory Data Analysis¶

Research Question : Is there any correlation between life expectancy and standard of living through the years?¶

1 Display top 10 Countries with the highest population¶

2 Display top 10 Countries with the highest life expectancy¶

3 Display top 10 Countries with the highest GDP (Incomes)¶

Case Study 1: Kuwait¶

Case Study 2 : China¶

Case Study 3 : Iceland¶

	year	population	life_exp	gdp_cap
count	12.000000	1.200000e+01	12.000000	12.000000
mean	1979.500000	1.206496e+06	68.500000	65332.910472
std	18.027756	7.836823e+05	7.692972	33882.139536
min	1952.000000	1.600000e+05	55.000000	28118.429980
25%	1965.750000	5.208188e+05	63.000000	35065.809143
50%	1979.500000	1.279226e+06	70.000000	53286.233460
75%	1993.250000	1.796880e+06	75.250000	98689.172045
max	2007.000000	2.505559e+06	77.000000	113523.132900

	country	year	population	continent	life_exp	gdp_cap
288	China	1952	556263527	Asia	44	400.448611
289	China	1957	637408000	Asia	50	575.987001
290	China	1962	665770000	Asia	44	487.674018
291	China	1967	754550000	Asia	58	612.705693
292	China	1972	862030000	Asia	63	676.900092

	country	year	population	continent	life_exp	gdp_cap
684	Iceland	1952	147962	Europe	72	7267.688428
685	Iceland	1957	165110	Europe	73	9244.001412
686	Iceland	1962	182053	Europe	73	10350.159060
687	Iceland	1967	198676	Europe	73	13319.895680
688	Iceland	1972	209275	Europe	74	15798.063620