# Load libraires
import pandas as pd
import numpy as np

# load dataset

US_Electric_Utility_df = pd.read_csv(r"C:\Users\jki\Downloads\iou_zipcodes_2020.csv")  
US_Electric_Utility_df.head(5)


# lets check for missing values
misssing_values = US_Electric_Utility_df.isna().sum()
print(misssing_values)

zip             0
eiaid           0
utility_name    0
state           0
service_type    0
ownership       0
comm_rate       0
ind_rate        0
res_rate        0
dtype: int64


# lets check for unwanted negatives values
US_Electric_Utility_df.describe()


# lets check for data types
US_Electric_Utility_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52177 entries, 0 to 52176
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   zip           52177 non-null  int64  
 1   eiaid         52177 non-null  int64  
 2   utility_name  52177 non-null  object 
 3   state         52177 non-null  object 
 4   service_type  52177 non-null  object 
 5   ownership     52177 non-null  object 
 6   comm_rate     52177 non-null  float64
 7   ind_rate      52177 non-null  float64
 8   res_rate      52177 non-null  float64
dtypes: float64(3), int64(2), object(4)
memory usage: 3.6+ MB


# lets rename the columns for better clarity
US_Electric_Utility_df.rename(columns={'comm_rate': 'commercial_rate', 'ind_rate': 'industrial_rate', 'res_rate': 'residential_rate'}, inplace=True)
US_Electric_Utility_df.head(5)


US_Electric_Utility_df['commercial_rate'].mean()

0.09120652553333844


US_Electric_Utility_df['industrial_rate'].mean()

0.06373383774584586


US_Electric_Utility_df['residential_rate'].mean()

0.11546862665001821


import matplotlib.pyplot as plt

commercial_mean = US_Electric_Utility_df['commercial_rate'].mean()
industrial_mean = US_Electric_Utility_df['industrial_rate'].mean()
residential_mean = US_Electric_Utility_df['residential_rate'].mean()

utility_types = ['Commercial', 'Industrial', 'Residential']
mean_values = [commercial_mean, industrial_mean, residential_mean]

plt.bar(utility_types, mean_values, color=['blue', 'green', 'red'])
plt.title('Mean Electric Utility Rates based on types')
plt.xlabel('Utility Types')
plt.ylabel('Mean Rates')
plt.show()


# lets create new column of the average utilty rates  for the combination of reseidetial,industrial and commercial

# Create a new column for the average of commercial, industrial, and residential rates
US_Electric_Utility_df['Total_average_rate'] =  US_Electric_Utility_df[['commercial_rate', 'industrial_rate', 'residential_rate']].mean(axis=1)
US_Electric_Utility_df.head(5)


import pandas as pd
import pandas as pd
import matplotlib.pyplot as plt

# Assuming your DataFrame is named df
# If it's not, replace df with the actual name of your DataFrame

# Group by 'utility_name' and calculate the mean of 'Total_average_rate'
average_rates = US_Electric_Utility_df.groupby('utility_name')['Total_average_rate'].mean().reset_index()

# Rank the utility companies based on average Total_average_rate
average_rates['rank'] = average_rates['Total_average_rate'].rank(ascending=True)

# Display the resulting DataFrame
print(average_rates)

# Get the top ten utility companies
top_ten = average_rates.nlargest(10, 'Total_average_rate')

# Plotting
plt.figure(figsize=(10, 6))
plt.bar(top_ten['utility_name'], top_ten['Total_average_rate'], color='blue')
plt.title('Top Ten Utility Companies based on  Total_average_rate')
plt.xlabel('Utility Companies')
plt.ylabel('Total_average_rate')
plt.xticks(rotation=45, ha='right')  # Rotate x-axis labels for better readability
plt.tight_layout()
plt.show()

                       utility_name  Total_average_rate   rank
0                      ALLETE, Inc.            0.094970   77.0
1                Ajo Improvement Co            0.060593    9.0
2                  Alabama Power Co            0.106868  100.0
3    Alaska Electric Light&Power Co            0.114955  114.0
4     Alaska Power and Telephone Co            0.178773  137.0
..                              ...                 ...    ...
138      Westfield Electric Company            0.165738  134.0
139               Wheeling Power Co            0.094007   72.0
140     Wisconsin Electric Power Co            0.118471  117.0
141      Wisconsin Power & Light Co            0.105398   97.0
142   Wisconsin Public Service Corp            0.096512   80.0

[143 rows x 3 columns]


import pandas as pd
import pandas as pd
import matplotlib.pyplot as plt

# Assuming your DataFrame is named df
# If it's not, replace df with the actual name of your DataFrame

# Group by 'utility_name' and calculate the mean of 'Total_average_rate'
average_rates = US_Electric_Utility_df.groupby('state')['Total_average_rate'].mean().reset_index()

# Rank the utility companies based on average Total_average_rate
average_rates['rank'] = average_rates['Total_average_rate'].rank(ascending=True)

# Display the resulting DataFrame
print(average_rates)

# Get the top ten utility companies
top_ten = average_rates.nlargest(10, 'Total_average_rate')

# Plotting
plt.figure(figsize=(10, 6))
plt.bar(top_ten['state'], top_ten['Total_average_rate'], color='blue')
plt.title('Top Ten state based on  Total_average_rate')
plt.xlabel('state')
plt.ylabel('Total_average_rate')
plt.xticks(rotation=45, ha='right')  # Rotate x-axis labels for better readability
plt.tight_layout()
plt.show()

   state  Total_average_rate  rank
0     AK            0.161755  48.0
1     AL            0.106868  40.0
2     AR            0.084439  21.0
3     AZ            0.103655  37.0
4     CA            0.154831  46.0
5     CO            0.098254  33.0
6     CT            0.137837  44.0
7     DC            0.059851   4.0
8     DE            0.064008   6.0
9     FL            0.090751  26.0
10    GA            0.091360  27.0
11    HI            0.291223  50.0
12    IA            0.103941  38.0
13    ID            0.081102  15.0
14    IL            0.060523   5.0
15    IN            0.104271  39.0
16    KS            0.102352  36.0
17    KY            0.094763  30.0
18    LA            0.082695  17.0
19    MA            0.136888  43.0
20    MD            0.075134  13.0
21    ME            0.100685  35.0
22    MI            0.067822   8.0
23    MN            0.100324  34.0
24    MO            0.089138  25.0
25    MS            0.088582  24.0
26    MT            0.069713  10.0
27    NC            0.085090  22.0
28    ND            0.084388  20.0
29    NH            0.115304  42.0
30    NJ            0.082753  18.0
31    NM            0.095760  32.0
32    NV            0.045040   1.0
33    NY            0.087803  23.0
34    OH            0.073334  12.0
35    OK            0.068374   9.0
36    OR            0.057122   3.0
37    PA            0.069820  11.0
38    RI            0.141477  45.0
39    SC            0.095562  31.0
40    SD            0.083952  19.0
41    TN            0.202728  49.0
42    TX            0.077118  14.0
43    UT            0.081864  16.0
44    VA            0.045431   2.0
45    VT            0.158850  47.0
46    WA            0.064013   7.0
47    WI            0.108218  41.0
48    WV            0.092497  29.0
49    WY            0.091617  28.0


# Group by 'service_type' and count the occurrences
service_type_counts = US_Electric_Utility_df.groupby('service_type').count()

# Plotting
plt.bar(service_type_counts.index, service_type_counts['zip'], color='green')
plt.title('Counts of Electric Utility Service Types')
plt.xlabel('Service Type')
plt.ylabel('Count')
plt.show()

	zip	eiaid	utility_name	state	service_type	ownership	comm_rate	ind_rate	res_rate
0	85321	176	Ajo Improvement Co	AZ	Bundled	Investor Owned	0.087890	0.000000	0.093887
1	36560	195	Alabama Power Co	AL	Bundled	Investor Owned	0.121895	0.063652	0.135057
2	36513	195	Alabama Power Co	AL	Bundled	Investor Owned	0.121895	0.063652	0.135057
3	36280	195	Alabama Power Co	AL	Bundled	Investor Owned	0.121895	0.063652	0.135057
4	35473	195	Alabama Power Co	AL	Bundled	Investor Owned	0.121895	0.063652	0.135057

	zip	eiaid	comm_rate	ind_rate	res_rate
count	52177.000000	52177.000000	52177.000000	52177.000000	52177.000000
mean	42503.161795	13894.579777	0.091207	0.063734	0.115469
std	29166.080511	11445.598550	0.047823	0.045584	0.052939
min	1002.000000	176.000000	0.008510	0.000000	0.000000
25%	16685.000000	5860.000000	0.065812	0.036541	0.091532
50%	40061.000000	13781.000000	0.092400	0.062603	0.115326
75%	62425.000000	15477.000000	0.113309	0.077213	0.135057
max	99950.000000	57483.000000	0.348755	0.360000	0.351657

	zip	eiaid	utility_name	state	service_type	ownership	commercial_rate	industrial_rate	residential_rate
0	85321	176	Ajo Improvement Co	AZ	Bundled	Investor Owned	0.087890	0.000000	0.093887
1	36560	195	Alabama Power Co	AL	Bundled	Investor Owned	0.121895	0.063652	0.135057
2	36513	195	Alabama Power Co	AL	Bundled	Investor Owned	0.121895	0.063652	0.135057
3	36280	195	Alabama Power Co	AL	Bundled	Investor Owned	0.121895	0.063652	0.135057
4	35473	195	Alabama Power Co	AL	Bundled	Investor Owned	0.121895	0.063652	0.135057

	zip	eiaid	utility_name	state	service_type	ownership	commercial_rate	industrial_rate	residential_rate	Total_average_rate
0	85321	176	Ajo Improvement Co	AZ	Bundled	Investor Owned	0.087890	0.000000	0.093887	0.060593
1	36560	195	Alabama Power Co	AL	Bundled	Investor Owned	0.121895	0.063652	0.135057	0.106868
2	36513	195	Alabama Power Co	AL	Bundled	Investor Owned	0.121895	0.063652	0.135057	0.106868
3	36280	195	Alabama Power Co	AL	Bundled	Investor Owned	0.121895	0.063652	0.135057	0.106868
4	35473	195	Alabama Power Co	AL	Bundled	Investor Owned	0.121895	0.063652	0.135057	0.106868

Return Home

U.S. Electric Utility Companies and Rates EDA¶

1.What is the Average Commerical Electricity Rate across all states¶

2.What is the Average Industrial Electricity Rate across all states¶

3.What is the Average Residential Electricity Rate across all states¶

4.Plot a graph to display average electric rates for the three categories¶

5 Rank theTotal_average_rate for Each Electricity Provider¶

6 Rank theTotal_average_rate for each State¶

7 What is the most used Service type¶