his dataset, compiled by NREL using data from ABB, the Velocity Suite and the U.S. Energy Information Administration dataset 861, provides average residential, commercial and industrial electricity rates with likely zip codes for both investor owned utilities (IOU) and non-investor owned utilities. Note: the files include average rates for each utility (not average rates per zip code), but not the detailed rate structure data found in the OpenEI U.S. Utility Rate Database.
# Load libraires
import pandas as pd
import numpy as np
# load dataset
US_Electric_Utility_df = pd.read_csv(r"C:\Users\jki\Downloads\iou_zipcodes_2020.csv")
US_Electric_Utility_df.head(5)
zip | eiaid | utility_name | state | service_type | ownership | comm_rate | ind_rate | res_rate | |
---|---|---|---|---|---|---|---|---|---|
0 | 85321 | 176 | Ajo Improvement Co | AZ | Bundled | Investor Owned | 0.087890 | 0.000000 | 0.093887 |
1 | 36560 | 195 | Alabama Power Co | AL | Bundled | Investor Owned | 0.121895 | 0.063652 | 0.135057 |
2 | 36513 | 195 | Alabama Power Co | AL | Bundled | Investor Owned | 0.121895 | 0.063652 | 0.135057 |
3 | 36280 | 195 | Alabama Power Co | AL | Bundled | Investor Owned | 0.121895 | 0.063652 | 0.135057 |
4 | 35473 | 195 | Alabama Power Co | AL | Bundled | Investor Owned | 0.121895 | 0.063652 | 0.135057 |
# lets check for missing values
misssing_values = US_Electric_Utility_df.isna().sum()
print(misssing_values)
zip 0 eiaid 0 utility_name 0 state 0 service_type 0 ownership 0 comm_rate 0 ind_rate 0 res_rate 0 dtype: int64
# lets check for unwanted negatives values
US_Electric_Utility_df.describe()
zip | eiaid | comm_rate | ind_rate | res_rate | |
---|---|---|---|---|---|
count | 52177.000000 | 52177.000000 | 52177.000000 | 52177.000000 | 52177.000000 |
mean | 42503.161795 | 13894.579777 | 0.091207 | 0.063734 | 0.115469 |
std | 29166.080511 | 11445.598550 | 0.047823 | 0.045584 | 0.052939 |
min | 1002.000000 | 176.000000 | 0.008510 | 0.000000 | 0.000000 |
25% | 16685.000000 | 5860.000000 | 0.065812 | 0.036541 | 0.091532 |
50% | 40061.000000 | 13781.000000 | 0.092400 | 0.062603 | 0.115326 |
75% | 62425.000000 | 15477.000000 | 0.113309 | 0.077213 | 0.135057 |
max | 99950.000000 | 57483.000000 | 0.348755 | 0.360000 | 0.351657 |
# lets check for data types
US_Electric_Utility_df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 52177 entries, 0 to 52176 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 zip 52177 non-null int64 1 eiaid 52177 non-null int64 2 utility_name 52177 non-null object 3 state 52177 non-null object 4 service_type 52177 non-null object 5 ownership 52177 non-null object 6 comm_rate 52177 non-null float64 7 ind_rate 52177 non-null float64 8 res_rate 52177 non-null float64 dtypes: float64(3), int64(2), object(4) memory usage: 3.6+ MB
# lets rename the columns for better clarity
US_Electric_Utility_df.rename(columns={'comm_rate': 'commercial_rate', 'ind_rate': 'industrial_rate', 'res_rate': 'residential_rate'}, inplace=True)
US_Electric_Utility_df.head(5)
zip | eiaid | utility_name | state | service_type | ownership | commercial_rate | industrial_rate | residential_rate | |
---|---|---|---|---|---|---|---|---|---|
0 | 85321 | 176 | Ajo Improvement Co | AZ | Bundled | Investor Owned | 0.087890 | 0.000000 | 0.093887 |
1 | 36560 | 195 | Alabama Power Co | AL | Bundled | Investor Owned | 0.121895 | 0.063652 | 0.135057 |
2 | 36513 | 195 | Alabama Power Co | AL | Bundled | Investor Owned | 0.121895 | 0.063652 | 0.135057 |
3 | 36280 | 195 | Alabama Power Co | AL | Bundled | Investor Owned | 0.121895 | 0.063652 | 0.135057 |
4 | 35473 | 195 | Alabama Power Co | AL | Bundled | Investor Owned | 0.121895 | 0.063652 | 0.135057 |
US_Electric_Utility_df['commercial_rate'].mean()
0.09120652553333844
US_Electric_Utility_df['industrial_rate'].mean()
0.06373383774584586
US_Electric_Utility_df['residential_rate'].mean()
0.11546862665001821
import matplotlib.pyplot as plt
commercial_mean = US_Electric_Utility_df['commercial_rate'].mean()
industrial_mean = US_Electric_Utility_df['industrial_rate'].mean()
residential_mean = US_Electric_Utility_df['residential_rate'].mean()
utility_types = ['Commercial', 'Industrial', 'Residential']
mean_values = [commercial_mean, industrial_mean, residential_mean]
plt.bar(utility_types, mean_values, color=['blue', 'green', 'red'])
plt.title('Mean Electric Utility Rates based on types')
plt.xlabel('Utility Types')
plt.ylabel('Mean Rates')
plt.show()
Residential rates had the highets rates
# lets create new column of the average utilty rates for the combination of reseidetial,industrial and commercial
# Create a new column for the average of commercial, industrial, and residential rates
US_Electric_Utility_df['Total_average_rate'] = US_Electric_Utility_df[['commercial_rate', 'industrial_rate', 'residential_rate']].mean(axis=1)
US_Electric_Utility_df.head(5)
zip | eiaid | utility_name | state | service_type | ownership | commercial_rate | industrial_rate | residential_rate | Total_average_rate | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 85321 | 176 | Ajo Improvement Co | AZ | Bundled | Investor Owned | 0.087890 | 0.000000 | 0.093887 | 0.060593 |
1 | 36560 | 195 | Alabama Power Co | AL | Bundled | Investor Owned | 0.121895 | 0.063652 | 0.135057 | 0.106868 |
2 | 36513 | 195 | Alabama Power Co | AL | Bundled | Investor Owned | 0.121895 | 0.063652 | 0.135057 | 0.106868 |
3 | 36280 | 195 | Alabama Power Co | AL | Bundled | Investor Owned | 0.121895 | 0.063652 | 0.135057 | 0.106868 |
4 | 35473 | 195 | Alabama Power Co | AL | Bundled | Investor Owned | 0.121895 | 0.063652 | 0.135057 | 0.106868 |
import pandas as pd
import pandas as pd
import matplotlib.pyplot as plt
# Assuming your DataFrame is named df
# If it's not, replace df with the actual name of your DataFrame
# Group by 'utility_name' and calculate the mean of 'Total_average_rate'
average_rates = US_Electric_Utility_df.groupby('utility_name')['Total_average_rate'].mean().reset_index()
# Rank the utility companies based on average Total_average_rate
average_rates['rank'] = average_rates['Total_average_rate'].rank(ascending=True)
# Display the resulting DataFrame
print(average_rates)
# Get the top ten utility companies
top_ten = average_rates.nlargest(10, 'Total_average_rate')
# Plotting
plt.figure(figsize=(10, 6))
plt.bar(top_ten['utility_name'], top_ten['Total_average_rate'], color='blue')
plt.title('Top Ten Utility Companies based on Total_average_rate')
plt.xlabel('Utility Companies')
plt.ylabel('Total_average_rate')
plt.xticks(rotation=45, ha='right') # Rotate x-axis labels for better readability
plt.tight_layout()
plt.show()
utility_name Total_average_rate rank 0 ALLETE, Inc. 0.094970 77.0 1 Ajo Improvement Co 0.060593 9.0 2 Alabama Power Co 0.106868 100.0 3 Alaska Electric Light&Power Co 0.114955 114.0 4 Alaska Power and Telephone Co 0.178773 137.0 .. ... ... ... 138 Westfield Electric Company 0.165738 134.0 139 Wheeling Power Co 0.094007 72.0 140 Wisconsin Electric Power Co 0.118471 117.0 141 Wisconsin Power & Light Co 0.105398 97.0 142 Wisconsin Public Service Corp 0.096512 80.0 [143 rows x 3 columns]
Hawaii Electic Light Co has the most Expensive Electicity
import pandas as pd
import pandas as pd
import matplotlib.pyplot as plt
# Assuming your DataFrame is named df
# If it's not, replace df with the actual name of your DataFrame
# Group by 'utility_name' and calculate the mean of 'Total_average_rate'
average_rates = US_Electric_Utility_df.groupby('state')['Total_average_rate'].mean().reset_index()
# Rank the utility companies based on average Total_average_rate
average_rates['rank'] = average_rates['Total_average_rate'].rank(ascending=True)
# Display the resulting DataFrame
print(average_rates)
# Get the top ten utility companies
top_ten = average_rates.nlargest(10, 'Total_average_rate')
# Plotting
plt.figure(figsize=(10, 6))
plt.bar(top_ten['state'], top_ten['Total_average_rate'], color='blue')
plt.title('Top Ten state based on Total_average_rate')
plt.xlabel('state')
plt.ylabel('Total_average_rate')
plt.xticks(rotation=45, ha='right') # Rotate x-axis labels for better readability
plt.tight_layout()
plt.show()
state Total_average_rate rank 0 AK 0.161755 48.0 1 AL 0.106868 40.0 2 AR 0.084439 21.0 3 AZ 0.103655 37.0 4 CA 0.154831 46.0 5 CO 0.098254 33.0 6 CT 0.137837 44.0 7 DC 0.059851 4.0 8 DE 0.064008 6.0 9 FL 0.090751 26.0 10 GA 0.091360 27.0 11 HI 0.291223 50.0 12 IA 0.103941 38.0 13 ID 0.081102 15.0 14 IL 0.060523 5.0 15 IN 0.104271 39.0 16 KS 0.102352 36.0 17 KY 0.094763 30.0 18 LA 0.082695 17.0 19 MA 0.136888 43.0 20 MD 0.075134 13.0 21 ME 0.100685 35.0 22 MI 0.067822 8.0 23 MN 0.100324 34.0 24 MO 0.089138 25.0 25 MS 0.088582 24.0 26 MT 0.069713 10.0 27 NC 0.085090 22.0 28 ND 0.084388 20.0 29 NH 0.115304 42.0 30 NJ 0.082753 18.0 31 NM 0.095760 32.0 32 NV 0.045040 1.0 33 NY 0.087803 23.0 34 OH 0.073334 12.0 35 OK 0.068374 9.0 36 OR 0.057122 3.0 37 PA 0.069820 11.0 38 RI 0.141477 45.0 39 SC 0.095562 31.0 40 SD 0.083952 19.0 41 TN 0.202728 49.0 42 TX 0.077118 14.0 43 UT 0.081864 16.0 44 VA 0.045431 2.0 45 VT 0.158850 47.0 46 WA 0.064013 7.0 47 WI 0.108218 41.0 48 WV 0.092497 29.0 49 WY 0.091617 28.0
# Group by 'service_type' and count the occurrences
service_type_counts = US_Electric_Utility_df.groupby('service_type').count()
# Plotting
plt.bar(service_type_counts.index, service_type_counts['zip'], color='green')
plt.title('Counts of Electric Utility Service Types')
plt.xlabel('Service Type')
plt.ylabel('Count')
plt.show()