import pandas as pd 
from pytrends.request import TrendReq

%matplotlib inline


pip install matplotlib-inline

Requirement already satisfied: matplotlib-inline in c:\users\jki\anaconda3\lib\site-packages (0.1.6)
Requirement already satisfied: traitlets in c:\users\jki\anaconda3\lib\site-packages (from matplotlib-inline) (5.7.1)
Note: you may need to restart the kernel to use updated packages.


pip install pytrends

Requirement already satisfied: pytrends in c:\users\jki\anaconda3\lib\site-packages (4.9.2)
Requirement already satisfied: requests>=2.0 in c:\users\jki\anaconda3\lib\site-packages (from pytrends) (2.31.0)
Requirement already satisfied: pandas>=0.25 in c:\users\jki\anaconda3\lib\site-packages (from pytrends) (1.5.3)
Requirement already satisfied: lxml in c:\users\jki\anaconda3\lib\site-packages (from pytrends) (4.9.2)
Requirement already satisfied: python-dateutil>=2.8.1 in c:\users\jki\anaconda3\lib\site-packages (from pandas>=0.25->pytrends) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in c:\users\jki\anaconda3\lib\site-packages (from pandas>=0.25->pytrends) (2022.7)
Requirement already satisfied: numpy>=1.21.0 in c:\users\jki\anaconda3\lib\site-packages (from pandas>=0.25->pytrends) (1.24.3)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\jki\anaconda3\lib\site-packages (from requests>=2.0->pytrends) (2.0.4)
Requirement already satisfied: idna<4,>=2.5 in c:\users\jki\anaconda3\lib\site-packages (from requests>=2.0->pytrends) (3.4)
Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\jki\anaconda3\lib\site-packages (from requests>=2.0->pytrends) (1.26.16)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\jki\anaconda3\lib\site-packages (from requests>=2.0->pytrends) (2023.7.22)
Requirement already satisfied: six>=1.5 in c:\users\jki\anaconda3\lib\site-packages (from python-dateutil>=2.8.1->pandas>=0.25->pytrends) (1.16.0)
Note: you may need to restart the kernel to use updated packages.


# import Google Analytics Data
ga_data = pd.read_csv(r"C:\Users\jki\Downloads\ga_data\ga_keyword.csv")


ga_data.head(5)


# import google analytcis keyword data
ga_kw_data = pd.read_csv(r"C:\Users\jki\Downloads\ga_data\ga_keyword.csv",skiprows = 6, nrows =5)
ga_kw_traffic_data = pd.read_csv(r"C:\Users\jki\Downloads\ga_data\ga_keyword.csv",skiprows = 14, nrows =366,
                                parse_dates = ['Day Index'])


ga_kw_data.head(4)


ga_kw_traffic_data.head()


# import google analytics pages data
ga_page_data = pd.read_csv(r"C:\Users\jki\Downloads\ga_data\ga_pages.csv",skiprows = 6,nrows =376)
ga_page_traffic_data = pd.read_csv(r"C:\Users\jki\Downloads\ga_data\ga_pages.csv",skiprows =386,nrows =367, parse_dates=['Day Index'])


ga_page_data.head()


ga_page_traffic_data.head()


#Import google analytics social data
ga_social_data = pd.read_csv(r"C:\Users\jki\Downloads\ga_data\ga_social_data.csv", skiprows=5, nrows=7)
ga_social_traffic_data = pd.read_csv(r"C:\Users\jki\Downloads\ga_data\ga_social_data.csv", skiprows=15, nrows=366, parse_dates=['Day Index'])


ga_social_data.head()


ga_social_traffic_data.head()


#Import google analytics other data
ga_other_data = pd.read_csv(r"C:\Users\jki\Downloads\ga_data\ga_other.csv", skiprows=6, nrows=5)
ga_other_traffic_data = pd.read_csv(r"C:\Users\jki\Downloads\ga_data\ga_other.csv", skiprows=13,
                                    nrows=366, parse_dates=['Day Index'])


ga_other_data.head()


ga_other_traffic_data.head()


#Import google analytics total data
ga_total_data = pd.read_csv(r"C:\Users\jki\Downloads\ga_data\ga_total_channels.csv", skiprows=6, nrows=5)
ga_total_traffic_data = pd.read_csv(r"C:\Users\jki\Downloads\ga_data\ga_total_channels.csv", skiprows=13,
                                    nrows=366, parse_dates=['Day Index'])


ga_total_data.head()


ga_total_traffic_data.head()


#Importing query data from Google Search Console
gsc_query = pd.read_csv(r"C:\Users\jki\Downloads\gsc_data\gsc_query.csv")


gsc_query.head()


#Importing pages data from Google Search Console
gsc_page = pd.read_csv(r"C:\Users\jki\Downloads\gsc_data\gsc_pages.csv")


gsc_page.head()


#Importing country data from Google Search Console
gsc_country = pd.read_csv(r"C:\Users\jki\Downloads\gsc_data\gsc_country.csv")


gsc_country.head()


#Importing Device data from Google Search Console


#Importing query apperance from Google Search Consoleb


#Import PPC data
ppc_data = pd.read_csv(r"C:\Users\jki\Downloads\other_data\ppc_data.csv")


ppc_data.head()


#Import Facebook ad data
fb_data = pd.read_csv(r"C:\Users\jki\Downloads\other_data\facebook-data-ads.csv",
                      parse_dates=['Reporting starts','Reporting ends'])


fb_data.head()


#import site vsiability csv
site_visibility = pd.read_csv(r"C:\Users\jki\Downloads\other_data\site_visibility.csv")


site_visibility.head()


ranking_data = pd.read_csv(r"C:\Users\jki\Downloads\other_data\ranking_data.csv")


ranking_data.head()


ranking_data[['Day Index','app competitor analysis']].head()


#Lets create a variable that stores the API settings
pytrends = TrendReq(hl='en-GB', 
                    tz=360, #timezone
                    timeout=(10,25),#time out after afew seconts
                    retries=2, #retries 2 times
                    backoff_factor=0.1) #stops after a few attempts


#Create a list with 5 keywords you'd like to investigate
kw_list = ["seo audit", "aso audit", "app marketing agency","seo agency", "aso agency"]


#Pass your variables over to Google Trends
pytrends.build_payload(kw_list, #keywords to lookup
                       cat=83, #category to look in
                       timeframe='today 5-y', #timeframe
                       geo='', #country
                       gprop='')#region


!pip install retrying

Collecting retrying
  Obtaining dependency information for retrying from https://files.pythonhosted.org/packages/8f/04/9e36f28be4c0532c0e9207ff9dc01fb13a2b0eb036476a213b0000837d0e/retrying-1.3.4-py3-none-any.whl.metadata
  Downloading retrying-1.3.4-py3-none-any.whl.metadata (6.9 kB)
Requirement already satisfied: six>=1.7.0 in c:\users\jki\anaconda3\lib\site-packages (from retrying) (1.16.0)
Downloading retrying-1.3.4-py3-none-any.whl (11 kB)
Installing collected packages: retrying
Successfully installed retrying-1.3.4


import pandas as pd
from retrying import retry

@retry(stop_max_attempt_number=5)  # Retry for a maximum of 5 times
def fetch_trends_data():
    trends_data = pytrends.interest_over_time()
    return trends_data

try:
    trends_data = fetch_trends_data()
    # Convert the trends data to a Pandas DataFrame
    trends_df = pd.DataFrame(trends_data)
    print(trends_df)
except Exception as e:
    print("Error fetching trends data:", e)

            seo audit  aso audit  app marketing agency  seo agency  \
date                                                                 
2019-03-10          0          0                    25          27   
2019-03-17          0         29                    23          58   
2019-03-24          0          0                     0          49   
2019-03-31          0          0                     0          73   
2019-04-07          0          0                     0          91   
...               ...        ...                   ...         ...   
2024-02-11          0          0                     0          83   
2024-02-18          0          0                     0          61   
2024-02-25          0          0                     0          78   
2024-03-03          0          0                     0           0   
2024-03-10          0          0                     0           0   

            aso agency  isPartial  
date                               
2019-03-10           0      False  
2019-03-17           0      False  
2019-03-24           0      False  
2019-03-31           0      False  
2019-04-07           0      False  
...                ...        ...  
2024-02-11           0      False  
2024-02-18           0      False  
2024-02-25           0      False  
2024-03-03           0      False  
2024-03-10           0       True  

[262 rows x 6 columns]


trends_data.plot()

<Axes: xlabel='date'>


#Visualise page data using a bar chart sort the chart by pageviews
ga_page_data[:10].sort_values(by='Pageviews', ascending=False).plot.bar(figsize=(15,5))

<Axes: >


#Plot another bar chart, give it a secondary y axes as Impressions and stack the bar chart
gsc_query[:10].plot.bar(figsize=(25,5), secondary_y="Impressions",  stacked=True)

<Axes: >


#Plot another bar chart but this time increase the font size
gsc_country[['Country','Clicks']][:10].plot.bar(figsize=(15,4), fontsize=14)

<Axes: >


ppc_data['Cost'][:10].plot(kind='line', figsize=(15,5))

<Axes: >


fb_data.head()


#Scatter charts are useful to look for relationship patterns, create on between two metrics
fb_data.plot.scatter(x='Mobile app installs',
                     y='Cost per mobile app install (GBP)',figsize=(15,5))

<Axes: xlabel='Mobile app installs', ylabel='Cost per mobile app install (GBP)'>


#Plot two scatter graphs on top of each other and format it
ax = fb_data.plot.scatter(x='Post reactions',y='Cost per mobile app install (GBP)', 
                          color='Magenta', label='Group 1', 
                          alpha=.6, s=fb_data['Cost per mobile app install (GBP)']*5,
                          figsize=(15,5))
fb_data.plot.scatter(x='Mobile app installs',y='Cost per mobile app install (GBP)', 
                     color='Cyan', 
                     label='Group 2', ax=ax, alpha=.5,
                     figsize=(15,5))

<Axes: xlabel='Mobile app installs', ylabel='Cost per mobile app install (GBP)'>


#Plot a line graph with a secondary y of the largest value
trends_data.plot(secondary_y=('seo agency'), figsize=(15,5))

<Axes: xlabel='date'>


#Now plot a chart of the cummulative sum of all the data
trends_data.cumsum().plot(figsize=(15,5))

<Axes: xlabel='date'>


import pandas as pd # Lodad the pandas library


#Import google analytics pages data
ga_page_data = pd.read_csv(r"C:\Users\jki\Downloads\ga_data\ga_pages.csv", skiprows=6, nrows=376)
ga_page_traffic_data = pd.read_csv(r"C:\Users\jki\Downloads\ga_data\ga_pages.csv", 
                                   skiprows=386, nrows=366, parse_dates=['Day Index'])


#Importing pages data from Google Search Console
gsc_page = pd.read_csv(r"C:\Users\jki\Downloads\gsc_data\gsc_pages.csv")


#Import Facebook ad data
fb_data = pd.read_csv(r"C:\Users\jki\Downloads\other_data\facebook-data-ads.csv", 
                      parse_dates=['Reporting starts','Reporting ends'])


#Look at Google Analytics data
ga_page_data.head()


#Split the url by /
split_dom = ga_page_data.Page.str.rpartition("/")


split_dom.head()


split_dom[2].replace("", "miratrix.co.uk", inplace=True)


#Change URLformat so that it matches other data sources 
split_dom[2].replace("www.miratrix.co.uk", "miratrix.co.uk", inplace=True)


split_dom.head()


#Build the URL and replace current URL's with new URL format
ga_page_data.Page = "https://" + split_dom[2] + split_dom[0] + "/"


ga_page_data.head()


ga_page_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 376 entries, 0 to 375
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Page               376 non-null    object
 1   Pageviews          376 non-null    int64 
 2   Unique Pageviews   376 non-null    int64 
 3   Avg. Time on Page  376 non-null    object
 4   Entrances          376 non-null    int64 
 5   Bounce Rate        376 non-null    object
 6   % Exit             376 non-null    object
 7   Page Value         376 non-null    object
dtypes: int64(3), object(5)
memory usage: 23.6+ KB


#Replace 0  with time format 00:00:00
ga_page_data['Avg. Time on Page'] = ga_page_data['Avg. Time on Page'].replace(0, '00:00:00')


#Convert column from string to time delta
ga_page_data['Avg. Time on Page'] = pd.to_timedelta(ga_page_data['Avg. Time on Page'])


#Replace % with nothing
ga_page_data['Bounce Rate'] = ga_page_data['Bounce Rate'].str.replace('%',"")
#Convert the column into a Float
ga_page_data['Bounce Rate'] = ga_page_data['Bounce Rate'].astype('float')


#Replace % with nothing
ga_page_data['% Exit'] = ga_page_data['% Exit'].str.replace('%',"")
#Convert the column into a Float
ga_page_data['% Exit'] = ga_page_data['% Exit'].astype('float')


#Replace % with nothing
ga_page_data['Page Value'] = ga_page_data['Page Value'].str.replace('£',"")
#Convert the column into a Float
ga_page_data['Page Value'] = ga_page_data['Page Value'].astype('float')


#Set the index for the dataframe to Page
ga_page_data.set_index('Page',inplace=True)


#Check the data to make sure the data types have changed
ga_page_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 376 entries, https://miratrix.co.uk/ to https://www.miratrix.co.uk#utm_source=nav&utm_medium=traffic&utm_campaign=datatools/
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype          
---  ------             --------------  -----          
 0   Pageviews          376 non-null    int64          
 1   Unique Pageviews   376 non-null    int64          
 2   Avg. Time on Page  376 non-null    timedelta64[ns]
 3   Entrances          376 non-null    int64          
 4   Bounce Rate        376 non-null    float64        
 5   % Exit             376 non-null    float64        
 6   Page Value         376 non-null    float64        
dtypes: float64(3), int64(3), timedelta64[ns](1)
memory usage: 23.5+ KB


#Groupby Index and get the mean of the 'Avg. Time on Page'
avg_time = ga_page_data.groupby(ga_page_data.index)['Avg. Time on Page'].mean(numeric_only=False)


#Convert avg_time to dataframe and store as ga_Time
ga_time = pd.DataFrame(avg_time)


#Groupby bounce, exit, pageviews and Unique pageviews using the appropriate math 
#and store in a dataframe. Save as ga_bou_ex_type
ga_bou_ex_type = pd.DataFrame(ga_page_data.groupby(ga_page_data.index).agg({
    'Bounce Rate': ['mean'], "% Exit": ['mean'],'Pageviews': sum, 'Unique Pageviews': sum}))


#Check your time groupby
ga_time['Avg. Time on Page'].head()

Page
https://#utm_source=nav&utm_medium=traffic&utm_campaign=datatools/                                                  0 days 00:00:19
https://#utm_source=nav&utm_medium=traffic&utm_campaign=datatools/app-marketing-agency/                             0 days 00:00:04
https://+&cd=10&hl=en&ct=clnk&gl=in/search?q=cache:bI4Yk20ZHV8J:https://miratrix.co.uk/get-in-touch/                0 days 00:00:00
https://+&cd=5&hl=en&ct=clnk&gl=uk/search?q=cache:CAP7TiQpKL4J:https://miratrix.co.uk/tag/bridgnorth-shoplifters/   0 days 00:00:00
https://39/blog/page/                                                                                               0 days 00:00:00
Name: Avg. Time on Page, dtype: timedelta64[ns]


#Check your aggreated groupby
ga_bou_ex_type.info()

<class 'pandas.core.frame.DataFrame'>
Index: 316 entries, https://#utm_source=nav&utm_medium=traffic&utm_campaign=datatools/ to https://xFRAx2jfTOmg+6EPY+09e3SpRN05tZm6PA=&b=5&f=frame&u=quh9O6fk3cp+g1Q0B84ejuJAQEZJll4HdKCQfOVJedw=&b=5/go.php?u=quh9O7m+gcpnkEcjR8wG07lIAEwDjhYPfuuLYf1GaJpICjY4ebQy5itrwJGI8mdy/xFTBhuneDOmg+6EPY+09e3SpRN05tZo5/w=&b=5&f=frame&u=quh9O7m+gcpnkEcjR8wG07lIAEwDjhYPfuuLYf1GaJpICjY4ebQy5itrwJGI8mdy/
Data columns (total 4 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   (Bounce Rate, mean)      316 non-null    float64
 1   (% Exit, mean)           316 non-null    float64
 2   (Pageviews, sum)         316 non-null    int64  
 3   (Unique Pageviews, sum)  316 non-null    int64  
dtypes: float64(2), int64(2)
memory usage: 12.3+ KB


#Merge ga_time and ga_bou_ex_type on page and with an inner join
new_ga = pd.merge(ga_time, #left dataset
                  ga_bou_ex_type, #right dataset
                  how='inner', # we're joning using the data in the column
                  left_on="Page", # the left dataset is neing joined on the column Page
                  right_on="Page" # the right dataset is neing joined on the column Page
                 ).sort_values(by="Avg. Time on Page" ,ascending=False) #sort the new dataframe

C:\Users\jki\AppData\Local\Temp\ipykernel_1124\1241091563.py:2: FutureWarning: merging between different levels is deprecated and will be removed in a future version. (1 levels on the left, 2 on the right)
  new_ga = pd.merge(ga_time, #left dataset


new_ga.head()


#rename (Bounce Rate, mean), (% Exit, mean), (Pageviews, sum), (Unique Pageviews, sum)
new_ga = new_ga.rename(columns={('Bounce Rate', 'mean'): 'Bounce Rate', ('% Exit', 'mean') : 'Exit',
                               ('Pageviews', 'sum') : 'Pageviews', 
                               ('Unique Pageviews', 'sum') : 'Unique Pageviews'})


#Check the rename worked
new_ga.head()


#Check the Google Analytics Page data
ga_page_traffic_data.head()


#Drop NaN values from the dataset
ga_page_traffic_data.dropna(axis=1, inplace=True)


#Check the dropna worked
ga_page_traffic_data.head()


#Check the Facebook Ads data
fb_data.head()


#Drop all columns that have NaN
fb_data.drop('Unnamed: 0', axis=1, inplace=True)


#Check the drop worked
fb_data.head()


fb_data.fillna(0,inplace=True)


#Check that fillna worked
fb_data.head()


#You pandas merge to join Google Console and Google Analytics data
ga_gsc_data = pd.merge(gsc_page, #left data
                       new_ga, #right data
                       how='inner', 
                       left_on="Page", 
                       right_index=True)


#Check it worked
ga_gsc_data.head()


#Save the newly created Page dataset
ga_gsc_data.to_csv('ga_gsc_data.csv')


#Save the amended facebook dataset
fb_data.to_csv('fixed_fb_data.csv')


import pandas as pd # importing pandas library
import matplotlib.pyplot as plt #importing matplotlib library
import seaborn as sns #importing seaborn package
%matplotlib inline

import warnings #for managing error message
warnings.filterwarnings('ignore')


ga_gsc_data = pd.read_csv('ga_gsc_data.csv',
                          usecols=['Page', 'Clicks', 'Impressions', 'CTR', 'Position',
                                   'Avg. Time on Page', 'Bounce Rate', 'Exit', 'Pageviews',
                                   'Unique Pageviews'])


#View the make up of the data using .info()
ga_gsc_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 193 entries, 0 to 192
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Page               193 non-null    object 
 1   Clicks             193 non-null    int64  
 2   Impressions        193 non-null    int64  
 3   CTR                193 non-null    object 
 4   Position           193 non-null    float64
 5   Avg. Time on Page  193 non-null    object 
 6   Bounce Rate        193 non-null    float64
 7   Exit               193 non-null    float64
 8   Pageviews          193 non-null    int64  
 9   Unique Pageviews   193 non-null    int64  
dtypes: float64(3), int64(4), object(3)
memory usage: 15.2+ KB


#Have a look at the data
ga_gsc_data.head()


#Remove the % sign by using string replace
ga_gsc_data.CTR = ga_gsc_data.CTR.str.replace('%','')


#Replace 0 days with nothing and convert the remaining string to a timedelta
ga_gsc_data['Avg. Time on Page'] = ga_gsc_data['Avg. Time on Page'].str.replace('0 days ','').apply(pd.Timedelta)


#Set Page as index
ga_gsc_data.set_index('Page', inplace=True)


#Plot a basic bar chart showing the statstical breakdown the data set
ga_gsc_data.describe().plot.bar(figsize=(15,5), secondary_y='Impressions')

<Axes: >


ga_gsc_data[['Clicks', 'Pageviews', 'Position']][:20].plot.bar(figsize=(15,5), 
                                                               secondary_y=('Position'))

<Axes: xlabel='Page'>


## Creating matplotlib subplots


#Create subplots of two Series of data from ga_gsc_data
fig = plt.figure(figsize=(15,5)) # Create matplotlib figure

ax = fig.add_subplot(121) # Create matplotlib axes (nrows, ncolumns, postion)
ax2 = fig.add_subplot(122) # Create another axes

#first plot
ga_gsc_data['Clicks'][:10].plot.bar(ax=ax #set the matplotlib axes to ax
                                   ) 
#second plot
ga_gsc_data['Pageviews'][:10].plot.bar(ax=ax2 #set the matplotlib axes to ax2
                                      )
plt.show() #display the charts


fig = plt.figure(figsize=(15,5)) # Create matplotlib figure

ax = fig.add_subplot(111) # Create matplotlib axes
ax2 = ax.twinx() # Create another axes that shares the same x-axis as ax.

width = 0.2 #the width of the bars

ga_gsc_data['Clicks'][:10].plot.bar(color='red',  #colour of bar
                                ax=ax, #what matplotlinb axes to use
                                width=width, #what matplotlinb axes to use
                                position=1)#the postion of the bar relative to the others

ga_gsc_data['Pageviews'][:10].plot(kind='bar', #type of plot
                                   color='blue', #colour of bar
                                   ax=ax2, #what matplotlinb axes to use
                                   width=width,#what matplotlinb axes to use
                                   position=0)#the postion of the bar relative to the others


#display the charts
plt.show()


## Adding x and y labels to a plot


fig = plt.figure(figsize=(15,5)) # Create matplotlib figure

ax = fig.add_subplot(111) # Create matplotlib axes
ax2 = ax.twinx() # Create another axes that shares the same x-axis as ax.

width = 0.2 #set the width of the bars

ga_gsc_data['Clicks'][:10].plot(kind='bar', #type of chart to use
                                color='yellow', #colour of bar
                                ax=ax, #what matplotlinb axes to use
                                width=width, #what matplotlinb axes to use
                                position=1, #the postion of the bar relative to the others
                                fontsize=10) #set font size 
ga_gsc_data['Pageviews'][:10].plot(kind='bar', 
                                   color='lightgreen', 
                                   ax=ax2,
                                   width=width,
                                   position=0)

#Create ax customatisations
ax.set_xlabel('Page') #name the x axis
ax.set_ylabel('Clicks') # name the primary y axis

#Create ax2 customatisations
ax2.set_ylabel('Pageviews') #name the secondary y axis

#display the charts
plt.show()


fig = plt.figure(figsize=(15,5)) # Create matplotlib figure

ax = fig.add_subplot(111) # Create matplotlib axes
ax2 = ax.twinx() # Create another axes that shares the same x-axis as ax.

width = 0.2 #set the width of the bars

ga_gsc_data['Clicks'][:10].plot(kind='bar', #type of chart to use
                                color='yellow', #colour of bar
                                ax=ax, #what matplotlinb axes to use
                                width=width, #set the bar width
                                position=1, #the postion of the bar relative to the others
                                fontsize=10) #set font size 
ga_gsc_data['Pageviews'][:10].plot(kind='bar', 
                                   color='lightgreen', 
                                   ax=ax2,
                                   width=width,
                                   position=0)

#Create ax customatisations
ax.set_xlabel('Page') #name the x axis
ax.set_ylabel('Clicks') # name the primary y axis
ax.set_xticklabels(ax.get_xticklabels(), #get a strings with the label names
                   rotation=45) #Rotate the xticks labels by degrees

#Create ax2 customatisations
ax2.set_ylabel('Pageviews') #name the secondary y axis

#display the charts
plt.show()


fig = plt.figure(figsize=(15,5)) # Create matplotlib figure

ax = fig.add_subplot(111) # Create matplotlib axes
ax2 = ax.twinx() # Create another axes that shares the same x-axis as ax.

width = 0.2 #set the width of the bars

ga_gsc_data['Clicks'][:10].plot(kind='bar', #type of chart to use
                                color='yellow', #colour of bar
                                ax=ax, #what matplotlinb axes to use
                                width=width, #set the bar width
                                position=1, #the postion of the bar relative to the others
                                fontsize=10) #set font size 
ga_gsc_data['Pageviews'][:10].plot(kind='bar', 
                                   color='lightgreen', 
                                   ax=ax2,
                                   width=width,
                                   position=0)

#Create ax customatisations
ax.set_xlabel('Page') #name the x axis
ax.set_ylabel('Clicks') # name the primary y axis
ax.set_xticklabels(ax.get_xticklabels(), #get a strings with the label names
                   rotation=45) #Rotate the xticks labels by degrees

#Create ax2 customatisations
ax2.set_ylabel('Pageviews') #name the secondary y axis

#Global Plot settings
fig.legend() # set the legend

#display the charts
plt.show()


fig = plt.figure(figsize=(15,5)) # Create matplotlib figure

ax = fig.add_subplot(111) # Create matplotlib axes
ax2 = ax.twinx() # Create another axes that shares the same x-axis as ax.

width = 0.2 #set the width of the bars

ga_gsc_data['Clicks'][:10].plot(kind='bar', #type of chart to use
                                color='yellow', #colour of bar
                                ax=ax, #what matplotlinb axes to use
                                width=width, #set the bar width
                                position=1, #the postion of the bar relative to the others
                                fontsize=10) #set font size 
ga_gsc_data['Pageviews'][:10].plot(kind='bar', 
                                   color='lightgreen', 
                                   ax=ax2,
                                   width=width,
                                   position=0)

#Create ax customatisations

ax2.set_ylabel('Pageviews') #name the secondary y axis

#Global Plot settings
plt.title('Bar Chart: Clicks vs Pageview') # set chart name
fig.legend() # set the legend

#display the charts
plt.show()


fig = plt.figure(figsize=(15,5)) # Create matplotlib figure

ax = fig.add_subplot(111) # Create matplotlib axes
ax2 = ax.twinx() # Create another axes that shares the same x-axis as ax.

width = 0.2 #set the width of the bars

ga_gsc_data['Clicks'][:10].plot(kind='bar', #type of chart to use
                                color='yellow', #colour of bar
                                ax=ax, #what matplotlinb axes to use
                                width=width, #set the bar width
                                position=1, #the postion of the bar relative to the others
                                fontsize=10) #set font size 
ga_gsc_data['Pageviews'][:10].plot(kind='bar', 
                                   color='lightgreen', 
                                   ax=ax2,
                                   width=width,
                                   position=0)

#Create ax customatisations
ax.annotate('Lots of Pageviews but few clicks', 
            xy=(8, 50), 
            xytext=(2, 120),
            arrowprops=dict(facecolor='cyan', #colour
                            shrink=0.05, #length of arrow
                            lw=1, #line width
                            ec='magenta', #boarder colour
                            zorder=1) #layering order of annotation
           )
ax.set_xlabel('Page') #name the x axis
ax.set_ylabel('Clicks') # name the primary y axis
ax.set_xticklabels(ax.get_xticklabels(), #get a strings with the label names
                   rotation=45) #Rotate the xticks labels by degrees

#Create ax customatisations
ax2.annotate('Lots of Pageviews and clicks', 
            xy=(0, 1398), 
            xytext=(1, 1398),
            arrowprops=dict(facecolor='cyan', #colour
                            shrink=0.05, #length of arrow
                            lw=1, #line width
                            ec='magenta', #boarder colour
                            zorder=1) #layering order of annotation
           )
ax2.set_ylabel('Pageviews') #name the secondary y axis

#Global Plot settings
plt.title('Bar Chart: Clicks vs Pageview') # set chart name
fig.legend() # set the legend

#display the charts
plt.show()


plt.style.use('seaborn-pastel') #set a style sheet

fig = plt.figure(figsize=(15,5)) # Create matplotlib figure

ax = fig.add_subplot(111) # Create matplotlib axes
ax2 = ax.twinx() # Create another axes that shares the same x-axis as ax.

width = 0.2 #set the width of the bars

ga_gsc_data['Clicks'][:10].plot(kind='bar', #type of chart to use 
                                color='yellow', 
                                ax=ax, #what matplotlinb axes to use
                                width=width, #set the bar width
                                position=1, #the postion of the bar relative to the others
                                fontsize=10) #set font size 
ga_gsc_data['Pageviews'][:10].plot(kind='bar', 
                                   color='lightgreen', 
                                   ax=ax2,
                                   width=width,
                                   position=0)

#Create ax customatisations
ax.annotate('Lots of Pageviews but few clicks', 
            xy=(8, 50), 
            xytext=(2, 120),
            arrowprops=dict(shrink=0.05, #length of arrow
                            lw=1, #line width
                            zorder=1) #layering order of annotation
           )
ax.set_xlabel('Page') #name the x axis
ax.set_ylabel('Clicks') # name the primary y axis
ax.set_xticklabels(ax.get_xticklabels(), #get a strings with the label names
                   rotation=45) #Rotate the xticks labels by degrees

#Create ax customatisations
ax2.annotate('Lots of Pageviews and clicks', 
            xy=(0, 1398), 
            xytext=(1, 1398),
            arrowprops=dict(shrink=0.05, #length of arrow
                            lw=1, #line width
                            zorder=1) #layering order of annotation
           )
ax2.set_ylabel('Pageviews') #name the secondary y axis

#Global Plot settings
plt.title('Bar Chart: Clicks vs Pageview') # set chart name
fig.legend() # set the legend

#display the charts
plt.show()


plt.style.use('default') # restores the notebook back to default palette


fig = plt.figure(figsize=(15,5)) # Create matplotlib figure
ax = fig.add_subplot(111) # Create matplotlib axes

sns.scatterplot(x='Impressions', # x axes
                y="Clicks", # y axes
                data=ga_gsc_data) #the dataset we're using

#display the charts
plt.show()


fig = plt.figure(figsize=(15,5)) # Create matplotlib figure
ax = fig.add_subplot(111) # Create matplotlib axes

sns.scatterplot(x='Impressions', # x axes
                y="Clicks", # y axes
                hue='Position',# colouring the dots based on a num value
                data=ga_gsc_data) #the dataset we're using

#display the charts
plt.show()


fig = plt.figure(figsize=(15,5)) # Create matplotlib figure
ax = fig.add_subplot(111) # Create matplotlib axes

sns.scatterplot(x='Impressions',# x axes
                y="Clicks",# xy axes
                hue='Position',# colouring the dots based on a num value
                size="Pageviews", # increasing the size of the dots based on a num values
                data=ga_gsc_data) #the dataset we're using

#display the charts
plt.show()


fig = plt.figure(figsize=(15,5)) # Create matplotlib figure
ax = fig.add_subplot(111) # Create matplotlib axes

sns.scatterplot(x='Impressions',# x axes
                y="Clicks",# xy axes
                hue='Position',# colouring the dots based on a num value
                size="Pageviews", # increasing the size of the dots based on a num values
                palette="Set2", # selecting a colour palette
                data=ga_gsc_data) #the dataset we're using

#display the charts
plt.show()


#Load the facebook data fb_example_data.csv
fb_camp_data = pd.read_csv(r"C:\Users\jki\Downloads\other_data\fb_example_data.csv")


fb_camp_data.head()


#inspect the data
fb_camp_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 18 columns):
 #   Column                                               Non-Null Count  Dtype  
---  ------                                               --------------  -----  
 0   Results                                              19 non-null     int64  
 1   Reach                                                19 non-null     int64  
 2   Frequency                                            19 non-null     float64
 3   Cost per results                                     19 non-null     float64
 4   Budget                                               19 non-null     float64
 5   Amount spent (GBP)                                   19 non-null     float64
 6   Clicks (all)                                         19 non-null     int64  
 7   CTR (all)                                            19 non-null     float64
 8   CPC (all) (GBP)                                      19 non-null     float64
 9   Impressions                                          19 non-null     int64  
 10  CPM (cost per 1,000 impressions) (GBP)               19 non-null     float64
 11  Link clicks                                          19 non-null     int64  
 12  CPC (cost per link click) (GBP)                      19 non-null     float64
 13  CTR (link click-through rate)                        19 non-null     float64
 14  Website purchases                                    19 non-null     int64  
 15  Website purchases conversion value                   19 non-null     float64
 16  Website purchase ROAS (return on advertising spend)  19 non-null     float64
 17  Button clicks                                        19 non-null     int64  
dtypes: float64(11), int64(7)
memory usage: 2.8 KB


#Create a correlation matrix and store as fb_corr
fb_corr = fb_camp_data.corr()


#Use seaborn to draw a heatmap of the correlated features in our dataset
plt.figure(figsize=(20,16))
sns.heatmap(fb_corr, #load data
           linewidth=0.5, #line width
           cmap='RdBu_r', #prefered colour maps
           vmin=-1, #min range of board
           vmax=1, #max range of board
           annot=True) #add annotation

#display the charts
plt.show()


import pandas as pd # importing pandas library
import matplotlib.pyplot as plt #importing matplotlib library
%matplotlib inline


ga_page_traffic_data = pd.read_csv(r"C:\Users\jki\Downloads\ga_data\ga_pages.csv", 
                                   skiprows=386, 
                                   nrows=366, 
                                   usecols=['Day Index', 'Pageviews'], 
                                   parse_dates=['Day Index'],
                                   index_col='Day Index')


ppc_data = pd.read_csv(r"C:\Users\jki\Downloads\other_data\ppc_data.csv",
                       parse_dates=['Day'],
                       index_col='Day')


ga_page_traffic_data.head()


ga_page_traffic_data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 366 entries, 2018-08-31 to 2019-08-31
Data columns (total 1 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   Pageviews  366 non-null    int64
dtypes: int64(1)
memory usage: 5.7 KB


ga_page_traffic_data.plot(figsize=(15,5))

<Axes: xlabel='Day Index'>


ga_page_traffic_data.describe()


#Create a variable for Pageview average
average_value = ga_page_traffic_data['Pageviews'].mean()


#Print the average
average_value

9.065573770491802


#Replace any values that are 0 with the average values
ga_page_traffic_data['Pageviews'].replace(0,
                                          average_value,
                                          inplace=True)


#Plot chart to check missing values have been replaced
ga_page_traffic_data.plot(figsize=(15,5))

<Axes: xlabel='Day Index'>


#Let's look at the data by Month using the resample method
ga_page_monthly = ga_page_traffic_data.resample('M').mean()


#Plot a monthly line graph
ga_page_monthly.plot(figsize=(15,5))

<Axes: xlabel='Day Index'>


#Plot a quarterly line graph
ga_page_quarterly = ga_page_traffic_data.resample('Q', convention='end').mean()


#Plot a quarterly line graph
ga_page_quarterly.plot(figsize=(15,5))

<Axes: xlabel='Day Index'>


ppc_data.head()


#Lets look at the cost data from a ppc dataset
ppc_data['Cost'].plot(figsize=(15,5))

<Axes: xlabel='Day'>


#Lets resample that data by week and take the average
ppc_weekly = ppc_data['Cost'].resample('W').mean()
ppc_weekly.head()

Day
2018-09-02    1492.371799
2018-09-09    1073.762622
2018-09-16    1554.423832
2018-09-23    1322.445411
2018-09-30    1041.005947
Freq: W-SUN, Name: Cost, dtype: float64


ppc_weekly.plot(figsize=(15,5))

<Axes: xlabel='Day'>


ppc_rolling_7d = ppc_data['Cost'].rolling(window=7, #set a rolling window of x days
                         center=True # label each window at its midpoint
                        ).mean()
ppc_rolling_7d.head(10)

Day
2018-08-31            NaN
2018-09-01            NaN
2018-09-02            NaN
2018-09-03    1248.123595
2018-09-04    1319.591703
2018-09-05    1310.633560
2018-09-06    1073.762622
2018-09-07    1320.345163
2018-09-08    1317.992852
2018-09-09    1089.016399
Name: Cost, dtype: float64


ppc_rolling_7d.plot(figsize=(15,5))

<Axes: xlabel='Day'>


fig = plt.figure(figsize=(15,5)) # Create matplotlib figure

ax = fig.add_subplot(111) # Create matplotlib axes

figsize = (15,8)

ppc_data['Cost'].plot(figsize=figsize, 
                      alpha=.5, 
                      marker='.', 
                      linestyle='-', 
                      linewidth=0.5, 
                      label='Daily'
                     )

ppc_weekly.plot(figsize=figsize,
                marker='x',
                markersize=8, 
                linestyle='-',
                label='Weekly Mean Resample'
               )

ppc_rolling_7d.plot(figsize=figsize,
                   marker='o',
                   linestyle='-',
                   label='7-d Rolling Mean'
                  )

ax.set_xlabel('Date') #name the x axis
ax.set_ylabel('Cost Per Click (CPC)') # name the primary y axis

#Global Plot settings
plt.title('COMPARE: Daily, Weekly Mean, 7-d Rolling Mean ') # set chart name
fig.legend() # set the legend

#display the charts
plt.show()


fig = plt.figure(figsize=(15,5))

ax = fig.add_subplot(111)

figsize = (15,8)

ppc_data['Cost'].plot(figsize=figsize, 
                      alpha=.5, 
                      marker='.', 
                      linestyle='-', 
                      linewidth=0.5, 
                      label='Daily'
                     )

ppc_weekly.plot(figsize=figsize,
                marker='x',
                markersize=8, 
                linestyle='-',
                label='Weekly Mean Resample'
               )

ppc_rolling_7d.plot(figsize=figsize,
                   marker='o',
                   linestyle='-',
                   label='7-d Rolling Mean'
                  )

max_value = ppc_rolling_7d.max()
max_value_index = ppc_rolling_7d.idxmax()

min_value = ppc_rolling_7d.min()
min_value_index = ppc_rolling_7d.idxmin()



#Create dynamic annotations for max values
ax.annotate('Highest CPC: £{}'.format(max_value.round()),
            xy=(max_value_index,max_value),
            xytext=(max_value_index,max_value),
            arrowprops=dict(facecolor='cyan', #colour
                            shrink=0.05, #length of arrow
                            lw=1, #line width
                            ec='magenta', #boarder colour
                            zorder=1)) #layering order of annotation

#Create dynamic annotations for min values
ax.annotate('Lowest CPC: £{}'.format(min_value.round()),
            xy=(min_value_index,min_value),
            xytext=(min_value_index,min_value),
            arrowprops=dict(facecolor='cyan', #colour
                            shrink=0.0, #length of arrow
                            lw=1, #line width
                            ec='magenta', #boarder colour
                            zorder=1)) #layering order of annotation


#Global Plot settings
plt.title('COMPARE: Daily, Weekly Mean, 7-d Rolling Mean ') # set chart name
fig.legend() # set the legend

#display the charts
plt.show()

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[162], line 37
     32 min_value_index = ppc_rolling_7d.idxmin()
     36 #Create dynamic annotations for max values
---> 37 ax.annotate('Highest CPC: £{}'.format(max_value.round()),
     38             xy=(max_value_index,max_value),
     39             xytext=(max_value_index,max_value),
     40             arrowprops=dict(facecolor='cyan', #colour
     41                             shrink=0.05, #length of arrow
     42                             lw=1, #line width
     43                             ec='magenta', #boarder colour
     44                             zorder=1)) #layering order of annotation
     46 #Create dynamic annotations for min values
     47 ax.annotate('Lowest CPC: £{}'.format(min_value.round()),
     48             xy=(min_value_index,min_value),
     49             xytext=(min_value_index,min_value),
   (...)
     53                             ec='magenta', #boarder colour
     54                             zorder=1)) #layering order of annotation

AttributeError: 'float' object has no attribute 'round'


import pandas as pd # importing pandas library


ga_gsc_data = pd.read_csv(r"C:\Users\jki\Downloads\other_data\ga_gsc_data.csv")


ga_gsc_data.head()


ga_gsc_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 193 entries, 0 to 192
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Unnamed: 0         193 non-null    int64  
 1   Page               193 non-null    object 
 2   Clicks             193 non-null    int64  
 3   Impressions        193 non-null    int64  
 4   CTR                193 non-null    object 
 5   Position           193 non-null    float64
 6   Avg. Time on Page  193 non-null    object 
 7   Bounce Rate        193 non-null    float64
 8   Exit               193 non-null    float64
 9   Pageviews          193 non-null    int64  
 10  Unique Pageviews   193 non-null    int64  
dtypes: float64(3), int64(5), object(3)
memory usage: 16.7+ KB


#Calculating CTR and replace the CTR column
ga_gsc_data.CTR = (ga_gsc_data.Clicks.div(ga_gsc_data.Impressions)*100).round(2)


ga_gsc_data.CTR.head()

0    0.37
1    0.26
2    1.10
3    2.13
4    7.14
Name: CTR, dtype: float64


#Calculate the number of bounces
ga_gsc_data['num_bounces'] = (ga_gsc_data['Clicks'] - (ga_gsc_data.Exit.div(100).multiply(
    ga_gsc_data['Clicks']))).astype(int)


ga_gsc_data.head()


ga_gsc_data.insert(4, #position
                   'CTR true visits', #name of column 
                   (ga_gsc_data.num_bounces.div(ga_gsc_data.Impressions)*100).round(2) #value
                  )


ga_gsc_data.head()


#Filter the data by pages that have recieved 100 or more clicks
ga_gsc_data[ga_gsc_data['Clicks'] >= 100]


#Filter the data by pages that have recieved 100 or more clicks
filter1 = ga_gsc_data[(ga_gsc_data['Position'] <= 10) & (ga_gsc_data['Impressions'] >= 100)]


filter1


#Filter and sort your data 
ga_gsc_data[ga_gsc_data['Impressions'] >= 1000].sort_values(by="CTR true visits",
                                                            ascending=False)


import pandas as pd
import matplotlib.pyplot as plt #importing matplotlib library
%matplotlib inline


ga_page_traffic_data = pd.read_csv(r"C:\Users\jki\Downloads\ga_data\ga_pages.csv", 
                                   skiprows=386, nrows=366, 
                                   usecols=['Day Index', 'Pageviews'], 
                                   parse_dates=['Day Index'],
                                   index_col='Day Index')


ga_page_traffic_data.head()


ga_page_traffic_data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 366 entries, 2018-08-31 to 2019-08-31
Data columns (total 1 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   Pageviews  366 non-null    int64
dtypes: int64(1)
memory usage: 5.7 KB


#Create a variable called num and assign it a integer 
num = -1


#Write an if statement that prints if the number is up, down or no change
if num > 0:
    print("It's UP!")
    
elif num == 0:
    print("No Change")
else:
    print("It's DOWN!")

It's DOWN!


start = '2018-08-31'
end = '2018-09-30'


#Create a mask that will select two dates. A mask will return a boolean (True/False) array 
mask = ((ga_page_traffic_data.index == start) | (ga_page_traffic_data.index == end))
mask[:30]

array([ True, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False])


#Apply the mask to the dataframe
ga_page_traffic_data[mask]


#Now we want to calculate the difference between the two rows and select the number
diff = ga_page_traffic_data[mask].diff().max()[0]


diff

-12.0


ga_page_traffic_data[mask].diff().max()[0]

-12.0


start = '2018-08-31'
end = '2018-09-30'

def alerts(df, start, end):
    mask = ((df.index == start) | (df.index == end)) #Create a mask
    diff = df[mask].diff().max()[0] #select the difference between the two dates
    chart = df[mask] #create a df with the mask applied
    if diff > 0:
        print("Traffic is up by {}".format(diff)) #print if traffic is above 0
    
    elif diff == 0:
        print("No Change") #print if traffic is unchanged
        
    else:
        print("Traffic is down by {}".format(diff))  #print if traffic is below 0
        
        #Print a bar chart to show the troublesome data
        fig = plt.figure(figsize=(15,5))

        ax = fig.add_subplot(111)

        figsize = (15,8)

        chart.plot(kind='bar',
                  figsize=figsize,
                  linewidth=0.5,
                  ax=ax
                 )

        #Global Plot settings
        plt.title('{} - {} : Traffic Drop in {}'.format(start,
                                                        end,
                                                        df.columns.values)) # set chart name

        #display the charts
        plt.show()


#Run the function
alerts(ga_page_traffic_data, start, end)

Traffic is down by -12.0

	Keyword	Users	New Users	Sessions	Bounce Rate	Pages / Session	Avg. Session Duration	Contact From Good Lead (Goal 1 Conversion Rate)	Contact From Good Lead (Goal 1 Value)
0	(not provided)	615	591	727	35.90%	1.75	00:01:42	0.00%	£0.00
1	amazon	3	3	3	100.00%	1.00	00:00:00	0.00%	£0.00
2	(not set)	1	1	1	100.00%	1.00	00:00:00	0.00%	£0.00
3	miratrix	1	1	1	0.00%	2.00	00:11:37	0.00%	£0.00

	Page	Pageviews	Unique Pageviews	Avg. Time on Page	Entrances	Bounce Rate	% Exit	Page Value
0	/	813	665	00:02:05	641	14.35%	55.97%	£0.00
1	/www.miratrix.co.uk	515	388	00:02:17	373	59.25%	56.89%	£0.00
2	/get-in-touch/	336	163	00:01:06	23	10.53%	33.04%	£0.00
3	/app-marketing-agency/	140	120	00:02:04	73	18.31%	57.86%	£0.00
4	/app-store-optimization-services/	136	105	00:01:30	22	14.29%	47.79%	£0.00

	Social Network	Users	New Users	Sessions	Bounce Rate	Pages / Session	Avg. Session Duration	Contact From Good Lead (Goal 1 Conversion Rate)	Contact From Good Lead (Goal 1 Value)
0	Facebook	29	29	29	37.93%	1.00	<00:00:01	0.00%	£0.00
1	Twitter	6	4	7	0.00%	2.14	00:04:50	0.00%	£0.00
2	reddit	2	2	2	100.00%	1.00	00:00:00	0.00%	£0.00
3	LinkedIn	1	1	1	100.00%	1.00	00:00:00	0.00%	£0.00
4	LiveJournal	1	1	1	0.00%	1.00	00:00:00	0.00%	£0.00

	Source	Users	New Users	Sessions	Bounce Rate	Pages / Session	Avg. Session Duration	Contact From Good Lead (Goal 1 Conversion Rate)	Contact From Good Lead (Goal 1 Value)
0	directory	116	110	143	18.18%	2.14	00:02:34	0.00%	£0.00
1	google_business	35	35	36	16.67%	1.81	00:02:00	0.00%	£0.00
2	nav	26	0	43	6.98%	2.40	00:04:04	0.00%	£0.00
3	companyemail	16	12	23	43.48%	1.65	00:00:59	0.00%	£0.00
4	Clickky DevTools Listing	3	3	3	0.00%	2.67	00:01:45	0.00%	£0.00

	Channel Grouping	Users	New Users	Sessions	Bounce Rate	Pages / Session	Avg. Session Duration	Contact From Good Lead (Goal 1 Conversion Rate)	Contact From Good Lead (Goal 1 Completions)	Contact From Good Lead (Goal 1 Value)
0	Organic Search	622	598	734	36.10%	1.75	00:01:43	0.00%	0	£0.00
1	Direct	601	599	665	57.74%	1.37	00:00:42	0.00%	0	£0.00
2	(Other)	194	160	248	18.15%	2.10	00:02:35	0.00%	0	£0.00
3	Referral	100	88	144	23.61%	3.85	00:02:54	2.08%	3	£0.00
4	Social	41	39	42	35.71%	1.19	00:00:49	0.00%	0	£0.00

	# ----------------------------------------	Unnamed: 1	Unnamed: 2	Unnamed: 3	Unnamed: 4	Unnamed: 5	Unnamed: 6	Unnamed: 7	Unnamed: 8	Unnamed: 9
0	# Miratrix	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	# Channels	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	# 20180831-20190831	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	# ----------------------------------------	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	Day Index	Users	Unnamed: 2	Unnamed: 3	Unnamed: 4	Unnamed: 5	Unnamed: 6	Unnamed: 7	Unnamed: 8	Unnamed: 9
0	2018-08-31	5	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	2018-09-01	4	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	2018-09-02	5	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	2018-09-03	3	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	2018-09-04	4	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	Query	Clicks	Impressions	CTR	Position
0	miratrix	71	760	9.34%	6.23
1	app marketing agency uk	23	398	5.78%	3.36
2	mobile marketing agency london	11	2529	0.43%	3.97
3	app marketing uk	9	261	3.45%	2.01
4	app marketing agency	7	9860	0.07%	39.21

	Page	Clicks	Impressions	CTR	Position
0	https://miratrix.co.uk/	371	100673	0.37%	42.59
1	https://miratrix.co.uk/app-marketing-agency/	80	30824	0.26%	26.84
2	https://miratrix.co.uk/mobile-app-competitor-a...	16	1458	1.1%	30.64
3	https://miratrix.co.uk/blippar-the-message-isn...	11	517	2.13%	36.05
4	https://miratrix.co.uk/author/nduddy/	10	140	7.14%	5.61

	Country	Clicks	Impressions	CTR	Position
0	United Kingdom	310	18836	1.65%	48.67
1	India	97	25622	0.38%	51.38
2	United States	29	14493	0.2%	45.92
3	Germany	14	2516	0.56%	55.01
4	France	12	2167	0.55%	43.88

	Unnamed: 0	Day	Ad group	Avg. CPC	CTR	Campaign	Clicks	Conv. rate	Conversions	Cost	Cost / conv.	Final URL	Impr.	Keyword	Keyword status	Max. CPC	Mobile final URL	Status
0	0	2018-08-31	miratrix_ppc_adgroup	2.490563	16.894144	miratrix_ppc	75	0.225225	1	1105.809783	1105.809783	Text	7501	keyword	Active	2.6	Text	Active
1	1	2018-09-01	miratrix_ppc_adgroup	2.472161	5.976134	miratrix_ppc	17	1.193317	5	1035.835572	207.167114	Text	2504	keyword	Active	2.6	Text	Active
2	2	2018-09-02	miratrix_ppc_adgroup	2.508561	6.824919	miratrix_ppc	3	0.107411	1	2335.470042	2335.470042	Text	6354	keyword	Active	2.6	Text	Active
3	3	2018-09-03	miratrix_ppc_adgroup	2.548919	8.058366	miratrix_ppc	99	2.723735	7	655.072102	93.581729	Text	2071	keyword	Active	2.6	Text	Active
4	4	2018-09-04	miratrix_ppc_adgroup	2.414868	6.884937	miratrix_ppc	93	1.464435	7	1154.306794	164.900971	Text	3291	keyword	Active	2.6	Text	Active

	Unnamed: 0	Reporting starts	Reporting ends	Campaign name	Delivery	Mobile app installs	Mobile app actions	Cost per mobile app install (GBP)	Cost per mobile app action (GBP)	Desktop app installs	Desktop app engagement	Cost per desktop app install (GBP)	Cost per desktop app engagement (GBP)	Post reactions	Post comments	Post shares
0	0	2014-02-21	2018-01-29	Campaign1	0	11451.0	NaN	1.275683	NaN	NaN	NaN	NaN	NaN	1807.0	42.0	133.0
1	1	2015-12-04	2015-12-04	Campaign2	inactive	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	2.0	NaN	NaN
2	2	2015-10-21	2015-10-21	Campaign3	inactive	2.0	NaN	3.530000	NaN	NaN	NaN	NaN	NaN	2.0	NaN	NaN
3	3	2015-10-20	2015-10-20	Campaign3	inactive	2.0	NaN	16.500000	NaN	NaN	NaN	NaN	NaN	15.0	NaN	NaN
4	4	2015-10-20	2015-10-20	Campaign4	inactive	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	Day Index	apptamin.com	miratrix.co.uk
0	2018-08-31	35.354169	79.576190
1	2018-09-01	97.529284	45.990192
2	2018-09-02	8.643297	63.922349
3	2018-09-03	19.927875	54.022379
4	2018-09-04	47.617416	57.075055

	0	1	2
0		/
1		/	www.miratrix.co.uk
2	/get-in-touch	/
3	/app-marketing-agency	/
4	/app-store-optimization-services	/

	0	1	2
0		/	miratrix.co.uk
1		/	miratrix.co.uk
2	/get-in-touch	/	miratrix.co.uk
3	/app-marketing-agency	/	miratrix.co.uk
4	/app-store-optimization-services	/	miratrix.co.uk

	Avg. Time on Page	(Bounce Rate, mean)	(% Exit, mean)	(Pageviews, sum)	(Unique Pageviews, sum)
Page
https://channels/channel/UCAQfRNzXGD4BQICkO1KQZUA/	0 days 00:21:45	0.0	0.0	1	1
https://miratrix.co.uk/top-3-competitor-analysis-for-aso/	0 days 00:18:09.500000	50.0	87.5	5	5
https://www.miratrix.co.uk?SuperSocializerAuth=LiveJournal/blog/page/30/	0 days 00:14:57	0.0	0.0	1	1
https://about/channel/UCAQfRNzXGD4BQICkO1KQZUA/	0 days 00:14:45	0.0	0.0	2	2
https://videos?view=0&sort=p&flow=grid/channel/UCAQfRNzXGD4BQICkO1KQZUA/	0 days 00:14:43	0.0	0.0	1	1

	Avg. Time on Page	Bounce Rate	Exit	Pageviews	Unique Pageviews
Page
https://channels/channel/UCAQfRNzXGD4BQICkO1KQZUA/	0 days 00:21:45	0.0	0.0	1	1
https://miratrix.co.uk/top-3-competitor-analysis-for-aso/	0 days 00:18:09.500000	50.0	87.5	5	5
https://www.miratrix.co.uk?SuperSocializerAuth=LiveJournal/blog/page/30/	0 days 00:14:57	0.0	0.0	1	1
https://about/channel/UCAQfRNzXGD4BQICkO1KQZUA/	0 days 00:14:45	0.0	0.0	2	2
https://videos?view=0&sort=p&flow=grid/channel/UCAQfRNzXGD4BQICkO1KQZUA/	0 days 00:14:43	0.0	0.0	1	1

	Results	Reach	Frequency	Cost per results	Budget	Amount spent (GBP)	Clicks (all)	CTR (all)	CPC (all) (GBP)	Impressions	CPM (cost per 1,000 impressions) (GBP)	Link clicks	CPC (cost per link click) (GBP)	CTR (link click-through rate)	Website purchases	Website purchases conversion value	Website purchase ROAS (return on advertising spend)	Button clicks
0	105	77905	1.182132	2.871238	187.2	301.48	2570	2.790627	0.117307	92094	3.273612	2126	0.141806	2.308511	105	10927.20	36.245190	482
1	77	59035	1.204590	3.910130	187.2	301.08	1846	2.595869	0.163099	71113	4.233825	1580	0.190557	2.221816	77	8525.30	28.315730	319
2	49	48718	1.254116	6.135102	187.2	300.62	1485	2.430521	0.202438	61098	4.920292	1223	0.245805	2.001702	49	5955.55	19.810891	261
3	66	66413	1.259859	3.957727	187.2	261.21	1703	2.035353	0.153382	83671	3.121870	1342	0.194642	1.603901	66	9079.10	34.757858	342
4	47	64472	1.320946	5.499787	187.2	258.49	1373	1.612184	0.188267	85164	3.035203	1099	0.235205	1.290451	47	4602.00	17.803397	257

	Pageviews
count	366.000000
mean	9.065574
std	16.412694
min	0.000000
25%	3.000000
50%	6.000000
75%	12.000000
max	274.000000

	Unnamed: 0	Page	Clicks	Impressions	CTR	Position	Avg. Time on Page	Bounce Rate	Exit	Pageviews	Unique Pageviews	num_bounces
0	0	https://miratrix.co.uk/	371	100673	0.37	42.59	0 days 00:01:43.666666666	47.296667	54.286667	1396	1095	169
1	1	https://miratrix.co.uk/app-marketing-agency/	80	30824	0.26	26.84	0 days 00:01:45.333333333	64.436667	60.913333	210	181	31
2	2	https://miratrix.co.uk/mobile-app-competitor-a...	16	1458	1.10	30.64	0 days 00:04:06.000000000	53.335000	89.475000	30	26	1
3	3	https://miratrix.co.uk/blippar-the-message-isn...	11	517	2.13	36.05	0 days 00:00:22.333333333	58.170000	78.333333	27	21	2
4	4	https://miratrix.co.uk/author/nduddy/	10	140	7.14	5.61	0 days 00:02:52.000000000	22.223333	38.890000	15	13	6

	Unnamed: 0	Page	Clicks	Impressions	CTR true visits	CTR	Position	Avg. Time on Page	Bounce Rate	Exit	Pageviews	Unique Pageviews	num_bounces
0	0	https://miratrix.co.uk/	371	100673	0.17	0.37	42.59	0 days 00:01:43.666666666	47.296667	54.286667	1396	1095	169
1	1	https://miratrix.co.uk/app-marketing-agency/	80	30824	0.10	0.26	26.84	0 days 00:01:45.333333333	64.436667	60.913333	210	181	31
2	2	https://miratrix.co.uk/mobile-app-competitor-a...	16	1458	0.07	1.10	30.64	0 days 00:04:06.000000000	53.335000	89.475000	30	26	1
3	3	https://miratrix.co.uk/blippar-the-message-isn...	11	517	0.39	2.13	36.05	0 days 00:00:22.333333333	58.170000	78.333333	27	21	2
4	4	https://miratrix.co.uk/author/nduddy/	10	140	4.29	7.14	5.61	0 days 00:02:52.000000000	22.223333	38.890000	15	13	6

Libraries Needed¶

Importing and Basic Visualization in Google Analytics CSVs¶

Importing Google Search Console CSV's¶

Import Adwords PPC CSV¶

importing Facebook Ad CSV¶

Importing Competitor Site Visability CSV¶

Import keyword rank tracking data CSV¶

How to import Google Trends data via an API¶

Basic Visualisations in Pandas¶

Visualisation Google Analytics Data¶

Visualisting Google Search Console data¶

Visualising Adwords PPC data¶

Visualising Facebook Ads data¶

Visualising Google Trend data¶

Wrangling data...¶

Fix Google Analytics Page data¶

Recreate Google Analytics Page URL¶

Grouping Data¶

Wrangling data types for Groupby function¶

Applying Groupby to the data¶

Rebuilding the Google Analytics dataframe¶

Dropping Columns¶

Replacing missing (NaN) Facebook Ad data¶

Joining Google Analytics and Google Search Console Data¶

Saving your data to a CSV¶

Customising Visualisations¶

Libraries¶

Importing and exploring data¶

Prepare Data for Visualisation¶

Plot a basic visualisation¶

Customising Plots with matplotlib¶

Plotting a seconday y axes¶

Rotating xticks labels on plot¶

Adding a legend to a plot¶

Adding a title to your plot¶

Adding annotation to a plot¶

Switching between Matplotlib styles sheets¶

Plotting in Seaborn¶

Plot a scatter plot in Seaborn¶

Plot a Facebook ad metric heatmap in Seaborn¶

Working with Timeseries¶

Exploring timeseries data¶

Fixing missing values in the data¶

Resampling timeseries data¶

Monthly resampling¶

Quarterly resampling¶

Resampling and Rolling Average plots¶

Calculating the CPC Weekly average¶

Weekly Resampling¶

Rolling average¶

Plot Weekly PPC CPC Comparison¶

Adding Dynamic Annotations¶

Calculating and Creating New Metrics¶

Caluculating metrics in Pandas¶

Filtering Data¶

Creating helpful alerts¶

Creating an alert for loop¶

Filter by date range and get the difference¶

Create an Alert!¶