l_headers=['page','clicks','imp','ctr','position','tm'] # Changed the name of the columns
df=pd.read_csv(path,skiprows=11,usecols=[0,1,2,3,4,8],index_col=0,names=l_headers)
After creating the DataFrame df we are using the same to create a SQLite table to store them using to_sql(). We can use MySQL also to store the DataFrame.
import pandas as pd
#path and name of downloaded csv file.
path='D:\\testing\\analytics\\data\\y24_09.csv'
month_name='24-Sep' # should match with above csv file of the month
table_name='y_all' # Common table all months
####### No edit ######
l_headers=['page','clicks','imp','ctr','position','tm'] # Changed the name of the columns
df=pd.read_csv(path,skiprows=11,usecols=[0,1,2,3,4,8],index_col=0,names=l_headers) # remove first 6 lines
df['position']=df['position']. round(2)
df['ctr']=df['ctr']. round(2)
df['tm']=df['tm']. round(2)
df['month']=month_name
df=df[~df.index.str.contains('\?')] # remove rows having ? in url or page column
print(df.head())
## Inserted the DataFrame to SQLite database
from sqlalchemy import create_engine
my_conn = create_engine("sqlite:///C:\\xampp\\htdocs\\my_dir\\templates\\temp1\\analytics3.db")
my_conn=my_conn.connect()
#df.to_sql(con=my_conn,name=table_name,if_exists='append')
import pandas as pd
df=pd.read_csv('F:\\webmaster-console-query-data\\Queries.csv') # use your system path
list_words=['tkinter','color'] # List of keywords to check ranking
my_str = '|'.join(list_words) # tkinter | color , the OR combination
df2=df[df['queries'].str.contains(my_str,case=False)]
print(df2)
In above code we will get output based on the OR combination. So our output rows will contain presence of any one keyword from the given list of keywords. list_words=['tkinter','color'] # List of keywords to check ranking
## my_str = '|'.join(list_words) # tkinter | color , only for the OR combination
my_str=''
for w in list_words:
my_str=my_str + "(?=.*" + w + ")"
print(my_str) # check the string before applying
#df2=df[df['queries'].str.contains(r'^(?=.*tkinter)(?=.*color)',case=False)]
df2=df[df['queries'].str.contains(my_str,case=False)]
print(df2)
Using the above output we can further filter the rows based on ranking position. From the above rows ( dataframe df2 ) we will collect rows having rank ( Column name Position ) within 6 to 10.
df2=df[df['queries'].str.contains(my_str,case=False)]
df2 = df2[df2["Position"].between(6,10)] # Position between two ranks
import pandas as pd
df=pd.read_csv('F:\\webmaster-console-query-data\\Queries.csv') # use your system path
df2 = df["queries"].str.split(expand=True).stack().value_counts()
print(df2.head(15)) # printing top 15 keywords
df2.to_csv("H:\\data\\seo\\to_words.csv") # saving to csv file, change to your path
import pandas as pd
l1=['javascript_tutorial','php_tutorial','html_tutorial','sql_tutorial','python','jquery','c-tutorial']
l1=['c-tutorial'] # comment this line to update all directories
for d in l1:
#path = "C:\\xampp\\htdocs\\plus2net\\"+d+"\\" # full path to directory
f_x='E:\\testing\\sitemap\\'+d+'.xlsx' # path of excel file.
df=pd.read_excel(f_x)
#condition=(df['tag_canonical']<3) & (df['tag_head']==1)
condition=(df['tag_head']!=0) # exclude some files based on condition
df=df.loc[condition]
my_str='\n'
my_str=my_str+'\n'
def print_row(row):
global my_str,d
#print(f"Name: {row['f_name']}, Date: {row['dt']}")
my_str=my_str+f"{row['url']} {row['dt']} \n"
df.apply(print_row, axis=1)
my_str=my_str+" "
path=path+"sitemap.xml"
with open(path, 'w') as file:
file.write(my_str)
print("For ",d," Total : ",len(df))
Author
🎥 Join me live on YouTubePassionate about coding and teaching, I publish practical tutorials on PHP, Python, JavaScript, SQL, and web development. My goal is to make learning simple, engaging, and project‑oriented with real examples and source code.