Functions | Details |
---|---|
mkdir() | Create directory |
walk() | path, directories and files of sub -directories |
import os
print(os.name) # nt
print(os.getcwd()) # c:\Users\DellHP
import os
print(os.path.dirname(os.path.abspath(__file__)))
Create a student.csv file in same directory ( where application is running )
import csv
import os
file_name=os.path.dirname(os.path.abspath(__file__))+'/student.csv'
#print(os.getcwd())
#print(os.path.dirname(os.path.abspath(__file__)))
if not os.path.exists(file_name):
with open(file_name, mode='w', newline='') as file:
writer = csv.writer(file)
writer.writerow(["id", "name", "class", "mark", "gender"])
import os
print(os.getcwd()) # c:\Users\DellHP
path='D:\\my_dir'
os.chdir(path)
print(os.getcwd()) # D:\my_dir
Changing to parent directory
os.chdir('..')
import os
path = "E:\\testing\\images\\" # Directory Path
print(os.listdir(path))
import os
print(os.path.expanduser('~\\test.txt'))
Output
C:\Users\HP\test.txt
import os
path = "E:\\testing\\images\\" # Directory Path
print(os.path.exists(path)) # True
For a file
import os
path = "E:\\testing\\images\\test3.png" # file path
print(os.path.exists(path)) # True
import os
path = "E:\\testing\\images\\test3.png" # File Path
print(os.path.isdir(path)) # False
path = "E:\\testing\\images\\" # Directory Path
print(os.path.isdir(path)) # True
import os
path = "E:\\testing\\images\\test3.png" # Path
print(os.path.splitext(path)) # ('E:\\testing\\images\\test3', '.png')
print(os.path.splitext(path)[0]) #E:\testing\images\test3
print(os.path.splitext(path)[1]) #.png
import os
from datetime import datetime
path = "E:\\testing\\images\\test3.png" # Directory Path
t_stamp=os.path.getmtime(path) # for file modificaton time
#t_stamp=os.path.getctime(path) # for file Creation time
dt_mod = datetime.fromtimestamp(t_stamp) # date object
print('File Modified on:', dt_mod) # Printing date and time
m_date = datetime.strftime(dt_mod, '%Y-%m-%d') # Change format
print(m_date)
size=os.path.getsize(path+f)
import os
from datetime import datetime
#path = "E:\\testing\\images\\test3.png" # Directory Path
path = "E:\\testing\\" # Directory Path
files=os.listdir(path)
for f in files:
t_stamp=os.path.getmtime(path+f) # for file modificaton time
#t_stamp=os.path.getctime(path) # for file Creation time
f_name,f_extension=os.path.splitext(path+f)
size=os.path.getsize(path+f)
dt_mod = datetime.fromtimestamp(t_stamp) # date object
#print('File Modified on:', dt_mod) # Prting date and time
m_date = datetime.strftime(dt_mod, '%Y-%m-%d') # Change format
print(f, f_extension, m_date,size)
Above code is integrated inside Tkinter GUI
import os
path='D:\\my_dir1\\my_dir2\\my_dir3\\my_dir4'
#os.makedirs(path) # create all directories in the path
try:
os.rmdir(path) # delete directory my_dir4
except OSError as e:
print(e) # Specific error message
print ("Failed to delete %s " % path)
else:
print ("Successfully deleted the directory %s " % path)
Output ( my_dir4 is not available to delete )
[WinError 2] The system cannot find the file specified: 'D:\\my_dir1\\my_dir2\\my_dir3\\my_dir4'
Failed to delete D:\my_dir1\my_dir2\my_dir3\my_dir4
import os
path='D:\\testing\\my_db\\my_db.db' # update your path
try:
os.remove(path) # delete directory my_dir4
except OSError as e:
print(e) # Specific error message
print ("Failed to delete %s " % path)
else:
print ("Successfully deleted the file %s " % path)
import os
path = "E:\\testing\\images\\test.txt" # path to file.
os.startfile(path, "print")
import win32api
import win32print
# A List containing the system printers
all_printers = [printer[2] for printer in win32print.EnumPrinters(2)]
# Update the default printer or ask user to select.
win32print.SetDefaultPrinter(all_printers[2]) # Update your printers
path2 = "F:\\testing\\images\\certificate_12.pdf" # Path of PDF file
win32api.ShellExecute(0, "print", path2, None, ".", 0)
import re,os
file_name_to_check='pb-check-demo.php'
path1='C:/xampp/htdocs/example/dir_name/' # dir path
file_list=os.listdir(path1)
if(file_name_to_check in file_list):
print('File is there, checking')
for f in file_list:
if(os.path.getsize(path1+f)>500):
filename,file_extension=os.path.splitext(path1+f)
if(file_extension=='.php'): # check this extension only
fob= open(path1+f,'r',encoding='utf8',errors='ignore')
data=fob.read() # collect data
fob.close() # close file object
urls1 = re.findall(r'href=[\'"]?([^\'" >]+)', data,re.IGNORECASE)
if(file_name_to_check in urls1):
print(f)
else:
print('File is not there, check the name and then submit. ')
import re,os
path1='C:/xampp/htdocs/dir_name/python/' # dir path
file_list=os.listdir(path1) # List of files
for f in file_list:
#if re.search('tkinter',f, re.IGNORECASE):
if f.lower().find('tkinter')>=0: # check string in file name
#print(f)
fob= open(path1+f,'r',encoding='utf8',errors='ignore')
data=fob.read().lower() # collect data
fob.close() # close file object
if data.find('www.youtube')<0: # not present
print(f)
import re,os
path1='C:/xampp/htdocs/dir_name/python/' # dir path
file_list=os.listdir(path1) # List of files
total=0
for f in file_list:
#if re.search('tkinter',f, re.IGNORECASE):
#if f.lower().find('tkinter')>=0: # check string in file name
#print(f)
if(os.path.getsize(path1+f)>500):
filename,file_extension=os.path.splitext(path1+f)
if(file_extension=='.php'): # check this extension only
fob= open(path1+f,'r',encoding='utf8',errors='ignore')
data=fob.read().lower() # collect data
fob.close() # close file object
#if data.find('www.youtube')<0: # not present
if data.find('breadcrumb')<0: # not present
print(f)
total=total+1
print("Total files : ",len(file_list),", Not having string : ",total)
os.listdir()
function to get a list of all files in the specified directory. It then uses a for loop to iterate through the list of files and retrieve their details.
import os
from datetime import datetime
import pandas as pd
# source directory list to get list of files inside the directory
l1=['javascript_tutorial','php_tutorial','html_tutorial','sql_tutorial','python']
for d in l1:
path = "C:\\xampp\\htdocs\\plus2net\\"+d+"\\" # full path to directory
f_x='C:\\data2\\'+d+'.xlsx' #path with name, to store final excel file for directory
files=os.listdir(path) # List of files in the directory
df = pd.DataFrame(columns = ['f_name', 'dt', 'size']) #create blank DataFrame
df['dt'] = pd.to_datetime(df['dt']) # change to date column
for f in files: # list of files looping
f_name,f_extension=os.path.splitext(path+f)
if(f_extension=='.php'): # To check only .php file extensions
size=os.path.getsize(path+f)
t_stamp=os.path.getmtime(path+f) # for file modificaton time
#t_stamp=os.path.getctime(path) # for file Creation time
dt_mod = datetime.fromtimestamp(t_stamp) # date object
#print('File Modified on:', dt_mod) # Prting date and time
m_date = datetime.strftime(dt_mod, '%Y-%m-%d') # Change format
#print(f, f_extension, m_date,size)
df2 = pd.DataFrame({'f_name': [f], 'dt': [m_date],'size':[size]})
df=pd.concat([df,df2]) # add row to DataFrame
#df = df.append({'f_name' : f, 'dt' : m_date, 'size' : size},ignore_index = True)
#print(df.head())
df.to_excel(f_x,index=False) # create excel file with file data
The output of the code will be a table containing the file name, size, creation time, or modification time for all files in the current working directory. This information can be useful for various purposes, such as identifying large files, finding recently modified files, or organizing files by their creation or modification dates.
This script scans specified directories for PHP files, extracts metadata, and updates both an Excel file and a MySQL database with the collected information. Additionally, it generates an updated sitemap.xml file for search engine optimization.
## this will update the mysql and sitemap inside excel page list.
import os
from datetime import datetime
import pandas as pd
from sqlalchemy import create_engine,Float
#my_conn = create_engine("mysql+mysqldb://id:pw@localhost/my_tutorial") # MySQL
my_conn = create_engine("sqlite:///F:\\testing2\\sitemap\\data\\plus2net.db") #
my_conn=my_conn.connect() # add this line if error
# source directory list to get list of files inside the directory
l1=['javascript_tutorial','php_tutorial','html_tutorial',
'sql_tutorial','python','jquery','c-tutorial','articles'
'asp-tutorial','ps-tutorial','java_tutorial','angular','msg-demo']
#l1=['c-tutorial'] # comment this line to update all directories
l1=['python']
#path1="C:\\xampp\\htdocs\\plus2net\\"
path1 = "C:\\xampp\\htdocs\\z-plus2net\\" # full path to directory
f_x1='F:\\testing2\\sitemap\\data\\' # Excel fle path to store
###### End of Edit #############
for d in l1:
path = path1 +d+"\\" # full path to directory
f_x=f_x1+d+'.xlsx' # excel file to store, file name is the directory name
files=os.listdir(path) # List of files in the directory
df = pd.DataFrame(columns = ['url','f_name', 'dt', 'size','tag_nav','Sp_Mobile','Sp_Desktop']) #create blank DataFrame
df['dt'] = pd.to_datetime(df['dt']) # change to date column
df.astype({'Sp_Mobile': 'float64'}).dtypes
df.astype({'Sp_Desktop': 'float64'}).dtypes
for f in files: # list of files looping
f_name,f_extension=os.path.splitext(path+f)
if(f_extension=='.php'): # To check only .php file extensions
size=os.path.getsize(path+f)
t_stamp=os.path.getmtime(path+f) # for file modificaton time
#t_stamp=os.path.getctime(path) # for file Creation time
dt_mod = datetime.fromtimestamp(t_stamp) # date object
#print('File Modified on:', dt_mod) # Prting date and time
m_date = datetime.strftime(dt_mod, '%Y-%m-%d') # Change format
fob= open(path+f,'r',encoding='utf8',errors='ignore')
#print(path+f)
data=fob.read() # collect data
if data.find('</head>')<0: # not present
tag_head=0
else:
tag_head=1
if data.find('canonical')<0: # not present
tag_canonical=0
else:
tag_canonical=1
if data.find('breadcrumb')<0: # not presetn
tag_nav=0
else:
tag_nav=1
#url="https://www.plus2net.com/jquery/"+d+"/"+f # site name + directory + file name
url="https://www.plus2net.com/"+d+"/"+f # site name + directory + file name
df2 = pd.DataFrame({'url':url,'f_name': [f], 'dt': [m_date],'size':[size],'todo':1,
'tag_head':tag_head,'tag_canonical':tag_canonical,'tag_nav':tag_nav,
'Sp_Mobile':0,'Sp_Desktop':0})
df=pd.concat([df,df2]) # add row to DataFrame
df.to_excel(f_x,index=False) # create excel file with file data
df.to_sql(con=my_conn,name=d,if_exists='replace',index=False,dtype={"Sp_Mobile": Float,"Sp_Desktop":Float}) # to store in database table
############### Part 2 creating site map #######
for d in l1:
##path = "C:\\xampp\\htdocs\\plus2net\\"+d+"\\" # full path to directory
##path = "C:\\xampp\\htdocs\\z~1234-plus2net\\jquery\\"+d+"\\" # full path to directory
##f_x='D:\\testing\\sitemap\\'+d+'.xlsx'
df=pd.read_excel(f_x) # read the file and create Dataframe
#condition=(df['tag_canonical']<3) & (df['tag_head']==1)
condition=(df['tag_head']!=0) # exclude some files based on condition
df=df.loc[condition]
my_str='<?xml version="1.0" encoding="UTF-8"?>\n'
my_str=my_str+'<urlset xmlns="https://www.sitemaps.org/schemas/sitemap/0.9">\n'
def print_row(row):
global my_str,d
#print(f"Name: {row['f_name']}, Date: {row['dt']}")
my_str=my_str+f"<url><loc>{row['url']}</loc><lastmod>{row['dt']}</lastmod></url>\n"
df.apply(print_row, axis=1)
my_str=my_str+"</urlset>"
path=path+"sitemap.xml"
with open(path, 'w') as file:
file.write(my_str)
print("For ",d," Total : ",len(df))
Using the above data we can check the Canonical tag by using BeautifulSoup