Python Pandas sample DataFrame
abs
absolute value of elements
add Adding to elements
add_prefix Adding string to column name or row name
add_suffix Adding string (at end )to column name or row name
agg aggregating the data
align Aligning two DataFrames using join methods
all Returns True if all elements are True
any Returns True if any element is True
append Adding rows and columns
apply Apply a function to DataFrame
applymap Apply a function to each element
abs
Absolute value of elements
import pandas as pd
my_dict=([-1,-1,2,-3])
my_data = pd.DataFrame(data=my_dict)
print(my_data.abs())
output
0
0 1
1 1
2 2
3 3
Absolute value of a column
import pandas as pd
my_dict={'NAME':['Ravi','Raju','Alex'],
'ID':[1,2,3],'MATH':[-30,40,-50],
'ENGLISH':[20,-30,40]}
my_data = pd.DataFrame(data=my_dict)
#print(my_data['MATH'].abs())
print(abs(my_data['MATH'])) # MATH column
Output
0 30
1 40
2 50
Name: MATH, dtype: int64
Absolute value of two columns
import pandas as pd
my_dict={'NAME':['Ravi','Raju','Alex'],
'ID':[1,2,3],'MATH':[-30,40,-50],
'ENGLISH':[20,-30,40]}
my_data = pd.DataFrame(data=my_dict)
print(abs(my_data[['MATH','ENGLISH']]))
Output
MATH ENGLISH
0 30 20
1 40 30
2 50 40
add
Adding to elements
import pandas as pd
my_dict=([-1,-1,2,-3])
my_data = pd.DataFrame(data=my_dict)
print(my_data.add(2))
Output
0
0 1
1 1
2 4
3 -1
Adding to a column
import pandas as pd
my_dict={'NAME':['Ravi','Raju','Alex'],
'ID':[1,2,3],'MATH':[-30,40,-50],
'ENGLISH':[20,-30,40]}
my_data = pd.DataFrame(data=my_dict)
#print(my_data[['MATH']].add(2)) # applying to single column
print(my_data[['MATH','ENGLISH']].add(2)) # applying to two columns
Output
MATH ENGLISH
0 -28 22
1 42 -28
2 -48 42
add_prefix
A string we can add before the row labels for a series and before the column names for a DataFrame
import pandas as pd
my_dict=([1,2,3,4])
my_data = pd.Series(data=my_dict)
print(my_data.add_prefix('row_no_'))
Output for a series
row_no_0 1
row_no_1 2
row_no_2 3
dtype: int64
for a DataFrame
import pandas as pd
my_dict=([1,2,3,4])
my_data = pd.DataFrame(data=my_dict)
print(my_data.add_prefix('col_no_'))
Output
col_no_0
0 1
1 2
2 3
3 4
Using a column
import pandas as pd
my_dict={'NAME':['Ravi','Raju','Alex'],
'ID':[1,2,3],'MATH':[-30,40,-50],
'ENGLISH':[20,-30,40]}
my_data = pd.DataFrame(data=my_dict)
my_label=pd.DataFrame()
print(my_data[['NAME']].add_prefix('f_name '))
Output
f_name NAME
0 Ravi
1 Raju
2 Alex
add_suffix
A string we can add after the row labels for a series and after the column names for a DataFrame
import pandas as pd
my_dict=([1,2,3,4])
my_data = pd.Series(data=my_dict)
print(my_data.add_suffix('_no_'))
Output
0_no_ 1
1_no_ 2
2_no_ 3
3_no_ 4
dtype: int64
Using a DataFrame
import pandas as pd
my_dict=([1,2,3,4])
my_data = pd.DataFrame(data=my_dict)
print(my_data.add_suffix(' col_no_'))
Output
0 col_no_
0 1
1 2
2 3
3 4
Using column name
import pandas as pd
my_dict={'NAME':['Ravi','Raju','Alex'],
'ID':[1,2,3],'MATH':[-30,40,-50],
'ENGLISH':[20,-30,40]}
my_data = pd.DataFrame(data=my_dict)
my_label=pd.DataFrame()
print(my_data[['NAME']].add_suffix(' (f_name)'))
Output
NAME (f_name)
0 Ravi
1 Raju
2 Alex
agg
Using Aggregate operations ( agg ) like sum, min, max, mean
import pandas as pd
my_dict=([1,2,3,4])
my_data = pd.DataFrame(data=my_dict)
print(my_data.agg(['sum']))
Output
0
sum 10
Using all
import pandas as pd
my_dict=([1,2,3,4])
my_data = pd.DataFrame(data=my_dict)
print(my_data.agg(['sum','min','max','mean']))
Output
0
sum 10.0
min 1.0
max 4.0
mean 2.5
Aggregate functions across multiple columns
import pandas as pd
my_dict={'NAME':['Ravi','Raju','Alex'],
'ID':[1,2,3],'MATH':[30,40,50],
'ENGLISH':[20,30,40]}
my_data = pd.DataFrame(data=my_dict)
print(my_data.agg(['sum','min','max','mean'])) # all the columns
Output is here
NAME ID MATH ENGLISH
sum RaviRajuAlex 6.0 120.0 90.0
min Alex 1.0 30.0 20.0
max Ravi 3.0 50.0 40.0
mean NaN 2.0 40.0 30.0
Using only one column (MATH)
print(my_data['MATH'].agg(['sum','min','max','mean'])) # on one columns
sum 120.0
min 30.0
max 50.0
mean 40.0
Name: MATH, dtype: float64
Using two columns ( MATH & ENGLISH)
print(my_data[['MATH','ENGLISH']].agg(['sum','min','max','mean'])) # two columns
Output
MATH ENGLISH
sum 120.0 90.0
min 30.0 20.0
max 50.0 40.0
mean 40.0 30.0
my_sale=sales.groupby(['product','store'],as_index=False).agg({'qty':'sum','price':'mean'})
Aggregate functions are used with GroupBy on multiple columns→
align
It makes two DataFrame to match each other configuration.
import pandas as pd
my_dict1={'NAME':['Ravi','Alex'],
'MATH':[30,40]}
my_dict2={'NAME':['Ravi','Raju','Rone'],
'ENGLISH':[20,50,60]}
my_data1 = pd.DataFrame(data=my_dict1)
my_data2 = pd.DataFrame(data=my_dict2)
a1, a2 = my_data1.align(my_data2, join='outer', axis=1)
print(a1)
print(a2)
Output
ENGLISH MATH NAME
0 NaN 30 Ravi
1 NaN 40 Alex
ENGLISH MATH NAME
0 20 NaN Ravi
1 50 NaN Raju
2 60 NaN Rone
Using join='left'
import pandas as pd
my_dict1={'NAME':['Ravi','Alex'],
'ID':[1,2],
'MATH':[30,40]}
my_dict2={'NAME':['Ravi','Raju','Rone'],
'ID':[1,3,4],
'ENGLISH':[20,50,60]}
my_data1 = pd.DataFrame(data=my_dict1)
my_data2 = pd.DataFrame(data=my_dict2)
a1, a2 = my_data1.align(my_data2, join='left', axis=1)
print(a1)
print(a2)
Output
NAME ID MATH
0 Ravi 1 30
1 Alex 2 40
NAME ID MATH
0 Ravi 1 NaN
1 Raju 3 NaN
2 Rone 4 NaN
all
Returns true if all elements are True , returns False if any one element is False , zero or empty
import pandas as pd
my_dict={'NAME':['Ravi','Raju','Alex'],
'ID':[1,2,3],'MATH':[-30,False,-50],
'ENGLISH':[20,-30,40]}
my_data = pd.DataFrame(data=my_dict)
my_data.all()
OutputNAME True
ID True
MATH False
ENGLISH True
dtype: bool
Using axis
import pandas as pd
my_dict={'NAME':['Ravi','Raju','Alex'],
'ID':[1,2,3],'MATH':[-30,False,-50],
'ENGLISH':[20,-30,40]}
my_data = pd.DataFrame(data=my_dict)
my_data.all(axis=1)
Output
0 True
1 False
2 True
dtype: bool
Using zero or empty element
import pandas as pd
my_dict={'NAME':['Ravi','Raju','Alex'],
'ID':[1,2,3],'MATH':[-30,40,-50],
'ENGLISH':[20,'',40]}
my_data = pd.DataFrame(data=my_dict)
my_data.all(axis=1)
Output
0 True
1 False
2 True
dtype: bool
Using zero
import pandas as pd
my_dict={'NAME':['Ravi','Raju','Alex'],
'ID':[1,2,3],'MATH':[0,40,-50],
'ENGLISH':[20,30,40]}
my_data = pd.DataFrame(data=my_dict)
my_data.all(axis=1)
Output0 False
1 True
2 True
dtype: bool
any
Returns True if any one of the element is True. If all elements are False or zero or empty then False is returned.
import pandas as pd
my_dict={'NAME':['']}
my_data = pd.DataFrame(data=my_dict)
my_data.any() # False
import pandas as pd
my_dict={'NAME':['Ravi','Raju','Alex','']}
my_data = pd.DataFrame(data=my_dict)
my_data.any() # True
import pandas as pd
my_dict={'NAME':['Ravi','Raju','Alex'],
'ID':[1,2,3],'MATH':[0,0,0],
'ENGLISH':[20,-30,40]}
my_data = pd.DataFrame(data=my_dict)
my_data.any()
OUtput
NAME True
ID True
MATH False
ENGLISH True
dtype: bool
Using Axis
import pandas as pd
my_dict={'NAME':['Ravi','Raju','Alex'],
'ID':[1,2,3],'MATH':[0,0,0],
'ENGLISH':[20,-30,40]}
my_data = pd.DataFrame(data=my_dict)
my_data.any(axis=1)
Output ( we are getting all True as Axis = 1 Horizontal , any one of the element is True
0 True
1 True
2 True
dtype: bool
When Axis =0, we can see all elements of Math is 0 ( or false )
import pandas as pd
my_dict={'NAME':['Ravi','Raju','Alex'],
'ID':[1,2,3],'MATH':[0,0,0],
'ENGLISH':[20,-30,40]}
my_data = pd.DataFrame(data=my_dict)
my_data.any(axis=0)
Output
NAME True
ID True
MATH False
ENGLISH True
dtype: bool
append
Adds rows at the end , if columns are not matching then new column is added.
import pandas as pd
my_dict1={'NAME':['Ravi','Raju','Alex'],
'ID':[1,2,3],'MATH':[10,20,30],
'ENGLISH':[20,30,40]}
my_dict2={'NAME':['Ravi2','Raju2','Alex2'],
'ID':[1,2,3],'MATH':[0,0,0],
'ENGLISH':[40,50,60]}
my_data1 = pd.DataFrame(data=my_dict1)
my_data2 = pd.DataFrame(data=my_dict2)
my_data1= my_data1.append(my_data2,sort=True)
print(my_data1)
Output
ENGLISH ID MATH NAME
0 20 1 10 Ravi
1 30 2 20 Raju
2 40 3 30 Alex
0 40 1 0 Ravi2
1 50 2 0 Raju2
2 60 3 0 Alex2
We can have common Index by setting ignore_index=True
my_data1= my_data1.append(my_data2,sort=True,ignore_index=True)
Output
ENGLISH ID MATH NAME
0 20 1 10 Ravi
1 30 2 20 Raju
2 40 3 30 Alex
3 40 1 0 Ravi2
4 50 2 0 Raju2
5 60 3 0 Alex2
verify_integrity = True , will raise ValueError while creating index with duplicates.
With different columns
import pandas as pd
my_dict1={'NAME':['Ravi','Raju','Alex'],
'ID':[1,2,3],'MATH':[0,0,0],
'ENGLISH':[20,-30,40]}
my_dict2={'SCIENCE':[7,8,9]}
my_data1 = pd.DataFrame(data=my_dict1)
my_data2 = pd.DataFrame(data=my_dict2)
my_data1= my_data1.append(my_data2,sort=True)
print(my_data1)
Output is here
ENGLISH ID MATH NAME SCIENCE
0 20.0 1.0 0.0 Ravi NaN
1 -30.0 2.0 0.0 Raju NaN
2 40.0 3.0 0.0 Alex NaN
0 NaN NaN NaN NaN 7.0
1 NaN NaN NaN NaN 8.0
2 NaN NaN NaN NaN 9.0
In Pandas version 2.0.0 , append() is Removed deprecated , you will get this error
AttributeError: 'DataFrame' object has no attribute 'append'
Here use concat()
import pandas as pd
my_dict1={'NAME':['Ravi','Raju','Alex'],
'ID':[1,2,3],'MATH':[10,20,30],
'ENGLISH':[20,30,40]}
my_dict2={'NAME':['Ravi2','Raju2','Alex2'],
'ID':[1,2,3],'MATH':[0,0,0],
'ENGLISH':[40,50,60]}
my_data1 = pd.DataFrame(data=my_dict1)
my_data2 = pd.DataFrame(data=my_dict2)
#my_data1= my_data1.append(my_data2,sort=True)
my_data1= pd.concat([my_data1,my_data2])
print(my_data1)
output
NAME ID MATH ENGLISH
0 Ravi 1 10 20
1 Raju 2 20 30
2 Alex 3 30 40
0 Ravi2 1 0 40
1 Raju2 2 0 50
2 Alex2 3 0 60
apply
The given function is applied across the Axis ( if given ) and returns a DataFrame as output
import pandas as pd
my_dict={'ENGLISH':[20,-30,40]}
my_data = pd.DataFrame(data=my_dict)
print(my_data.apply(sum)) # 30
print(my_data.apply(max)) # 40
Using Axis
import pandas as pd
my_dict={'ENGLISH':[20,-30,40],'MATH':[40,50,70]}
my_data = pd.DataFrame(data=my_dict)
print(my_data.apply(sum,axis=0))
print(my_data.apply(sum,axis=1))
Output
ENGLISH 30
MATH 160
dtype: int64
0 60
1 20
2 110
dtype: int64
applying Numpy functions
import pandas as pd
import numpy as my_np
my_dict={'NAME':['Ravi','Raju','Alex'],
'ID':[1,2,3],'MATH':[0,0,0],
'ENGLISH':[20,-30,40]}
my_data = pd.DataFrame(data=my_dict)
print(my_data['ENGLISH'].apply(my_np.sqrt))
Output
0 4.472136
1 NaN
2 6.324555
Name: ENGLISH, dtype: float64
applymap
Applying function to each element
import pandas as pd
my_dict={'ENGLISH':[20,-30,40]}
my_data = pd.DataFrame(data=my_dict)
print(my_data.applymap(lambda x:x+1))
Output
ENGLISH
0 21
1 -29
2 41
import pandas as pd
my_dict={'ENGLISH':[20,-30,40],'MATH':[40,50,70]}
my_data = pd.DataFrame(data=my_dict)
print(my_data.applymap(lambda x:x+10))
Output
ENGLISH MATH
0 30 50
1 -20 60
2 50 80
Applying to a column
import pandas as pd
my_dict={'ENGLISH':[20,-30,40],'MATH':[40,50,70]}
my_data = pd.DataFrame(data=my_dict)
print(my_data[['MATH']].applymap(lambda x:x+10))
MATH
0 50
1 60
2 80
« Pandas DataFrame Pandas » Atributes »
Sample student DataFrame
← Subscribe to our YouTube Channel here