Pandas DataFrame Methods


Youtube Live session on Tkinter

absabsolute value of elements
addAdding to elements
add_prefixAdding string to column name or row name
add_suffixAdding string (at end )to column name or row name
aggaggregating the data
alignAligning two DataFrames using join methods
allReturns True if all elements are True
anyReturns True if any element is True
appendAdding rows and columns
applyApply a function to DataFrame
applymapApply a function to each element

abs

Absolute value of elements
import pandas as pd 
my_dict=([-1,-1,2,-3])
my_data = pd.DataFrame(data=my_dict)
print(my_data.abs())
output
   0
0  1
1  1
2  2
3  3
Absolute value of a column
import pandas as pd 
my_dict={'NAME':['Ravi','Raju','Alex'],
         'ID':[1,2,3],'MATH':[-30,40,-50],
		 'ENGLISH':[20,-30,40]}
my_data = pd.DataFrame(data=my_dict)
#print(my_data['MATH'].abs())
print(abs(my_data['MATH'])) # MATH column
Output
0    30
1    40
2    50
Name: MATH, dtype: int64
Absolute value of two columns
import pandas as pd 
my_dict={'NAME':['Ravi','Raju','Alex'],
         'ID':[1,2,3],'MATH':[-30,40,-50],
		 'ENGLISH':[20,-30,40]}
my_data = pd.DataFrame(data=my_dict)
print(abs(my_data[['MATH','ENGLISH']]))
Output
   MATH  ENGLISH
0    30       20
1    40       30
2    50       40

add

Adding to elements
import pandas as pd 
my_dict=([-1,-1,2,-3])
my_data = pd.DataFrame(data=my_dict)
print(my_data.add(2))
Output
     0
0  1
1  1
2  4
3 -1
Adding to a column
import pandas as pd 
my_dict={'NAME':['Ravi','Raju','Alex'],
         'ID':[1,2,3],'MATH':[-30,40,-50],
		 'ENGLISH':[20,-30,40]}
my_data = pd.DataFrame(data=my_dict)
#print(my_data[['MATH']].add(2)) # applying to single column
print(my_data[['MATH','ENGLISH']].add(2)) # applying to two columns
Output
   MATH  ENGLISH
0   -28       22
1    42      -28
2   -48       42

add_prefix

A string we can add before the row labels for a series and before the column names for a DataFrame
import pandas as pd 
my_dict=([1,2,3,4])
my_data = pd.Series(data=my_dict)
print(my_data.add_prefix('row_no_'))
Output for a series
row_no_0    1
row_no_1    2
row_no_2    3
dtype: int64
for a DataFrame
import pandas as pd 
my_dict=([1,2,3,4])
my_data = pd.DataFrame(data=my_dict)
print(my_data.add_prefix('col_no_'))
Output
   col_no_0
0         1
1         2
2         3
3         4
Using a column
import pandas as pd 
my_dict={'NAME':['Ravi','Raju','Alex'],
         'ID':[1,2,3],'MATH':[-30,40,-50],
		 'ENGLISH':[20,-30,40]}
my_data = pd.DataFrame(data=my_dict)
my_label=pd.DataFrame()
print(my_data[['NAME']].add_prefix('f_name '))
Output
  f_name NAME
0        Ravi
1        Raju
2        Alex

add_suffix

A string we can add after the row labels for a series and after the column names for a DataFrame
import pandas as pd 
my_dict=([1,2,3,4])
my_data = pd.Series(data=my_dict)
print(my_data.add_suffix('_no_'))
Output
0_no_    1
1_no_    2
2_no_    3
3_no_    4
dtype: int64
Using a DataFrame
import pandas as pd 
my_dict=([1,2,3,4])
my_data = pd.DataFrame(data=my_dict)
print(my_data.add_suffix(' col_no_'))
Output
   0 col_no_
0         1
1         2
2         3
3         4
Using column name
import pandas as pd 
my_dict={'NAME':['Ravi','Raju','Alex'],
         'ID':[1,2,3],'MATH':[-30,40,-50],
		 'ENGLISH':[20,-30,40]}
my_data = pd.DataFrame(data=my_dict)
my_label=pd.DataFrame()
print(my_data[['NAME']].add_suffix(' (f_name)'))
Output
  NAME (f_name)
0          Ravi
1          Raju
2          Alex

agg

Using Aggrigate operations ( agg ) like sum, min, max, mean
import pandas as pd 
my_dict=([1,2,3,4])
my_data = pd.DataFrame(data=my_dict)
print(my_data.agg(['sum']))
Output
      0
sum  10
Using all
import pandas as pd 
my_dict=([1,2,3,4])
my_data = pd.DataFrame(data=my_dict)
print(my_data.agg(['sum','min','max','mean']))
Output
         0
sum   10.0
min    1.0
max    4.0
mean   2.5
Aggrigate functions across multiple columns
import pandas as pd 
my_dict={'NAME':['Ravi','Raju','Alex'],
         'ID':[1,2,3],'MATH':[30,40,50],
		 'ENGLISH':[20,30,40]}
my_data = pd.DataFrame(data=my_dict)
print(my_data.agg(['sum','min','max','mean'])) # all the columns 
Output is here
              NAME   ID   MATH  ENGLISH
sum   RaviRajuAlex  6.0  120.0     90.0
min           Alex  1.0   30.0     20.0
max           Ravi  3.0   50.0     40.0
mean           NaN  2.0   40.0     30.0
Using only one column (MATH)
print(my_data['MATH'].agg(['sum','min','max','mean'])) # on one  columns 
sum     120.0
min      30.0
max      50.0
mean     40.0
Name: MATH, dtype: float64
Using two columns ( MATH & ENGLISH)
print(my_data[['MATH','ENGLISH']].agg(['sum','min','max','mean'])) # two columns
Output
       MATH  ENGLISH
sum   120.0     90.0
min    30.0     20.0
max    50.0     40.0
mean   40.0     30.0
my_sale=sales.groupby(['product','store'],as_index=False).agg({'qty':'sum','price':'mean'})
Aggrigate functions are used with GroupBy on multiple columns

align

It makes two DataFrame to match each other configuration.
import pandas as pd 
my_dict1={'NAME':['Ravi','Alex'],
         'MATH':[30,40]}
my_dict2={'NAME':['Ravi','Raju','Rone'],
         'ENGLISH':[20,50,60]}
my_data1 = pd.DataFrame(data=my_dict1)
my_data2 = pd.DataFrame(data=my_dict2)
a1, a2 = my_data1.align(my_data2, join='outer', axis=1)
print(a1)
print(a2)
Output
   ENGLISH  MATH  NAME
0      NaN    30  Ravi
1      NaN    40  Alex
   ENGLISH  MATH  NAME
0       20   NaN  Ravi
1       50   NaN  Raju
2       60   NaN  Rone
Using join='left'
import pandas as pd 
my_dict1={'NAME':['Ravi','Alex'],
          'ID':[1,2],
         'MATH':[30,40]}
my_dict2={'NAME':['Ravi','Raju','Rone'],
          'ID':[1,3,4],
         'ENGLISH':[20,50,60]}
my_data1 = pd.DataFrame(data=my_dict1)
my_data2 = pd.DataFrame(data=my_dict2)
a1, a2 = my_data1.align(my_data2, join='left', axis=1)
print(a1)
print(a2)
Output
   NAME  ID  MATH
0  Ravi   1    30
1  Alex   2    40
   NAME  ID  MATH
0  Ravi   1   NaN
1  Raju   3   NaN
2  Rone   4   NaN

all

Returns true if all elements are True , returns False if any one element is False , zero or empty
import pandas as pd 
my_dict={'NAME':['Ravi','Raju','Alex'],
         'ID':[1,2,3],'MATH':[-30,False,-50],
		 'ENGLISH':[20,-30,40]}
my_data = pd.DataFrame(data=my_dict)
my_data.all()
Output
NAME        True
ID          True
MATH       False
ENGLISH     True
dtype: bool
Using axis
import pandas as pd 
my_dict={'NAME':['Ravi','Raju','Alex'],
         'ID':[1,2,3],'MATH':[-30,False,-50],
		 'ENGLISH':[20,-30,40]}
my_data = pd.DataFrame(data=my_dict)
my_data.all(axis=1)
Output
0     True
1    False
2     True
dtype: bool
Using zero or empty element
import pandas as pd 
my_dict={'NAME':['Ravi','Raju','Alex'],
         'ID':[1,2,3],'MATH':[-30,40,-50],
		 'ENGLISH':[20,'',40]}
my_data = pd.DataFrame(data=my_dict)
my_data.all(axis=1)
Output
0     True
1    False
2     True
dtype: bool
Using zero
import pandas as pd 
my_dict={'NAME':['Ravi','Raju','Alex'],
         'ID':[1,2,3],'MATH':[0,40,-50],
		 'ENGLISH':[20,30,40]}
my_data = pd.DataFrame(data=my_dict)
my_data.all(axis=1)
Output
0    False
1     True
2     True
dtype: bool

any

Returns True if any one of the element is True. If all elements are False or zero or empty then False is returned.
import pandas as pd 
my_dict={'NAME':['']}
my_data = pd.DataFrame(data=my_dict)
my_data.any() # False
import pandas as pd 
my_dict={'NAME':['Ravi','Raju','Alex','']}
my_data = pd.DataFrame(data=my_dict)
my_data.any() # True
import pandas as pd 
my_dict={'NAME':['Ravi','Raju','Alex'],
         'ID':[1,2,3],'MATH':[0,0,0],
		 'ENGLISH':[20,-30,40]}
my_data = pd.DataFrame(data=my_dict)
my_data.any()
OUtput
NAME        True
ID          True
MATH       False
ENGLISH     True
dtype: bool
Using Axis
import pandas as pd 
my_dict={'NAME':['Ravi','Raju','Alex'],
         'ID':[1,2,3],'MATH':[0,0,0],
		 'ENGLISH':[20,-30,40]}
my_data = pd.DataFrame(data=my_dict)
my_data.any(axis=1)
Output ( we are getting all True as Axis = 1 Horizontal , any one of the element is True
0    True
1    True
2    True
dtype: bool
When Axis =0, we can see all elements of Math is 0 ( or false )
import pandas as pd 
my_dict={'NAME':['Ravi','Raju','Alex'],
         'ID':[1,2,3],'MATH':[0,0,0],
		 'ENGLISH':[20,-30,40]}
my_data = pd.DataFrame(data=my_dict)
my_data.any(axis=0)
Output
NAME        True
ID          True
MATH       False
ENGLISH     True
dtype: bool

append

Adds rows at the end , if columns are not matching then new column is added.
import pandas as pd 
my_dict1={'NAME':['Ravi','Raju','Alex'],
         'ID':[1,2,3],'MATH':[10,20,30],
		 'ENGLISH':[20,30,40]}
my_dict2={'NAME':['Ravi2','Raju2','Alex2'],
         'ID':[1,2,3],'MATH':[0,0,0],
		 'ENGLISH':[40,50,60]}
my_data1 = pd.DataFrame(data=my_dict1)
my_data2 = pd.DataFrame(data=my_dict2)
my_data1= my_data1.append(my_data2,sort=True) 
print(my_data1)
Output
   ENGLISH  ID  MATH   NAME
0       20   1    10   Ravi
1       30   2    20   Raju
2       40   3    30   Alex
0       40   1     0  Ravi2
1       50   2     0  Raju2
2       60   3     0  Alex2
We can have common Index by setting ignore_index=True
my_data1= my_data1.append(my_data2,sort=True,ignore_index=True) 
Output
  ENGLISH  ID  MATH   NAME
0       20   1    10   Ravi
1       30   2    20   Raju
2       40   3    30   Alex
3       40   1     0  Ravi2
4       50   2     0  Raju2
5       60   3     0  Alex2
verify_integrity = True , will raise ValueError while creating index with duplicates.
With diffferent columns
import pandas as pd 
my_dict1={'NAME':['Ravi','Raju','Alex'],
         'ID':[1,2,3],'MATH':[0,0,0],
		 'ENGLISH':[20,-30,40]}
my_dict2={'SCIENCE':[7,8,9]}
my_data1 = pd.DataFrame(data=my_dict1)
my_data2 = pd.DataFrame(data=my_dict2)
my_data1= my_data1.append(my_data2,sort=True) 
print(my_data1)
Output is here
   ENGLISH   ID  MATH  NAME  SCIENCE
0     20.0  1.0   0.0  Ravi      NaN
1    -30.0  2.0   0.0  Raju      NaN
2     40.0  3.0   0.0  Alex      NaN
0      NaN  NaN   NaN   NaN      7.0
1      NaN  NaN   NaN   NaN      8.0
2      NaN  NaN   NaN   NaN      9.0
In Pandas version 2.0.0 , append() is Removed deprecated , you will get this error
AttributeError: 'DataFrame' object has no attribute 'append'
Here use concat()
import pandas as pd 
my_dict1={'NAME':['Ravi','Raju','Alex'],
         'ID':[1,2,3],'MATH':[10,20,30],
		 'ENGLISH':[20,30,40]}
my_dict2={'NAME':['Ravi2','Raju2','Alex2'],
         'ID':[1,2,3],'MATH':[0,0,0],
		 'ENGLISH':[40,50,60]}
my_data1 = pd.DataFrame(data=my_dict1)
my_data2 = pd.DataFrame(data=my_dict2)
#my_data1= my_data1.append(my_data2,sort=True) 
my_data1= pd.concat([my_data1,my_data2]) 
print(my_data1)
output
    NAME  ID  MATH  ENGLISH
0   Ravi   1    10       20
1   Raju   2    20       30
2   Alex   3    30       40
0  Ravi2   1     0       40
1  Raju2   2     0       50
2  Alex2   3     0       60

apply

The given function is applied across the Axis ( if given ) and returns a datfram as output
import pandas as pd 
my_dict={'ENGLISH':[20,-30,40]}
my_data = pd.DataFrame(data=my_dict)

print(my_data.apply(sum)) # 30 
print(my_data.apply(max)) # 40 
Using Axis
import pandas as pd 
my_dict={'ENGLISH':[20,-30,40],'MATH':[40,50,70]}
my_data = pd.DataFrame(data=my_dict)

print(my_data.apply(sum,axis=0))  
print(my_data.apply(sum,axis=1))  
Output
ENGLISH     30
MATH       160
dtype: int64
0     60
1     20
2    110
dtype: int64
applying Numpy functions
import pandas as pd 
import numpy as my_np 
my_dict={'NAME':['Ravi','Raju','Alex'],
         'ID':[1,2,3],'MATH':[0,0,0],
		 'ENGLISH':[20,-30,40]}

my_data = pd.DataFrame(data=my_dict)
print(my_data['ENGLISH'].apply(my_np.sqrt))
Output
0    4.472136
1         NaN
2    6.324555
Name: ENGLISH, dtype: float64

applymap

Applying function to each element
import pandas as pd 
my_dict={'ENGLISH':[20,-30,40]}
my_data = pd.DataFrame(data=my_dict)
print(my_data.applymap(lambda x:x+1))
Output
   ENGLISH
0       21
1      -29
2       41
import pandas as pd 
my_dict={'ENGLISH':[20,-30,40],'MATH':[40,50,70]}
my_data = pd.DataFrame(data=my_dict)

print(my_data.applymap(lambda x:x+10))
Output
   ENGLISH  MATH
0       30    50
1      -20    60
2       50    80
Applying to a column
import pandas as pd 
my_dict={'ENGLISH':[20,-30,40],'MATH':[40,50,70]}
my_data = pd.DataFrame(data=my_dict)

print(my_data[['MATH']].applymap(lambda x:x+10))

   MATH
0    50
1    60
2    80
Pandas DataFrame Pandas Atributes Sample student DataFrame
Subscribe to our YouTube Channel here


Subscribe

* indicates required
Subscribe to plus2net

    plus2net.com



    Post your comments , suggestion , error , requirements etc here





    Python Video Tutorials
    Python SQLite Video Tutorials
    Python MySQL Video Tutorials
    Python Tkinter Video Tutorials
    We use cookies to improve your browsing experience. . Learn more
    HTML MySQL PHP JavaScript ASP Photoshop Articles FORUM . Contact us
    ©2000-2024 plus2net.com All rights reserved worldwide Privacy Policy Disclaimer