100+ Python Pandas commands for EDA | Tips & Tricks
4 min readFeb 9, 2022
These commands help you save a lot of time while working on any kind of analysis using Pandas
# importsimport random
import time
import os
import pandas as pd
import seaborn as sns
import numpy as np
from scipy import stats as sts
import matplotlib.pyplot as plt
a4_dims = (11.7, 8.27)
plt.rcParams['figure.figsize'] = a4_dimsimport warnings
warnings.filterwarnings('ignore')#to display all rows columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)# to remove scientific notation
pd.set_option('display.float_format', lambda x: '%.3f' % x)#timing your program?
import time
start = time.time()
# your code here
end = time.time()
print(end - start)
# working with date time
# convert a col to datetime pandas
df['date'] = pd.to_datetime(df['date'])#Change working directory
import os
os.getcwd()
os.chdir("directory")%pwd
%cd folder# get df value
df['col'][1].item()# create empty df with n cols & m rows#read excel
pd.read_excel('.xlsx', sheet_name = 'Sheet1')# remove index while exporting
df.to_csv('csv', index = False)#importing multiple files in a directory
l = [pd.read_csv(filename) for filename in glob.glob("path\*.csv")]
df = pd.concat(l, axis = 0)df = pd.concat(l, axis = 1)
df = df1.append(df2)axis - 0 row - 1 coldf = pd.merge(df1, df2, on = 'com_col', how = 'outer')#index reset
df.reset_index(drop = True, inplace = True)#change dtype
df.Weight = df.Weight.astype('int64')#replace blanks with NaN
df.replace(r'^\s*$', np.nan, regex=True)#accepts only 1D , get all unique elements in a column
pd.unique(df['col1'])
df['col1'].unique()#to flat 2D into 1D
df[['col1', 'col2']].values.ravel()#to flat 2D into 1D & get only unique values
pd.unique(df[['col1', 'col2']].values.ravel())#number of unique elements in one column
df['col1'].value_counts()#number of unique elements in all columns
df.nunique()#missing values
sns.heatmap(df.isnull())# NaNs by col
df.isnull().sum(axis = 0)#drop a column in df
df.drop(['colname'], axis = 1)#percentile
df['col'].quantile(0.1) #top 10 percentile#filter columns based on names
col_list = list(df.filter(like = 'Avg_').columns)#create a sample dataframe
df = pd.DataFrame({'col1': [1,2,3], 'col2': [11,22,33]})
df = pd.DataFrame({'x': [1,2,3], 'y':[11,22,33]}, columns = ['x1', 'y1'])
#with n cols & rows
pd.DataFrame(index=np.arange(1), columns=np.arange(8))#sorting values by 1 col
df.sort_values(by = ['col1'], ascending = True)#sorting values by more columns
df.sort_values(by = ['col1', 'col2', 'col3'], ascending = True)#renaming the columns
df.rename(columns = {'col1':'rnm1', 'col2':'rnm2'}, inplace = True)#column slicing
all = df.columns
except last one = df.columns[:-1]
mirror columns = df.columns[::-1]#filter function
df.filter(['col1', 'col2', 'col3'])
df.filter(regex = '/d')#upto 2 place decimal
"{:.2f}".format(x)#row slicing
#top 4 rows
df[:4]#col slicing
df[(cond1) | (cond2) & (cond3)] #where cond1 = df['col1'] > 2#iloc & loc
df.iloc[<index>, <index>]
df.loc[(cond1), ['col1', 'col2']] #where cond1 = df['col1'] > 2#groupby
df.groupby(by = ['col1'])['reqcols'].mean()#replacing nan with space
df['col1'] = df['col1'].replace('whattoreplace', 'replacewith')
df = df.replace('','')
df = df.fillna('')#drop rows with nan
df.dropna()#converting string to datetime
df['col1'] = pd.to_datetime(df['col1'])#summary & transpose
df.describe().transpose()#check for null values in a column
df.isnull().any()
df.isnull().all()
df['col1'].isnull()
df['col2'].notnull()#null values in each col
df.isna().sum()#check for non-null values
pd.notnull()#isin in andas
df['col1'].isin('somelist')#dropping duplicates
#drops duplicates excluding first occurence
df.drop_duplicates()
#drops duplicates excluding last occurence
df.drop_duplicates(keep = 'last')
#drops duplicates by col
df.drop_duplicates(['col1'])
df.drop_duplicates(['col1'], keep = 'last')df.drop_duplicates(subset = 'Col1')#joining dataframes#Creating a pivot
df.pivot('A', 'B', 'C') - [A - vertical, B - Horizontal, C - values]
pd.pivot_table(df, values = '', index = ['',''], columns = [''], aggfunc = np.sum)
#Unpivot
pd.DataFrame(pivoted.to_records())#replace infinity with nan
df.replace([np.inf, -np.inf], np.nan)
#check for infinite values
np.isfinite(df).any()#data types of all columns
df.dtypes
#data type of a single column
df.colname.dtypes
#convert dtypes
df['col'].astype(str).astype(int)#lambda function
lambda x : x + 10#applying functions to a dataframe
df.apply(lambda x: x + 3)
#apply function referencing multiple columns
df['Value'] = df.apply(lambda row: my_test(row['a'], row['c']), axis=1)
#if else loop in a lambda function
df.apply(lambda x: 1 if x == 'W' else 0)#List Comprehensions
ls = [i for i in ls1 if i not in ls2]#numpy array methods
np.zeros((shape))
np.ones((shape))
np.full(5, -1)
np.full((2,5), -1)#list methods
list.sort()
list.sort(reverse = true)
ls = ['a','b','c']
"".join(ls) = abc
"-".join(ls) = a-b-c#reverse a list
list.reverse()
#remove list items
list.clear()
#remove - removes the element from the list
ls.remove(element)
list.pop[0]#pop - last element
ls.pop()
#pop at index
ls.pop(0)
del list[0]
#get list index
list_name.index('element')#append - adds an element to the list
ls.append(element)#extend - adds ls2 to the end of ls1
ls1.extend(ls2)#insert
ls.insert(position, element)#delete - deletes the element at that index
del ls[0]
del ls[:] #deletes all elements from the list#enumerate
enumerate(iterable, start)
li = ['a','b','c']
ob = enumerate(li)#String methods
str.endswith('pattern')# EDA notes
https://pandas-profiling.github.io/pandas-profiling/docs/master/rtd/
Bonus: Jupyter markdown commands
#JUPYTER MARKDOWNHEADERS
#h1
##h2
######h6LISTS
*item1
*item2
*item3
*item4IMAGES
![alt text](url)EMPHASIS
*italic*
_italic_
**bold**
__bold__
*italic **bold** combine*ORDERED LIST
1. item
2. item
*item
*itemLINKS
[text](url)BLOCKQUOTES
>line 1
>line 2BACKSLASH ESCAPES
\* literal asterisks\*MATHEMATICAL SYMBOLS
$symbol$
Thank you! follow for more such articles :)