Pandas DataFrame

i hope this post will be helpful for all beginner of python, especially to pandas user. i use winpython as the environment.

sample_data.csv

COL1 COL2
DATA_1 1
DATA_2 2
DATA_3 3
DATA_4 4
DATA_5 5
DATA_6 6
DATA_7 7
DATA_8 8
DATA_9 9
DATA_10 10

sample_data_odd.csv

COL2 IS_ODD
1 2
2 1
3 2
4 1
5 2
6 1
7 2
8 1
9 2
10 1

source code

import pandas as pd
 
# =================================================================
#load csv
sample_data_odd = pd.read_csv(r"D:\nitip\tulisan\2018\Python\Panda_Data_Frame\sample_data_odd.csv")
sample_data = pd.read_csv(r"D:\nitip\tulisan\2018\Python\Panda_Data_Frame\sample_data.csv")
 
# =================================================================
print("============================ basic dataframe")
print(sample_data.head(3))
 
sample_data['COL3'] = sample_data['COL2'].apply( lambda x : x if x % 2 == 0 else 0)
print("============================ after applying lambda (check odd number)")
print(sample_data.head(3))
 
sample_data = sample_data.rename(columns={'COL1':'DATA_ID', 'COL3':'IS_ODD'})
print("============================ rename column")
print(sample_data.head(3))
 
sample_data = sample_data.loc[:, ['DATA_ID', 'COL2']]
print("============================ replace / slice IS_ODD column")
print(sample_data.head(3))
 
sample_data = pd.merge(left = sample_data, right = sample_data_odd, how='left', on=['COL2'])
print("============================ sample merge")
print(sample_data.head(3))
 
sample_data = sample_data.drop(['COL2'], axis = 1)
print("============================ drop column")
print(sample_data.head(3))
 
sample_data_concat_nodrop = pd.concat( [sample_data, sample_data], axis = 0)
print("============================ concat column with no drop index")
print(sample_data_concat_nodrop)
 
sample_data_concat_drop = pd.concat( [sample_data, sample_data], axis = 0).reset_index(drop = True)
print("============================ concat column with drop / reset index")
print(sample_data_concat_drop)
 
sample_data["COL_NEW"] = sample_data['DATA_ID'].apply( lambda x : int(x[5:len(x)]))
print("============================ sample group by & sum,mean,median")
print(sample_data.groupby(['IS_ODD'])['COL_NEW'].sum())
print(sample_data.groupby(['IS_ODD'])['COL_NEW'].mean())
print(sample_data.groupby(['IS_ODD'])['COL_NEW'].median())
 
sample_data = sample_data.loc[ sample_data['IS_ODD'].isin([1]), :].reset_index(drop = True)
print("============================ sample isin syntax (filtering IS_ODD equal to 1)")
print(sample_data)
 
print("============================ sample join sample_data_ori with sample_data")
#print(sample_data.join(sample_data_ori))
sample_data_ori = pd.read_csv(r"D:\nitip\tulisan\2018\Python\Panda_Data_Frame\sample_data.csv")
print(sample_data_ori.join(sample_data))
 
print("============================ sample max")
print(sample_data.max())
 
print("============================ sample min")
print(sample_data.min())

CMIIW :)

Leave a Reply

Your email address will not be published. Required fields are marked *

Afiseaza emoticoanele Locco.Ro