i hope this post will be helpful for all beginner of python, especially to pandas user. i use winpython as the environment.
sample_data.csv
COL1 | COL2 |
DATA_1 | 1 |
DATA_2 | 2 |
DATA_3 | 3 |
DATA_4 | 4 |
DATA_5 | 5 |
DATA_6 | 6 |
DATA_7 | 7 |
DATA_8 | 8 |
DATA_9 | 9 |
DATA_10 | 10 |
sample_data_odd.csv
COL2 | IS_ODD |
1 | 2 |
2 | 1 |
3 | 2 |
4 | 1 |
5 | 2 |
6 | 1 |
7 | 2 |
8 | 1 |
9 | 2 |
10 | 1 |
source code
import pandas as pd # ================================================================= #load csv sample_data_odd = pd.read_csv(r"D:\nitip\tulisan\2018\Python\Panda_Data_Frame\sample_data_odd.csv") sample_data = pd.read_csv(r"D:\nitip\tulisan\2018\Python\Panda_Data_Frame\sample_data.csv") # ================================================================= print("============================ basic dataframe") print(sample_data.head(3)) sample_data['COL3'] = sample_data['COL2'].apply( lambda x : x if x % 2 == 0 else 0) print("============================ after applying lambda (check odd number)") print(sample_data.head(3)) sample_data = sample_data.rename(columns={'COL1':'DATA_ID', 'COL3':'IS_ODD'}) print("============================ rename column") print(sample_data.head(3)) sample_data = sample_data.loc[:, ['DATA_ID', 'COL2']] print("============================ replace / slice IS_ODD column") print(sample_data.head(3)) sample_data = pd.merge(left = sample_data, right = sample_data_odd, how='left', on=['COL2']) print("============================ sample merge") print(sample_data.head(3)) sample_data = sample_data.drop(['COL2'], axis = 1) print("============================ drop column") print(sample_data.head(3)) sample_data_concat_nodrop = pd.concat( [sample_data, sample_data], axis = 0) print("============================ concat column with no drop index") print(sample_data_concat_nodrop) sample_data_concat_drop = pd.concat( [sample_data, sample_data], axis = 0).reset_index(drop = True) print("============================ concat column with drop / reset index") print(sample_data_concat_drop) sample_data["COL_NEW"] = sample_data['DATA_ID'].apply( lambda x : int(x[5:len(x)])) print("============================ sample group by & sum,mean,median") print(sample_data.groupby(['IS_ODD'])['COL_NEW'].sum()) print(sample_data.groupby(['IS_ODD'])['COL_NEW'].mean()) print(sample_data.groupby(['IS_ODD'])['COL_NEW'].median()) sample_data = sample_data.loc[ sample_data['IS_ODD'].isin([1]), :].reset_index(drop = True) print("============================ sample isin syntax (filtering IS_ODD equal to 1)") print(sample_data) print("============================ sample join sample_data_ori with sample_data") #print(sample_data.join(sample_data_ori)) sample_data_ori = pd.read_csv(r"D:\nitip\tulisan\2018\Python\Panda_Data_Frame\sample_data.csv") print(sample_data_ori.join(sample_data)) print("============================ sample max") print(sample_data.max()) print("============================ sample min") print(sample_data.min()) |
CMIIW
Leave a Reply