In [1]:
import pandas as pd
In [2]:
def compareDF(df1,df2,ColumnName, diff=''):
leftList=[]
rightList=[]
for i in list(df1):
leftList.append(str(i)+'_l')
for i in list(df2):
rightList.append(str(i)+'_r')
df1.columns=leftList
df1=df1.rename(columns={str(ColumnName)+"_l": ColumnName})
df2.columns=rightList
df2=df2.rename(columns={str(ColumnName)+"_r": ColumnName})
df = pd.merge(df1,df2, on=ColumnName, suffixes=('', '') ,how='outer')
df=df.fillna(0)
df.sort_values(ColumnName, inplace=True)
dropList=[]
for index, row in df.iterrows():
total=0
for i in list(df):
if i==ColumnName:
continue
else:
total=row[i]+total
if total==0:
dropList.append(index)
df=df.drop(dropList)
df=df.reset_index(drop=True)
#if diff=='x':
return df
In [3]:
def formatX(c): # Optional
c=str(c)
c=c.replace(',', '')
c=c.replace('$','')
return float(str(c))
In [4]:
def cleanDF(df, columnName):
df=df.fillna(0)
for i in list(df):
try:
if i != columnName:
df[i] = df.apply(lambda x: formatX(x[i]),axis=1)
except Exception as e:
continue
return df
In [5]:
def getDiff(df, columnName):
leftList=[]
rightList=[]
for i in list(df):
if str(i)[-2:]=='_l':
leftList.append(str(i)[:-2])
elif str(i)[-2:]=='_r':
rightList.append(str(i)[:-2])
for i in leftList:
if i not in rightList:
df[str(i+'_r')]=0
for i in rightList:
if i not in leftList:
df[str(i+'_l')]=0
for i in list(df):
try:
if i != columnName and str(i)[-2:]!='_r':
if str(i)[:-2]+'_r' in list(df):
df[str(i)[:-2]]=df[str(i)[:-2]+'_r']-df[i]
else:
print("not there",str(i)[:-2]+'_r', list(df))
elif '_r' in str(i):
pass
#df=df.drop([i], axis=1)
except Exception as e:
#print("Error:", str(e))
continue
for i in list(df):
if str(i)[-2:]=='_l' or str(i)[-2:]=='_r':
df=df.drop([i], axis=1)
return df
In [6]:
df1 = pd.DataFrame(columns=["id","A", "B"], data=[[1,5,3], [2,3,5]])
df2 = pd.DataFrame(columns=["id","A", "B"], data=[[1,2,3], [2,3,6]])
In [7]:
df1=cleanDF(df1, 'id')
df2=cleanDF(df2, 'id')
result=compareDF(df1,df2,'id')
In [8]:
result.head()
Out[8]:
In [10]:
# Right - Left
result2=getDiff(result, 'id')
In [11]:
result2.head()
Out[11]:
In [18]:
df1 = pd.DataFrame(columns=["id","A", "B"], data=[[1,5,3], [2,3,5]])
df2 = pd.DataFrame(columns=["id","A", "B", "C"], data=[[1,2,3,4], [2,3,6,2], [12,13,16,1]])
In [19]:
df1=cleanDF(df1, 'id')
df2=cleanDF(df2, 'id')
result=compareDF(df2,df1,'id')
In [20]:
result.head()
Out[20]:
In [21]:
result2=getDiff(result, 'id')
In [22]:
result2.head()
Out[22]:
In [ ]:
In [ ]: