OiO.lk Blog python Find the datframe row where a column's datetime value matches another column's datetime value and return a value from the matching row
python

Find the datframe row where a column's datetime value matches another column's datetime value and return a value from the matching row


I am trying to find the row where a column ‘c2’ datetime value matches another column ‘c1’ datetime value and return a value from the matching row ‘rtn’ as a ‘new’ value. It works with the following example

In the actual problem the second date column contains an the date an earlier predicted value for the date. There are a number of these predictions made at date-1d, date-2d etc . What I want to produce eventually is a graph comparing the final value with all the future predicted values.

    from datetime import  datetime#, timedelta #, date # 

    import pandas as pd
    import numpy as np

    df = pd.DataFrame({'date1': [10, 11, 12], 'date2': [12, 11, 10],  'rtn':[21,22,23],'new':[99,99,99]})

    print(df['date1'].isin(df['date2']) )
    print(f'date1 \n{df.date1 == df.date2}')

    datatypes = df.dtypes
    print(df.dtypes)
    print(df)

    for index, row in df.iterrows():
        row['new']=df.loc[df['date2'] ==row['date1'],'rtn'].iloc[0]        

    datatypes = df.dtypes
    print(df.dtypes)
    print(df)

However when I use datetime dates as opposed to integers it does not (I have included commented out attempts converting the datetime to string, float and even int as above and it fails)

    from datetime import  datetime#, timedelta #, date # 

    import pandas as pd
    import numpy as np

    date_time_ref = datetime(year=2020, month=1, day=1, hour=00, minute=00, second=00)
    date_time_date11 = datetime(year=2024, month=10, day=20, hour=1, minute=00, second=00)
    date_time_date12 = datetime(year=2024, month=10, day=21, hour=13, minute=00, second=00)
    date_time_date13 = datetime(year=2024, month=10, day=22, hour=1, minute=00, second=00)
    date_time_date23 = date_time_date11  #this is to ensure the test dates are identical 
    date_time_date22 = date_time_date12  #this is to ensure the test dates are identical 
    date_time_date21 = date_time_date13  #this is to ensure the test dates are identical 

    print(f'Test {date_time_date11 == date_time_date21}')
    print(f'Test {date_time_date11 == date_time_date23}')

    df = pd.DataFrame({'date1': [date_time_date11, date_time_date12, date_time_date13], 'date2': [date_time_date21, date_time_date22, date_time_date23],  'rtn':[21,22,23],'new':[99,99,99]})


    datatypes = df.dtypes
    print(df.dtypes)
    print(df)

    for index, row in df.iterrows():
        row['new']=df.loc[df['date2'] ==row['date1'],'rtn'].iloc[0]        

    datatypes = df.dtypes
    print(df.dtypes)
    print(df)

    print(df['date1'].isin(df['date2']) )
    print(f'date1 \n{df.date1 == df.date2}')

I have tried the following conversions to datetime above without any change to the result

    df = df.reset_index()  # make sure indexes pair with number of rows

    # 1. convert to strings for comparison
    df['date1'] = df['date1'].dt.strftime('%Y-%m-%d %H') #':%M:%S')
    df['date2'] = df['date2'].dt.strftime('%Y-%m-%d %H') #:%M:%S')

    # 2. convert the datetime column to an ordinal
    df['date1'].apply(lambda x:x.toordinal())
    df['date2'].apply(lambda x:x.toordinal())

    # 3. convert the datetime column to an integer divide the resulting integer by the number of    nanoseconds in a second
    df['date1'] = df['date1'].astype(int)/ 10**9
    df['date2'] = df['date2'].astype(int)/ 10**9

    # 4. convert datetime to seconds
    pd.to_timedelta(df.date1).dt.total_seconds()
    pd.to_timedelta(df.date2).dt.total_seconds()

    # 5. convert datetime to float64
    df['date1'] = df['date1'] - date_time_ref
    df['date2'] = df['date2'] - date_time_ref
    df['date1'] = df['date1'] / np.timedelta64(1, 'h')
    df['date2'] = df['date2'] / np.timedelta64(1, 'h')

    # 6. convert to float64 above to int64
    df['date1'] = df['date1'].astype(np.int64)
    df['date2'] = df['date2'].astype(np.int64)

Many thanks in advance … why the code works for on and not the other is a mystery to me##### 🙁



You need to sign in to view this answers

Exit mobile version