31  Unión de dataframes

import pandas as pd
# Series temporales de ejemplo
dates = pd.date_range('2023-01-01', periods=10)
temps = pd.Series([22, 21, 23, 24, 22, 23, 25, 24, 26, 27], index=dates)
rain = pd.Series([0, 1, 0, 0, 2, 0, 1, 3, 0, 1], index=dates)

# Crear DataFrames a partir de las series
df_temps = pd.DataFrame({'date': dates, 'temperature': temps.values})
df_rain = pd.DataFrame({'date': dates, 'precipitation': rain.values})
df_temps
date temperature
0 2023-01-01 22
1 2023-01-02 21
2 2023-01-03 23
3 2023-01-04 24
4 2023-01-05 22
5 2023-01-06 23
6 2023-01-07 25
7 2023-01-08 24
8 2023-01-09 26
9 2023-01-10 27
df_rain
date precipitation
0 2023-01-01 0
1 2023-01-02 1
2 2023-01-03 0
3 2023-01-04 0
4 2023-01-05 2
5 2023-01-06 0
6 2023-01-07 1
7 2023-01-08 3
8 2023-01-09 0
9 2023-01-10 1
# Usar merge para combinarlas en una columna con datos en común
pd.merge(df_temps, df_rain, on='date')
date temperature precipitation
0 2023-01-01 22 0
1 2023-01-02 21 1
2 2023-01-03 23 0
3 2023-01-04 24 0
4 2023-01-05 22 2
5 2023-01-06 23 0
6 2023-01-07 25 1
7 2023-01-08 24 3
8 2023-01-09 26 0
9 2023-01-10 27 1
df_temps.set_index('date',inplace=True)
df_rain.set_index('date',inplace=True)
df_temps.join(df_rain)
temperature precipitation
date
2023-01-01 22 0
2023-01-02 21 1
2023-01-03 23 0
2023-01-04 24 0
2023-01-05 22 2
2023-01-06 23 0
2023-01-07 25 1
2023-01-08 24 3
2023-01-09 26 0
2023-01-10 27 1

# Crear DataFrames a partir de las series
df_temps = pd.DataFrame({'date': dates, 'temperature': temps.values})
df_rain = pd.DataFrame({'date': dates, 'precipitation': rain.values})
pd.concat([df_temps,df_rain],axis=1)
date temperature date precipitation
0 2023-01-01 22 2023-01-01 0
1 2023-01-02 21 2023-01-02 1
2 2023-01-03 23 2023-01-03 0
3 2023-01-04 24 2023-01-04 0
4 2023-01-05 22 2023-01-05 2
5 2023-01-06 23 2023-01-06 0
6 2023-01-07 25 2023-01-07 1
7 2023-01-08 24 2023-01-08 3
8 2023-01-09 26 2023-01-09 0
9 2023-01-10 27 2023-01-10 1

# Crear DataFrames a partir de las series
df_temps = pd.DataFrame({'date': dates, 'temperature': temps.values})
df_rain = pd.DataFrame({'date': dates, 'precipitation': rain.values})
pd.concat([df_temps,df_rain],axis=0)
date temperature precipitation
0 2023-01-01 22.0 NaN
1 2023-01-02 21.0 NaN
2 2023-01-03 23.0 NaN
3 2023-01-04 24.0 NaN
4 2023-01-05 22.0 NaN
5 2023-01-06 23.0 NaN
6 2023-01-07 25.0 NaN
7 2023-01-08 24.0 NaN
8 2023-01-09 26.0 NaN
9 2023-01-10 27.0 NaN
0 2023-01-01 NaN 0.0
1 2023-01-02 NaN 1.0
2 2023-01-03 NaN 0.0
3 2023-01-04 NaN 0.0
4 2023-01-05 NaN 2.0
5 2023-01-06 NaN 0.0
6 2023-01-07 NaN 1.0
7 2023-01-08 NaN 3.0
8 2023-01-09 NaN 0.0
9 2023-01-10 NaN 1.0
df_temps.set_index('date',inplace=True)
df_rain.set_index('date',inplace=True)
pd.concat([df_temps,df_rain],axis=1)
temperature precipitation
date
2023-01-01 22 0
2023-01-02 21 1
2023-01-03 23 0
2023-01-04 24 0
2023-01-05 22 2
2023-01-06 23 0
2023-01-07 25 1
2023-01-08 24 3
2023-01-09 26 0
2023-01-10 27 1

Para aprender más:

  1. Revisa las opciones de left, right en merge
  2. Revisa las opciones de on y how en join
  3. Revisa las opciones de ignore_index, join en concat
  4. Averigua que pasa cuando faltan datos en alguna de las series o dataframes