import pandas as pd
31 Unión de dataframes
# Series temporales de ejemplo
= pd.date_range('2023-01-01', periods=10)
dates = pd.Series([22, 21, 23, 24, 22, 23, 25, 24, 26, 27], index=dates)
temps = pd.Series([0, 1, 0, 0, 2, 0, 1, 3, 0, 1], index=dates)
rain
# Crear DataFrames a partir de las series
= pd.DataFrame({'date': dates, 'temperature': temps.values})
df_temps = pd.DataFrame({'date': dates, 'precipitation': rain.values}) df_rain
df_temps
date | temperature | |
---|---|---|
0 | 2023-01-01 | 22 |
1 | 2023-01-02 | 21 |
2 | 2023-01-03 | 23 |
3 | 2023-01-04 | 24 |
4 | 2023-01-05 | 22 |
5 | 2023-01-06 | 23 |
6 | 2023-01-07 | 25 |
7 | 2023-01-08 | 24 |
8 | 2023-01-09 | 26 |
9 | 2023-01-10 | 27 |
df_rain
date | precipitation | |
---|---|---|
0 | 2023-01-01 | 0 |
1 | 2023-01-02 | 1 |
2 | 2023-01-03 | 0 |
3 | 2023-01-04 | 0 |
4 | 2023-01-05 | 2 |
5 | 2023-01-06 | 0 |
6 | 2023-01-07 | 1 |
7 | 2023-01-08 | 3 |
8 | 2023-01-09 | 0 |
9 | 2023-01-10 | 1 |
# Usar merge para combinarlas en una columna con datos en común
='date') pd.merge(df_temps, df_rain, on
date | temperature | precipitation | |
---|---|---|---|
0 | 2023-01-01 | 22 | 0 |
1 | 2023-01-02 | 21 | 1 |
2 | 2023-01-03 | 23 | 0 |
3 | 2023-01-04 | 24 | 0 |
4 | 2023-01-05 | 22 | 2 |
5 | 2023-01-06 | 23 | 0 |
6 | 2023-01-07 | 25 | 1 |
7 | 2023-01-08 | 24 | 3 |
8 | 2023-01-09 | 26 | 0 |
9 | 2023-01-10 | 27 | 1 |
'date',inplace=True)
df_temps.set_index('date',inplace=True)
df_rain.set_index( df_temps.join(df_rain)
temperature | precipitation | |
---|---|---|
date | ||
2023-01-01 | 22 | 0 |
2023-01-02 | 21 | 1 |
2023-01-03 | 23 | 0 |
2023-01-04 | 24 | 0 |
2023-01-05 | 22 | 2 |
2023-01-06 | 23 | 0 |
2023-01-07 | 25 | 1 |
2023-01-08 | 24 | 3 |
2023-01-09 | 26 | 0 |
2023-01-10 | 27 | 1 |
# Crear DataFrames a partir de las series
= pd.DataFrame({'date': dates, 'temperature': temps.values})
df_temps = pd.DataFrame({'date': dates, 'precipitation': rain.values})
df_rain =1) pd.concat([df_temps,df_rain],axis
date | temperature | date | precipitation | |
---|---|---|---|---|
0 | 2023-01-01 | 22 | 2023-01-01 | 0 |
1 | 2023-01-02 | 21 | 2023-01-02 | 1 |
2 | 2023-01-03 | 23 | 2023-01-03 | 0 |
3 | 2023-01-04 | 24 | 2023-01-04 | 0 |
4 | 2023-01-05 | 22 | 2023-01-05 | 2 |
5 | 2023-01-06 | 23 | 2023-01-06 | 0 |
6 | 2023-01-07 | 25 | 2023-01-07 | 1 |
7 | 2023-01-08 | 24 | 2023-01-08 | 3 |
8 | 2023-01-09 | 26 | 2023-01-09 | 0 |
9 | 2023-01-10 | 27 | 2023-01-10 | 1 |
# Crear DataFrames a partir de las series
= pd.DataFrame({'date': dates, 'temperature': temps.values})
df_temps = pd.DataFrame({'date': dates, 'precipitation': rain.values})
df_rain =0) pd.concat([df_temps,df_rain],axis
date | temperature | precipitation | |
---|---|---|---|
0 | 2023-01-01 | 22.0 | NaN |
1 | 2023-01-02 | 21.0 | NaN |
2 | 2023-01-03 | 23.0 | NaN |
3 | 2023-01-04 | 24.0 | NaN |
4 | 2023-01-05 | 22.0 | NaN |
5 | 2023-01-06 | 23.0 | NaN |
6 | 2023-01-07 | 25.0 | NaN |
7 | 2023-01-08 | 24.0 | NaN |
8 | 2023-01-09 | 26.0 | NaN |
9 | 2023-01-10 | 27.0 | NaN |
0 | 2023-01-01 | NaN | 0.0 |
1 | 2023-01-02 | NaN | 1.0 |
2 | 2023-01-03 | NaN | 0.0 |
3 | 2023-01-04 | NaN | 0.0 |
4 | 2023-01-05 | NaN | 2.0 |
5 | 2023-01-06 | NaN | 0.0 |
6 | 2023-01-07 | NaN | 1.0 |
7 | 2023-01-08 | NaN | 3.0 |
8 | 2023-01-09 | NaN | 0.0 |
9 | 2023-01-10 | NaN | 1.0 |
'date',inplace=True)
df_temps.set_index('date',inplace=True)
df_rain.set_index(=1) pd.concat([df_temps,df_rain],axis
temperature | precipitation | |
---|---|---|
date | ||
2023-01-01 | 22 | 0 |
2023-01-02 | 21 | 1 |
2023-01-03 | 23 | 0 |
2023-01-04 | 24 | 0 |
2023-01-05 | 22 | 2 |
2023-01-06 | 23 | 0 |
2023-01-07 | 25 | 1 |
2023-01-08 | 24 | 3 |
2023-01-09 | 26 | 0 |
2023-01-10 | 27 | 1 |
Para aprender más:
- Revisa las opciones de left, right en merge
- Revisa las opciones de on y how en join
- Revisa las opciones de ignore_index, join en concat
- Averigua que pasa cuando faltan datos en alguna de las series o dataframes