46  Multi-índices en las columnas de series temporales

En el análisis de datos, la capacidad de organizar la información de manera jerárquica es esencial para una comprensión más profunda y una exploración eficiente. En esta sesión, veremos cómo crear un multiíndice en el nivel del índice con series temporales. Al crear un multiíndice, establecemos un nivel jerárquico que nos permite acceder a la información de manera estructurada y organizada. Veremos cómo esta técnica puede facilitar el acceso y la exploración de datos, especialmente cuando lidiamos con conjuntos de datos complejos.

import pandas as pd
import matplotlib.pyplot as plt
f = '../data/Ti_blanco.csv'
Ti_b = pd.read_csv(f,index_col=0,parse_dates=True)

f = '../data/Ti_negro.csv'
Ti_n = pd.read_csv(f,index_col=0,parse_dates=True)
Ti_n
Ti_C Ti_R1 Ti_R2 Ti_S
date
2006-01-01 00:10:00 19.038413 19.047696 19.087096 19.129994
2006-01-01 00:20:00 19.035998 19.045313 19.084383 19.128110
2006-01-01 00:30:00 19.033880 19.043235 19.082035 19.126533
2006-01-01 00:40:00 19.031973 19.041355 19.079928 19.125138
2006-01-01 00:50:00 19.030158 19.039552 19.077921 19.123787
... ... ... ... ...
2006-12-31 23:20:00 24.137526 23.391216 23.667201 24.646797
2006-12-31 23:30:00 24.064361 23.323456 23.589420 24.567420
2006-12-31 23:40:00 23.990232 23.257064 23.512465 24.488597
2006-12-31 23:50:00 23.917165 23.191858 23.436305 24.410237
2007-01-01 00:00:00 23.846178 23.127226 23.360837 24.332185

52560 rows × 4 columns

Ti_b
Ti_C Ti_R1 Ti_R2 Ti_S
date
2006-01-01 00:10:00 19.014610 19.024472 19.065057 19.106503
2006-01-01 00:20:00 19.012693 19.022505 19.062782 19.104997
2006-01-01 00:30:00 19.011030 19.020820 19.060846 19.103773
2006-01-01 00:40:00 19.009526 19.019312 19.059129 19.102714
2006-01-01 00:50:00 19.008070 19.017864 19.057494 19.101684
... ... ... ... ...
2006-12-31 23:20:00 19.589577 19.517255 19.780842 19.902851
2006-12-31 23:30:00 19.572531 19.500154 19.757842 19.882044
2006-12-31 23:40:00 19.555469 19.482987 19.734803 19.861207
2006-12-31 23:50:00 19.538364 19.465734 19.711720 19.840302
2007-01-01 00:00:00 19.521151 19.448329 19.688533 19.819254

52560 rows × 4 columns

pd.concat([Ti_b,Ti_n],axis=1,keys=['blanco','negro'])
blanco negro
Ti_C Ti_R1 Ti_R2 Ti_S Ti_C Ti_R1 Ti_R2 Ti_S
date
2006-01-01 00:10:00 19.014610 19.024472 19.065057 19.106503 19.038413 19.047696 19.087096 19.129994
2006-01-01 00:20:00 19.012693 19.022505 19.062782 19.104997 19.035998 19.045313 19.084383 19.128110
2006-01-01 00:30:00 19.011030 19.020820 19.060846 19.103773 19.033880 19.043235 19.082035 19.126533
2006-01-01 00:40:00 19.009526 19.019312 19.059129 19.102714 19.031973 19.041355 19.079928 19.125138
2006-01-01 00:50:00 19.008070 19.017864 19.057494 19.101684 19.030158 19.039552 19.077921 19.123787
... ... ... ... ... ... ... ... ...
2006-12-31 23:20:00 19.589577 19.517255 19.780842 19.902851 24.137526 23.391216 23.667201 24.646797
2006-12-31 23:30:00 19.572531 19.500154 19.757842 19.882044 24.064361 23.323456 23.589420 24.567420
2006-12-31 23:40:00 19.555469 19.482987 19.734803 19.861207 23.990232 23.257064 23.512465 24.488597
2006-12-31 23:50:00 19.538364 19.465734 19.711720 19.840302 23.917165 23.191858 23.436305 24.410237
2007-01-01 00:00:00 19.521151 19.448329 19.688533 19.819254 23.846178 23.127226 23.360837 24.332185

52560 rows × 8 columns

pd.MultiIndex.from_product() #help
TypeError: MultiIndex.from_product() missing 1 required positional argument: 'iterables'
# Crear un MultiIndex combinando la categoría con el índice de fecha y hora existente
multi_indice  = pd.MultiIndex.from_product([['blanco'], Ti_b.index], names=['Categoría', 'Fecha'])

# # Asignar el nuevo MultiIndex al DataFrame
# Ti_b.index = multi_index
# Ti_b
multi_indice
Ti_bm = Ti_b
Ti_bm.index = multi_indice
Ti_bm
NameError: name 'multi_indice' is not defined
f = '../data/Ti_blanco.csv'
Ti_b = pd.read_csv(f,index_col=0,parse_dates=True)
multi_indice  = pd.MultiIndex.from_product([['blanco'], Ti_b.index], names=['Categoría', 'Fecha'])
Ti_bm = Ti_b
Ti_bm.index = multi_indice
Ti_bm
Ti_C Ti_R1 Ti_R2 Ti_S
Categoría Fecha
blanco 2006-01-01 00:10:00 19.014610 19.024472 19.065057 19.106503
2006-01-01 00:20:00 19.012693 19.022505 19.062782 19.104997
2006-01-01 00:30:00 19.011030 19.020820 19.060846 19.103773
2006-01-01 00:40:00 19.009526 19.019312 19.059129 19.102714
2006-01-01 00:50:00 19.008070 19.017864 19.057494 19.101684
... ... ... ... ...
2006-12-31 23:20:00 19.589577 19.517255 19.780842 19.902851
2006-12-31 23:30:00 19.572531 19.500154 19.757842 19.882044
2006-12-31 23:40:00 19.555469 19.482987 19.734803 19.861207
2006-12-31 23:50:00 19.538364 19.465734 19.711720 19.840302
2007-01-01 00:00:00 19.521151 19.448329 19.688533 19.819254

52560 rows × 4 columns

f = '../data/Ti_blanco.csv'
Ti_b = pd.read_csv(f,index_col=0,parse_dates=True)
multi_indice  = pd.MultiIndex.from_product([ Ti_b.index,['blanco']], names=['Categoría', 'Fecha'])
Ti_bm = Ti_b
Ti_bm.index = multi_indice
f = '../data/Ti_blanco.csv'
def agrega_multi(f,multi):
    Ti_b = pd.read_csv(f,index_col=0,parse_dates=True)
    Ti_b = Ti_b.resample('D').mean()
    multi_indice  = pd.MultiIndex.from_product([ Ti_b.index,[multi]], names=['Categoría', 'Fecha'])
    Ti_bm = Ti_b
    Ti_bm.index = multi_indice
    return Ti_bm

blanco = agrega_multi(f,'blanco')
f = '../data/Ti_blanco.csv'
blanco = agrega_multi(f,'blanco')

f = '../data/Ti_negro.csv'
negro  = agrega_multi(f,'negro')
casos = pd.concat([negro,blanco])
casos
Ti_C Ti_R1 Ti_R2 Ti_S
Categoría Fecha
2006-01-01 negro 18.993764 18.999711 19.034391 19.083183
2006-01-02 negro 20.104855 20.078903 20.250677 20.205274
2006-01-03 negro 19.309793 19.118780 18.991211 19.123250
2006-01-04 negro 17.480913 16.588023 16.372463 17.137445
2006-01-05 negro 18.980579 17.768470 17.668538 18.643465
... ... ... ... ... ...
2006-12-28 blanco 14.165858 14.000331 14.035546 13.920070
2006-12-29 blanco 15.896368 15.731774 15.864342 15.855534
2006-12-30 blanco 17.800614 17.556038 17.730885 17.826264
2006-12-31 blanco 18.482376 18.312972 18.421315 18.600672
2007-01-01 blanco 19.521151 19.448329 19.688533 19.819254

732 rows × 4 columns

casos.loc['2006-01','blanco']
Ti_C Ti_R1 Ti_R2 Ti_S
Categoría
2006-01-01 18.961551 18.968034 19.002049 19.050950
2006-01-02 19.413483 19.425501 19.520059 19.542445
2006-01-03 18.138849 18.032331 17.872337 18.046172
2006-01-04 14.431871 14.147740 13.754557 14.007418
2006-01-05 14.295440 14.057466 13.901701 13.993210
2006-01-06 15.620154 15.377971 15.416920 15.425257
2006-01-07 16.198036 16.104297 16.049916 16.044580
2006-01-08 11.876301 11.592555 11.122067 11.163917
2006-01-09 10.507727 10.213770 9.851995 9.714301
2006-01-10 13.602409 13.486783 13.594123 13.352994
2006-01-11 14.092676 13.894091 13.824781 13.781661
2006-01-12 15.369566 15.237519 15.217131 15.146219
2006-01-13 13.955472 13.684138 13.461462 13.520220
2006-01-14 13.934886 13.618162 13.469840 13.552627
2006-01-15 14.817750 14.560462 14.529891 14.595354
2006-01-16 16.393359 16.239772 16.396078 16.424494
2006-01-17 18.052838 17.857875 18.041275 18.168993
2006-01-18 19.496338 19.301262 19.477641 19.735577
2006-01-19 18.503936 18.204900 18.066650 18.486581
2006-01-20 14.183292 13.769410 13.327532 13.635025
2006-01-21 15.626780 15.475040 15.454669 15.371909
2006-01-22 15.983414 15.810804 15.825034 15.827421
2006-01-23 17.513464 17.322074 17.450166 17.513643
2006-01-24 19.045467 18.870103 19.011692 19.106406
2006-01-25 17.247607 16.939827 16.725375 17.002644
2006-01-26 17.874866 17.771731 17.882630 17.955168
2006-01-27 18.996153 18.947590 19.051278 19.158464
2006-01-28 16.146461 15.844702 15.577670 15.841123
2006-01-29 17.558852 17.435669 17.569172 17.670580
2006-01-30 18.897609 18.699047 18.844758 19.049960
2006-01-31 18.736328 18.485041 18.501673 18.882997