import pandas as pd16 Localizar y reemplazar información en Pandas
En esta sesión veremos el uso de la función .loc[ ]. Aprenderás a localizar y modificar información dentro de un DataFrame de manera efectiva. Descubrirás cómo seleccionar datos concretos, segmentar secciones específicas y aplicar filtros para ajustarse a tus requerimientos de análisis de datos.
Utilizala para extraer subconjuntos de filas, eligiendo rangos específicos basados en etiquetas. Establece condiciones para acceder a segmentos del DataFrame que satisfagan criterios definidos, asegurando así la obtención de datos precisos y pertinentes. Prepara tus datos para análisis futuros con .loc[ ], una herramienta esencial para seleccionar, filtrar y modificar la información clave de tu DataFrame.
f = "../data/Cuernavaca_Enero_comas.csv"
cuerna = pd.read_csv(f,index_col=0,parse_dates=True)
cuerna.head()| To | RH | P | Ws | Wd | Ig | Ib | Id | |
|---|---|---|---|---|---|---|---|---|
| tiempo | ||||||||
| 2012-01-01 00:00:00 | 19.3 | 58 | 87415 | 0.0 | 26 | 0 | 0 | 0 |
| 2012-01-01 01:00:00 | 18.6 | 59 | 87602 | 0.0 | 26 | 0 | 0 | 0 |
| 2012-01-01 02:00:00 | 17.9 | 61 | 87788 | 0.0 | 30 | 0 | 0 | 0 |
| 2012-01-01 03:00:00 | 17.3 | 66 | 87554 | 0.0 | 30 | 0 | 0 | 0 |
| 2012-01-01 04:00:00 | 16.6 | 71 | 87321 | 0.0 | 27 | 0 | 0 | 0 |
cuerna.loc["2012-1-2"]| To | RH | P | Ws | Wd | Ig | Ib | Id | |
|---|---|---|---|---|---|---|---|---|
| tiempo | ||||||||
| 2012-01-02 00:00:00 | 15.1 | 40 | 87190 | 1.0 | 91 | 0 | 0 | 0 |
| 2012-01-02 01:00:00 | 14.7 | 44 | 87280 | 1.0 | 78 | 0 | 0 | 0 |
| 2012-01-02 02:00:00 | 14.2 | 47 | 87319 | 1.0 | 330 | 0 | 0 | 0 |
| 2012-01-02 03:00:00 | 14.2 | 49 | 87237 | 0.8 | 69 | 0 | 0 | 0 |
| 2012-01-02 04:00:00 | 14.9 | 51 | 87179 | 0.5 | 67 | 0 | 0 | 0 |
| 2012-01-02 05:00:00 | 16.1 | 51 | 87147 | 0.2 | 360 | 0 | 0 | 0 |
| 2012-01-02 06:00:00 | 15.0 | 55 | 87006 | 0.0 | 71 | 0 | 0 | 0 |
| 2012-01-02 07:00:00 | 17.0 | 52 | 87095 | 0.0 | 66 | 14 | 42 | 11 |
| 2012-01-02 08:00:00 | 19.0 | 49 | 87185 | 0.0 | 79 | 148 | 399 | 44 |
| 2012-01-02 09:00:00 | 23.0 | 36 | 87360 | 2.1 | 160 | 347 | 679 | 68 |
| 2012-01-02 10:00:00 | 23.5 | 35 | 87381 | 1.5 | 171 | 551 | 882 | 78 |
| 2012-01-02 11:00:00 | 24.0 | 34 | 87403 | 1.0 | 130 | 699 | 937 | 86 |
| 2012-01-02 12:00:00 | 25.5 | 30 | 87466 | 1.0 | 191 | 783 | 959 | 90 |
| 2012-01-02 13:00:00 | 27.0 | 26 | 87530 | 1.0 | 150 | 795 | 958 | 93 |
| 2012-01-02 14:00:00 | 27.0 | 24 | 87530 | 1.0 | 205 | 681 | 768 | 157 |
| 2012-01-02 15:00:00 | 27.0 | 23 | 87530 | 1.0 | 150 | 553 | 712 | 154 |
| 2012-01-02 16:00:00 | 26.0 | 26 | 87488 | 0.0 | 212 | 423 | 778 | 82 |
| 2012-01-02 17:00:00 | 25.0 | 30 | 87445 | 0.0 | 216 | 213 | 566 | 58 |
| 2012-01-02 18:00:00 | 19.4 | 45 | 86992 | 0.4 | 229 | 0 | 0 | 0 |
| 2012-01-02 19:00:00 | 18.4 | 47 | 86893 | 0.5 | 239 | 0 | 0 | 0 |
| 2012-01-02 20:00:00 | 17.5 | 49 | 86811 | 0.6 | 320 | 0 | 0 | 0 |
| 2012-01-02 21:00:00 | 15.9 | 53 | 87027 | 0.6 | 244 | 0 | 0 | 0 |
| 2012-01-02 22:00:00 | 14.3 | 56 | 87121 | 0.9 | 247 | 0 | 0 | 0 |
| 2012-01-02 23:00:00 | 12.6 | 60 | 87290 | 1.0 | 340 | 0 | 0 | 0 |
cuerna.loc["2012-01-10":"2012-01-17"]| To | RH | P | Ws | Wd | Ig | Ib | Id | |
|---|---|---|---|---|---|---|---|---|
| tiempo | ||||||||
| 2012-01-10 00:00:00 | 16.4 | 42 | 87080 | 1.0 | 16 | 0 | 0 | 0 |
| 2012-01-10 01:00:00 | 15.9 | 46 | 87224 | 1.0 | 10 | 0 | 0 | 0 |
| 2012-01-10 02:00:00 | 15.5 | 50 | 87368 | 1.0 | 270 | 0 | 0 | 0 |
| 2012-01-10 03:00:00 | 15.4 | 52 | 87277 | 0.8 | 10 | 0 | 0 | 0 |
| 2012-01-10 04:00:00 | 15.2 | 54 | 87187 | 0.5 | 13 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2012-01-17 19:00:00 | 20.9 | 34 | 87273 | 0.2 | 233 | 0 | 0 | 0 |
| 2012-01-17 20:00:00 | 18.8 | 37 | 87186 | 0.2 | 279 | 0 | 0 | 0 |
| 2012-01-17 21:00:00 | 18.3 | 38 | 87162 | 0.3 | 314 | 0 | 0 | 0 |
| 2012-01-17 22:00:00 | 17.9 | 39 | 87137 | 0.4 | 337 | 0 | 0 | 0 |
| 2012-01-17 23:00:00 | 17.4 | 41 | 87112 | 0.5 | 348 | 0 | 0 | 0 |
192 rows × 8 columns
#Si queremos seleccionar donde ocurre algo, por ejemplo donde Ig > 0
cuerna.Ig>0.tiempo
2012-01-01 00:00:00 False
2012-01-01 01:00:00 False
2012-01-01 02:00:00 False
2012-01-01 03:00:00 False
2012-01-01 04:00:00 False
...
2012-01-31 19:00:00 False
2012-01-31 20:00:00 False
2012-01-31 21:00:00 False
2012-01-31 22:00:00 False
2012-01-31 23:00:00 False
Name: Ig, Length: 744, dtype: bool
cuerna.loc[cuerna.Ig>0]| To | RH | P | Ws | Wd | Ig | Ib | Id | |
|---|---|---|---|---|---|---|---|---|
| tiempo | ||||||||
| 2012-01-01 07:00:00 | 18.0 | 70 | 87140 | 0.0 | 34 | 20 | 151 | 11 |
| 2012-01-01 08:00:00 | 19.0 | 68 | 87185 | 0.0 | 61 | 164 | 522 | 37 |
| 2012-01-01 09:00:00 | 20.0 | 60 | 87229 | 0.0 | 95 | 369 | 812 | 58 |
| 2012-01-01 10:00:00 | 20.0 | 64 | 87229 | 1.0 | 108 | 568 | 931 | 68 |
| 2012-01-01 11:00:00 | 20.0 | 68 | 87229 | 2.1 | 160 | 717 | 981 | 75 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2012-01-31 13:00:00 | 25.0 | 19 | 87445 | 2.6 | 180 | 878 | 990 | 90 |
| 2012-01-31 14:00:00 | 26.0 | 20 | 87488 | 3.1 | 240 | 827 | 978 | 88 |
| 2012-01-31 15:00:00 | 26.0 | 21 | 87488 | 3.1 | 190 | 705 | 941 | 84 |
| 2012-01-31 16:00:00 | 25.0 | 22 | 87445 | 2.6 | 210 | 523 | 872 | 75 |
| 2012-01-31 17:00:00 | 23.0 | 27 | 87360 | 0.0 | 234 | 301 | 722 | 59 |
341 rows × 8 columns
mascara = cuerna.Ig>0
cuerna.loc[mascara]| To | RH | P | Ws | Wd | Ig | Ib | Id | |
|---|---|---|---|---|---|---|---|---|
| tiempo | ||||||||
| 2012-01-01 07:00:00 | 18.0 | 70 | 87140 | 0.0 | 34 | 20 | 151 | 11 |
| 2012-01-01 08:00:00 | 19.0 | 68 | 87185 | 0.0 | 61 | 164 | 522 | 37 |
| 2012-01-01 09:00:00 | 20.0 | 60 | 87229 | 0.0 | 95 | 369 | 812 | 58 |
| 2012-01-01 10:00:00 | 20.0 | 64 | 87229 | 1.0 | 108 | 568 | 931 | 68 |
| 2012-01-01 11:00:00 | 20.0 | 68 | 87229 | 2.1 | 160 | 717 | 981 | 75 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2012-01-31 13:00:00 | 25.0 | 19 | 87445 | 2.6 | 180 | 878 | 990 | 90 |
| 2012-01-31 14:00:00 | 26.0 | 20 | 87488 | 3.1 | 240 | 827 | 978 | 88 |
| 2012-01-31 15:00:00 | 26.0 | 21 | 87488 | 3.1 | 190 | 705 | 941 | 84 |
| 2012-01-31 16:00:00 | 25.0 | 22 | 87445 | 2.6 | 210 | 523 | 872 | 75 |
| 2012-01-31 17:00:00 | 23.0 | 27 | 87360 | 0.0 | 234 | 301 | 722 | 59 |
341 rows × 8 columns
mascara = (cuerna.Ig>0) & (cuerna.Ws != 0)
cuerna.loc[mascara]| To | RH | P | Ws | Wd | Ig | Ib | Id | |
|---|---|---|---|---|---|---|---|---|
| tiempo | ||||||||
| 2012-01-01 10:00:00 | 20.0 | 64 | 87229 | 1.0 | 108 | 568 | 931 | 68 |
| 2012-01-01 11:00:00 | 20.0 | 68 | 87229 | 2.1 | 160 | 717 | 981 | 75 |
| 2012-01-01 12:00:00 | 21.0 | 60 | 87273 | 1.8 | 135 | 800 | 999 | 79 |
| 2012-01-01 13:00:00 | 22.0 | 53 | 87316 | 1.5 | 160 | 810 | 998 | 80 |
| 2012-01-01 14:00:00 | 21.7 | 53 | 87302 | 1.3 | 164 | 747 | 977 | 79 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2012-01-31 09:00:00 | 20.0 | 30 | 87229 | 3.1 | 150 | 377 | 786 | 62 |
| 2012-01-31 13:00:00 | 25.0 | 19 | 87445 | 2.6 | 180 | 878 | 990 | 90 |
| 2012-01-31 14:00:00 | 26.0 | 20 | 87488 | 3.1 | 240 | 827 | 978 | 88 |
| 2012-01-31 15:00:00 | 26.0 | 21 | 87488 | 3.1 | 190 | 705 | 941 | 84 |
| 2012-01-31 16:00:00 | 25.0 | 22 | 87445 | 2.6 | 210 | 523 | 872 | 75 |
181 rows × 8 columns
cuerna.loc[cuerna.Ig==0,"Ig"] = -999cuerna.Ig.plot()
cuerna.head()| To | RH | P | Ws | Wd | Ig | Ib | Id | |
|---|---|---|---|---|---|---|---|---|
| tiempo | ||||||||
| 2012-01-01 00:00:00 | 19.3 | 58 | 87415 | 0.0 | 26 | -999 | 0 | 0 |
| 2012-01-01 01:00:00 | 18.6 | 59 | 87602 | 0.0 | 26 | -999 | 0 | 0 |
| 2012-01-01 02:00:00 | 17.9 | 61 | 87788 | 0.0 | 30 | -999 | 0 | 0 |
| 2012-01-01 03:00:00 | 17.3 | 66 | 87554 | 0.0 | 30 | -999 | 0 | 0 |
| 2012-01-01 04:00:00 | 16.6 | 71 | 87321 | 0.0 | 27 | -999 | 0 | 0 |
# .loc no sirve para localizar columnas, eso se hace directamente
# cuerna.loc["To"]cuerna["To"]tiempo
2012-01-01 00:00:00 19.3
2012-01-01 01:00:00 18.6
2012-01-01 02:00:00 17.9
2012-01-01 03:00:00 17.3
2012-01-01 04:00:00 16.6
...
2012-01-31 19:00:00 17.9
2012-01-31 20:00:00 16.9
2012-01-31 21:00:00 16.5
2012-01-31 22:00:00 16.3
2012-01-31 23:00:00 16.2
Name: To, Length: 744, dtype: float64